From 9aa773aa2bba694c691d1ea7b18e16a64fe7695e Mon Sep 17 00:00:00 2001 From: Joshua Chin Date: Tue, 7 Jul 2015 15:03:26 -0400 Subject: [PATCH] changed default to minimum for word_frequency --- tests/test.py | 2 +- wordfreq/__init__.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test.py b/tests/test.py index 795d533..358abd4 100644 --- a/tests/test.py +++ b/tests/test.py @@ -46,7 +46,7 @@ def test_twitter(): def test_defaults(): eq_(word_frequency('esquivalience', 'en'), 0) - eq_(word_frequency('esquivalience', 'en', default=1e-6), 1e-6) + eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6) def test_most_common_words(): diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py index 6430ee2..f7a1948 100644 --- a/wordfreq/__init__.py +++ b/wordfreq/__init__.py @@ -243,7 +243,7 @@ def half_harmonic_mean(a, b): @lru_cache(maxsize=CACHE_SIZE) -def word_frequency(word, lang, wordlist='combined', default=0.): +def word_frequency(word, lang, wordlist='combined', minimum=0.): """ Get the frequency of `word` in the language with code `lang`, from the specified `wordlist`. The default wordlist is 'combined', built from @@ -261,7 +261,7 @@ def word_frequency(word, lang, wordlist='combined', default=0.): Words that we believe occur at least once per million tokens, based on the average of these lists, will appear in the word frequency list. - If you look up a word that's not in the list, you'll get the `default` + If you look up a word that's not in the list, you'll get the `minimum` value, which itself defaults to 0. If a word decomposes into multiple tokens, we'll return a smoothed estimate @@ -273,12 +273,12 @@ def word_frequency(word, lang, wordlist='combined', default=0.): tokens = tokenize(word, lang) if len(tokens) == 0: - return default + return minimum for token in tokens: if token not in freqs: # If any word is missing, just return the default value - return default + return minimum value = freqs[token] if combined_value is None: combined_value = value