changed default to minimum for word_frequency

2024-12-23 17:31:41 +00:00 · 2015-07-07 15:03:26 -04:00 · 2015-07-07 15:03:26 -04:00 · 9aa773aa2b
commit 9aa773aa2b
parent 0b25caaf24
2 changed files with 5 additions and 5 deletions
--- a/tests/test.py
+++ b/tests/test.py
@ -46,7 +46,7 @@ def test_twitter():

 def test_defaults():
    eq_(word_frequency('esquivalience', 'en'), 0)
-    eq_(word_frequency('esquivalience', 'en', default=1e-6), 1e-6)
+    eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)


 def test_most_common_words():
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -243,7 +243,7 @@ def half_harmonic_mean(a, b):


@lru_cache(maxsize=CACHE_SIZE)
-def word_frequency(word, lang, wordlist='combined', default=0.):
+def word_frequency(word, lang, wordlist='combined', minimum=0.):
    """
    Get the frequency of `word` in the language with code `lang`, from the
    specified `wordlist`. The default wordlist is 'combined', built from
@ -261,7 +261,7 @@ def word_frequency(word, lang, wordlist='combined', default=0.):

    Words that we believe occur at least once per million tokens, based on
    the average of these lists, will appear in the word frequency list.
-    If you look up a word that's not in the list, you'll get the `default`
+    If you look up a word that's not in the list, you'll get the `minimum`
    value, which itself defaults to 0.

    If a word decomposes into multiple tokens, we'll return a smoothed estimate
@ -273,12 +273,12 @@ def word_frequency(word, lang, wordlist='combined', default=0.):
    tokens = tokenize(word, lang)

    if len(tokens) == 0:
-        return default
+        return minimum

    for token in tokens:
        if token not in freqs:
            # If any word is missing, just return the default value
-            return default
+            return minimum
        value = freqs[token]
        if combined_value is None:
            combined_value = value