updated minimum

Former-commit-id: 59c03e2411
2024-12-23 17:31:41 +00:00 · 2015-07-07 15:46:33 -04:00 · 2015-07-07 15:46:33 -04:00 · 4b398fac65
commit 4b398fac65
parent 4389422958
2 changed files with 5 additions and 5 deletions
--- a/tests/test.py
+++ b/tests/test.py
@ -44,10 +44,10 @@ def test_twitter():
                       word_frequency('rt', lang, 'combined'))


-def test_defaults():
+def test_minimums():
    eq_(word_frequency('esquivalience', 'en'), 0)
    eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)
-
+    eq_(word_frequency('the', 'en', minimum=1), 1)

 def test_most_common_words():
    # If something causes the most common words in well-supported languages to
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -234,8 +234,8 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):

    Words that we believe occur at least once per million tokens, based on
    the average of these lists, will appear in the word frequency list.
-    If you look up a word that's not in the list, you'll get the `minimum`
-    value, which itself defaults to 0.
+
+    The value returned will always be at least as large as `minimum`.

    If a word decomposes into multiple tokens, we'll return a smoothed estimate
    of the word frequency that is no greater than the frequency of any of its
@ -259,7 +259,7 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
            # Combine word values using the half-harmonic-mean formula,
            # (a * b) / (a + b). This operation is associative.
            combined_value = half_harmonic_mean(combined_value, value)
-    return combined_value
+    return max(combined_value, minimum)


@lru_cache(maxsize=100)