updated minimum

This commit is contained in:
Joshua Chin 2015-07-07 15:46:33 -04:00
parent f04ca8fc9e
commit 59c03e2411
2 changed files with 5 additions and 5 deletions

View File

@ -44,10 +44,10 @@ def test_twitter():
word_frequency('rt', lang, 'combined')) word_frequency('rt', lang, 'combined'))
def test_defaults(): def test_minimums():
eq_(word_frequency('esquivalience', 'en'), 0) eq_(word_frequency('esquivalience', 'en'), 0)
eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6) eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)
eq_(word_frequency('the', 'en', minimum=1), 1)
def test_most_common_words(): def test_most_common_words():
# If something causes the most common words in well-supported languages to # If something causes the most common words in well-supported languages to

View File

@ -234,8 +234,8 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
Words that we believe occur at least once per million tokens, based on Words that we believe occur at least once per million tokens, based on
the average of these lists, will appear in the word frequency list. the average of these lists, will appear in the word frequency list.
If you look up a word that's not in the list, you'll get the `minimum`
value, which itself defaults to 0. The value returned will always be at least as large as `minimum`.
If a word decomposes into multiple tokens, we'll return a smoothed estimate If a word decomposes into multiple tokens, we'll return a smoothed estimate
of the word frequency that is no greater than the frequency of any of its of the word frequency that is no greater than the frequency of any of its
@ -259,7 +259,7 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
# Combine word values using the half-harmonic-mean formula, # Combine word values using the half-harmonic-mean formula,
# (a * b) / (a + b). This operation is associative. # (a * b) / (a + b). This operation is associative.
combined_value = half_harmonic_mean(combined_value, value) combined_value = half_harmonic_mean(combined_value, value)
return combined_value return max(combined_value, minimum)
@lru_cache(maxsize=100) @lru_cache(maxsize=100)