1
0
mirror of https://github.com/rspeer/wordfreq.git synced 2025-01-13 20:56:00 +00:00

updated minimum

This commit is contained in:
Joshua Chin 2015-07-07 15:46:33 -04:00
parent f04ca8fc9e
commit 59c03e2411
2 changed files with 5 additions and 5 deletions
tests
wordfreq

View File

@ -44,10 +44,10 @@ def test_twitter():
word_frequency('rt', lang, 'combined'))
def test_defaults():
def test_minimums():
eq_(word_frequency('esquivalience', 'en'), 0)
eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)
eq_(word_frequency('the', 'en', minimum=1), 1)
def test_most_common_words():
# If something causes the most common words in well-supported languages to

View File

@ -234,8 +234,8 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
Words that we believe occur at least once per million tokens, based on
the average of these lists, will appear in the word frequency list.
If you look up a word that's not in the list, you'll get the `minimum`
value, which itself defaults to 0.
The value returned will always be at least as large as `minimum`.
If a word decomposes into multiple tokens, we'll return a smoothed estimate
of the word frequency that is no greater than the frequency of any of its
@ -259,7 +259,7 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
# Combine word values using the half-harmonic-mean formula,
# (a * b) / (a + b). This operation is associative.
combined_value = half_harmonic_mean(combined_value, value)
return combined_value
return max(combined_value, minimum)
@lru_cache(maxsize=100)