mirror of
https://github.com/rspeer/wordfreq.git
synced 2025-01-14 13:15:59 +00:00
updated minimum
This commit is contained in:
parent
f04ca8fc9e
commit
59c03e2411
@ -44,10 +44,10 @@ def test_twitter():
|
|||||||
word_frequency('rt', lang, 'combined'))
|
word_frequency('rt', lang, 'combined'))
|
||||||
|
|
||||||
|
|
||||||
def test_defaults():
|
def test_minimums():
|
||||||
eq_(word_frequency('esquivalience', 'en'), 0)
|
eq_(word_frequency('esquivalience', 'en'), 0)
|
||||||
eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)
|
eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)
|
||||||
|
eq_(word_frequency('the', 'en', minimum=1), 1)
|
||||||
|
|
||||||
def test_most_common_words():
|
def test_most_common_words():
|
||||||
# If something causes the most common words in well-supported languages to
|
# If something causes the most common words in well-supported languages to
|
||||||
|
@ -234,8 +234,8 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
|
|||||||
|
|
||||||
Words that we believe occur at least once per million tokens, based on
|
Words that we believe occur at least once per million tokens, based on
|
||||||
the average of these lists, will appear in the word frequency list.
|
the average of these lists, will appear in the word frequency list.
|
||||||
If you look up a word that's not in the list, you'll get the `minimum`
|
|
||||||
value, which itself defaults to 0.
|
The value returned will always be at least as large as `minimum`.
|
||||||
|
|
||||||
If a word decomposes into multiple tokens, we'll return a smoothed estimate
|
If a word decomposes into multiple tokens, we'll return a smoothed estimate
|
||||||
of the word frequency that is no greater than the frequency of any of its
|
of the word frequency that is no greater than the frequency of any of its
|
||||||
@ -259,7 +259,7 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
|
|||||||
# Combine word values using the half-harmonic-mean formula,
|
# Combine word values using the half-harmonic-mean formula,
|
||||||
# (a * b) / (a + b). This operation is associative.
|
# (a * b) / (a + b). This operation is associative.
|
||||||
combined_value = half_harmonic_mean(combined_value, value)
|
combined_value = half_harmonic_mean(combined_value, value)
|
||||||
return combined_value
|
return max(combined_value, minimum)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=100)
|
@lru_cache(maxsize=100)
|
||||||
|
Loading…
Reference in New Issue
Block a user