2013-10-29 21:21:55 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
from nose.tools import eq_, assert_almost_equal, assert_greater
|
|
|
|
from wordfreq.query import (word_frequency, average_frequency, wordlist_size,
|
2013-10-29 22:06:47 +00:00
|
|
|
get_wordlists, metanl_word_frequency)
|
2013-10-29 21:21:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_freq_examples():
|
|
|
|
assert_almost_equal(
|
|
|
|
word_frequency('normalization', 'en', 'google-books'),
|
|
|
|
1.767e-6, places=9
|
|
|
|
)
|
2013-10-29 22:06:47 +00:00
|
|
|
assert_almost_equal(
|
|
|
|
word_frequency('normalization', 'en', 'google-books', 1e-6),
|
|
|
|
2.767e-6, places=9
|
|
|
|
)
|
2013-10-29 21:21:55 +00:00
|
|
|
assert_almost_equal(
|
|
|
|
word_frequency('normalisation', 'fr', 'leeds-internet'),
|
|
|
|
4.162e-6, places=9
|
|
|
|
)
|
|
|
|
assert_greater(
|
|
|
|
word_frequency('lol', 'xx', 'twitter'),
|
|
|
|
word_frequency('lol', 'en', 'google-books')
|
|
|
|
)
|
|
|
|
eq_(
|
2013-10-29 22:06:47 +00:00
|
|
|
word_frequency('totallyfakeword', 'en', 'multi', .5),
|
|
|
|
.5
|
2013-10-29 21:21:55 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2013-10-29 22:06:47 +00:00
|
|
|
def test_compatibility():
|
|
|
|
eq_(metanl_word_frequency(':|en'), 1e9)
|
|
|
|
eq_(metanl_word_frequency(':|en', offset=1e9), 2e9)
|
|
|
|
|
|
|
|
|
2013-10-29 21:21:55 +00:00
|
|
|
def _check_normalized_frequencies(wordlist, lang):
|
|
|
|
assert_almost_equal(
|
|
|
|
average_frequency(wordlist, lang) * wordlist_size(wordlist, lang),
|
|
|
|
1.0, places=6
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def test_normalized_frequencies():
|
|
|
|
for list_info in get_wordlists():
|
|
|
|
wordlist = list_info['wordlist']
|
|
|
|
lang = list_info['lang']
|
|
|
|
yield _check_normalized_frequencies, wordlist, lang
|