wordfreq/tests/test_queries.py
Rob Speer 3063b3915a Revise the build test to compare lengths of wordlists.
The test currently fails on Python 3, for some strange reason.
2013-10-30 13:22:56 -04:00

47 lines
1.3 KiB
Python

from __future__ import unicode_literals
from nose.tools import eq_, assert_almost_equal, assert_greater
from wordfreq.query import (word_frequency, average_frequency, wordlist_size,
wordlist_info, metanl_word_frequency)
def test_freq_examples():
assert_almost_equal(
word_frequency('normalization', 'en', 'google-books'),
1.767e-6, places=9
)
assert_almost_equal(
word_frequency('normalization', 'en', 'google-books', 1e-6),
2.767e-6, places=9
)
assert_almost_equal(
word_frequency('normalisation', 'fr', 'leeds-internet'),
4.162e-6, places=9
)
assert_greater(
word_frequency('lol', 'xx', 'twitter'),
word_frequency('lol', 'en', 'google-books')
)
eq_(
word_frequency('totallyfakeword', 'en', 'multi', .5),
.5
)
def test_compatibility():
eq_(metanl_word_frequency(':|en'), 1e9)
eq_(metanl_word_frequency(':|en', offset=1e9), 2e9)
def _check_normalized_frequencies(wordlist, lang):
assert_almost_equal(
average_frequency(wordlist, lang) * wordlist_size(wordlist, lang),
1.0, places=6
)
def test_normalized_frequencies():
for list_info in wordlist_info():
wordlist = list_info['wordlist']
lang = list_info['lang']
yield _check_normalized_frequencies, wordlist, lang