wordfreq/tests/test_queries.py

from __future__ import unicode_literals
from nose.tools import eq_, assert_almost_equal, assert_greater
from wordfreq.query import (word_frequency, average_frequency, wordlist_size,
                            wordlist_info, metanl_word_frequency)


def test_freq_examples():
    assert_almost_equal(
        word_frequency('normalization', 'en', 'google-books'),
        1.767e-6, places=9
    )
    assert_almost_equal(
        word_frequency('normalization', 'en', 'google-books', 1e-6),
        2.767e-6, places=9
    )
    assert_almost_equal(
        word_frequency('normalisation', 'fr', 'leeds-internet'),
        4.162e-6, places=9
    )
    assert_greater(
        word_frequency('lol', 'xx', 'twitter'),
        word_frequency('lol', 'en', 'google-books')
    )
    eq_(
        word_frequency('totallyfakeword', 'en', 'multi', .5),
        .5
    )


def test_compatibility():
    assert_almost_equal(metanl_word_frequency('the|en'), 1e9, places=3)
    assert_almost_equal(metanl_word_frequency('the|en', offset=1e9), 2e9, places=3)


def _check_normalized_frequencies(wordlist, lang):
    assert_almost_equal(
        average_frequency(wordlist, lang) * wordlist_size(wordlist, lang),
        1.0, places=6
    )


def test_normalized_frequencies():
    for list_info in wordlist_info():
        wordlist = list_info['wordlist']
        lang = list_info['lang']
        yield _check_normalized_frequencies, wordlist, lang
now this package has tests 2013-10-29 21:21:55 +00:00			`from __future__ import unicode_literals`
			`from nose.tools import eq_, assert_almost_equal, assert_greater`
			`from wordfreq.query import (word_frequency, average_frequency, wordlist_size,`
Revise the build test to compare lengths of wordlists. The test currently fails on Python 3, for some strange reason. 2013-10-30 17:22:56 +00:00			`wordlist_info, metanl_word_frequency)`
now this package has tests 2013-10-29 21:21:55 +00:00

			`def test_freq_examples():`
			`assert_almost_equal(`
			`word_frequency('normalization', 'en', 'google-books'),`
			`1.767e-6, places=9`
			`)`
Change default values to offsets. 2013-10-29 22:06:47 +00:00			`assert_almost_equal(`
			`word_frequency('normalization', 'en', 'google-books', 1e-6),`
			`2.767e-6, places=9`
			`)`
now this package has tests 2013-10-29 21:21:55 +00:00			`assert_almost_equal(`
			`word_frequency('normalisation', 'fr', 'leeds-internet'),`
			`4.162e-6, places=9`
			`)`
			`assert_greater(`
			`word_frequency('lol', 'xx', 'twitter'),`
			`word_frequency('lol', 'en', 'google-books')`
			`)`
			`eq_(`
Change default values to offsets. 2013-10-29 22:06:47 +00:00			`word_frequency('totallyfakeword', 'en', 'multi', .5),`
			`.5`
now this package has tests 2013-10-29 21:21:55 +00:00			`)`


Change default values to offsets. 2013-10-29 22:06:47 +00:00			`def test_compatibility():`
make the tests less picky about numerical exactness 2013-10-31 19:43:19 +00:00			`assert_almost_equal(metanl_word_frequency('the\|en'), 1e9, places=3)`
			`assert_almost_equal(metanl_word_frequency('the\|en', offset=1e9), 2e9, places=3)`
Change default values to offsets. 2013-10-29 22:06:47 +00:00

now this package has tests 2013-10-29 21:21:55 +00:00			`def _check_normalized_frequencies(wordlist, lang):`
			`assert_almost_equal(`
			`average_frequency(wordlist, lang) * wordlist_size(wordlist, lang),`
			`1.0, places=6`
			`)`


			`def test_normalized_frequencies():`
Revise the build test to compare lengths of wordlists. The test currently fails on Python 3, for some strange reason. 2013-10-30 17:22:56 +00:00			`for list_info in wordlist_info():`
now this package has tests 2013-10-29 21:21:55 +00:00			`wordlist = list_info['wordlist']`
			`lang = list_info['lang']`
			`yield _check_normalized_frequencies, wordlist, lang`