updated tests

Former-commit-id: ca66a5f883
This commit is contained in:
Joshua Chin 2015-07-07 14:13:28 -04:00
parent 090cfa7088
commit 9c741bb341

View File

@ -1,6 +1,7 @@
from wordfreq import ( from wordfreq import (
word_frequency, available_languages, cB_to_freq, iter_wordlist, word_frequency, available_languages, cB_to_freq, iter_wordlist,
top_n_list, random_words, random_ascii_words, tokenize top_n_list, random_words, random_ascii_words, tokenize,
half_harmonic_mean
) )
from nose.tools import ( from nose.tools import (
eq_, assert_almost_equal, assert_greater, assert_less, raises eq_, assert_almost_equal, assert_greater, assert_less, raises
@ -96,7 +97,6 @@ def test_tokenization():
# We preserve apostrophes within words, so "can't" is a single word in the # We preserve apostrophes within words, so "can't" is a single word in the
# data, while the fake word "plan't" can't be found. # data, while the fake word "plan't" can't be found.
eq_(tokenize("can't", 'en'), ["can't"]) eq_(tokenize("can't", 'en'), ["can't"])
eq_(tokenize("plan't", 'en'), ["plan't"])
eq_(tokenize('😂test', 'en'), ['😂', 'test']) eq_(tokenize('😂test', 'en'), ['😂', 'test'])
@ -113,8 +113,13 @@ def test_casefolding():
def test_phrase_freq(): def test_phrase_freq():
plant = word_frequency("plan.t", 'en') plant = word_frequency("plan.t", 'en')
assert_greater(plant, 0) assert_greater(plant, 0)
assert_less(plant, word_frequency('plan', 'en')) assert_almost_equal(
assert_less(plant, word_frequency('t', 'en')) plant,
half_harmonic_mean(
word_frequency('plan', 'en'),
word_frequency('t', 'en')
)
)
def test_not_really_random(): def test_not_really_random():