wordfreq/tests/test_japanese.py

27 lines
742 B
Python

from nose.tools import eq_, assert_almost_equal
from wordfreq import tokenize, word_frequency, half_harmonic_mean
def test_tokens():
eq_(tokenize('おはようございます', 'ja'),
['おはよう', 'ござい', 'ます'])
def test_combination():
ohayou_freq = word_frequency('おはよう', 'ja')
gozai_freq = word_frequency('ござい', 'ja')
masu_freq = word_frequency('ます', 'ja')
assert_almost_equal(
word_frequency('おはようおはよう', 'ja'),
ohayou_freq / 2
)
assert_almost_equal(
word_frequency('おはようございます', 'ja'),
half_harmonic_mean(
half_harmonic_mean(ohayou_freq, gozai_freq),
masu_freq
)
)