wordfreq/tests/test_korean.py

19 lines
520 B
Python
Raw Normal View History

from wordfreq import tokenize, word_frequency
2018-06-01 20:40:51 +00:00
import pytest
def test_tokens():
2018-06-01 20:40:51 +00:00
assert tokenize('감사합니다', 'ko') == ['감사', '합니다']
def test_combination():
gamsa_freq = word_frequency('감사', 'ko')
habnida_freq = word_frequency('합니다', 'ko')
assert word_frequency('감사감사', 'ko') == pytest.approx(gamsa_freq / 2, rel=0.01)
2018-06-01 20:40:51 +00:00
assert (
1.0 / word_frequency('감사합니다', 'ko') ==
pytest.approx(1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01)
)