2016-07-15 19:10:25 +00:00
|
|
|
from wordfreq import tokenize, word_frequency
|
2018-06-01 20:40:51 +00:00
|
|
|
import pytest
|
2016-07-15 19:10:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_tokens():
|
2018-06-01 20:40:51 +00:00
|
|
|
assert tokenize('감사합니다', 'ko') == ['감사', '합니다']
|
2016-07-15 19:10:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_combination():
|
|
|
|
gamsa_freq = word_frequency('감사', 'ko')
|
|
|
|
habnida_freq = word_frequency('합니다', 'ko')
|
|
|
|
|
2018-06-01 20:40:51 +00:00
|
|
|
assert word_frequency('감사감사', 'ko') == pytest.approx(gamsa_freq / 2)
|
|
|
|
assert (
|
|
|
|
1.0 / word_frequency('감사합니다', 'ko') ==
|
|
|
|
pytest.approx(1.0 / gamsa_freq + 1.0 / habnida_freq)
|
2016-07-15 19:10:25 +00:00
|
|
|
)
|
|
|
|
|