2018-06-01 20:40:51 +00:00
|
|
|
import pytest
|
2023-11-21 23:07:04 +00:00
|
|
|
from wordfreq import tokenize, word_frequency
|
2016-07-15 19:10:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_tokens():
|
2022-03-10 23:33:42 +00:00
|
|
|
assert tokenize("감사합니다", "ko") == ["감사", "합니다"]
|
2016-07-15 19:10:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_combination():
|
2022-03-10 23:33:42 +00:00
|
|
|
gamsa_freq = word_frequency("감사", "ko")
|
|
|
|
habnida_freq = word_frequency("합니다", "ko")
|
2016-07-15 19:10:25 +00:00
|
|
|
|
2022-03-10 23:33:42 +00:00
|
|
|
assert word_frequency("감사감사", "ko") == pytest.approx(gamsa_freq / 2, rel=0.01)
|
|
|
|
assert 1.0 / word_frequency("감사합니다", "ko") == pytest.approx(
|
|
|
|
1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01
|
2016-07-15 19:10:25 +00:00
|
|
|
)
|