wordfreq/tests/test_korean.py
2022-03-10 18:33:42 -05:00

17 lines
509 B
Python

from wordfreq import tokenize, word_frequency
import pytest
def test_tokens():
assert tokenize("감사합니다", "ko") == ["감사", "합니다"]
def test_combination():
gamsa_freq = word_frequency("감사", "ko")
habnida_freq = word_frequency("합니다", "ko")
assert word_frequency("감사감사", "ko") == pytest.approx(gamsa_freq / 2, rel=0.01)
assert 1.0 / word_frequency("감사합니다", "ko") == pytest.approx(
1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01
)