from wordfreq import tokenize, word_frequency import pytest def test_tokens(): assert tokenize('감사합니다', 'ko') == ['감사', '합니다'] def test_combination(): gamsa_freq = word_frequency('감사', 'ko') habnida_freq = word_frequency('합니다', 'ko') assert word_frequency('감사감사', 'ko') == pytest.approx(gamsa_freq / 2, rel=0.01) assert ( 1.0 / word_frequency('감사합니다', 'ko') == pytest.approx(1.0 / gamsa_freq + 1.0 / habnida_freq, rel=0.01) )