diff --git a/tests/test.py b/tests/test.py index 470d4fe..2d11e35 100644 --- a/tests/test.py +++ b/tests/test.py @@ -23,7 +23,9 @@ def test_languages(): # Laughter is the universal language for lang in avail: - if lang != 'zh': # we don't have enough Chinese data yet + if lang not in {'zh', 'ja'}: + # we do not have enough Chinese data + # Japanese people do not lol assert_greater(word_frequency('lol', lang), 0) # Make up a weirdly verbose language code and make sure @@ -109,7 +111,7 @@ def test_not_really_random(): # This not only tests random_ascii_words, it makes sure we didn't end # up with 'eos' as a very common Japanese word eq_(random_ascii_words(nwords=4, lang='ja', bits_per_word=0), - 'rt rt rt rt') + 'e e e e') @raises(ValueError) diff --git a/wordfreq/data/combined_ar.msgpack.gz b/wordfreq/data/combined_ar.msgpack.gz index 5ce708f..b5ccd9c 100644 Binary files a/wordfreq/data/combined_ar.msgpack.gz and b/wordfreq/data/combined_ar.msgpack.gz differ diff --git a/wordfreq/data/combined_de.msgpack.gz b/wordfreq/data/combined_de.msgpack.gz index 35384fc..43c6aab 100644 Binary files a/wordfreq/data/combined_de.msgpack.gz and b/wordfreq/data/combined_de.msgpack.gz differ diff --git a/wordfreq/data/combined_el.msgpack.gz b/wordfreq/data/combined_el.msgpack.gz index bc0beab..4b93696 100644 Binary files a/wordfreq/data/combined_el.msgpack.gz and b/wordfreq/data/combined_el.msgpack.gz differ diff --git a/wordfreq/data/combined_en.msgpack.gz b/wordfreq/data/combined_en.msgpack.gz index 673b9ca..dd98c45 100644 Binary files a/wordfreq/data/combined_en.msgpack.gz and b/wordfreq/data/combined_en.msgpack.gz differ diff --git a/wordfreq/data/combined_es.msgpack.gz b/wordfreq/data/combined_es.msgpack.gz index 9a4b475..b9967d6 100644 Binary files a/wordfreq/data/combined_es.msgpack.gz and b/wordfreq/data/combined_es.msgpack.gz differ diff --git a/wordfreq/data/combined_fr.msgpack.gz b/wordfreq/data/combined_fr.msgpack.gz index bbcfc60..0eafa2b 100644 Binary files a/wordfreq/data/combined_fr.msgpack.gz and b/wordfreq/data/combined_fr.msgpack.gz differ diff --git a/wordfreq/data/combined_id.msgpack.gz b/wordfreq/data/combined_id.msgpack.gz index 4ff43c3..a8fc1d0 100644 Binary files a/wordfreq/data/combined_id.msgpack.gz and b/wordfreq/data/combined_id.msgpack.gz differ diff --git a/wordfreq/data/combined_it.msgpack.gz b/wordfreq/data/combined_it.msgpack.gz index f02c507..15e33eb 100644 Binary files a/wordfreq/data/combined_it.msgpack.gz and b/wordfreq/data/combined_it.msgpack.gz differ diff --git a/wordfreq/data/combined_ja.msgpack.gz b/wordfreq/data/combined_ja.msgpack.gz index 1c38166..1b498dc 100644 Binary files a/wordfreq/data/combined_ja.msgpack.gz and b/wordfreq/data/combined_ja.msgpack.gz differ diff --git a/wordfreq/data/combined_ko.msgpack.gz b/wordfreq/data/combined_ko.msgpack.gz index 5469582..d3fd802 100644 Binary files a/wordfreq/data/combined_ko.msgpack.gz and b/wordfreq/data/combined_ko.msgpack.gz differ diff --git a/wordfreq/data/combined_ms.msgpack.gz b/wordfreq/data/combined_ms.msgpack.gz index 3c63beb..42af825 100644 Binary files a/wordfreq/data/combined_ms.msgpack.gz and b/wordfreq/data/combined_ms.msgpack.gz differ diff --git a/wordfreq/data/combined_nl.msgpack.gz b/wordfreq/data/combined_nl.msgpack.gz index eed5525..3885c91 100644 Binary files a/wordfreq/data/combined_nl.msgpack.gz and b/wordfreq/data/combined_nl.msgpack.gz differ diff --git a/wordfreq/data/combined_pt.msgpack.gz b/wordfreq/data/combined_pt.msgpack.gz index b8251b3..5e73e44 100644 Binary files a/wordfreq/data/combined_pt.msgpack.gz and b/wordfreq/data/combined_pt.msgpack.gz differ diff --git a/wordfreq/data/combined_ru.msgpack.gz b/wordfreq/data/combined_ru.msgpack.gz index 6a05d2b..b85eab7 100644 Binary files a/wordfreq/data/combined_ru.msgpack.gz and b/wordfreq/data/combined_ru.msgpack.gz differ diff --git a/wordfreq/data/combined_zh.msgpack.gz b/wordfreq/data/combined_zh.msgpack.gz index c988cb8..edfea8d 100644 Binary files a/wordfreq/data/combined_zh.msgpack.gz and b/wordfreq/data/combined_zh.msgpack.gz differ