Merge pull request #12 from LuminosoInsight/split-emoji

Added the results of the new wordfreq_builder that splits emoji.
2024-12-23 17:31:41 +00:00 · 2015-06-30 11:32:40 -04:00 · 2015-06-30 11:32:40 -04:00 · 7d25627e43
commit 7d25627e43
parent 6c76942da2 bbf7b9de34
16 changed files with 4 additions and 2 deletions
--- a/tests/test.py
+++ b/tests/test.py
@ -23,7 +23,9 @@ def test_languages():

    # Laughter is the universal language
    for lang in avail:
-        if lang != 'zh':  # we don't have enough Chinese data yet
+        if lang not in {'zh', 'ja'}:
+            # we do not have enough Chinese data
+            # Japanese people do not lol
            assert_greater(word_frequency('lol', lang), 0)

            # Make up a weirdly verbose language code and make sure
@ -109,7 +111,7 @@ def test_not_really_random():
    # This not only tests random_ascii_words, it makes sure we didn't end
    # up with 'eos' as a very common Japanese word
    eq_(random_ascii_words(nwords=4, lang='ja', bits_per_word=0),
-        'rt rt rt rt')
+        'e e e e')


@raises(ValueError)
--- a/wordfreq/data/combined_ar.msgpack.gz
+++ b/wordfreq/data/combined_ar.msgpack.gz
--- a/wordfreq/data/combined_de.msgpack.gz
+++ b/wordfreq/data/combined_de.msgpack.gz
--- a/wordfreq/data/combined_el.msgpack.gz
+++ b/wordfreq/data/combined_el.msgpack.gz
--- a/wordfreq/data/combined_en.msgpack.gz
+++ b/wordfreq/data/combined_en.msgpack.gz
--- a/wordfreq/data/combined_es.msgpack.gz
+++ b/wordfreq/data/combined_es.msgpack.gz
--- a/wordfreq/data/combined_fr.msgpack.gz
+++ b/wordfreq/data/combined_fr.msgpack.gz
--- a/wordfreq/data/combined_id.msgpack.gz
+++ b/wordfreq/data/combined_id.msgpack.gz
--- a/wordfreq/data/combined_it.msgpack.gz
+++ b/wordfreq/data/combined_it.msgpack.gz
--- a/wordfreq/data/combined_ja.msgpack.gz
+++ b/wordfreq/data/combined_ja.msgpack.gz
--- a/wordfreq/data/combined_ko.msgpack.gz
+++ b/wordfreq/data/combined_ko.msgpack.gz
--- a/wordfreq/data/combined_ms.msgpack.gz
+++ b/wordfreq/data/combined_ms.msgpack.gz
--- a/wordfreq/data/combined_nl.msgpack.gz
+++ b/wordfreq/data/combined_nl.msgpack.gz
--- a/wordfreq/data/combined_pt.msgpack.gz
+++ b/wordfreq/data/combined_pt.msgpack.gz
--- a/wordfreq/data/combined_ru.msgpack.gz
+++ b/wordfreq/data/combined_ru.msgpack.gz
--- a/wordfreq/data/combined_zh.msgpack.gz
+++ b/wordfreq/data/combined_zh.msgpack.gz