diff --git a/README.md b/README.md index 7204409..4555008 100644 --- a/README.md +++ b/README.md @@ -167,41 +167,50 @@ least 3 different sources of word frequencies: Language Code # Large? WP Subs News Books Web Twit. Redd. Misc. ──────────────────────────────┼──────────────────────────────────────────────── Arabic ar 5 Yes │ Yes Yes Yes - Yes Yes - - - Bengali bn 3 - │ Yes - Yes - - Yes - - + Bangla bn 5 Yes │ Yes Yes Yes - Yes Yes - - Bosnian bs [1] 3 - │ Yes Yes - - - Yes - - - Bulgarian bg 3 - │ Yes Yes - - - Yes - - - Catalan ca 4 - │ Yes Yes Yes - - Yes - - + Bulgarian bg 4 - │ Yes Yes - - Yes Yes - - + Catalan ca 5 Yes │ Yes Yes Yes - Yes Yes - - Chinese zh [3] 7 Yes │ Yes Yes Yes Yes Yes Yes - Jieba Croatian hr [1] 3 │ Yes Yes - - - Yes - - Czech cs 5 Yes │ Yes Yes Yes - Yes Yes - - - Danish da 3 - │ Yes Yes - - - Yes - - + Danish da 4 - │ Yes Yes - - Yes Yes - - Dutch nl 5 Yes │ Yes Yes Yes - Yes Yes - - English en 7 Yes │ Yes Yes Yes Yes Yes Yes Yes - Finnish fi 6 Yes │ Yes Yes Yes - Yes Yes Yes - French fr 7 Yes │ Yes Yes Yes Yes Yes Yes Yes - German de 7 Yes │ Yes Yes Yes Yes Yes Yes Yes - - Greek el 3 - │ Yes Yes - - Yes - - - - Hebrew he 4 - │ Yes Yes - Yes - Yes - - - Hindi hi 3 - │ Yes - - - - Yes Yes - - Hungarian hu 3 - │ Yes Yes - - Yes - - - + Greek el 4 - │ Yes Yes - - Yes Yes - - + Hebrew he 5 Yes │ Yes Yes - Yes Yes Yes - - + Hindi hi 4 Yes │ Yes - - - Yes Yes Yes - + Hungarian hu 4 - │ Yes Yes - - Yes Yes - - + Icelandic is 3 - │ Yes Yes - - Yes - - - Indonesian id 3 - │ Yes Yes - - - Yes - - Italian it 7 Yes │ Yes Yes Yes Yes Yes Yes Yes - Japanese ja 5 Yes │ Yes Yes - - Yes Yes Yes - Korean ko 4 - │ Yes Yes - - - Yes Yes - Latvian lv 4 - │ Yes Yes - - Yes Yes - - - Macedonian mk 3 - │ Yes Yes Yes - - - - - + Lithuanian lt 3 - │ Yes Yes - - Yes - - - + Macedonian mk 5 Yes │ Yes Yes Yes - Yes Yes - - Malay ms 3 - │ Yes Yes - - - Yes - - - Norwegian nb [2] 4 - │ Yes Yes - - - Yes Yes - - Persian fa 3 - │ Yes Yes - - - Yes - - + Malayalam ml 3 - │ Yes Yes - - - Yes - - + Norwegian nb [2] 5 Yes │ Yes Yes - - Yes Yes Yes - + Persian fa 4 - │ Yes Yes - - Yes Yes - - Polish pl 6 Yes │ Yes Yes Yes - Yes Yes Yes - Portuguese pt 5 Yes │ Yes Yes Yes - Yes Yes - - - Romanian ro 4 - │ Yes Yes - - Yes Yes - - + Romanian ro 3 - │ Yes Yes - - Yes - - - Russian ru 6 Yes │ Yes Yes Yes Yes Yes Yes - - + Slovak sl 3 - │ Yes Yes - - Yes - - - + Slovenian sk 3 - │ Yes Yes - - Yes - - - Serbian sr [1] 3 - │ Yes Yes - - - Yes - - Spanish es 7 Yes │ Yes Yes Yes Yes Yes Yes Yes - - Swedish sv 4 - │ Yes Yes - - - Yes Yes - - Turkish tr 3 - │ Yes Yes - - - Yes - - - Ukrainian uk 4 - │ Yes Yes - - - Yes Yes - + Swedish sv 5 Yes │ Yes Yes - - Yes Yes Yes - + Tagalog fil 3 - │ Yes Yes - - Yes - - - + Tamil ta 3 - │ Yes - - - Yes Yes - - + Turkish tr 4 - │ Yes Yes - - Yes Yes - - + Ukrainian uk 5 Yes │ Yes Yes - - Yes Yes Yes - + Urdu ur 3 - │ Yes - - - Yes Yes - - + Vietnamese vi 3 - │ Yes Yes - - Yes - - - [1] Bosnian, Croatian, and Serbian use the same underlying word list, because they share most of their vocabulary and grammar, they were once considered the diff --git a/tests/test_general.py b/tests/test_general.py index a5b68bd..5628299 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -72,7 +72,6 @@ def test_most_common_words(): assert get_most_common('fi') == 'ja' assert get_most_common('fil') == 'sa' assert get_most_common('fr') == 'de' - assert get_most_common('gl') == 'de' assert get_most_common('he') == 'את' assert get_most_common('hi') == 'के' assert get_most_common('hu') == 'a' diff --git a/wordfreq/data/small_gl.msgpack.gz b/wordfreq/data/small_gl.msgpack.gz deleted file mode 100644 index bbb89ee..0000000 Binary files a/wordfreq/data/small_gl.msgpack.gz and /dev/null differ