From 4704131e134ff5d41b58498e30acb9ff4e67331e Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Fri, 4 Sep 2015 16:40:11 -0400 Subject: [PATCH] add tests for Turkish Former-commit-id: fc93c8dc9c66a786914137729c42209be0c4acd0 --- tests/test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 0a8e212..21dd9ad 100644 --- a/tests/test.py +++ b/tests/test.py @@ -19,7 +19,7 @@ def test_freq_examples(): def test_languages(): # Make sure the number of available languages doesn't decrease avail = available_languages() - assert_greater(len(avail), 14) + assert_greater(len(avail), 15) # Laughter is the universal language for lang in avail: @@ -36,7 +36,7 @@ def test_languages(): def test_twitter(): avail = available_languages('twitter') - assert_greater(len(avail), 12) + assert_greater(len(avail), 14) for lang in avail: assert_greater(word_frequency('rt', lang, 'twitter'), @@ -68,6 +68,7 @@ def test_most_common_words(): eq_(get_most_common('nl'), 'de') eq_(get_most_common('pt'), 'de') eq_(get_most_common('ru'), 'в') + eq_(get_most_common('tr'), 'bir') eq_(get_most_common('zh'), '的') @@ -111,6 +112,8 @@ def test_tokenization(): def test_casefolding(): eq_(tokenize('WEISS', 'de'), ['weiss']) eq_(tokenize('weiß', 'de'), ['weiss']) + eq_(tokenize('İstanbul', 'tr'), ['istanbul']) + eq_(tokenize('SIKISINCA', 'tr'), ['sıkısınca']) def test_phrase_freq():