From c2eab6881edc534c6345bafd47b5ef90ec9d4f5e Mon Sep 17 00:00:00 2001 From: Rob Speer Date: Thu, 10 Mar 2016 11:56:04 -0500 Subject: [PATCH] move Thai test to where it makes more sense Former-commit-id: 4ec6b56faab4bc5a698e48cca1493ed45c9de6ea --- tests/test.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test.py b/tests/test.py index 177ebf4..07f8bef 100644 --- a/tests/test.py +++ b/tests/test.py @@ -116,12 +116,6 @@ def test_tokenization(): eq_(tokenize('this text has... punctuation :)', 'en', include_punctuation=True), ['this', 'text', 'has', '...', 'punctuation', ':)']) - # Test that we leave Thai letters stuck together. If we had better Thai support, - # we would actually split this into a three-word phrase. - eq_(tokenize('การเล่นดนตรี', 'th'), ['การเล่นดนตรี']) - eq_(tokenize('"การเล่นดนตรี" means "playing music"', 'en'), - ['การเล่นดนตรี', 'means', 'playing', 'music']) - def test_casefolding(): eq_(tokenize('WEISS', 'de'), ['weiss']) @@ -186,3 +180,10 @@ def test_ideographic_fallback(): tokenize(ja_text, 'en'), ['ひらがな', 'カタカナ', 'romaji'] ) + + # Test that we leave Thai letters stuck together. If we had better Thai support, + # we would actually split this into a three-word phrase. + eq_(tokenize('การเล่นดนตรี', 'th'), ['การเล่นดนตรี']) + eq_(tokenize('"การเล่นดนตรี" means "playing music"', 'en'), + ['การเล่นดนตรี', 'means', 'playing', 'music']) +