updated comments

This commit is contained in:
Joshua Chin 2015-07-17 14:50:12 -04:00
parent b0a9a2980f
commit 131b916c57

View File

@ -94,7 +94,7 @@ def test_failed_cB_conversion():
def test_tokenization(): def test_tokenization():
# We preserve apostrophes within words, so "can't" is a single word in the # We preserve apostrophes within words, so "can't" is a single word in the
# data, while the fake word "plan't" can't be found. # data
eq_(tokenize("can't", 'en'), ["can't"]) eq_(tokenize("can't", 'en'), ["can't"])
eq_(tokenize('😂test', 'en'), ['😂', 'test']) eq_(tokenize('😂test', 'en'), ['😂', 'test'])