mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
parent
4c7910246e
commit
e8fa25cb73
@ -94,7 +94,7 @@ def test_failed_cB_conversion():
|
|||||||
|
|
||||||
def test_tokenization():
|
def test_tokenization():
|
||||||
# We preserve apostrophes within words, so "can't" is a single word in the
|
# We preserve apostrophes within words, so "can't" is a single word in the
|
||||||
# data, while the fake word "plan't" can't be found.
|
# data
|
||||||
eq_(tokenize("can't", 'en'), ["can't"])
|
eq_(tokenize("can't", 'en'), ["can't"])
|
||||||
|
|
||||||
eq_(tokenize('😂test', 'en'), ['😂', 'test'])
|
eq_(tokenize('😂test', 'en'), ['😂', 'test'])
|
||||||
|
Loading…
Reference in New Issue
Block a user