mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 01:41:39 +00:00
13 lines
505 B
Python
13 lines
505 B
Python
from nose.tools import eq_
|
|
from wordfreq import tokenize
|
|
|
|
|
|
def test_transliteration():
|
|
# "Well, there's a lot of things you do not understand."
|
|
# (from somewhere in OpenSubtitles)
|
|
eq_(tokenize("Па, има ту много ствари које не схваташ.", 'sr'),
|
|
['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš'])
|
|
eq_(tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", 'sr'),
|
|
['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš'])
|
|
|