test that number-smashing still happens in freq lookups

This commit is contained in:
Robyn Speer 2017-01-06 19:20:41 -05:00
parent 573ecc53d0
commit 9a6beb0089

View File

@ -155,6 +155,7 @@ def test_number_smashing():
eq_(tokenize('1', 'en', combine_numbers=True), ['1']) eq_(tokenize('1', 'en', combine_numbers=True), ['1'])
eq_(tokenize('3.14', 'en', combine_numbers=True), ['0.00']) eq_(tokenize('3.14', 'en', combine_numbers=True), ['0.00'])
eq_(tokenize('24601', 'en', combine_numbers=True), ['00000']) eq_(tokenize('24601', 'en', combine_numbers=True), ['00000'])
eq_(word_frequency('24601', 'en'), word_frequency('90210', 'en'))
def test_phrase_freq(): def test_phrase_freq():