revert to using global mecab_tokenize variable

Former-commit-id: 189a5b9cd6
This commit is contained in:
Joshua Chin 2015-07-07 15:47:37 -04:00
parent 927aaae920
commit 993bc4da15

View File

@ -43,7 +43,7 @@ def simple_tokenize(text):
"""
return [token.casefold() for token in TOKEN_RE.findall(text)]
mecab_tokenize = None
def tokenize(text, lang):
"""
Tokenize this text in a way that's straightforward but appropriate for
@ -57,7 +57,9 @@ def tokenize(text, lang):
first, so that they can be expected to match the data.
"""
if lang == 'ja':
from wordfreq.mecab import mecab_tokenize
global mecab_tokenize
if mecab_tokenize is None:
from wordfreq.mecab import mecab_tokenize
return mecab_tokenize(text)
if lang == 'ar':