imports are already cached

Former-commit-id: b1cd2e01d3
This commit is contained in:
Joshua Chin 2015-07-07 14:44:50 -04:00
parent 93681e43b3
commit c5135edd88

View File

@ -73,7 +73,6 @@ def simple_tokenize(text):
""" """
return [token.casefold() for token in TOKEN_RE.findall(text)] return [token.casefold() for token in TOKEN_RE.findall(text)]
mecab_tokenize = None
def tokenize(text, lang): def tokenize(text, lang):
""" """
Tokenize this text in a way that's straightforward but appropriate for Tokenize this text in a way that's straightforward but appropriate for
@ -87,10 +86,7 @@ def tokenize(text, lang):
first, so that they can be expected to match the data. first, so that they can be expected to match the data.
""" """
if lang == 'ja': if lang == 'ja':
global mecab_tokenize from wordfreq.mecab import mecab_tokenize
if mecab_tokenize is None:
from wordfreq.mecab import mecab_tokenize
return mecab_tokenize(text)
if lang == 'ar': if lang == 'ar':
text = COMBINING_MARK_RE.sub('', text.replace('ـ', '')) text = COMBINING_MARK_RE.sub('', text.replace('ـ', ''))