changed mecab_tokenize to a global variable

Former-commit-id: 5fc448bc60
2024-12-24 18:01:38 +00:00 · 2015-06-25 13:58:30 -04:00 · 2015-06-25 13:58:30 -04:00 · 6765bead18
commit 6765bead18
parent d372f29757
1 changed files with 4 additions and 5 deletions
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -124,7 +124,7 @@ def simple_tokenize(text):
    """
    return [token.lower() for token in TOKEN_RE.findall(text)]

-
+mecab_tokenize = None
 def tokenize(text, lang):
    """
    Tokenize this text in a way that's straightforward but appropriate for
@ -138,11 +138,10 @@ def tokenize(text, lang):
    first, so that they can be expected to match the data.
    """
    if lang == 'ja':
-        try:
-            return mecab_tokenize(text)
-        except NameError:
+        global mecab_tokenize
+        if mecab_tokenize is None:
            from wordfreq.mecab import mecab_tokenize
-            return mecab_tokenize(text)
+        return mecab_tokenize(text)
    elif lang == 'ar':
        tokens = simple_tokenize(text)
        tokens = [token.replace('ـ', '') for token in tokens] # remove arabic commas