revert to using global mecab_tokenize variable

Former-commit-id: 189a5b9cd6
2024-12-23 17:31:41 +00:00 · 2015-07-07 15:47:37 -04:00 · 2015-07-07 15:47:37 -04:00 · a72b4abb48
commit a72b4abb48
parent 4b398fac65
1 changed files with 4 additions and 2 deletions
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -43,7 +43,7 @@ def simple_tokenize(text):
    """
    return [token.casefold() for token in TOKEN_RE.findall(text)]

-
+mecab_tokenize = None
 def tokenize(text, lang):
    """
    Tokenize this text in a way that's straightforward but appropriate for
@ -57,6 +57,8 @@ def tokenize(text, lang):
    first, so that they can be expected to match the data.
    """
    if lang == 'ja':
+        global mecab_tokenize
+        if mecab_tokenize is None:
            from wordfreq.mecab import mecab_tokenize
        return mecab_tokenize(text)