only import mecab once

Former-commit-id: 6e1f7e30c6
2024-12-23 17:31:41 +00:00 · 2015-06-25 11:41:19 -04:00 · 2015-06-25 11:41:19 -04:00 · 78bff813e3
commit 78bff813e3
parent a0b7211451
1 changed files with 7 additions and 3 deletions
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -106,14 +106,18 @@ def tokenize(text, lang):
    the language.

    So far, this means that Japanese is handled by mecab_tokenize, and
-    everything else is handled by simple_tokenize.
+    everything else is handled by simple_tokenize. Additionally, Arabic commas
+    are removed.

    Strings that are looked up in wordfreq will be run through this function
    first, so that they can be expected to match the data.
    """
    if lang == 'ja':
-        from wordfreq.mecab import mecab_tokenize
-        return mecab_tokenize(text)
+        try:
+            return mecab_tokenize(text)
+        except NameError:
+            from wordfreq.mecab import mecab_tokenize
+            return mecab_tokenize(text)
    else:
        return simple_tokenize(text)