remove unnecessary delayed loads in wordfreq.chinese

2024-12-23 17:31:41 +00:00 · 2015-09-22 16:42:13 -04:00 · 2015-09-22 16:42:13 -04:00 · 4a87890afd
commit 4a87890afd
parent 6cf4210187
1 changed files with 4 additions and 10 deletions
--- a/wordfreq/chinese.py
+++ b/wordfreq/chinese.py
@ -3,22 +3,16 @@ import jieba
 import msgpack
 import gzip
 jieba_tokenizer = None
 simplified_map = None
 DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
 SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
 SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
 JIEBA_TOKENIZER = jieba.Tokenizer(dictionary=DICT_FILENAME)
 def simplify_chinese(text):
-    global simplified_map
+    return text.translate(SIMPLIFIED_MAP).casefold()
    if simplified_map is None:
        simplified_map = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
    return text.translate(simplified_map).casefold()
 def jieba_tokenize(text):
-    global jieba_tokenizer
+    return JIEBA_TOKENIZER.lcut(simplify_chinese(text), HMM=False)
    if jieba_tokenizer is None:
        jieba_tokenizer = jieba.Tokenizer(dictionary=DICT_FILENAME)
    return jieba_tokenizer.lcut(simplify_chinese(text), HMM=False)