remove unnecessary delayed loads in wordfreq.chinese

2024-12-23 17:31:41 +00:00 · 2015-09-22 16:42:13 -04:00 · 2015-09-22 16:42:13 -04:00 · 4a87890afd
commit 4a87890afd
parent 6cf4210187
1 changed files with 4 additions and 10 deletions
--- a/wordfreq/chinese.py
+++ b/wordfreq/chinese.py
@ -3,22 +3,16 @@ import jieba
 import msgpack
 import gzip

-jieba_tokenizer = None
-simplified_map = None
 DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
 SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
+SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
+JIEBA_TOKENIZER = jieba.Tokenizer(dictionary=DICT_FILENAME)


 def simplify_chinese(text):
-    global simplified_map
-    if simplified_map is None:
-        simplified_map = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
-    return text.translate(simplified_map).casefold()
+    return text.translate(SIMPLIFIED_MAP).casefold()


 def jieba_tokenize(text):
-    global jieba_tokenizer
-    if jieba_tokenizer is None:
-        jieba_tokenizer = jieba.Tokenizer(dictionary=DICT_FILENAME)
-    return jieba_tokenizer.lcut(simplify_chinese(text), HMM=False)
+    return JIEBA_TOKENIZER.lcut(simplify_chinese(text), HMM=False)