mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
update encoding='utf-8' to raw=False
This commit is contained in:
parent
4cd7b4bada
commit
c7a14cd4ab
@ -74,7 +74,7 @@ def read_cBpack(filename):
|
|||||||
]
|
]
|
||||||
"""
|
"""
|
||||||
with gzip.open(filename, 'rb') as infile:
|
with gzip.open(filename, 'rb') as infile:
|
||||||
data = msgpack.load(infile, encoding='utf-8')
|
data = msgpack.load(infile, raw=False)
|
||||||
header = data[0]
|
header = data[0]
|
||||||
if (
|
if (
|
||||||
not isinstance(header, dict) or header.get('format') != 'cB'
|
not isinstance(header, dict) or header.get('format') != 'cB'
|
||||||
|
@ -6,7 +6,7 @@ import gzip
|
|||||||
DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
|
DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
|
||||||
ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt')
|
ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt')
|
||||||
SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
|
SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
|
||||||
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
|
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False)
|
||||||
jieba_tokenizer = None
|
jieba_tokenizer = None
|
||||||
jieba_orig_tokenizer = None
|
jieba_orig_tokenizer = None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user