update encoding='utf-8' to raw=False

This commit is contained in:
Robyn Speer 2019-02-04 14:57:38 -05:00
parent 4cd7b4bada
commit c7a14cd4ab
2 changed files with 2 additions and 2 deletions

View File

@ -74,7 +74,7 @@ def read_cBpack(filename):
]
"""
with gzip.open(filename, 'rb') as infile:
data = msgpack.load(infile, encoding='utf-8')
data = msgpack.load(infile, raw=False)
header = data[0]
if (
not isinstance(header, dict) or header.get('format') != 'cB'

View File

@ -6,7 +6,7 @@ import gzip
DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt')
SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False)
jieba_tokenizer = None
jieba_orig_tokenizer = None