update encoding='utf-8' to raw=False

This commit is contained in:
Robyn Speer 2019-02-04 14:57:38 -05:00
parent 4cd7b4bada
commit c7a14cd4ab
2 changed files with 2 additions and 2 deletions

View File

@ -74,7 +74,7 @@ def read_cBpack(filename):
] ]
""" """
with gzip.open(filename, 'rb') as infile: with gzip.open(filename, 'rb') as infile:
data = msgpack.load(infile, encoding='utf-8') data = msgpack.load(infile, raw=False)
header = data[0] header = data[0]
if ( if (
not isinstance(header, dict) or header.get('format') != 'cB' not isinstance(header, dict) or header.get('format') != 'cB'

View File

@ -6,7 +6,7 @@ import gzip
DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt') DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt') ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt')
SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz') SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8') SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False)
jieba_tokenizer = None jieba_tokenizer = None
jieba_orig_tokenizer = None jieba_orig_tokenizer = None