From 61a1604b38dce0c2927a05086b8e0549f4016ccb Mon Sep 17 00:00:00 2001 From: Robyn Speer <rspeer@luminoso.com> Date: Mon, 4 Feb 2019 14:57:38 -0500 Subject: [PATCH] update encoding='utf-8' to raw=False --- wordfreq/__init__.py | 2 +- wordfreq/chinese.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py index 6a993f1..a72770f 100644 --- a/wordfreq/__init__.py +++ b/wordfreq/__init__.py @@ -74,7 +74,7 @@ def read_cBpack(filename): ] """ with gzip.open(filename, 'rb') as infile: - data = msgpack.load(infile, encoding='utf-8') + data = msgpack.load(infile, raw=False) header = data[0] if ( not isinstance(header, dict) or header.get('format') != 'cB' diff --git a/wordfreq/chinese.py b/wordfreq/chinese.py index 9f7b95a..c8215fc 100644 --- a/wordfreq/chinese.py +++ b/wordfreq/chinese.py @@ -6,7 +6,7 @@ import gzip DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt') ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt') SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz') -SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8') +SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False) jieba_tokenizer = None jieba_orig_tokenizer = None