From 45a002c1e1057c3122bf9126d9780ba8589e2a1a Mon Sep 17 00:00:00 2001 From: Lance Nathan Date: Fri, 28 Feb 2020 12:51:18 -0500 Subject: [PATCH] Fix code affected by a breaking change in msgpack 1.0 The msgpack readme explains: "Default value of strict_map_key is changed to True to avoid hashdos. You need to pass strict_map_key=False if you have data which contain map keys which type is not bytes or str." chinese.py loads SIMPLIFIED_MAP from disk. Since it is a str.translate dictionary, its keys are numbers. And since it's a dictionary we created ourselves, there's no hashdos concern, so we can load it with strict_map_key=False. --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- wordfreq/chinese.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 153d171..2f96e40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## Version 2.2.2 (2020-02-28) + +Library change: + +- Fixed an incompatibility with newly-released `msgpack 1.0`. + ## Version 2.2.1 (2019-02-05) Library changes: diff --git a/setup.py b/setup.py index 7717c64..014624f 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ if sys.version_info < (3, 4): setup( name="wordfreq", - version='2.2.1', + version='2.2.2', maintainer='Robyn Speer', maintainer_email='rspeer@luminoso.com', url='http://github.com/LuminosoInsight/wordfreq/', diff --git a/wordfreq/chinese.py b/wordfreq/chinese.py index c8215fc..61e931a 100644 --- a/wordfreq/chinese.py +++ b/wordfreq/chinese.py @@ -6,7 +6,7 @@ import gzip DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt') ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt') SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz') -SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False) +SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False, strict_map_key=False) jieba_tokenizer = None jieba_orig_tokenizer = None