mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 09:21:37 +00:00
Fix code affected by a breaking change in msgpack 1.0
The msgpack readme explains: "Default value of strict_map_key is changed to True to avoid hashdos. You need to pass strict_map_key=False if you have data which contain map keys which type is not bytes or str." chinese.py loads SIMPLIFIED_MAP from disk. Since it is a str.translate dictionary, its keys are numbers. And since it's a dictionary we created ourselves, there's no hashdos concern, so we can load it with strict_map_key=False.
This commit is contained in:
parent
401889d7c8
commit
86e988b838
@ -1,3 +1,9 @@
|
||||
## Version 2.2.2 (2020-02-28)
|
||||
|
||||
Library change:
|
||||
|
||||
- Fixed an incompatibility with newly-released `msgpack 1.0`.
|
||||
|
||||
## Version 2.2.1 (2019-02-05)
|
||||
|
||||
Library changes:
|
||||
|
2
setup.py
2
setup.py
@ -35,7 +35,7 @@ if sys.version_info < (3, 4):
|
||||
|
||||
setup(
|
||||
name="wordfreq",
|
||||
version='2.2.1',
|
||||
version='2.2.2',
|
||||
maintainer='Robyn Speer',
|
||||
maintainer_email='rspeer@luminoso.com',
|
||||
url='http://github.com/LuminosoInsight/wordfreq/',
|
||||
|
@ -6,7 +6,7 @@ import gzip
|
||||
DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
|
||||
ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt')
|
||||
SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
|
||||
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False)
|
||||
SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False, strict_map_key=False)
|
||||
jieba_tokenizer = None
|
||||
jieba_orig_tokenizer = None
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user