From d30183a7d736df8ddec38a8d7364d797c74e4d37 Mon Sep 17 00:00:00 2001
From: Robyn Speer <rspeer@luminoso.com>
Date: Thu, 25 Oct 2018 11:07:55 -0400
Subject: [PATCH 1/3] Allow a wider range of 'regex' versions

The behavior of segmentation shouldn't change within this range, and it
includes the version currently used by SpaCy.
---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index d5352df..7717c64 100755
--- a/setup.py
+++ b/setup.py
@@ -28,14 +28,14 @@ README_contents = open(os.path.join(current_dir, 'README.md'),
                        encoding='utf-8').read()
 doclines = README_contents.split("\n")
 dependencies = [
-    'msgpack', 'langcodes >= 1.4.1', 'regex == 2018.02.21'
+    'msgpack', 'langcodes >= 1.4.1', 'regex >= 2017.07.11, <= 2018.02.21'
 ]
 if sys.version_info < (3, 4):
     dependencies.append('pathlib')
 
 setup(
     name="wordfreq",
-    version='2.2.0',
+    version='2.2.1',
     maintainer='Robyn Speer',
     maintainer_email='rspeer@luminoso.com',
     url='http://github.com/LuminosoInsight/wordfreq/',

From 61a1604b38dce0c2927a05086b8e0549f4016ccb Mon Sep 17 00:00:00 2001
From: Robyn Speer <rspeer@luminoso.com>
Date: Mon, 4 Feb 2019 14:57:38 -0500
Subject: [PATCH 2/3] update encoding='utf-8' to raw=False

---
 wordfreq/__init__.py | 2 +-
 wordfreq/chinese.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py
index 6a993f1..a72770f 100644
--- a/wordfreq/__init__.py
+++ b/wordfreq/__init__.py
@@ -74,7 +74,7 @@ def read_cBpack(filename):
         ]
     """
     with gzip.open(filename, 'rb') as infile:
-        data = msgpack.load(infile, encoding='utf-8')
+        data = msgpack.load(infile, raw=False)
     header = data[0]
     if (
         not isinstance(header, dict) or header.get('format') != 'cB'
diff --git a/wordfreq/chinese.py b/wordfreq/chinese.py
index 9f7b95a..c8215fc 100644
--- a/wordfreq/chinese.py
+++ b/wordfreq/chinese.py
@@ -6,7 +6,7 @@ import gzip
 DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh.txt')
 ORIG_DICT_FILENAME = resource_filename('wordfreq', 'data/jieba_zh_orig.txt')
 SIMP_MAP_FILENAME = resource_filename('wordfreq', 'data/_chinese_mapping.msgpack.gz')
-SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), encoding='utf-8')
+SIMPLIFIED_MAP = msgpack.load(gzip.open(SIMP_MAP_FILENAME), raw=False)
 jieba_tokenizer = None
 jieba_orig_tokenizer = None
 

From dd72051929123571eb5b8b5c7dd65f231d4ec5b2 Mon Sep 17 00:00:00 2001
From: Robyn Speer <rspeer@luminoso.com>
Date: Tue, 5 Feb 2019 11:16:22 -0500
Subject: [PATCH 3/3] update msgpack call in scripts/make_chinese_mapping

---
 scripts/make_chinese_mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make_chinese_mapping.py b/scripts/make_chinese_mapping.py
index 4a17d4f..3d78708 100644
--- a/scripts/make_chinese_mapping.py
+++ b/scripts/make_chinese_mapping.py
@@ -36,7 +36,7 @@ def make_hanzi_converter(table_in, msgpack_out):
             if chr(codept) != char:
                 table[codept] = char
     with gzip.open(msgpack_out, 'wb') as outfile:
-        msgpack.dump(table, outfile, encoding='utf-8')
+        msgpack.dump(table, outfile, raw=False)
 
 
 def build():