caches non_punct regex in non_punct.txt

Former-commit-id: f576ca58ae
2024-12-25 02:05:24 +00:00 · 2015-06-24 17:11:50 -04:00 · 2015-06-24 17:11:50 -04:00 · d48a44b4e3
commit d48a44b4e3
parent f98c6c4401
2 changed files with 13 additions and 3 deletions
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -29,10 +29,19 @@ EMOJI_RANGE = _emoji_char_class()
 # FIXME: Find a better way to get a list of all non punctuation unicodes
 def _non_punct_class():
-    non_punct = [chr(x) for x in range(0x110000)
+    try:
-                    if unicodedata.category(chr(x))[0] not in 'PSZMC']
+        with open('non_punct.txt') as file:
            return file.read()
    except FileNotFoundError:
        non_punct = [chr(x) for x in range(0x110000)
                        if unicodedata.category(chr(x))[0] not in 'PSZMC']
-    return '[%s]' % ''.join(non_punct)
+        out = '[%s]' % ''.join(non_punct)
        with open('non_punct.txt', mode='w') as file:
            file.write(out)
        return out
 NON_PUNCT_RANGE = _non_punct_class()
--- a/wordfreq/non_punct.txt
+++ b/wordfreq/non_punct.txt