mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-25 02:05:24 +00:00
parent
f98c6c4401
commit
d48a44b4e3
@ -29,10 +29,19 @@ EMOJI_RANGE = _emoji_char_class()
|
|||||||
|
|
||||||
# FIXME: Find a better way to get a list of all non punctuation unicodes
|
# FIXME: Find a better way to get a list of all non punctuation unicodes
|
||||||
def _non_punct_class():
|
def _non_punct_class():
|
||||||
non_punct = [chr(x) for x in range(0x110000)
|
try:
|
||||||
if unicodedata.category(chr(x))[0] not in 'PSZMC']
|
with open('non_punct.txt') as file:
|
||||||
|
return file.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
non_punct = [chr(x) for x in range(0x110000)
|
||||||
|
if unicodedata.category(chr(x))[0] not in 'PSZMC']
|
||||||
|
|
||||||
return '[%s]' % ''.join(non_punct)
|
out = '[%s]' % ''.join(non_punct)
|
||||||
|
|
||||||
|
with open('non_punct.txt', mode='w') as file:
|
||||||
|
file.write(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
NON_PUNCT_RANGE = _non_punct_class()
|
NON_PUNCT_RANGE = _non_punct_class()
|
||||||
|
|
||||||
|
1
wordfreq/non_punct.txt
Normal file
1
wordfreq/non_punct.txt
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user