mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
updated emoji parser
This commit is contained in:
parent
9b851f3afe
commit
f04ca8fc9e
@ -15,18 +15,13 @@ def _emoji_char_class():
|
|||||||
"""
|
"""
|
||||||
emoji_file = DATA_PATH / 'emoji.txt'
|
emoji_file = DATA_PATH / 'emoji.txt'
|
||||||
|
|
||||||
ranges = []
|
def accept(c):
|
||||||
for i, c in enumerate(chardata.CHAR_CLASS_STRING):
|
x = ord(c)
|
||||||
# c represents the character class (3 corresponds to emoji)
|
return chardata.CHAR_CLASS_STRING[x] == '3' and \
|
||||||
if c == '3' and i >= 0x2600 and i != 0xfffd:
|
x >= 0x2600 and x != 0xfffd
|
||||||
if ranges and i == ranges[-1][1] + 1:
|
|
||||||
ranges[-1][1] = i
|
|
||||||
else:
|
|
||||||
ranges.append([i, i])
|
|
||||||
out = '[%s]' % ''.join(chr(a) + '-' + chr(b) for a, b in ranges)
|
|
||||||
|
|
||||||
with emoji_file.open(mode='w') as file:
|
with (DATA_PATH / 'emoji.txt').open(mode='w') as file:
|
||||||
file.write(out)
|
file.write(func_to_regex(accept))
|
||||||
|
|
||||||
|
|
||||||
def _non_punct_class():
|
def _non_punct_class():
|
||||||
|
@ -1 +1 @@
|
|||||||
[☀-♮♰-❧➔-➿⠀-⣿⬀-⬯⭅-⭆⭍-⭳⭶-⮕⮘-⮹⮽-⯈⯊-⯑⳥-⳪⺀-⺙⺛-⻳⼀-⿕⿰-⿻〄-〄〒-〓〠-〠〶-〷〾-〿㆐-㆑㆖-㆟㇀-㇣㈀-㈞㈪-㉇㉐-㉐㉠-㉿㊊-㊰㋀-㋾㌀-㏿䷀-䷿꒐-꓆꠨-꠫꠶-꠷꠹-꠹꩷-꩹﷽-﷽¦-¦│-│■-○-𐄷-𐄿𐅹-𐆉𐆌-𐆌𐆐-𐆛𐆠-𐆠𐇐-𐇼𐡷-𐡸𐫈-𐫈𖬼-𖬿𖭅-𖭅𛲜-𛲜𝀀-𝃵𝄀-𝄦𝄩-𝅘𝅥𝅲𝅪-𝅬𝆃-𝆄𝆌-𝆩𝆮-𝇝𝈀-𝉁𝉅-𝉅𝌀-𝍖🀀-🄍-]
|
[☀-♮♰-❧➔-➿⠀-⣿⬀-⬯⭅-⭆⭍-⯿⳥-⳪⸼-〄-〄〒-〓〠-〠〶-〷〾--㆑㆖-㆟ㆻ-㈀-㈪-㉇㉐-㉐㉠-㉿㊊-㊰㋀-㏿䶶-䷿-꠨-꠶-꠷꠹-꩷-꩹﷽-﷿¦-¦-│■---𐄿𐅹-𐆉𐆋-𐇼𐡠-𐣿𐪀--𛀂-𝅘𝅥𝅲𝅪-𝅬𝆃-𝆄𝆌-𝆩𝆮-𝉁𝉅--🄋-]
|
Loading…
Reference in New Issue
Block a user