updated emoji parser

Former-commit-id: f04ca8fc9e
This commit is contained in:
Joshua Chin 2015-07-07 15:43:34 -04:00
parent 94ba6e650f
commit 4389422958
2 changed files with 7 additions and 12 deletions

View File

@ -15,18 +15,13 @@ def _emoji_char_class():
""" """
emoji_file = DATA_PATH / 'emoji.txt' emoji_file = DATA_PATH / 'emoji.txt'
ranges = [] def accept(c):
for i, c in enumerate(chardata.CHAR_CLASS_STRING): x = ord(c)
# c represents the character class (3 corresponds to emoji) return chardata.CHAR_CLASS_STRING[x] == '3' and \
if c == '3' and i >= 0x2600 and i != 0xfffd: x >= 0x2600 and x != 0xfffd
if ranges and i == ranges[-1][1] + 1:
ranges[-1][1] = i
else:
ranges.append([i, i])
out = '[%s]' % ''.join(chr(a) + '-' + chr(b) for a, b in ranges)
with emoji_file.open(mode='w') as file: with (DATA_PATH / 'emoji.txt').open(mode='w') as file:
file.write(out) file.write(func_to_regex(accept))
def _non_punct_class(): def _non_punct_class():

View File

@ -1 +1 @@
[☀-♮♰-❧➔-➿⠀-⣿⬀-⬯⭅-⭆⭍-⭳⭶-⮕⮘-⮹⮽-⯈⯊-⯑⳥-⳪⺀-⺙⺛-⻳⼀-⿕⿰-⿻〄-〄〒-〓〠-〠〶-〷〾-〿㆐-㆑㆖-㆟㇀-㇣㈀-㈞㈪-㉇㉐-㉐㉠-㉿㊊-㊰㋀-㋾㌀-㏿䷀-䷿꒐-꓆꠨-꠫꠶-꠷꠹-꠹꩷-꩹﷽-﷽¦-¦│-│■-○-𐄷-𐄿𐅹-𐆉𐆌-𐆌𐆐-𐆛𐆠-𐆠𐇐-𐇼𐡷-𐡸𐫈-𐫈𖬼-𖬿𖭅-𖭅𛲜-𛲜𝀀-𝃵𝄀-𝄦𝄩-𝅘𝅥𝅲𝅪-𝅬𝆃-𝆄𝆌-𝆩𝆮-𝇝𝈀-𝉁𝉅-𝉅𝌀-𝍖🀀-🃿🄍-🣿] [☀-♮♰-❧➔-➿⠀-⣿⬀-⬯⭅-⭆⭍-⯿⳥-⳪⸼-⿿〄-〄〒-〓〠-〠〶-〷〾-぀㆏-㆑㆖-㆟ㆻ-㇯㈀-㈟㈪-㉇㉐-㉐㉠-㉿㊊-㊰㋀-㏿䶶-䷿꒍-꓏꠨-꠯꠶-꠷꠹-꠿꩷-꩹﷽-﷿¦-¦￧-│■--𐄴-𐄿𐅹-𐆉𐆋-𐇼𐡠-𐣿𐪀-𐫿𖨹-𖻿𛀂-𝅘𝅥𝅲𝅪-𝅬𝆃-𝆄𝆌-𝆩𝆮-𝉁𝉅-𝍟𞻲-🃿🄋-🿿]