mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
removes combining marks from arabic words instead of treating them as punctuation
Former-commit-id: cebca52ea3
This commit is contained in:
parent
60782d3796
commit
7c8266aeb7
@ -1,3 +1,3 @@
|
|||||||
recursive-include wordfreq/data *.gz
|
recursive-include wordfreq/data *.gz
|
||||||
include README.md
|
include README.md
|
||||||
include data/non_punct.txt
|
recursive-include wordfreq/data *.txt
|
||||||
|
@ -41,29 +41,51 @@ def _non_punct_class():
|
|||||||
- P: punctuation
|
- P: punctuation
|
||||||
- S: symbols
|
- S: symbols
|
||||||
- Z: separators
|
- Z: separators
|
||||||
- M: combining marks
|
|
||||||
- C: control characters
|
- C: control characters
|
||||||
This will classify symbols, including emoji, as punctuation; callers that
|
This will classify symbols, including emoji, as punctuation; callers that
|
||||||
want to treat emoji separately should filter them out first.
|
want to treat emoji separately should filter them out first.
|
||||||
"""
|
"""
|
||||||
non_punct = DATA_PATH / 'non_punct.txt'
|
non_punct_file = DATA_PATH / 'non_punct.txt'
|
||||||
try:
|
try:
|
||||||
with non_punct.open() as file:
|
with non_punct_file.open() as file:
|
||||||
return file.read()
|
return file.read()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
non_punct = [x for x in range(0x110000)
|
non_punct = [x for x in range(0x110000)
|
||||||
if unicodedata.category(chr(x))[0] not in 'PSZMC']
|
if unicodedata.category(chr(x))[0] not in 'PSZC']
|
||||||
|
|
||||||
non_punct_ranges = to_ranges(non_punct)
|
non_punct_ranges = to_ranges(non_punct)
|
||||||
|
|
||||||
out = '[%s]' % ''.join("%s-%s" % (chr(start), chr(end))
|
out = '[%s]' % ''.join("%s-%s" % (chr(start), chr(end))
|
||||||
for start, end in non_punct_ranges)
|
for start, end in non_punct_ranges)
|
||||||
|
|
||||||
with non_punct.open(mode='w') as file:
|
with non_punct_file.open(mode='w') as file:
|
||||||
file.write(out)
|
file.write(out)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def _combining_mark_class():
|
||||||
|
"""
|
||||||
|
Builds a regex that matches anything that is a combining mark
|
||||||
|
"""
|
||||||
|
_combining_mark_file = DATA_PATH / 'combining_mark.txt'
|
||||||
|
try:
|
||||||
|
with _combining_mark_file.open() as file:
|
||||||
|
return file.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
combining_mark = [x for x in range(0x110000)
|
||||||
|
if unicodedata.category(chr(x))[0] == 'M']
|
||||||
|
|
||||||
|
combining_mark_ranges = to_ranges(combining_mark)
|
||||||
|
|
||||||
|
out = '[%s]' % ''.join("%s-%s" % (chr(start), chr(end))
|
||||||
|
for start, end in combining_mark_ranges)
|
||||||
|
|
||||||
|
with _combining_mark_file.open(mode='w') as file:
|
||||||
|
file.write(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def to_ranges(seq):
|
def to_ranges(seq):
|
||||||
"""
|
"""
|
||||||
Converts a sequence of int's into a list of inclusives ranges
|
Converts a sequence of int's into a list of inclusives ranges
|
||||||
@ -78,7 +100,7 @@ def to_ranges(seq):
|
|||||||
return ranges
|
return ranges
|
||||||
|
|
||||||
|
|
||||||
|
COMBINING_MARK_RE = re.compile(_combining_mark_class())
|
||||||
NON_PUNCT_RANGE = _non_punct_class()
|
NON_PUNCT_RANGE = _non_punct_class()
|
||||||
|
|
||||||
TOKEN_RE = re.compile("{0}|{1}+(?:'{1}+)*".format(EMOJI_RANGE, NON_PUNCT_RANGE))
|
TOKEN_RE = re.compile("{0}|{1}+(?:'{1}+)*".format(EMOJI_RANGE, NON_PUNCT_RANGE))
|
||||||
@ -107,7 +129,7 @@ def tokenize(text, lang):
|
|||||||
|
|
||||||
So far, this means that Japanese is handled by mecab_tokenize, and
|
So far, this means that Japanese is handled by mecab_tokenize, and
|
||||||
everything else is handled by simple_tokenize. Additionally, Arabic commas
|
everything else is handled by simple_tokenize. Additionally, Arabic commas
|
||||||
are removed.
|
and combining marks are removed.
|
||||||
|
|
||||||
Strings that are looked up in wordfreq will be run through this function
|
Strings that are looked up in wordfreq will be run through this function
|
||||||
first, so that they can be expected to match the data.
|
first, so that they can be expected to match the data.
|
||||||
@ -120,7 +142,8 @@ def tokenize(text, lang):
|
|||||||
return mecab_tokenize(text)
|
return mecab_tokenize(text)
|
||||||
elif lang == 'ar':
|
elif lang == 'ar':
|
||||||
tokens = simple_tokenize(text)
|
tokens = simple_tokenize(text)
|
||||||
tokens = [token.replace('ـ', '') for token in tokens]
|
tokens = [token.replace('ـ', '') for token in tokens] # remove arabic commas
|
||||||
|
tokens = [COMBINING_MARK_RE.sub('', token) for token in tokens]
|
||||||
return [token for token in tokens if token] # remove empty strings
|
return [token for token in tokens if token] # remove empty strings
|
||||||
else:
|
else:
|
||||||
return simple_tokenize(text)
|
return simple_tokenize(text)
|
||||||
|
1
wordfreq/data/combining_mark.txt
Normal file
1
wordfreq/data/combining_mark.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
[̀-ͯ҃-҉֑-ֽֿ-ֿׁ-ׂׄ-ׇׅ-ׇؐ-ًؚ-ٰٟ-ٰۖ-ۜ۟-ۤۧ-۪ۨ-ܑۭ-ܑܰ-݊ަ-ް߫-߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙࠭-࡛ࣤ-ࣾऀ-ःऺ-़ा-ॏ॑-ॗॢ-ॣঁ-ঃ়-়া-ৄে-ৈো-্ৗ-ৗৢ-ৣਁ-ਃ਼-਼ਾ-ੂੇ-ੈੋ-੍ੑ-ੑੰ-ੱੵ-ੵઁ-ઃ઼-઼ા-ૅે-ૉો-્ૢ-ૣଁ-ଃ଼-଼ା-ୄେ-ୈୋ-୍ୖ-ୗୢ-ୣஂ-ஂா-ூெ-ைொ-்ௗ-ௗఁ-ఃా-ౄె-ైొ-్ౕ-ౖౢ-ౣಂ-ಃ಼-಼ಾ-ೄೆ-ೈೊ-್ೕ-ೖೢ-ೣം-ഃാ-ൄെ-ൈൊ-്ൗ-ൗൢ-ൣං-ඃ්-්ා-ුූ-ූෘ-ෟෲ-ෳั-ัิ-ฺ็-๎ັ-ັິ-ູົ-ຼ່-ໍ༘-༙༵-༵༷-༹༷-༹༾-༿ཱ-྄྆-྇ྍ-ྗྙ-ྼ࿆-࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏ-ႏႚ-ႝ፝-፟ᜒ-᜔ᜲ-᜴ᝒ-ᝓᝲ-ᝳ឴-៓៝-៝᠋-᠍ᢩ-ᢩᤠ-ᤫᤰ-᤻ᦰ-ᧀᧈ-ᧉᨗ-ᨛᩕ-ᩞ᩠-᩿᩼-᩿ᬀ-ᬄ᬴-᭄᭫-᭳ᮀ-ᮂᮡ-ᮭ᯦-᯳ᰤ-᰷᳐-᳔᳒-᳨᳭-᳭ᳲ-᳴᷀-ᷦ᷼-᷿⃐-⃰⳯-⵿⳱-⵿ⷠ-〪ⷿ-゙〯-゚꙯-꙲ꙴ-꙽ꚟ-ꚟ꛰-꛱ꠂ-ꠂ꠆-꠆ꠋ-ꠋꠣ-ꠧꢀ-ꢁꢴ-꣄꣠-꣱ꤦ-꤭ꥇ-꥓ꦀ-ꦃ꦳-꧀ꨩ-ꨶꩃ-ꩃꩌ-ꩍꩻ-ꩻꪰ-ꪰꪲ-ꪴꪷ-ꪸꪾ-꪿꫁-꫁ꫫ-ꫯꫵ-꫶ꯣ-ꯪ꯬-꯭ﬞ-ﬞ︀-️︠-𐇽︦-𐇽𐨁-𐨃𐨅-𐨆𐨌-𐨏𐨸-𐨿𐨺-𐨿𑀀-𑀂𑀸-𑁆𑂀-𑂂𑂰-𑂺𑄀-𑄂𑄧-𑄴𑆀-𑆂𑆳-𑇀𑚫-𑚷𖽑-𖽾𖾏-𖾒𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄󠄀-󠇯]
|
@ -1 +1 @@
|
|||||||
[0-9A-Za-zª-ª²-³µ-µ¹-º¼-¾À-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԧԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-ي٠-٩ٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱ߀-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢠࢢ-ࢬऄ-हऽ-ऽॐ-ॐक़-ॡ०-९ॱ-ॷॹ-ॿঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡ০-ৱ৴-৹ਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼੦-੯ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡ૦-૯ଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡ୦-୯ୱ-୷ஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐ௦-௲అ-ఌఎ-ఐఒ-నప-ళవ-హఽ-ఽౘ-ౙౠ-ౡ౦-౯౸-౾ಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡ೦-೯ೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൠ-ൡ൦-൵ൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆ๐-๙ກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆ໐-໙ໜ-ໟༀ-ༀ༠-༳ཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-၉ၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎ႐-႙Ⴀ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚ፩-፼ᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛮ-ᛰᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜ០-៩៰-៹᠐-᠙ᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤜ᥆-ᥭᥰ-ᥴᦀ-ᦫᧁ-ᧇ᧐-᧚ᨀ-ᨖᨠ-ᩔ᪀-᪉᪐-᪙ᪧ-ᪧᬅ-ᬳᭅ-ᭋ᭐-᭙ᮃ-ᮠᮮ-ᯥᰀ-ᰣ᱀-᱉ᱍ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼ⁰-ⁱ⁴-⁹ⁿ-₉ₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎ⅐-↉①-⒛⓪-⓿❶-➓Ⰰ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳ⳽-⳽ⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〇〡-〩〱-〵〸-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎ㆒-㆕ㆠ-ㆺㇰ-ㇿ㈠-㈩㉈-㉏㉑-㉟㊀-㊉㊱-㊿㐀-䶵一-鿌ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘫꙀ-ꙮꙿ-ꚗꚠ-ꛯꜗ-ꜟꜢ-ꞈꞋ-ꞎꞐ-ꞓꞠ-Ɦꟸ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢ꠰-꠵ꡀ-ꡳꢂ-ꢳ꣐-꣙ꣲ-ꣷꣻ-ꣻ꤀-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-꧙ꨀ-ꨨꩀ-ꩂꩄ-ꩋ꩐-꩙ꩠ-ꩶꩺ-ꩺꪀ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꯀ-ꯢ꯰-꯹가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼ0-9A-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐄇-𐄳𐅀-𐅸𐆊-𐆊𐊀-𐊜𐊠-𐋐𐌀-𐌞𐌠-𐌣𐌰-𐍊𐎀-𐎝𐎠-𐏃𐏈-𐏏𐏑-𐏕𐐀-𐒝𐒠-𐒩𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡘-𐡟𐤀-𐤛𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩀-𐩇𐩠-𐩾𐬀-𐬵𐭀-𐭕𐭘-𐭲𐭸-𐭿𐰀-𐱈𐹠-𐹾𑀃-𑀷𑁒-𑁯𑂃-𑂯𑃐-𑃨𑃰-𑃹𑄃-𑄦𑄶-𑄿𑆃-𑆲𑇁-𑇄𑇐-𑇙𑚀-𑚪𑛀-𑛉𒀀-𒍮𒐀-𒑢𓀀-𓐮𖠀-𖨸𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𝍠-𝍱𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𝟎-𝟿𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻🄀-🄊𠀀-𪛖𪜀-𫜴𫝀-𫠝丽-𪘀]
|
[0-9A-Za-zª-ª²-³µ-µ¹-º¼-¾À-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮ̀-ʹͶ-ͷͺ-ͽΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁ҃-ԧԱ-Ֆՙ-ՙա-և֑-ֽֿ-ֿׁ-ׂׄ-ׇׅ-ׇא-תװ-ײؐ-ؚؠ-٩ٮ-ۓە-ۜ۟-۪ۨ-ۼۿ-ۿܐ-݊ݍ-ޱ߀-ߵߺ-ߺࠀ-࠭ࡀ-࡛ࢠ-ࢠࢢ-ࢬࣤ-ࣾऀ-ॣ०-९ॱ-ॷॹ-ॿঁ-ঃঅ-ঌএ-ঐও-নপ-রল-লশ-হ়-ৄে-ৈো-ৎৗ-ৗড়-ঢ়য়-ৣ০-ৱ৴-৹ਁ-ਃਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹ਼-਼ਾ-ੂੇ-ੈੋ-੍ੑ-ੑਖ਼-ੜਫ਼-ਫ਼੦-ੵઁ-ઃઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હ઼-ૅે-ૉો-્ૐ-ૐૠ-ૣ૦-૯ଁ-ଃଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହ଼-ୄେ-ୈୋ-୍ୖ-ୗଡ଼-ଢ଼ୟ-ୣ୦-୯ୱ-୷ஂ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹா-ூெ-ைொ-்ௐ-ௐௗ-ௗ௦-௲ఁ-ఃఅ-ఌఎ-ఐఒ-నప-ళవ-హఽ-ౄె-ైొ-్ౕ-ౖౘ-ౙౠ-ౣ౦-౯౸-౾ಂ-ಃಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹ಼-ೄೆ-ೈೊ-್ೕ-ೖೞ-ೞೠ-ೣ೦-೯ೱ-ೲം-ഃഅ-ഌഎ-ഐഒ-ഺഽ-ൄെ-ൈൊ-ൎൗ-ൗൠ-ൣ൦-൵ൺ-ൿං-ඃඅ-ඖක-නඳ-රල-ලව-ෆ්-්ා-ුූ-ූෘ-ෟෲ-ෳก-ฺเ-๎๐-๙ກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ູົ-ຽເ-ໄໆ-ໆ່-ໍ໐-໙ໜ-ໟༀ-ༀ༘-༙༠-༳༵-༵༷-༹༷-༹༾-ཇཉ-ཬཱ-྄྆-ྗྙ-ྼ࿆-࿆က-၉ၐ-ႝႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚ፝-፟፩-፼ᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛮ-ᛰᜀ-ᜌᜎ-᜔ᜠ-᜴ᝀ-ᝓᝠ-ᝬᝮ-ᝰᝲ-ᝳក-៓ៗ-ៗៜ-៝០-៩៰-៹᠋-᠍᠐-᠙ᠠ-ᡷᢀ-ᢪᢰ-ᣵᤀ-ᤜᤠ-ᤫᤰ-᤻᥆-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉ᧐-᧚ᨀ-ᨛᨠ-ᩞ᩠-᩿᩼-᪉᪐-᪙ᪧ-ᪧᬀ-ᭋ᭐-᭙᭫-᭳ᮀ-᯳ᰀ-᰷᱀-᱉ᱍ-ᱽ᳐-᳔᳒-ᳶᴀ-ᷦ᷼-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼ⁰-ⁱ⁴-⁹ⁿ-₉ₐ-ₜ⃐-⃰ℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎ⅐-↉①-⒛⓪-⓿❶-➓Ⰰ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳳ⳽-⳽ⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯ⵿-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⷠ-ⷿⸯ-ⸯ々-〇〡-〯〱-〵〸-〼ぁ-ゖ゙-゚ゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎ㆒-㆕ㆠ-ㆺㇰ-ㇿ㈠-㈩㉈-㉏㉑-㉟㊀-㊉㊱-㊿㐀-䶵一-鿌ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘫꙀ-꙲ꙴ-꙽ꙿ-ꚗꚟ-꛱ꜗ-ꜟꜢ-ꞈꞋ-ꞎꞐ-ꞓꞠ-Ɦꟸ-ꠧ꠰-꠵ꡀ-ꡳꢀ-꣄꣐-꣙꣠-ꣷꣻ-ꣻ꤀-꤭ꤰ-꥓ꥠ-ꥼꦀ-꧀ꧏ-꧙ꨀ-ꨶꩀ-ꩍ꩐-꩙ꩠ-ꩶꩺ-ꩻꪀ-ꫂꫛ-ꫝꫠ-ꫯꫲ-꫶ꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꯀ-ꯪ꯬-꯭꯰-꯹가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻ︀-️︠-︦ﹰ-ﹴﹶ-ﻼ0-9A-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐄇-𐄳𐅀-𐅸𐆊-𐆊𐇽-𐇽𐊀-𐊜𐊠-𐋐𐌀-𐌞𐌠-𐌣𐌰-𐍊𐎀-𐎝𐎠-𐏃𐏈-𐏏𐏑-𐏕𐐀-𐒝𐒠-𐒩𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡘-𐡟𐤀-𐤛𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨃𐨅-𐨆𐨌-𐨓𐨕-𐨗𐨙-𐨳𐨸-𐨿𐨺-𐩇𐩠-𐩾𐬀-𐬵𐭀-𐭕𐭘-𐭲𐭸-𐭿𐰀-𐱈𐹠-𐹾𑀀-𑁆𑁒-𑁯𑂀-𑂺𑃐-𑃨𑃰-𑃹𑄀-𑄴𑄶-𑄿𑆀-𑇄𑇐-𑇙𑚀-𑚷𑛀-𑛉𒀀-𒍮𒐀-𒑢𓀀-𓐮𖠀-𖨸𖼀-𖽄𖽐-𖽾𖾏-𖾟𛀀-𛀁𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄𝍠-𝍱𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𝟎-𝟿𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻🄀-🄊𠀀-𪛖𪜀-𫜴𫝀-𫠝丽-𪘀󠄀-󠇯]
|
Loading…
Reference in New Issue
Block a user