mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 09:51:38 +00:00
Merge pull request #16 from LuminosoInsight/more-tweaking
More tweaking
This commit is contained in:
commit
2c573b5a0e
@ -21,6 +21,12 @@ install them on Ubuntu:
|
|||||||
sudo apt-get install mecab-ipadic-utf8 libmecab-dev
|
sudo apt-get install mecab-ipadic-utf8 libmecab-dev
|
||||||
pip3 install mecab-python3
|
pip3 install mecab-python3
|
||||||
|
|
||||||
|
## Unicode data
|
||||||
|
|
||||||
|
The tokenizers used to split non-Japanese phrases use regexes built using the
|
||||||
|
`unicodedata` module from Python 3.4, which uses Unicode version 6.3.0. To
|
||||||
|
update these regexes, run `scripts/gen_regex.py`.
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
`wordfreq` is freely redistributable under the MIT license (see
|
`wordfreq` is freely redistributable under the MIT license (see
|
||||||
|
@ -4,9 +4,34 @@ import pathlib
|
|||||||
from pkg_resources import resource_filename
|
from pkg_resources import resource_filename
|
||||||
|
|
||||||
|
|
||||||
|
CATEGORIES = [unicodedata.category(chr(i)) for i in range(0x110000)]
|
||||||
DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data'))
|
DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data'))
|
||||||
|
|
||||||
|
|
||||||
|
def func_to_regex(accept_func):
|
||||||
|
"""
|
||||||
|
Given a function that returns True or False for a numerical codepoint,
|
||||||
|
return a regex character class accepting the characters resulting in True.
|
||||||
|
Ranges separated only by unassigned characters are merged for efficiency.
|
||||||
|
"""
|
||||||
|
# parsing_range is True if the current codepoint might be in a range that
|
||||||
|
# the regex will accept
|
||||||
|
parsing_range = False
|
||||||
|
ranges = []
|
||||||
|
|
||||||
|
for codepoint, category in enumerate(CATEGORIES):
|
||||||
|
if accept_func(codepoint):
|
||||||
|
if not parsing_range:
|
||||||
|
ranges.append([codepoint, codepoint])
|
||||||
|
parsing_range = True
|
||||||
|
else:
|
||||||
|
ranges[-1][1] = codepoint
|
||||||
|
elif category != 'Cn':
|
||||||
|
parsing_range = False
|
||||||
|
|
||||||
|
return '[%s]' % ''.join('%c-%c' % tuple(r) for r in ranges)
|
||||||
|
|
||||||
|
|
||||||
def cache_regex_from_func(filename, func):
|
def cache_regex_from_func(filename, func):
|
||||||
"""
|
"""
|
||||||
Generates a regex from a function that accepts a single unicode character,
|
Generates a regex from a function that accepts a single unicode character,
|
||||||
@ -16,77 +41,36 @@ def cache_regex_from_func(filename, func):
|
|||||||
file.write(func_to_regex(func))
|
file.write(func_to_regex(func))
|
||||||
|
|
||||||
|
|
||||||
def _emoji_char_class():
|
def _is_emoji_codepoint(i):
|
||||||
"""
|
"""
|
||||||
Build a regex for emoji substitution. We create a regex character set
|
Report whether a numerical codepoint is (likely) an emoji: a Unicode 'So'
|
||||||
(like "[a-cv-z]") matching characters we consider emoji.
|
character (as future-proofed by the ftfy chardata module) but excluding
|
||||||
|
symbols like © and ™ below U+2600 and the replacement character U+FFFD.
|
||||||
"""
|
"""
|
||||||
cache_regex_from_func(
|
return chardata.CHAR_CLASS_STRING[i] == '3' and i >= 0x2600 and i != 0xfffd
|
||||||
'emoji.txt',
|
|
||||||
lambda c:
|
|
||||||
chardata.CHAR_CLASS_STRING[ord(c)] == '3' and
|
|
||||||
c >= '\u2600' and c != '\ufffd'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _non_punct_class():
|
def _is_non_punct_codepoint(i):
|
||||||
"""
|
"""
|
||||||
Builds a regex that matches anything that is not one of the following
|
Report whether a numerical codepoint is not one of the following classes:
|
||||||
classes:
|
|
||||||
- P: punctuation
|
- P: punctuation
|
||||||
- S: symbols
|
- S: symbols
|
||||||
- Z: separators
|
- Z: separators
|
||||||
- C: control characters
|
- C: control characters
|
||||||
This will classify symbols, including emoji, as punctuation; callers that
|
This will classify symbols, including emoji, as punctuation; users that
|
||||||
want to treat emoji separately should filter them out first.
|
want to accept emoji should add them separately.
|
||||||
"""
|
"""
|
||||||
cache_regex_from_func(
|
return CATEGORIES[i][0] not in 'PSZC'
|
||||||
'non_punct.txt',
|
|
||||||
lambda c: unicodedata.category(c)[0] not in 'PSZC'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _combining_mark_class():
|
def _is_combining_mark_codepoint(i):
|
||||||
"""
|
"""
|
||||||
Builds a regex that matches anything that is a combining mark
|
Report whether a numerical codepoint is a combining mark (Unicode 'M').
|
||||||
"""
|
"""
|
||||||
cache_regex_from_func(
|
return CATEGORIES[i][0] == 'M'
|
||||||
'combining_mark.txt',
|
|
||||||
lambda c: unicodedata.category(c)[0] == 'M'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def func_to_regex(accept):
|
|
||||||
"""
|
|
||||||
Converts a function that accepts a single unicode character into a regex.
|
|
||||||
Unassigned unicode characters are treated like their neighbors.
|
|
||||||
"""
|
|
||||||
ranges = []
|
|
||||||
start = None
|
|
||||||
has_accepted = False
|
|
||||||
for x in range(0x110000):
|
|
||||||
c = chr(x)
|
|
||||||
|
|
||||||
if accept(c):
|
|
||||||
has_accepted = True
|
|
||||||
if start is None:
|
|
||||||
start = c
|
|
||||||
elif unicodedata.category(c) == 'Cn':
|
|
||||||
if start is None:
|
|
||||||
start = c
|
|
||||||
elif start is not None:
|
|
||||||
if has_accepted:
|
|
||||||
ranges.append('-'.join([start, chr(x-1)]))
|
|
||||||
has_accepted = False
|
|
||||||
start = None
|
|
||||||
else:
|
|
||||||
if has_accepted and start is not None:
|
|
||||||
ranges.append('-'.join([start, chr(x-1)]))
|
|
||||||
|
|
||||||
return '[%s]' % ''.join(ranges)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
_combining_mark_class()
|
cache_regex_from_func('emoji.txt', _is_emoji_codepoint)
|
||||||
_non_punct_class()
|
cache_regex_from_func('non_punct.txt', _is_non_punct_codepoint)
|
||||||
_emoji_char_class()
|
cache_regex_from_func('combining_mark.txt', _is_combining_mark_codepoint)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
from wordfreq import (
|
from wordfreq import (
|
||||||
word_frequency, available_languages, cB_to_freq,
|
word_frequency, available_languages, cB_to_freq,
|
||||||
top_n_list, random_words, random_ascii_words, tokenize,
|
top_n_list, random_words, random_ascii_words, tokenize
|
||||||
half_harmonic_mean
|
|
||||||
)
|
)
|
||||||
from nose.tools import (
|
from nose.tools import (
|
||||||
eq_, assert_almost_equal, assert_greater, raises
|
eq_, assert_almost_equal, assert_greater, raises
|
||||||
@ -114,11 +113,8 @@ def test_phrase_freq():
|
|||||||
plant = word_frequency("plan.t", 'en')
|
plant = word_frequency("plan.t", 'en')
|
||||||
assert_greater(plant, 0)
|
assert_greater(plant, 0)
|
||||||
assert_almost_equal(
|
assert_almost_equal(
|
||||||
plant,
|
1.0 / plant,
|
||||||
half_harmonic_mean(
|
1.0 / word_frequency('plan', 'en') + 1.0 / word_frequency('t', 'en')
|
||||||
word_frequency('plan', 'en'),
|
|
||||||
word_frequency('t', 'en')
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from nose.tools import eq_, assert_almost_equal
|
from nose.tools import eq_, assert_almost_equal
|
||||||
from wordfreq import tokenize, word_frequency, half_harmonic_mean
|
from wordfreq import tokenize, word_frequency
|
||||||
|
|
||||||
|
|
||||||
def test_tokens():
|
def test_tokens():
|
||||||
@ -17,10 +17,7 @@ def test_combination():
|
|||||||
ohayou_freq / 2
|
ohayou_freq / 2
|
||||||
)
|
)
|
||||||
assert_almost_equal(
|
assert_almost_equal(
|
||||||
word_frequency('おはようございます', 'ja'),
|
1.0 / word_frequency('おはようございます', 'ja'),
|
||||||
half_harmonic_mean(
|
1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq
|
||||||
half_harmonic_mean(ohayou_freq, gozai_freq),
|
|
||||||
masu_freq
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
from nose.tools import assert_less_equal, assert_almost_equal
|
|
||||||
from wordfreq import half_harmonic_mean
|
|
||||||
from functools import reduce
|
|
||||||
import random
|
|
||||||
|
|
||||||
|
|
||||||
def check_hm_properties(inputs):
|
|
||||||
# I asserted that the half-harmonic-mean formula is associative,
|
|
||||||
# commutative, monotonic, and less than or equal to its inputs.
|
|
||||||
# (Less if its inputs are strictly positive, in fact.)
|
|
||||||
#
|
|
||||||
# So let's test that what I said is true.
|
|
||||||
hm1 = reduce(half_harmonic_mean, inputs)
|
|
||||||
random.shuffle(inputs)
|
|
||||||
hm2 = reduce(half_harmonic_mean, inputs)
|
|
||||||
assert_almost_equal(hm1, hm2)
|
|
||||||
|
|
||||||
inputs[0] *= 2
|
|
||||||
hm3 = reduce(half_harmonic_mean, inputs)
|
|
||||||
assert_less_equal(hm2, hm3)
|
|
||||||
|
|
||||||
|
|
||||||
def test_half_harmonic_mean():
|
|
||||||
for count in range(2, 6):
|
|
||||||
for rep in range(10):
|
|
||||||
# get some strictly positive arbitrary numbers
|
|
||||||
inputs = [random.expovariate(0.01)
|
|
||||||
for i in range(count)]
|
|
||||||
yield check_hm_properties, inputs
|
|
||||||
|
|
@ -10,13 +10,13 @@ import random
|
|||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data'))
|
|
||||||
|
|
||||||
CACHE_SIZE = 100000
|
CACHE_SIZE = 100000
|
||||||
|
DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data'))
|
||||||
|
|
||||||
def load_range(filename):
|
def load_range(filename):
|
||||||
"""
|
"""
|
||||||
Loads a file from the data path
|
Load a file from the data path.
|
||||||
"""
|
"""
|
||||||
with (DATA_PATH / filename).open() as file:
|
with (DATA_PATH / filename).open() as file:
|
||||||
return file.read()
|
return file.read()
|
||||||
@ -26,7 +26,6 @@ NON_PUNCT_RANGE = load_range('non_punct.txt')
|
|||||||
COMBINING_MARK_RANGE = load_range('combining_mark.txt')
|
COMBINING_MARK_RANGE = load_range('combining_mark.txt')
|
||||||
|
|
||||||
COMBINING_MARK_RE = re.compile(COMBINING_MARK_RANGE)
|
COMBINING_MARK_RE = re.compile(COMBINING_MARK_RANGE)
|
||||||
|
|
||||||
TOKEN_RE = re.compile("{0}|{1}+(?:'{1}+)*".format(EMOJI_RANGE, NON_PUNCT_RANGE))
|
TOKEN_RE = re.compile("{0}|{1}+(?:'{1}+)*".format(EMOJI_RANGE, NON_PUNCT_RANGE))
|
||||||
|
|
||||||
|
|
||||||
@ -46,6 +45,7 @@ def simple_tokenize(text):
|
|||||||
"""
|
"""
|
||||||
return [token.casefold() for token in TOKEN_RE.findall(text)]
|
return [token.casefold() for token in TOKEN_RE.findall(text)]
|
||||||
|
|
||||||
|
|
||||||
mecab_tokenize = None
|
mecab_tokenize = None
|
||||||
def tokenize(text, lang):
|
def tokenize(text, lang):
|
||||||
"""
|
"""
|
||||||
@ -209,18 +209,30 @@ def iter_wordlist(lang, wordlist='combined'):
|
|||||||
return itertools.chain(*get_frequency_list(lang, wordlist))
|
return itertools.chain(*get_frequency_list(lang, wordlist))
|
||||||
|
|
||||||
|
|
||||||
def half_harmonic_mean(a, b):
|
# This dict and inner function are used to implement a "drop everything" cache
|
||||||
"""
|
# for word_frequency(); the overheads of lru_cache() are comparable to the time
|
||||||
An associative, commutative, monotonic function that returns a value
|
# it takes to look up frequencies from scratch, so something faster is needed.
|
||||||
less than or equal to both a and b.
|
_wf_cache = {}
|
||||||
|
|
||||||
Used for estimating the frequency of terms made of multiple tokens, given
|
def _word_frequency(word, lang, wordlist, minimum):
|
||||||
the assumption that the tokens very frequently appear together.
|
tokens = tokenize(word, lang)
|
||||||
"""
|
if not tokens:
|
||||||
return (a * b) / (a + b)
|
return minimum
|
||||||
|
|
||||||
|
# Frequencies for multiple tokens are combined using the formula
|
||||||
|
# 1 / f = 1 / f1 + 1 / f2 + ...
|
||||||
|
# Thus the resulting frequency is less than any individual frequency, and
|
||||||
|
# the smallest frequency dominates the sum.
|
||||||
|
freqs = get_frequency_dict(lang, wordlist)
|
||||||
|
one_over_result = 0.0
|
||||||
|
for token in tokens:
|
||||||
|
if token not in freqs:
|
||||||
|
# If any word is missing, just return the default value
|
||||||
|
return minimum
|
||||||
|
one_over_result += 1.0 / freqs[token]
|
||||||
|
|
||||||
|
return max(1.0 / one_over_result, minimum)
|
||||||
|
|
||||||
@lru_cache(maxsize=CACHE_SIZE)
|
|
||||||
def word_frequency(word, lang, wordlist='combined', minimum=0.):
|
def word_frequency(word, lang, wordlist='combined', minimum=0.):
|
||||||
"""
|
"""
|
||||||
Get the frequency of `word` in the language with code `lang`, from the
|
Get the frequency of `word` in the language with code `lang`, from the
|
||||||
@ -246,25 +258,14 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
|
|||||||
of the word frequency that is no greater than the frequency of any of its
|
of the word frequency that is no greater than the frequency of any of its
|
||||||
individual tokens.
|
individual tokens.
|
||||||
"""
|
"""
|
||||||
freqs = get_frequency_dict(lang, wordlist)
|
args = (word, lang, wordlist, minimum)
|
||||||
combined_value = None
|
try:
|
||||||
tokens = tokenize(word, lang)
|
return _wf_cache[args]
|
||||||
|
except KeyError:
|
||||||
if len(tokens) == 0:
|
if len(_wf_cache) >= CACHE_SIZE:
|
||||||
return minimum
|
_wf_cache.clear()
|
||||||
|
_wf_cache[args] = _word_frequency(*args)
|
||||||
for token in tokens:
|
return _wf_cache[args]
|
||||||
if token not in freqs:
|
|
||||||
# If any word is missing, just return the default value
|
|
||||||
return minimum
|
|
||||||
value = freqs[token]
|
|
||||||
if combined_value is None:
|
|
||||||
combined_value = value
|
|
||||||
else:
|
|
||||||
# Combine word values using the half-harmonic-mean formula,
|
|
||||||
# (a * b) / (a + b). This operation is associative.
|
|
||||||
combined_value = half_harmonic_mean(combined_value, value)
|
|
||||||
return max(combined_value, minimum)
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=100)
|
@lru_cache(maxsize=100)
|
||||||
@ -305,8 +306,7 @@ def random_words(lang='en', wordlist='combined', nwords=5, bits_per_word=12,
|
|||||||
"There aren't enough words in the wordlist to provide %d bits of "
|
"There aren't enough words in the wordlist to provide %d bits of "
|
||||||
"entropy per word." % bits_per_word
|
"entropy per word." % bits_per_word
|
||||||
)
|
)
|
||||||
selected = [random.choice(choices) for i in range(nwords)]
|
return ' '.join([random.choice(choices) for i in range(nwords)])
|
||||||
return ' '.join(selected)
|
|
||||||
|
|
||||||
|
|
||||||
def random_ascii_words(lang='en', wordlist='combined', nwords=5,
|
def random_ascii_words(lang='en', wordlist='combined', nwords=5,
|
||||||
|
@ -1 +1 @@
|
|||||||
[̀-ͯ҃-҉-ֽֿ-ֿׁ-ׂׄ-ׇׅ-ؐ-ًؚ-ٰٟ-ٰۖ-ۜ۟-ۤۧ-۪ۨ-ܑۭ-ܑܰ-ަ-ް߫-߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙-ࢭ-ःऺ-़ा-ॏ॑-ॗॢ-ॣঀ--়া-্-ৢ-ৼ--ੰ-ੱੵ--઼ા-ૢ---଼ା-ୢ--ஂ---ఄా-ౢ-ಀ-಄-಼ಾ-ೝೢ-ೳ-ഄാ-്൏-ൟൢ---ෳั-ัิ-็-๎ັ-ັິ-ຼ-༘-༙༵-༵༷-༹༷-༹༾-༿-྄྆-྇ྍ-࿆-࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏ-ႏႚ-ႝ-፟ᜒ-ᜟᜲ-᜴ᝒ--឴-៓៝-᠋-᠍ᢩ-ᢩᤝ--ᧀᧈ-ᨗ-ᩕ-᩿-ᬄ᬴-᭄᭫-᭳᭽-ᮂᮡ-ᮭ᯦-ᰤ--᳔᳒-᳨᳭-᳭ᳲ-᳴᷀-᷿₻-⳯-⳱-⵿-〪ⷿ-〯-゚꙯-꙲ꙴ-꙽Ꚙ-ꚟ꛰-꛱ꠂ-ꠂ꠆-꠆ꠋ-ꠋꠣ-ꠧ-ꢁꢴ--꣱ꤦ-꤭ꥇ--ꦃ꦳-꧀ꨩ-ꩃ-ꩃꩌ-ꩻ-ꩿꪰ-ꪰꪲ-ꪴꪷ-ꪸꪾ-꪿꫁-꫁ꫫ-ꫯꫵ-ꯣ-ꯪ꯬-ﬞ-ﬞ﷾-️-𐇽︯-𐨁-𐨏𐨴-𐨿-𑀂𑀸-𑁆𑁰-𑂂𑂰-𑂺-𑄂𑄧-𑅄-𑆂𑆳-𑇀𑚫-𖽑-𖾒𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄-]
|
[̀-ͯ҃-҉֑-ֽֿ-ֿׁ-ׂׄ-ׇׅ-ׇؐ-ًؚ-ٰٟ-ٰۖ-ۜ۟-ۤۧ-۪ۨ-ܑۭ-ܑܰ-݊ަ-ް߫-߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙࠭-࡛ࣤ-ःऺ-़ा-ॏ॑-ॗॢ-ॣঁ-ঃ়-়া-্ৗ-ৗৢ-ৣਁ-ਃ਼-ੑੰ-ੱੵ-ઃ઼-઼ા-્ૢ-ૣଁ-ଃ଼-଼ା-ୗୢ-ୣஂ-ஂா-்ௗ-ௗఁ-ఃా-ౖౢ-ౣಂ-ಃ಼-಼ಾ-ೖೢ-ೣം-ഃാ-്ൗ-ൗൢ-ൣං-ඃ්-ෳั-ัิ-ฺ็-๎ັ-ັິ-ຼ່-ໍ༘-༙༵-༵༷-༹༷-༹༾-༿ཱ-྄྆-྇ྍ-ྼ࿆-࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏ-ႏႚ-ႝ፝-፟ᜒ-᜔ᜲ-᜴ᝒ-ᝓᝲ-ᝳ឴-៓៝-៝᠋-᠍ᢩ-ᢩᤠ-᤻ᦰ-ᧀᧈ-ᧉᨗ-ᨛᩕ-᩿ᬀ-ᬄ᬴-᭄᭫-᭳ᮀ-ᮂᮡ-ᮭ᯦-᯳ᰤ-᰷᳐-᳔᳒-᳨᳭-᳭ᳲ-᳴᷀-᷿⃐-⃰⳯-⵿⳱-⵿ⷠ-〪ⷿ-゙〯-゚꙯-꙲ꙴ-꙽ꚟ-ꚟ꛰-꛱ꠂ-ꠂ꠆-꠆ꠋ-ꠋꠣ-ꠧꢀ-ꢁꢴ-꣄꣠-꣱ꤦ-꤭ꥇ-꥓ꦀ-ꦃ꦳-꧀ꨩ-ꨶꩃ-ꩃꩌ-ꩍꩻ-ꩻꪰ-ꪰꪲ-ꪴꪷ-ꪸꪾ-꪿꫁-꫁ꫫ-ꫯꫵ-꫶ꯣ-ꯪ꯬-꯭ﬞ-ﬞ︀-️︠-𐇽︦-𐇽𐨁-𐨏𐨸-𐨿𑀀-𑀂𑀸-𑁆𑂀-𑂂𑂰-𑂺𑄀-𑄂𑄧-𑄴𑆀-𑆂𑆳-𑇀𑚫-𑚷𖽑-𖾒𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄󠄀-󠇯]
|
@ -1 +1 @@
|
|||||||
[☀-♮♰-❧➔-➿⠀-⣿⬀-⬯⭅-⭆⭍-⯿⳥-⳪⸼-〄-〄〒-〓〠-〠〶-〷〾--㆑㆖-㆟ㆻ-㈀-㈪-㉇㉐-㉐㉠-㉿㊊-㊰㋀-㏿䶶-䷿-꠨-꠶-꠷꠹-꩷-꩹﷽-﷿¦-¦-│■---𐄿𐅹-𐆉𐆋-𐇼𐡠-𐣿𐪀--𛀂-𝅘𝅥𝅲𝅪-𝅬𝆃-𝆄𝆌-𝆩𝆮-𝉁𝉅--🄋-]
|
[☀-♮♰-❧➔-➿⠀-⣿⬀-⬯⭅-⭆⭍-⯑⳥-⳪⺀-⿻〄-〄〒-〓〠-〠〶-〷〾-〿㆐-㆑㆖-㆟㇀-㇣㈀-㈞㈪-㉇㉐-㉐㉠-㉿㊊-㊰㋀-㏿䷀-䷿꒐-꓆꠨-꠫꠶-꠷꠹-꠹꩷-꩹﷽-﷽¦-¦│-│■-○-𐄷-𐄿𐅹-𐆉𐆌-𐇼𐡷-𐡸𐫈-𐫈𖬼-𖭅𛲜-𝅘𝅥𝅲𝅪-𝅬𝆃-𝆄𝆌-𝆩𝆮-𝉁𝉅-𝍖🀀-🄍-]
|
@ -1 +1 @@
|
|||||||
[0-9A-Za-zª-ª²-³µ-µ¹-º¼-¾À-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮ̀-ʹͶ-ͽΆ-ΆΈ-ϵϷ-ҁ҃-ՙՠ-ֈ-ֽֿ-ֿׁ-ׂׄ-ׇׅ-ײؐ-ؚؠ-٩ٮ-ۓە-ۜ۟-۪ۨ-ۼۿ-ۿܐ-ߵߺ---ॣ०-९ॱ-ৱ৴-৹ৼ-૯-୯ୱ-௲-౾ಀ-൸ൺ-ෳ-เ-๎๐-๙-ༀ༘-༙༠-༳༵-༵༷-༹༷-༹༾-྄྆-࿆-࿆-၉ၐ-ႝႠ-ჺჼ-፟፩-ᎏ-ᐁ-ᙬᙯ-ᙿᚁ-ᚚ-ᛪᛮ-᜴-៓ៗ-ៗៜ-᠋-᠍᠏-᥆-ᨀ-ᨠ-ᪧ-ᪧ-᭙᭫-᭳᭽-ᰀ-᱀-ᱽ-᳔᳒-ᾼι-ιῂ-ῌῐ-ῠ-Ῥ-ῼ⁰-⁹ⁿ-₉-₻-ℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎ⅐--⒛⓪-⓿❶-➓⭚-ⳤⳫ-⳽-⳽ⴀ-ⵯ-ⷿⸯ-ⸯ々-〇〡-〯〱-〵〸-〼-゚ゝ-ゟァ-ヺー-㆒-㆕ㆠ-ㆿ-ㇿ-㈩㉈-㉏㉑-㉟㊀-㊉㊱-㊿㐀-䶿一--ꓽꔀ-ꘌꘐ-꙲ꙴ-꙽ꙿ-꛱ꜗ-ꜟꜢ-ꞈꞋ-ꠧ꠬-꠵-ꡳ-꣐-ꣷꣻ-꤭ꤰ-ꥠ-꧀-ꧠ-ꩠ-ꩶꩺ-ꫝꫠ-ꫯꫲ-ꯪ꯬-豈-ﬨשׁ-ﮱ﯂-ﴽ﵀-ﷻ﷾-️-︯-0-9A-Za-zヲ---𐅀-𐅸𐆊-𐇽-𐎠-𐏏𐏑-𐡘-𐤠---𐩾𐪀-𐭀-𑁆-𑂺𑃂-𑄿𑅄-𑇄𑇉-𒑴-𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-🃠-🄏🝴--]
|
[0-9A-Za-zª-ª²-³µ-µ¹-º¼-¾À-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮ̀-ʹͶ-ͽΆ-ΆΈ-ϵϷ-ҁ҃-ՙա-և֑-ֽֿ-ֿׁ-ׂׄ-ׇׅ-ײؐ-ؚؠ-٩ٮ-ۓە-ۜ۟-۪ۨ-ۼۿ-ۿܐ-ߵߺ-࠭ࡀ-࡛ࢠ-ॣ०-९ॱ-ৱ৴-৹ਁ-૯ଁ-୯ୱ-௲ఁ-౾ಂ-൵ൺ-ෳก-ฺเ-๎๐-๙ກ-ༀ༘-༙༠-༳༵-༵༷-༹༷-༹༾-྄྆-ྼ࿆-࿆က-၉ၐ-ႝႠ-ჺჼ-፟፩-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛮ-᜴ᝀ-៓ៗ-ៗៜ-៹᠋-᠍᠐-᤻᥆-᧚ᨀ-ᨛᨠ-᪙ᪧ-ᪧᬀ-᭙᭫-᭳ᮀ-᯳ᰀ-᰷᱀-ᱽ᳐-᳔᳒-ᾼι-ιῂ-ῌῐ-Ίῠ-Ῥῲ-ῼ⁰-⁹ⁿ-₉ₐ-ₜ⃐-⃰ℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎ⅐-↉①-⒛⓪-⓿❶-➓Ⰰ-ⳤⳫ-ⳳ⳽-⳽ⴀ-ⵯ⵿-ⷿⸯ-ⸯ々-〇〡-〯〱-〵〸-〼ぁ-゚ゝ-ゟァ-ヺー-ㆎ㆒-㆕ㆠ-ㆺㇰ-ㇿ㈠-㈩㉈-㉏㉑-㉟㊀-㊉㊱-㊿㐀-䶵一-ꒌꓐ-ꓽꔀ-ꘌꘐ-꙲ꙴ-꙽ꙿ-꛱ꜗ-ꜟꜢ-ꞈꞋ-ꠧ꠰-꠵ꡀ-ꡳꢀ-꣄꣐-ꣷꣻ-꤭ꤰ-꥓ꥠ-꧀ꧏ-꧙ꨀ-꩙ꩠ-ꩶꩺ-ꫝꫠ-ꫯꫲ-ꯪ꯬-ퟻ豈-ﬨשׁ-ﮱﯓ-ﴽﵐ-ﷻ︀-️︠-︦ﹰ-ﻼ0-9A-Za-zヲ-ᅵ𐀀-𐃺𐄇-𐄳𐅀-𐅸𐆊-𐆊𐇽-𐎝𐎠-𐏏𐏑-𐡕𐡘-𐤛𐤠-𐤹𐦀-𐩇𐩠-𐩾𐬀-𐬵𐭀-𑁆𑁒-𑂺𑃐-𑄿𑆀-𑇄𑇐-𒑢𓀀-𛀁𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄𝍠-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𞺻🄀-🄊𠀀-𪘀󠄀-󠇯]
|
Loading…
Reference in New Issue
Block a user