mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 09:21:37 +00:00
parent
3621eba0b2
commit
8cbcef9bef
@ -14,22 +14,18 @@ def func_to_regex(accept):
|
|||||||
return a regex character class accepting the characters resulting in True.
|
return a regex character class accepting the characters resulting in True.
|
||||||
Ranges separated only by unassigned characters are merged for efficiency.
|
Ranges separated only by unassigned characters are merged for efficiency.
|
||||||
"""
|
"""
|
||||||
# start and end of the range we are currently parsing. `start` is None if
|
parsing_range = False
|
||||||
# we are not parsing a range.
|
|
||||||
start = end = None
|
|
||||||
ranges = []
|
ranges = []
|
||||||
|
|
||||||
for codepoint, category in enumerate(CATEGORIES):
|
for codepoint, category in enumerate(CATEGORIES):
|
||||||
if accept(codepoint):
|
if accept(codepoint):
|
||||||
if start is None:
|
if not parsing_range:
|
||||||
start = codepoint
|
ranges.append([codepoint, codepoint])
|
||||||
end = codepoint
|
parsing_range = True
|
||||||
elif category != 'Cn' and start is not None:
|
else:
|
||||||
ranges.append((start, end))
|
ranges[-1][1] = codepoint
|
||||||
start = end = None
|
elif category != 'Cn':
|
||||||
|
parsing_range = False
|
||||||
if start is not None:
|
|
||||||
ranges.append((start, end))
|
|
||||||
|
|
||||||
return '[%s]' % ''.join('%s-%s' % (chr(r[0]), chr(r[1])) for r in ranges)
|
return '[%s]' % ''.join('%s-%s' % (chr(r[0]), chr(r[1])) for r in ranges)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user