mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 09:51:38 +00:00
Merge pull request #17 from LuminosoInsight/alternative-func-to-regex
created alternate implementation of func-to-regex
Former-commit-id: 6efdaa308c
This commit is contained in:
commit
8439f14595
@ -14,23 +14,22 @@ def func_to_regex(accept_func):
|
|||||||
return a regex character class accepting the characters resulting in True.
|
return a regex character class accepting the characters resulting in True.
|
||||||
Ranges separated only by unassigned characters are merged for efficiency.
|
Ranges separated only by unassigned characters are merged for efficiency.
|
||||||
"""
|
"""
|
||||||
# Where the last range would end if it also included unassigned codepoints.
|
# parsing_range is True if the current codepoint might be in a range that
|
||||||
# If we need to add a codepoint right after this point, we extend the
|
# the regex will accept
|
||||||
# range; otherwise we start a new one.
|
parsing_range = False
|
||||||
tentative_end = None
|
|
||||||
ranges = []
|
ranges = []
|
||||||
|
|
||||||
for codepoint, category in enumerate(CATEGORIES):
|
for codepoint, category in enumerate(CATEGORIES):
|
||||||
if accept_func(codepoint):
|
if accept_func(codepoint):
|
||||||
if tentative_end == codepoint - 1:
|
if not parsing_range:
|
||||||
ranges[-1][1] = codepoint
|
|
||||||
else:
|
|
||||||
ranges.append([codepoint, codepoint])
|
ranges.append([codepoint, codepoint])
|
||||||
tentative_end = codepoint
|
parsing_range = True
|
||||||
elif category == 'Cn' and tentative_end == codepoint - 1:
|
else:
|
||||||
tentative_end = codepoint
|
ranges[-1][1] = codepoint
|
||||||
|
elif category != 'Cn':
|
||||||
|
parsing_range = False
|
||||||
|
|
||||||
return '[%s]' % ''.join(chr(r[0]) + '-' + chr(r[1]) for r in ranges)
|
return '[%s]' % ''.join('%c-%c' % tuple(r) for r in ranges)
|
||||||
|
|
||||||
|
|
||||||
def cache_regex_from_func(filename, func):
|
def cache_regex_from_func(filename, func):
|
||||||
|
Loading…
Reference in New Issue
Block a user