updated func_to_regex to remove end check

Former-commit-id: 87830d138b
This commit is contained in:
Joshua Chin 2015-07-10 14:10:26 -04:00
parent 3621eba0b2
commit 8cbcef9bef

View File

@ -14,22 +14,18 @@ def func_to_regex(accept):
return a regex character class accepting the characters resulting in True. return a regex character class accepting the characters resulting in True.
Ranges separated only by unassigned characters are merged for efficiency. Ranges separated only by unassigned characters are merged for efficiency.
""" """
# start and end of the range we are currently parsing. `start` is None if parsing_range = False
# we are not parsing a range.
start = end = None
ranges = [] ranges = []
for codepoint, category in enumerate(CATEGORIES): for codepoint, category in enumerate(CATEGORIES):
if accept(codepoint): if accept(codepoint):
if start is None: if not parsing_range:
start = codepoint ranges.append([codepoint, codepoint])
end = codepoint parsing_range = True
elif category != 'Cn' and start is not None: else:
ranges.append((start, end)) ranges[-1][1] = codepoint
start = end = None elif category != 'Cn':
parsing_range = False
if start is not None:
ranges.append((start, end))
return '[%s]' % ''.join('%s-%s' % (chr(r[0]), chr(r[1])) for r in ranges) return '[%s]' % ''.join('%s-%s' % (chr(r[0]), chr(r[1])) for r in ranges)