updated func_to_regex to remove end check

Former-commit-id: 87830d138b
2024-12-23 09:21:37 +00:00 · 2015-07-10 14:10:26 -04:00 · 2015-07-10 14:10:26 -04:00 · 8cbcef9bef
commit 8cbcef9bef
parent 3621eba0b2
1 changed files with 8 additions and 12 deletions
--- a/scripts/gen_regex.py
+++ b/scripts/gen_regex.py
@ -14,22 +14,18 @@ def func_to_regex(accept):
    return a regex character class accepting the characters resulting in True.
    Ranges separated only by unassigned characters are merged for efficiency.
    """
-    # start and end of the range we are currently parsing. `start` is None if
-    # we are not parsing a range.
-    start = end = None
+    parsing_range = False
    ranges = []

    for codepoint, category in enumerate(CATEGORIES):
        if accept(codepoint):
-            if start is None:
-                start = codepoint
-            end = codepoint
-        elif category != 'Cn' and start is not None:
-            ranges.append((start, end))
-            start = end = None
-
-    if start is not None:
-        ranges.append((start, end))
+            if not parsing_range:
+                ranges.append([codepoint, codepoint])
+                parsing_range = True
+            else:
+                ranges[-1][1] = codepoint
+        elif category != 'Cn':
+            parsing_range = False

    return '[%s]' % ''.join('%s-%s' % (chr(r[0]), chr(r[1])) for r in ranges)