From 2262088b5fbf604192761e771d0ac1c606570772 Mon Sep 17 00:00:00 2001 From: Andrew Lin Date: Fri, 10 Jul 2015 14:02:33 -0400 Subject: [PATCH] Improve variable names. Former-commit-id: 95da6985d466276aad850926188fa0f6b05a3d1f --- scripts/gen_regex.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/gen_regex.py b/scripts/gen_regex.py index 6e780bf..5233482 100644 --- a/scripts/gen_regex.py +++ b/scripts/gen_regex.py @@ -8,7 +8,7 @@ CATEGORIES = [unicodedata.category(chr(i)) for i in range(0x110000)] DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data')) -def func_to_regex(func): +def func_to_regex(accept_func): """ Given a function that returns True or False for a numerical codepoint, return a regex character class accepting the characters resulting in True. @@ -20,15 +20,15 @@ def func_to_regex(func): tentative_end = None ranges = [] - for i, cat in enumerate(CATEGORIES): - if func(i): - if tentative_end == i - 1: - ranges[-1][1] = i + for codepoint, category in enumerate(CATEGORIES): + if accept_func(codepoint): + if tentative_end == codepoint - 1: + ranges[-1][1] = codepoint else: - ranges.append([i, i]) - tentative_end = i - elif cat == 'Cn' and tentative_end == i - 1: - tentative_end = i + ranges.append([codepoint, codepoint]) + tentative_end = codepoint + elif category == 'Cn' and tentative_end == codepoint - 1: + tentative_end = codepoint return '[%s]' % ''.join(chr(r[0]) + '-' + chr(r[1]) for r in ranges)