mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
parent
9a513a2224
commit
93681e43b3
39
scripts/gen_regex.py
Normal file
39
scripts/gen_regex.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import argparse
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
def func_to_regex(accept):
|
||||||
|
"""
|
||||||
|
Converts a function that accepts a single unicode character into a regex.
|
||||||
|
Unassigned unicode characters are treated like their neighbors.
|
||||||
|
"""
|
||||||
|
ranges = []
|
||||||
|
start = None
|
||||||
|
has_accepted = False
|
||||||
|
for x in range(0x110000):
|
||||||
|
c = chr(x)
|
||||||
|
|
||||||
|
if accept(c):
|
||||||
|
has_accepted = True
|
||||||
|
if start is None:
|
||||||
|
start = None
|
||||||
|
elif unicodedata.category(c) == 'Cn':
|
||||||
|
if start is None:
|
||||||
|
start = c
|
||||||
|
elif start is not None:
|
||||||
|
if has_accepted:
|
||||||
|
ranges.append('-'.join([start, chr(x-1)]))
|
||||||
|
has_accepted = False
|
||||||
|
start = None
|
||||||
|
else:
|
||||||
|
if has_accepted and start is not None:
|
||||||
|
ranges.append('-'.join([start, chr(x-1)]))
|
||||||
|
|
||||||
|
return '[%s]' % ''.join(ranges)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Generate a regex matching a function')
|
||||||
|
parser.add_argument('acceptor', help='an python function that accepts a single char')
|
||||||
|
args = parser.parse_args()
|
||||||
|
print(func_to_regex(eval(args.acceptor)))
|
Loading…
Reference in New Issue
Block a user