mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
parent
9a513a2224
commit
93681e43b3
39
scripts/gen_regex.py
Normal file
39
scripts/gen_regex.py
Normal file
@ -0,0 +1,39 @@
|
||||
import argparse
|
||||
import unicodedata
|
||||
|
||||
def func_to_regex(accept):
|
||||
"""
|
||||
Converts a function that accepts a single unicode character into a regex.
|
||||
Unassigned unicode characters are treated like their neighbors.
|
||||
"""
|
||||
ranges = []
|
||||
start = None
|
||||
has_accepted = False
|
||||
for x in range(0x110000):
|
||||
c = chr(x)
|
||||
|
||||
if accept(c):
|
||||
has_accepted = True
|
||||
if start is None:
|
||||
start = None
|
||||
elif unicodedata.category(c) == 'Cn':
|
||||
if start is None:
|
||||
start = c
|
||||
elif start is not None:
|
||||
if has_accepted:
|
||||
ranges.append('-'.join([start, chr(x-1)]))
|
||||
has_accepted = False
|
||||
start = None
|
||||
else:
|
||||
if has_accepted and start is not None:
|
||||
ranges.append('-'.join([start, chr(x-1)]))
|
||||
|
||||
return '[%s]' % ''.join(ranges)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate a regex matching a function')
|
||||
parser.add_argument('acceptor', help='an python function that accepts a single char')
|
||||
args = parser.parse_args()
|
||||
print(func_to_regex(eval(args.acceptor)))
|
Loading…
Reference in New Issue
Block a user