mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-27 02:48:51 +00:00
6deced5244
Former-commit-id: 476a909e4d
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import argparse
|
|
import unicodedata
|
|
|
|
def func_to_regex(accept):
|
|
"""
|
|
Converts a function that accepts a single unicode character into a regex.
|
|
Unassigned unicode characters are treated like their neighbors.
|
|
"""
|
|
ranges = []
|
|
start = None
|
|
has_accepted = False
|
|
for x in range(0x110000):
|
|
c = chr(x)
|
|
|
|
if accept(c):
|
|
has_accepted = True
|
|
if start is None:
|
|
start = None
|
|
elif unicodedata.category(c) == 'Cn':
|
|
if start is None:
|
|
start = c
|
|
elif start is not None:
|
|
if has_accepted:
|
|
ranges.append('-'.join([start, chr(x-1)]))
|
|
has_accepted = False
|
|
start = None
|
|
else:
|
|
if has_accepted and start is not None:
|
|
ranges.append('-'.join([start, chr(x-1)]))
|
|
|
|
return '[%s]' % ''.join(ranges)
|
|
|
|
if __name__ == '__main__':
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Generate a regex matching a function')
|
|
parser.add_argument('acceptor', help='an python function that accepts a single char')
|
|
args = parser.parse_args()
|
|
print(func_to_regex(eval(args.acceptor)))
|