mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
unhoisted if statement
This commit is contained in:
parent
accb7e398c
commit
298d3c1d24
@ -17,14 +17,16 @@ def count_tokens(filename, lang):
|
||||
"""
|
||||
counts = defaultdict(int)
|
||||
with open(filename, encoding='utf-8', errors='replace') as infile:
|
||||
if lang == 'ar':
|
||||
for line in infile:
|
||||
for token in simple_tokenize(line):
|
||||
counts[standardize_arabic(token)] += 1
|
||||
else:
|
||||
for line in infile:
|
||||
for token in simple_tokenize(line):
|
||||
counts[token] += 1
|
||||
for line in infile:
|
||||
for token in simple_tokenize(line):
|
||||
if lang == 'ar':
|
||||
token = standardize_arabic(token)
|
||||
if not token:
|
||||
# skip empty strings
|
||||
continue
|
||||
|
||||
counts[token] += 1
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user