1
0
mirror of https://github.com/rspeer/wordfreq.git synced 2025-01-14 21:25:58 +00:00

automatically closes input file in tokenize_file

This commit is contained in:
Joshua Chin 2015-06-17 11:42:34 -04:00
parent 2fbfbfcc98
commit 7fc0ba9092

View File

@ -62,7 +62,8 @@ def tokenize_file(in_filename, out_prefix, tokenizer, line_reader=last_tab):
to mark the token boundaries.
"""
out_files = {}
for line in open(in_filename, encoding='utf-8'):
with open(in_filename, encoding='utf-8') as in_file:
for line in in_file:
text = line_reader(line)
tokens, language = tokenizer(text)
tokenized = '\n'.join(tokens)