mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 09:21:37 +00:00
tokenize_file: don't join tokens if language is None
This commit is contained in:
parent
7fc0ba9092
commit
b5bc39c893
@ -66,8 +66,8 @@ def tokenize_file(in_filename, out_prefix, tokenizer, line_reader=last_tab):
|
||||
for line in in_file:
|
||||
text = line_reader(line)
|
||||
tokens, language = tokenizer(text)
|
||||
tokenized = '\n'.join(tokens)
|
||||
if language is not None:
|
||||
tokenized = '\n'.join(tokens)
|
||||
out_filename = '%s.%s.txt' % (out_prefix, language)
|
||||
if out_filename in out_files:
|
||||
out_file = out_files[out_filename]
|
||||
|
Loading…
Reference in New Issue
Block a user