diff --git a/wordfreq_builder/wordfreq_builder/tokenizers.py b/wordfreq_builder/wordfreq_builder/tokenizers.py index fec5928..653fa7e 100644 --- a/wordfreq_builder/wordfreq_builder/tokenizers.py +++ b/wordfreq_builder/wordfreq_builder/tokenizers.py @@ -66,8 +66,8 @@ def tokenize_file(in_filename, out_prefix, tokenizer, line_reader=last_tab): for line in in_file: text = line_reader(line) tokens, language = tokenizer(text) - tokenized = '\n'.join(tokens) if language is not None: + tokenized = '\n'.join(tokens) out_filename = '%s.%s.txt' % (out_prefix, language) if out_filename in out_files: out_file = out_files[out_filename]