Stylistic cleanups to word_counts.py.

Former-commit-id: 6d40912ef9
This commit is contained in:
Andrew Lin 2015-07-31 19:26:18 -04:00
parent b0fac15f98
commit 77610f57e1

View File

@ -39,20 +39,16 @@ def read_freqs(filename, cutoff=0, lang=None):
raw_counts = defaultdict(float)
total = 0.
with open(filename, encoding='utf-8', newline='') as infile:
reader = csv.reader(infile)
for key, strval in reader:
for key, strval in csv.reader(infile):
val = float(strval)
if val < cutoff:
break
tokens = tokenize(key, lang) if lang is not None else simple_tokenize(key)
for token in tokens:
token = fix_text(token)
total += val
# Use += so that, if we give the reader concatenated files with
# duplicates, it does the right thing
raw_counts[token] += val
raw_counts[fix_text(token)] += val
total += val
for word in raw_counts:
raw_counts[word] /= total