Stylistic cleanups to word_counts.py.

2024-12-23 17:31:41 +00:00 · 2015-07-31 19:26:18 -04:00 · 2015-07-31 19:26:18 -04:00 · 6d40912ef9
commit 6d40912ef9
parent 66c69e6fac
1 changed files with 3 additions and 7 deletions
--- a/wordfreq_builder/wordfreq_builder/word_counts.py
+++ b/wordfreq_builder/wordfreq_builder/word_counts.py
@ -39,20 +39,16 @@ def read_freqs(filename, cutoff=0, lang=None):
    raw_counts = defaultdict(float)
    total = 0.
    with open(filename, encoding='utf-8', newline='') as infile:
-        reader = csv.reader(infile)
+        for key, strval in csv.reader(infile):
        for key, strval in reader:
            val = float(strval)
            if val < cutoff:
                break
            tokens = tokenize(key, lang) if lang is not None else simple_tokenize(key)
            for token in tokens:
                token = fix_text(token)
                total += val
                # Use += so that, if we give the reader concatenated files with
                # duplicates, it does the right thing
-                raw_counts[token] += val
+                raw_counts[fix_text(token)] += val
                total += val
    for word in raw_counts:
        raw_counts[word] /= total