Stylistic cleanups to word_counts.py.

Former-commit-id: 6d40912ef9
2024-12-23 09:21:37 +00:00 · 2015-07-31 19:26:18 -04:00 · 2015-07-31 19:26:18 -04:00 · 77610f57e1
commit 77610f57e1
parent b0fac15f98
1 changed files with 3 additions and 7 deletions
--- a/wordfreq_builder/wordfreq_builder/word_counts.py
+++ b/wordfreq_builder/wordfreq_builder/word_counts.py
@ -39,20 +39,16 @@ def read_freqs(filename, cutoff=0, lang=None):
    raw_counts = defaultdict(float)
    total = 0.
    with open(filename, encoding='utf-8', newline='') as infile:
-        reader = csv.reader(infile)
-        for key, strval in reader:
-
+        for key, strval in csv.reader(infile):
            val = float(strval)
            if val < cutoff:
                break
-
            tokens = tokenize(key, lang) if lang is not None else simple_tokenize(key)
            for token in tokens:
-                token = fix_text(token)
-                total += val
                # Use += so that, if we give the reader concatenated files with
                # duplicates, it does the right thing
-                raw_counts[token] += val
+                raw_counts[fix_text(token)] += val
+                total += val

    for word in raw_counts:
        raw_counts[word] /= total