From a2bc90e430c7b153f760ddb85e7de32b49d58921 Mon Sep 17 00:00:00 2001
From: Robyn Speer <rspeer@luminoso.com>
Date: Thu, 31 Mar 2016 12:55:18 -0400
Subject: [PATCH] rename max_size to max_words consistently

Former-commit-id: 16059d3b9a3ef8b1c332d75df6137bcd09fe83a2
---
 wordfreq_builder/wordfreq_builder/cli/merge_counts.py | 7 +++----
 wordfreq_builder/wordfreq_builder/word_counts.py      | 8 ++++----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/wordfreq_builder/wordfreq_builder/cli/merge_counts.py b/wordfreq_builder/wordfreq_builder/cli/merge_counts.py
index 2e740cf..6413024 100644
--- a/wordfreq_builder/wordfreq_builder/cli/merge_counts.py
+++ b/wordfreq_builder/wordfreq_builder/cli/merge_counts.py
@@ -2,10 +2,10 @@ from wordfreq_builder.word_counts import read_values, merge_counts, write_wordli
 import argparse
 
 
-def merge_lists(input_names, output_name, cutoff=0, max_size=1000000):
+def merge_lists(input_names, output_name, cutoff=0, max_words=1000000):
     count_dicts = []
     for input_name in input_names:
-        values, total = read_values(input_name, cutoff=cutoff, max_size=max_size)
+        values, total = read_values(input_name, cutoff=cutoff, max_words=max_words)
         count_dicts.append(values)
     merged = merge_counts(count_dicts)
     write_wordlist(merged, output_name)
@@ -22,5 +22,4 @@ if __name__ == '__main__':
     parser.add_argument('inputs', nargs='+',
                         help='names of input files to merge')
     args = parser.parse_args()
-    merge_lists(args.inputs, args.output, cutoff=args.cutoff, max_size=args.max_words)
-
+    merge_lists(args.inputs, args.output, cutoff=args.cutoff, max_words=args.max_words)
diff --git a/wordfreq_builder/wordfreq_builder/word_counts.py b/wordfreq_builder/wordfreq_builder/word_counts.py
index ded334a..65baf72 100644
--- a/wordfreq_builder/wordfreq_builder/word_counts.py
+++ b/wordfreq_builder/wordfreq_builder/word_counts.py
@@ -36,15 +36,15 @@ def count_tokens(filename):
     return counts
 
 
-def read_values(filename, cutoff=0, max_size=1e8, lang=None):
+def read_values(filename, cutoff=0, max_words=1e8, lang=None):
     """
     Read words and their frequency or count values from a CSV file. Returns
     a dictionary of values and the total of all values.
 
     Only words with a value greater than or equal to `cutoff` are returned.
-    In addition, only up to `max_size` words are read.
+    In addition, only up to `max_words` words are read.
 
-    If `cutoff` is greater than 0 or `max_size` is smaller than the list,
+    If `cutoff` is greater than 0 or `max_words` is smaller than the list,
     the csv file must be sorted by value in descending order, so that the
     most frequent words are kept.
 
@@ -57,7 +57,7 @@ def read_values(filename, cutoff=0, max_size=1e8, lang=None):
         for key, strval in csv.reader(infile):
             val = float(strval)
             key = fix_text(key)
-            if val < cutoff or len(values) >= max_size:
+            if val < cutoff or len(values) >= max_words:
                 break
             tokens = tokenize(key, lang) if lang is not None else simple_tokenize(key)
             for token in tokens: