From f224b8dbbaf4eb30988def5c35c4f8b204e5823e Mon Sep 17 00:00:00 2001 From: Rob Speer Date: Tue, 22 Sep 2015 17:22:38 -0400 Subject: [PATCH] describe the use of `lang` in `read_values` --- wordfreq_builder/wordfreq_builder/word_counts.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wordfreq_builder/wordfreq_builder/word_counts.py b/wordfreq_builder/wordfreq_builder/word_counts.py index 1ba7214..a3bf0ae 100644 --- a/wordfreq_builder/wordfreq_builder/word_counts.py +++ b/wordfreq_builder/wordfreq_builder/word_counts.py @@ -42,6 +42,9 @@ def read_values(filename, cutoff=0, lang=None): If `cutoff` is greater than 0, the csv file must be sorted by value in descending order. + + If `lang` is given, it will apply language-specific tokenization to the + words that it reads. """ values = defaultdict(float) total = 0.