From 4304a400f78af6bd44508fb25ae9e4af5502e5a4 Mon Sep 17 00:00:00 2001 From: Joshua Chin Date: Tue, 7 Jul 2015 14:56:12 -0400 Subject: [PATCH] updated word_frequency docstring --- wordfreq/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py index a895fbc..800fcee 100644 --- a/wordfreq/__init__.py +++ b/wordfreq/__init__.py @@ -247,13 +247,14 @@ def word_frequency(word, lang, wordlist='combined', default=0.): """ Get the frequency of `word` in the language with code `lang`, from the specified `wordlist`. The default wordlist is 'combined', built from - whichever of these four sources have sufficient data for the language: + whichever of these five sources have sufficient data for the language: - Full text of Wikipedia - A sample of 72 million tweets collected from Twitter in 2014, divided roughly into languages using automatic language detection - Frequencies extracted from OpenSubtitles - The Leeds Internet Corpus + - Google Books Ngrams and Google Books Syntactic Ngrams Another available wordlist is 'twitter', which uses only the data from Twitter.