1
0
mirror of https://github.com/rspeer/wordfreq.git synced 2025-01-14 05:05:59 +00:00

Sometimes you need some random words.

Former-commit-id: 3447ae732e
This commit is contained in:
Robyn Speer 2014-01-06 15:50:49 -05:00
parent 181e8e08fa
commit 207defe6ff

View File

@ -120,3 +120,32 @@ def wordlist_info(connection=None):
for wordlist, lang, count in results:
yield {'wordlist': wordlist, 'lang': lang, 'count': count}
def random_words(nwords=4, bits_per_word=12, wordlist='google-books',
lang='en'):
"""
There are a few reasons you might want to see a sample of words in a
wordlist:
- Generating test cases
- Getting a feel for what a wordlist contains
- Generating passwords as in https://xkcd.com/936/
Parameters:
- `nwords` is the number of words to select.
- `bits_per_word` indicate how many bits of randomness per word you want,
up to log2(wordlist_size). As you increase it, the words get obscure.
- `wordlist` and `lang` specify the wordlist to use.
"""
import random
limit = 2 ** bits_per_word
c = CONN.cursor()
results = c.execute(
"SELECT word from words where wordlist = ? and lang = ? "
"ORDER BY freq DESC LIMIT ?",
(wordlist, lang, limit)
)
words = [row[0] for row in results]
selected = random.sample(words, nwords)
return u' '.join(selected)