mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 09:51:38 +00:00
Merge pull request #8 from LuminosoInsight/newbuild-refactor
Refactored the newbuild branch, in response to the preliminary review notes
This commit is contained in:
commit
13988f8e3d
@ -82,8 +82,7 @@ def read_dBpack(filename):
|
|||||||
[[], [], [], ['fish'], [], [], ['blue', 'red']]
|
[[], [], [], ['fish'], [], [], ['blue', 'red']]
|
||||||
"""
|
"""
|
||||||
with gzip.open(filename, 'rb') as infile:
|
with gzip.open(filename, 'rb') as infile:
|
||||||
got = msgpack.load(infile, encoding='utf-8')
|
return msgpack.load(infile, encoding='utf-8')
|
||||||
return got
|
|
||||||
|
|
||||||
|
|
||||||
def available_languages(wordlist='combined'):
|
def available_languages(wordlist='combined'):
|
||||||
@ -96,7 +95,7 @@ def available_languages(wordlist='combined'):
|
|||||||
list_name = path.name.split('.')[0]
|
list_name = path.name.split('.')[0]
|
||||||
name, lang = list_name.split('_')
|
name, lang = list_name.split('_')
|
||||||
if name == wordlist:
|
if name == wordlist:
|
||||||
available[lang] = path
|
available[lang] = str(path)
|
||||||
return available
|
return available
|
||||||
|
|
||||||
|
|
||||||
@ -124,11 +123,16 @@ def get_frequency_list(lang, wordlist='combined', match_cutoff=30):
|
|||||||
% (lang, best, langcodes.get(best).language_name('en'))
|
% (lang, best, langcodes.get(best).language_name('en'))
|
||||||
)
|
)
|
||||||
|
|
||||||
filepath = available[str(best)]
|
return read_dBpack(available[best])
|
||||||
return read_dBpack(str(filepath))
|
|
||||||
|
|
||||||
|
|
||||||
def dB_to_freq(dB):
|
def dB_to_freq(dB):
|
||||||
|
"""
|
||||||
|
Decibels are a logarithmic scale of frequency. 0dB represents a frequency
|
||||||
|
of 1 (it happens every time). -10dB represents a frequency of 1/10, or
|
||||||
|
1 in every 10. -20dB represents a frequency of 1/100. In general x dB
|
||||||
|
represents a frequency of 10 ** (x/10)
|
||||||
|
"""
|
||||||
if dB > 0:
|
if dB > 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"A frequency cannot be a positive number of decibels."
|
"A frequency cannot be a positive number of decibels."
|
||||||
@ -160,8 +164,7 @@ def iter_wordlist(lang, wordlist='combined'):
|
|||||||
each band.
|
each band.
|
||||||
"""
|
"""
|
||||||
for sublist in get_frequency_list(lang, wordlist):
|
for sublist in get_frequency_list(lang, wordlist):
|
||||||
for word in sublist:
|
yield from sublist
|
||||||
yield word
|
|
||||||
|
|
||||||
|
|
||||||
def half_harmonic_mean(a, b):
|
def half_harmonic_mean(a, b):
|
||||||
@ -227,6 +230,15 @@ def top_n_list(lang, n, wordlist='combined', ascii_only=False):
|
|||||||
|
|
||||||
def random_words(lang='en', wordlist='combined', nwords=4, bits_per_word=12,
|
def random_words(lang='en', wordlist='combined', nwords=4, bits_per_word=12,
|
||||||
ascii_only=False):
|
ascii_only=False):
|
||||||
|
"""
|
||||||
|
Returns a string of random, space separated words.
|
||||||
|
|
||||||
|
These words are are of the given language and from the given wordlist.
|
||||||
|
There are a total of nwords words in the string.
|
||||||
|
bits_per_word is an estimate of the entropy provided by each word.
|
||||||
|
You can restrict the selection of words to those written in ASCII
|
||||||
|
characters by setting ascii_only to True.
|
||||||
|
"""
|
||||||
n_choices = 2 ** bits_per_word
|
n_choices = 2 ** bits_per_word
|
||||||
choices = top_n_list(lang, n_choices, wordlist, ascii_only=ascii_only)
|
choices = top_n_list(lang, n_choices, wordlist, ascii_only=ascii_only)
|
||||||
if len(choices) < n_choices:
|
if len(choices) < n_choices:
|
||||||
@ -240,4 +252,11 @@ def random_words(lang='en', wordlist='combined', nwords=4, bits_per_word=12,
|
|||||||
|
|
||||||
def random_ascii_words(lang='en', wordlist='combined', nwords=4,
|
def random_ascii_words(lang='en', wordlist='combined', nwords=4,
|
||||||
bits_per_word=12):
|
bits_per_word=12):
|
||||||
|
"""
|
||||||
|
Returns a string of random, space separated, ascii words.
|
||||||
|
|
||||||
|
These words are are of the given language and from the given wordlist.
|
||||||
|
There are a total of nwords words in the string.
|
||||||
|
bits_per_word is an estimate of the entropy provided by each word.
|
||||||
|
"""
|
||||||
return random_words(lang, wordlist, nwords, bits_per_word, ascii_only=True)
|
return random_words(lang, wordlist, nwords, bits_per_word, ascii_only=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user