Merge pull request #8 from LuminosoInsight/newbuild-refactor

Refactored the newbuild branch, in response to the preliminary review notes
2024-12-24 09:51:38 +00:00 · 2015-06-17 14:35:37 -04:00 · 2015-06-17 14:35:37 -04:00 · 13988f8e3d
commit 13988f8e3d
parent ed19d79c5a 68b1c121bd
1 changed files with 26 additions and 7 deletions
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -82,8 +82,7 @@ def read_dBpack(filename):
        [[], [], [], ['fish'], [], [], ['blue', 'red']]
    """
    with gzip.open(filename, 'rb') as infile:
-        got = msgpack.load(infile, encoding='utf-8')
+        return msgpack.load(infile, encoding='utf-8')
    return got
 def available_languages(wordlist='combined'):
@ -96,7 +95,7 @@ def available_languages(wordlist='combined'):
        list_name = path.name.split('.')[0]
        name, lang = list_name.split('_')
        if name == wordlist:
-            available[lang] = path
+            available[lang] = str(path)
    return available
@ -124,11 +123,16 @@ def get_frequency_list(lang, wordlist='combined', match_cutoff=30):
            % (lang, best, langcodes.get(best).language_name('en'))
        )
-    filepath = available[str(best)]
+    return read_dBpack(available[best])
    return read_dBpack(str(filepath))
 def dB_to_freq(dB):
    """
    Decibels are a logarithmic scale of frequency. 0dB represents a frequency
    of 1 (it happens every time). -10dB represents a frequency of 1/10, or
    1 in every 10. -20dB represents a frequency of 1/100. In general x dB
    represents a frequency of 10 ** (x/10)
    """
    if dB > 0:
        raise ValueError(
            "A frequency cannot be a positive number of decibels."
@ -160,8 +164,7 @@ def iter_wordlist(lang, wordlist='combined'):
    each band.
    """
    for sublist in get_frequency_list(lang, wordlist):
-        for word in sublist:
+        yield from sublist
            yield word
 def half_harmonic_mean(a, b):
@ -227,6 +230,15 @@ def top_n_list(lang, n, wordlist='combined', ascii_only=False):
 def random_words(lang='en', wordlist='combined', nwords=4, bits_per_word=12,
                 ascii_only=False):
    """
    Returns a string of random, space separated words.
    These words are are of the given language and from the given wordlist.
    There are a total of nwords words in the string.
    bits_per_word is an estimate of the entropy provided by each word.
    You can restrict the selection of words to those written in ASCII
    characters by setting ascii_only to True.
    """
    n_choices = 2 ** bits_per_word
    choices = top_n_list(lang, n_choices, wordlist, ascii_only=ascii_only)
    if len(choices) < n_choices:
@ -240,4 +252,11 @@ def random_words(lang='en', wordlist='combined', nwords=4, bits_per_word=12,
 def random_ascii_words(lang='en', wordlist='combined', nwords=4,
                       bits_per_word=12):
    """
    Returns a string of random, space separated, ascii words.
    These words are are of the given language and from the given wordlist.
    There are a total of nwords words in the string.
    bits_per_word is an estimate of the entropy provided by each word.
    """
    return random_words(lang, wordlist, nwords, bits_per_word, ascii_only=True)