code review fixes to __init__

2024-12-23 09:21:37 +00:00 · 2018-03-14 15:04:59 -04:00 · 2018-03-14 15:04:59 -04:00 · de81a23b9d
commit de81a23b9d
parent 8656688b0b
1 changed files with 4 additions and 6 deletions
--- a/wordfreq/init.py
+++ b/wordfreq/init.py
@ -18,12 +18,6 @@ logger = logging.getLogger(__name__)
 CACHE_SIZE = 100000
 DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data'))
 # Chinese and Japanese are written without spaces. In Chinese, in particular,
 # we have to infer word boundaries from the frequencies of the words they
 # would create. When this happens, we should adjust the resulting frequency
 # to avoid creating a bias toward improbable word combinations.
 INFERRED_SPACE_LANGUAGES = {'zh'}
 # We'll divide the frequency by 10 for each token boundary that was inferred.
 # (We determined the factor of 10 empirically by looking at words in the
 # Chinese wordlist that weren't common enough to be identified by the
@ -269,6 +263,10 @@ def word_frequency(word, lang, wordlist='best', minimum=0.):
    - 'small': a wordlist built from at least 3 sources, containing word
      frquencies of 10^-6 and higher
    - 'best': uses 'large' if available, and 'small' otherwise
    The value returned will always be at least as large as `minimum`.
    You could set this value to 10^-8, for example, to return 10^-8 for
    unknown words in the 'large' list instead of 0, avoiding a discontinuity.
    """
    args = (word, lang, wordlist, minimum)
    try: