From 13642d6a4d53d8a00860e7d4bd5b6599c65197cd Mon Sep 17 00:00:00 2001
From: Robyn Speer <rspeer@luminoso.com>
Date: Tue, 22 Sep 2015 16:46:07 -0400
Subject: [PATCH] replace the literal 10 with the constant
 INFERRED_SPACE_FACTOR

Former-commit-id: 7a3ea2bf796c3f31fdf7d1c441b12b8ec52acf50
---
 wordfreq/__init__.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py
index 4790282..85e4711 100644
--- a/wordfreq/__init__.py
+++ b/wordfreq/__init__.py
@@ -21,6 +21,14 @@ DATA_PATH = pathlib.Path(resource_filename('wordfreq', 'data'))
 # for the fact that token boundaries were inferred.
 SPACELESS_LANGUAGES = {'zh', 'ja'}
 
+# We'll divide the frequency by 10 for each token boundary that was inferred.
+# (We determined the factor of 10 empirically by looking at words in the
+# Chinese wordlist that weren't common enough to be identified by the
+# tokenizer. These words would get split into multiple tokens, and their
+# inferred frequency would be on average 9.77 times higher than their actual
+# frequency.)
+INFERRED_SPACE_FACTOR = 10.0
+
 # simple_tokenize is imported so that other things can import it from here.
 # Suppress the pyflakes warning.
 simple_tokenize = simple_tokenize
@@ -190,13 +198,7 @@ def _word_frequency(word, lang, wordlist, minimum):
     freq = 1.0 / one_over_result
 
     if lang in SPACELESS_LANGUAGES:
-        # Divide the frequency by 10 for each token boundary that was inferred.
-        # (We determined the factor of 10 empirically by looking at words in
-        # the Chinese wordlist that weren't common enough to be identified by
-        # the tokenizer. These words would get split into multiple tokens, and
-        # their inferred frequency would be on average 9.77 times higher than
-        # their actual frequency.)
-        freq /= 10 ** (len(tokens) - 1)
+        freq /= INFERRED_SPACE_FACTOR ** (len(tokens) - 1)
 
     return max(freq, minimum)