Revert "code review and pep8 fixes"

This reverts commit ae6e03fa06 [formerly b4b8ba8be7]. Conflicts: wordfreq/transfer.py Former-commit-id: 5c8ba34492
2024-12-23 09:21:37 +00:00 · 2013-11-01 17:33:39 -04:00 · 2013-11-01 17:33:39 -04:00 · 5fc933495f
commit 5fc933495f
parent 4d904a3bae
3 changed files with 31 additions and 30 deletions
--- a/wordfreq/build.py
+++ b/wordfreq/build.py
@ -36,6 +36,7 @@ def read_multilingual_csv(filename):
    raw_freqs = _read_csv_basic(filename)
    for wordlang in raw_freqs:
        word, lang = wordlang.rsplit('|', 1)
        word = standardize_word(word)
        unscaled[lang][word] = raw_freqs[wordlang]
    scaled = {}
@ -87,7 +88,10 @@ def _scale_freqs(counts):
    """
    freqs = {}
    total = sum(counts.values())
-    return {word: count / total for word, count in freqs.items()}
+    for word in counts:
        freqs[word] = counts[word] / total
    return freqs
 def save_wordlist_to_db(conn, listname, lang, freqs):
@ -115,11 +119,11 @@ def create_db(filename):
    This should be safe to run (and have no effect) if the database already
    exists.
    """
    conn = get_db_connection(filename)
    base_dir = os.path.dirname(filename)
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    conn = get_db_connection(filename)
    conn.execute(schema.SCHEMA)
    for index_definition in schema.INDICES:
        conn.execute(index_definition)
--- a/wordfreq/config.py
+++ b/wordfreq/config.py
@ -17,7 +17,7 @@ CACHE_SIZE = 100000
 # Where can the data be downloaded from?
 DOWNLOAD_URL = (os.environ.get('WORDFREQ_URL')
                or 'http://ferret.lumi/dist/wordfreq/')
-RAW_DATA_URL = '/'.join([DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-data.tar.gz'])
+RAW_DATA_URL = os.path.join(DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-data.tar.gz')
 DB_URL = os.path.join(DOWNLOAD_URL, MINOR_VERSION,
                      'wordfreq-%s.db' % MINOR_VERSION)
--- a/wordfreq/transfer.py
+++ b/wordfreq/transfer.py
@ -79,7 +79,7 @@ def download_and_extract_raw_data(url=None, root_dir=None):
    ensure_dir_exists(dest_filename)
    download(url, dest_filename)
-    logger.info("Extracting %s", dest_filename)
+    logger.info("Extracting %s" % dest_filename)
    with tarfile.open(dest_filename, 'r') as tarf:
        tarf.extractall(root_dir)
@ -106,36 +106,33 @@ def upload_data(upload_path=None):
    This requires that it's running in a reasonable Unix environment,
    and more notably, that it has the proper SSH keys to upload to that
    server.
    It should also only be run in Python 3, because otherwise you're probably
    uploading the wrong data. We can even ensure this by using features that
    are specific to Python 3.
    """
    from tempfile import TemporaryDirectory
    if upload_path is None:
        upload_path = config.UPLOAD_PATH
    build_dir = tempfile.mkdtemp('.wordfreq')
    version_dir = os.path.join(build_dir, config.MINOR_VERSION)
    os.makedirs(version_dir)
-    with TemporaryDirectory('.wordfreq') as build_tmp:
+    source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
-        build_dir = build_tmp.name
+    logger.info("Creating %s" % source_filename)
-        version_dir = os.path.join(build_dir, config.MINOR_VERSION)
+    with tarfile.open(source_filename, 'w:gz') as tarf:
-        os.makedirs(version_dir)
+        tarf.add(config.RAW_DATA_DIR)
-        source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
+    logger.info("Copying database file %s" % config.DB_FILENAME)
-        logger.info("Creating %s", source_filename)
+    subprocess.call([
-        with tarfile.open(source_filename, 'w:gz') as tarf:
+        '/bin/cp',
-            tarf.add(config.RAW_DATA_DIR)
+        config.DB_FILENAME,
        version_dir
    ])
-        logger.info("Copying database file %s", config.DB_FILENAME)
+    logger.info("Uploading to %s" % upload_path)
-        subprocess.call([
+    subprocess.call([
-            '/bin/cp',
+        '/usr/bin/rsync',
-            config.DB_FILENAME,
+        '-avz',
-            version_dir
+        version_dir,
-        ])
+        upload_path
    ])
-        logger.info("Uploading to %s", upload_path)
+    logger.info("Removing build directory %s" % build_dir)
-        subprocess.call([
+    shutil.rmtree(build_dir)
            '/usr/bin/rsync',
            '-avz',
            version_dir,
            upload_path
        ])