From 5fc933495f947e422cec42f2cedd7b4c56830170 Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Fri, 1 Nov 2013 17:33:39 -0400 Subject: [PATCH] Revert "code review and pep8 fixes" This reverts commit ae6e03fa06bcda2aebc791742f283746300ed8de [formerly b4b8ba8be7771c183fad83d75b22019d8ee761ca]. Conflicts: wordfreq/transfer.py Former-commit-id: 5c8ba3449246d26073039acb235cd05e23a53a67 --- wordfreq/build.py | 8 +++++-- wordfreq/config.py | 2 +- wordfreq/transfer.py | 51 +++++++++++++++++++++----------------------- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/wordfreq/build.py b/wordfreq/build.py index b52eafc..d5f1609 100644 --- a/wordfreq/build.py +++ b/wordfreq/build.py @@ -36,6 +36,7 @@ def read_multilingual_csv(filename): raw_freqs = _read_csv_basic(filename) for wordlang in raw_freqs: word, lang = wordlang.rsplit('|', 1) + word = standardize_word(word) unscaled[lang][word] = raw_freqs[wordlang] scaled = {} @@ -87,7 +88,10 @@ def _scale_freqs(counts): """ freqs = {} total = sum(counts.values()) - return {word: count / total for word, count in freqs.items()} + for word in counts: + freqs[word] = counts[word] / total + + return freqs def save_wordlist_to_db(conn, listname, lang, freqs): @@ -115,11 +119,11 @@ def create_db(filename): This should be safe to run (and have no effect) if the database already exists. """ + conn = get_db_connection(filename) base_dir = os.path.dirname(filename) if not os.path.exists(base_dir): os.makedirs(base_dir) - conn = get_db_connection(filename) conn.execute(schema.SCHEMA) for index_definition in schema.INDICES: conn.execute(index_definition) diff --git a/wordfreq/config.py b/wordfreq/config.py index e58a9f7..86c225f 100644 --- a/wordfreq/config.py +++ b/wordfreq/config.py @@ -17,7 +17,7 @@ CACHE_SIZE = 100000 # Where can the data be downloaded from? DOWNLOAD_URL = (os.environ.get('WORDFREQ_URL') or 'http://ferret.lumi/dist/wordfreq/') -RAW_DATA_URL = '/'.join([DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-data.tar.gz']) +RAW_DATA_URL = os.path.join(DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-data.tar.gz') DB_URL = os.path.join(DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-%s.db' % MINOR_VERSION) diff --git a/wordfreq/transfer.py b/wordfreq/transfer.py index b8317c8..0050876 100644 --- a/wordfreq/transfer.py +++ b/wordfreq/transfer.py @@ -79,7 +79,7 @@ def download_and_extract_raw_data(url=None, root_dir=None): ensure_dir_exists(dest_filename) download(url, dest_filename) - logger.info("Extracting %s", dest_filename) + logger.info("Extracting %s" % dest_filename) with tarfile.open(dest_filename, 'r') as tarf: tarf.extractall(root_dir) @@ -106,36 +106,33 @@ def upload_data(upload_path=None): This requires that it's running in a reasonable Unix environment, and more notably, that it has the proper SSH keys to upload to that server. - - It should also only be run in Python 3, because otherwise you're probably - uploading the wrong data. We can even ensure this by using features that - are specific to Python 3. """ - from tempfile import TemporaryDirectory if upload_path is None: upload_path = config.UPLOAD_PATH + + build_dir = tempfile.mkdtemp('.wordfreq') + version_dir = os.path.join(build_dir, config.MINOR_VERSION) + os.makedirs(version_dir) - with TemporaryDirectory('.wordfreq') as build_tmp: - build_dir = build_tmp.name - version_dir = os.path.join(build_dir, config.MINOR_VERSION) - os.makedirs(version_dir) + source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz') + logger.info("Creating %s" % source_filename) + with tarfile.open(source_filename, 'w:gz') as tarf: + tarf.add(config.RAW_DATA_DIR) - source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz') - logger.info("Creating %s", source_filename) - with tarfile.open(source_filename, 'w:gz') as tarf: - tarf.add(config.RAW_DATA_DIR) + logger.info("Copying database file %s" % config.DB_FILENAME) + subprocess.call([ + '/bin/cp', + config.DB_FILENAME, + version_dir + ]) - logger.info("Copying database file %s", config.DB_FILENAME) - subprocess.call([ - '/bin/cp', - config.DB_FILENAME, - version_dir - ]) + logger.info("Uploading to %s" % upload_path) + subprocess.call([ + '/usr/bin/rsync', + '-avz', + version_dir, + upload_path + ]) - logger.info("Uploading to %s", upload_path) - subprocess.call([ - '/usr/bin/rsync', - '-avz', - version_dir, - upload_path - ]) + logger.info("Removing build directory %s" % build_dir) + shutil.rmtree(build_dir)