Revert "code review and pep8 fixes"

This reverts commit ae6e03fa06 [formerly b4b8ba8be7].

Conflicts:
	wordfreq/transfer.py

Former-commit-id: 5c8ba34492
This commit is contained in:
Robyn Speer 2013-11-01 17:33:39 -04:00
parent 4d904a3bae
commit 5fc933495f
3 changed files with 31 additions and 30 deletions

View File

@ -36,6 +36,7 @@ def read_multilingual_csv(filename):
raw_freqs = _read_csv_basic(filename)
for wordlang in raw_freqs:
word, lang = wordlang.rsplit('|', 1)
word = standardize_word(word)
unscaled[lang][word] = raw_freqs[wordlang]
scaled = {}
@ -87,7 +88,10 @@ def _scale_freqs(counts):
"""
freqs = {}
total = sum(counts.values())
return {word: count / total for word, count in freqs.items()}
for word in counts:
freqs[word] = counts[word] / total
return freqs
def save_wordlist_to_db(conn, listname, lang, freqs):
@ -115,11 +119,11 @@ def create_db(filename):
This should be safe to run (and have no effect) if the database already
exists.
"""
conn = get_db_connection(filename)
base_dir = os.path.dirname(filename)
if not os.path.exists(base_dir):
os.makedirs(base_dir)
conn = get_db_connection(filename)
conn.execute(schema.SCHEMA)
for index_definition in schema.INDICES:
conn.execute(index_definition)

View File

@ -17,7 +17,7 @@ CACHE_SIZE = 100000
# Where can the data be downloaded from?
DOWNLOAD_URL = (os.environ.get('WORDFREQ_URL')
or 'http://ferret.lumi/dist/wordfreq/')
RAW_DATA_URL = '/'.join([DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-data.tar.gz'])
RAW_DATA_URL = os.path.join(DOWNLOAD_URL, MINOR_VERSION, 'wordfreq-data.tar.gz')
DB_URL = os.path.join(DOWNLOAD_URL, MINOR_VERSION,
'wordfreq-%s.db' % MINOR_VERSION)

View File

@ -79,7 +79,7 @@ def download_and_extract_raw_data(url=None, root_dir=None):
ensure_dir_exists(dest_filename)
download(url, dest_filename)
logger.info("Extracting %s", dest_filename)
logger.info("Extracting %s" % dest_filename)
with tarfile.open(dest_filename, 'r') as tarf:
tarf.extractall(root_dir)
@ -106,36 +106,33 @@ def upload_data(upload_path=None):
This requires that it's running in a reasonable Unix environment,
and more notably, that it has the proper SSH keys to upload to that
server.
It should also only be run in Python 3, because otherwise you're probably
uploading the wrong data. We can even ensure this by using features that
are specific to Python 3.
"""
from tempfile import TemporaryDirectory
if upload_path is None:
upload_path = config.UPLOAD_PATH
with TemporaryDirectory('.wordfreq') as build_tmp:
build_dir = build_tmp.name
version_dir = os.path.join(build_dir, config.MINOR_VERSION)
os.makedirs(version_dir)
build_dir = tempfile.mkdtemp('.wordfreq')
version_dir = os.path.join(build_dir, config.MINOR_VERSION)
os.makedirs(version_dir)
source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
logger.info("Creating %s", source_filename)
with tarfile.open(source_filename, 'w:gz') as tarf:
tarf.add(config.RAW_DATA_DIR)
source_filename = os.path.join(version_dir, 'wordfreq-data.tar.gz')
logger.info("Creating %s" % source_filename)
with tarfile.open(source_filename, 'w:gz') as tarf:
tarf.add(config.RAW_DATA_DIR)
logger.info("Copying database file %s", config.DB_FILENAME)
subprocess.call([
'/bin/cp',
config.DB_FILENAME,
version_dir
])
logger.info("Copying database file %s" % config.DB_FILENAME)
subprocess.call([
'/bin/cp',
config.DB_FILENAME,
version_dir
])
logger.info("Uploading to %s", upload_path)
subprocess.call([
'/usr/bin/rsync',
'-avz',
version_dir,
upload_path
])
logger.info("Uploading to %s" % upload_path)
subprocess.call([
'/usr/bin/rsync',
'-avz',
version_dir,
upload_path
])
logger.info("Removing build directory %s" % build_dir)
shutil.rmtree(build_dir)