Deal with database connections more consistently

This commit is contained in:
Robyn Speer 2013-10-29 16:43:58 -04:00
parent 4fc1971b0f
commit 91a62dbee5

View File

@ -94,6 +94,13 @@ def _scale_freqs(counts):
def save_wordlist_to_db(conn, listname, lang, freqs): def save_wordlist_to_db(conn, listname, lang, freqs):
"""
Save a dictionary of word frequencies to a database.
The dictionary `freqs` should be properly scaled (run it through
`_scale_freqs`). It will be saved as language `lang` in wordlist
`listname`.
"""
rows = [(listname, lang, word, freq) rows = [(listname, lang, word, freq)
for word, freq in freqs.items()] for word, freq in freqs.items()]
conn.executemany( conn.executemany(
@ -104,7 +111,7 @@ def save_wordlist_to_db(conn, listname, lang, freqs):
conn.commit() conn.commit()
def create_db(conn, filename): def create_db(conn):
""" """
Create a wordlist database, at the filename specified by `wordfreq.config`. Create a wordlist database, at the filename specified by `wordfreq.config`.
@ -121,6 +128,10 @@ def create_db(conn, filename):
conn.commit() conn.commit()
def get_db_connection(filename):
return sqlite3.connect(filename)
LEEDS_LANGUAGES = ('ar', 'de', 'el', 'es', 'fr', 'it', 'ja', 'pt', 'ru', 'zh') LEEDS_LANGUAGES = ('ar', 'de', 'el', 'es', 'fr', 'it', 'ja', 'pt', 'ru', 'zh')
def load_all_data(source_dir=None, filename=None): def load_all_data(source_dir=None, filename=None):
""" """
@ -132,9 +143,9 @@ def load_all_data(source_dir=None, filename=None):
if filename is None: if filename is None:
filename = config.DB_FILENAME filename = config.DB_FILENAME
conn = sqlite3.connect(filename) conn = get_db_connection(filename)
logger.info("Creating database") logger.info("Creating database")
create_db(conn, filename) create_db(conn)
logger.info("Loading Leeds internet corpus:") logger.info("Loading Leeds internet corpus:")
for lang in LEEDS_LANGUAGES: for lang in LEEDS_LANGUAGES: