mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
parent
03fac20b1b
commit
f4280dcad0
@ -189,6 +189,14 @@ def load_all_data(source_dir=None, filename=None, do_it_anyway=False):
|
|||||||
)
|
)
|
||||||
save_wordlist_to_db(conn, 'twitter', 'xx', twitter_wordlist)
|
save_wordlist_to_db(conn, 'twitter', 'xx', twitter_wordlist)
|
||||||
|
|
||||||
|
logger.info("Loading stemmed Twitter corpus.")
|
||||||
|
twitter_stems_wordlist = read_multilingual_csv(
|
||||||
|
os.path.join(source_dir, 'luminoso', 'twitter-stems-2014.csv')
|
||||||
|
)
|
||||||
|
for lang in twitter_stems_wordlist:
|
||||||
|
logger.info("\tLanguage: %s" % lang)
|
||||||
|
save_wordlist_to_db(conn, 'twitter-stems', lang, twitter_stems_wordlist[lang])
|
||||||
|
|
||||||
logger.info("Done loading.")
|
logger.info("Done loading.")
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ DB_DIR = (os.environ.get('WORDFREQ_DATA')
|
|||||||
or os.path.expanduser('~/.cache/wordfreq'))
|
or os.path.expanduser('~/.cache/wordfreq'))
|
||||||
|
|
||||||
# When the minor version number increments, the data may change.
|
# When the minor version number increments, the data may change.
|
||||||
VERSION = '0.4.1'
|
VERSION = '0.5.0'
|
||||||
MINOR_VERSION = '.'.join(VERSION.split('.')[:2])
|
MINOR_VERSION = '.'.join(VERSION.split('.')[:2])
|
||||||
|
|
||||||
# Put these options together to make a database filename.
|
# Put these options together to make a database filename.
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
3710e65f27753facc699fe56269c9631d5ba6aba
|
Loading…
Reference in New Issue
Block a user