add surface forms from Twitter 2014 data

Former-commit-id: ffdaa82b11
This commit is contained in:
Robyn Speer 2015-02-17 15:06:11 -05:00
parent 8d57b39a7b
commit ad22387a53
2 changed files with 9 additions and 0 deletions

View File

@ -197,6 +197,14 @@ def load_all_data(source_dir=None, filename=None, do_it_anyway=False):
logger.info("\tLanguage: %s" % lang) logger.info("\tLanguage: %s" % lang)
save_wordlist_to_db(conn, 'twitter-stems', lang, twitter_stems_wordlist[lang]) save_wordlist_to_db(conn, 'twitter-stems', lang, twitter_stems_wordlist[lang])
logger.info("Loading unstemmed Twitter corpus.")
twitter_stems_wordlist = read_multilingual_csv(
os.path.join(source_dir, 'luminoso', 'twitter-surfaces-2014.csv')
)
for lang in twitter_stems_wordlist:
logger.info("\tLanguage: %s" % lang)
save_wordlist_to_db(conn, 'twitter-surfaces', lang, twitter_stems_wordlist[lang])
logger.info("Done loading.") logger.info("Done loading.")

View File

@ -0,0 +1 @@
8ba8230ca42d8e9e622afee772b3a96c34126e23