mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 09:21:37 +00:00
add surface forms from Twitter 2014 data
This commit is contained in:
parent
b6f246ecbb
commit
ffdaa82b11
@ -197,6 +197,14 @@ def load_all_data(source_dir=None, filename=None, do_it_anyway=False):
|
||||
logger.info("\tLanguage: %s" % lang)
|
||||
save_wordlist_to_db(conn, 'twitter-stems', lang, twitter_stems_wordlist[lang])
|
||||
|
||||
logger.info("Loading unstemmed Twitter corpus.")
|
||||
twitter_stems_wordlist = read_multilingual_csv(
|
||||
os.path.join(source_dir, 'luminoso', 'twitter-surfaces-2014.csv')
|
||||
)
|
||||
for lang in twitter_stems_wordlist:
|
||||
logger.info("\tLanguage: %s" % lang)
|
||||
save_wordlist_to_db(conn, 'twitter-surfaces', lang, twitter_stems_wordlist[lang])
|
||||
|
||||
logger.info("Done loading.")
|
||||
|
||||
|
||||
|
2370454
wordfreq_data/luminoso/twitter-surfaces-2014.csv
Normal file
2370454
wordfreq_data/luminoso/twitter-surfaces-2014.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user