start a new multilingual wordlist called 'stems'

So far, this wordlist is only in Dutch.


Former-commit-id: af5f65b328
This commit is contained in:
Rob Speer 2015-03-31 15:59:30 -04:00
parent 58da7797da
commit d3c41fd8d8
5 changed files with 44611 additions and 5 deletions

View File

@ -193,11 +193,10 @@ def load_all_data(source_dir=None, filename=None, do_it_anyway=False):
logger.info("Loading combined multilingual corpus:")
multi_wordlist = read_multilingual_csv(wordlist_path('luminoso', 'multilingual.csv'))
for lang in multi_wordlist:
if lang != 'nl':
logger.info("\tLanguage: %s" % lang)
save_wordlist_to_db(conn, 'multi', lang, multi_wordlist[lang])
logger.info("\tLanguage: %s" % lang)
save_wordlist_to_db(conn, 'multi', lang, multi_wordlist[lang])
# Load Dutch from a separate source. We may end up with more languages like this.
read_wordlist_into_db(conn, wordlist_path('luminoso', 'nl-combined-201503.csv'), 'multi', '*')
read_wordlist_into_db(conn, wordlist_path('luminoso', 'nl-combined-201503.csv'), 'stems', '*')
logger.info("Done loading.")

View File

@ -1 +1 @@
acde5e04c02b070c9f5c90b1da73ce6cb91bf937
9b29de132c82bd7287c08c2937e3c4821525e356

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
b9d52d81bbe078a7de17519ed3494eb4771f0f69