mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 09:51:38 +00:00
start a new multilingual wordlist called 'stems'
So far, this wordlist is only in Dutch.
Former-commit-id: af5f65b328
This commit is contained in:
parent
58da7797da
commit
d3c41fd8d8
@ -193,11 +193,10 @@ def load_all_data(source_dir=None, filename=None, do_it_anyway=False):
|
||||
logger.info("Loading combined multilingual corpus:")
|
||||
multi_wordlist = read_multilingual_csv(wordlist_path('luminoso', 'multilingual.csv'))
|
||||
for lang in multi_wordlist:
|
||||
if lang != 'nl':
|
||||
logger.info("\tLanguage: %s" % lang)
|
||||
save_wordlist_to_db(conn, 'multi', lang, multi_wordlist[lang])
|
||||
logger.info("\tLanguage: %s" % lang)
|
||||
save_wordlist_to_db(conn, 'multi', lang, multi_wordlist[lang])
|
||||
# Load Dutch from a separate source. We may end up with more languages like this.
|
||||
read_wordlist_into_db(conn, wordlist_path('luminoso', 'nl-combined-201503.csv'), 'multi', '*')
|
||||
read_wordlist_into_db(conn, wordlist_path('luminoso', 'nl-combined-201503.csv'), 'stems', '*')
|
||||
logger.info("Done loading.")
|
||||
|
||||
|
||||
|
@ -1 +1 @@
|
||||
acde5e04c02b070c9f5c90b1da73ce6cb91bf937
|
||||
9b29de132c82bd7287c08c2937e3c4821525e356
|
21282
wordfreq_data/luminoso/twitter-stems-2014-nl.csv
Normal file
21282
wordfreq_data/luminoso/twitter-stems-2014-nl.csv
Normal file
File diff suppressed because it is too large
Load Diff
23324
wordfreq_data/luminoso/twitter-surfaces-2014-nl.csv
Normal file
23324
wordfreq_data/luminoso/twitter-surfaces-2014-nl.csv
Normal file
File diff suppressed because it is too large
Load Diff
1
wordfreq_data/wikipedia/stems-nl.csv.REMOVED.git-id
Normal file
1
wordfreq_data/wikipedia/stems-nl.csv.REMOVED.git-id
Normal file
@ -0,0 +1 @@
|
||||
b9d52d81bbe078a7de17519ed3494eb4771f0f69
|
Loading…
Reference in New Issue
Block a user