add twitter-stems-2014 wordlist data

Former-commit-id: 6ab72201cd
2024-12-23 17:31:41 +00:00 · 2015-02-11 13:29:32 -05:00 · 2015-02-11 13:29:32 -05:00 · f4280dcad0
commit f4280dcad0
parent 03fac20b1b
3 changed files with 10 additions and 1 deletions
--- a/wordfreq/build.py
+++ b/wordfreq/build.py
@ -189,6 +189,14 @@ def load_all_data(source_dir=None, filename=None, do_it_anyway=False):
    )
    save_wordlist_to_db(conn, 'twitter', 'xx', twitter_wordlist)
    logger.info("Loading stemmed Twitter corpus.")
    twitter_stems_wordlist = read_multilingual_csv(
        os.path.join(source_dir, 'luminoso', 'twitter-stems-2014.csv')
    )
    for lang in twitter_stems_wordlist:
        logger.info("\tLanguage: %s" % lang)
        save_wordlist_to_db(conn, 'twitter-stems', lang, twitter_stems_wordlist[lang])
    logger.info("Done loading.")
--- a/wordfreq/config.py
+++ b/wordfreq/config.py
@ -5,7 +5,7 @@ DB_DIR = (os.environ.get('WORDFREQ_DATA')
          or os.path.expanduser('~/.cache/wordfreq'))
 # When the minor version number increments, the data may change.
-VERSION = '0.4.1'
+VERSION = '0.5.0'
 MINOR_VERSION = '.'.join(VERSION.split('.')[:2])
 # Put these options together to make a database filename.
--- a/wordfreq_data/luminoso/twitter-stems-2014.csv.REMOVED.git-id
+++ b/wordfreq_data/luminoso/twitter-stems-2014.csv.REMOVED.git-id
@ -0,0 +1 @@
 3710e65f27753facc699fe56269c9631d5ba6aba