update comments in wordfreq_builder.config; remove unused 'version'

2024-12-23 17:31:41 +00:00 · 2015-09-08 16:15:08 -04:00 · 2015-09-08 16:15:08 -04:00 · bc323eccaf
commit bc323eccaf
parent 0ab23f8a28
1 changed files with 2 additions and 4 deletions
--- a/wordfreq_builder/wordfreq_builder/config.py
+++ b/wordfreq_builder/wordfreq_builder/config.py
@ -1,19 +1,17 @@
 import os
 CONFIG = {
    'version': '1.0b',
    # data_dir is a relative or absolute path to where the wordlist data
    # is stored
    'data_dir': 'data',
    'sources': {
-        # A list of language codes (possibly un-standardized) that we'll
+        # A list of language codes that we'll look up in filenames for these
-        # look up in filenames for these various data sources.
+        # various data sources.
        #
        # Consider adding:
        # 'th' when we get tokenization for it
        # 'hi' when we stop messing up its tokenization
        # 'tl' with one more data source
        # 'sv' because its data sources are ready
        'twitter': [
            'ar', 'de', 'el', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
            'pl', 'pt', 'ru', 'sv', 'tr'