configuration that builds some larger lists

This commit is contained in:
Rob Speer 2016-01-22 14:20:12 -05:00
parent 9907948d11
commit c1a12cebec
2 changed files with 5 additions and 2 deletions

View File

@ -60,7 +60,8 @@ CONFIG = {
'twitter-dist': 'dist/twitter_{lang}.{ext}', 'twitter-dist': 'dist/twitter_{lang}.{ext}',
'jieba-dist': 'dist/jieba_{lang}.{ext}' 'jieba-dist': 'dist/jieba_{lang}.{ext}'
}, },
'min_sources': 2 'min_sources': 2,
'big-lists': ['en', 'fr', 'es', 'pt']
} }

View File

@ -353,9 +353,11 @@ def combine_lists(languages):
params={'lang': language, 'buckets': 600}) params={'lang': language, 'buckets': 600})
add_dep(lines, 'freqs2cB', output_file, output_cBpack_big, add_dep(lines, 'freqs2cB', output_file, output_cBpack_big,
extra='wordfreq_builder/word_counts.py', extra='wordfreq_builder/word_counts.py',
params={'lang': language, 'buckets': 900}) params={'lang': language, 'buckets': 800})
lines.append('default {}'.format(output_cBpack)) lines.append('default {}'.format(output_cBpack))
if language in CONFIG['big-lists']:
lines.append('default {}'.format(output_cBpack_big))
# Write standalone lists for Twitter frequency # Write standalone lists for Twitter frequency
if language in CONFIG['sources']['twitter']: if language in CONFIG['sources']['twitter']: