mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 18:01:38 +00:00
add 'twitter' as a final build, and a new build dir
The `data/dist` directory is now a convenient place to find the final built files that can be copied into wordfreq.
This commit is contained in:
parent
58c8bda21b
commit
3eb3e7c388
@ -41,7 +41,9 @@ CONFIG = {
|
|||||||
'opensubtitles': 'generated/opensubtitles/opensubtitles_{lang}.{ext}',
|
'opensubtitles': 'generated/opensubtitles/opensubtitles_{lang}.{ext}',
|
||||||
'leeds': 'generated/leeds/leeds_internet_{lang}.{ext}',
|
'leeds': 'generated/leeds/leeds_internet_{lang}.{ext}',
|
||||||
'google-books': 'generated/google-books/google_books_{lang}.{ext}',
|
'google-books': 'generated/google-books/google_books_{lang}.{ext}',
|
||||||
'combined': 'generated/combined/combined_{lang}.{ext}'
|
'combined': 'generated/combined/combined_{lang}.{ext}',
|
||||||
|
'combined-dist': 'dist/combined_{lang}.{ext}',
|
||||||
|
'twitter-dist': 'dist/twitter_{lang}.{ext}'
|
||||||
},
|
},
|
||||||
'min_sources': 2
|
'min_sources': 2
|
||||||
}
|
}
|
||||||
|
@ -205,11 +205,21 @@ def combine_lists(languages):
|
|||||||
add_dep(lines, 'merge', input_files, output_file,
|
add_dep(lines, 'merge', input_files, output_file,
|
||||||
extra='wordfreq_builder/word_counts.py')
|
extra='wordfreq_builder/word_counts.py')
|
||||||
|
|
||||||
output_cBpack = wordlist_filename('combined', language, 'msgpack.gz')
|
output_cBpack = wordlist_filename('combined-dist', language, 'msgpack.gz')
|
||||||
add_dep(lines, 'freqs2cB', output_file, output_cBpack,
|
add_dep(lines, 'freqs2cB', output_file, output_cBpack,
|
||||||
extra='wordfreq_builder/word_counts.py')
|
extra='wordfreq_builder/word_counts.py')
|
||||||
|
|
||||||
lines.append('default {}'.format(output_cBpack))
|
lines.append('default {}'.format(output_cBpack))
|
||||||
|
|
||||||
|
# Write standalone lists for Twitter frequency
|
||||||
|
if language in CONFIG['sources']['twitter']:
|
||||||
|
input_file = wordlist_filename('twitter', language, 'counts.txt')
|
||||||
|
output_cBpack = wordlist_filename('twitter-dist', language, 'msgpack.gz')
|
||||||
|
add_dep(lines, 'freqs2cB', input_file, output_cBpack,
|
||||||
|
extra='wordfreq_builder/word_counts.py')
|
||||||
|
|
||||||
|
lines.append('default {}'.format(output_cBpack))
|
||||||
|
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user