diff --git a/wordfreq_builder/wordfreq_builder/ninja.py b/wordfreq_builder/wordfreq_builder/ninja.py index ec59716..89aee37 100644 --- a/wordfreq_builder/wordfreq_builder/ninja.py +++ b/wordfreq_builder/wordfreq_builder/ninja.py @@ -10,10 +10,6 @@ HEADER = """# This file is automatically generated. Do not edit it. TMPDIR = data_filename('tmp') -# Set this to True to rebuild the Twitter tokenization (which takes days) -TOKENIZE_TWITTER = True - - def add_dep(lines, rule, input, output, extra=None, params=None): if isinstance(output, list): output = ' '.join(output) @@ -48,16 +44,13 @@ def make_ninja_deps(rules_filename, out=sys.stdout): # The first dependency is to make sure the build file is up to date. add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja', extra='wordfreq_builder/ninja.py') - - if TOKENIZE_TWITTER: - lines.extend( - twitter_deps( - data_filename('raw-input/twitter/all-2014.txt'), - slice_prefix=data_filename('slices/twitter/tweets-2014'), - combined_prefix=data_filename('generated/twitter/tweets-2014'), - slices=40, - languages=CONFIG['sources']['twitter'] - ) + lines.extend( + twitter_deps( + data_filename('raw-input/twitter/all-2014.txt'), + slice_prefix=data_filename('slices/twitter/tweets-2014'), + combined_prefix=data_filename('generated/twitter/tweets-2014'), + slices=40, + languages=CONFIG['sources']['twitter'] ) lines.extend( wikipedia_deps(