removed TOKENIZE_TWITTER option

This commit is contained in:
Joshua Chin 2015-07-17 14:40:49 -04:00
parent 772c0cddd1
commit 00e18b7d4b

View File

@ -10,10 +10,6 @@ HEADER = """# This file is automatically generated. Do not edit it.
TMPDIR = data_filename('tmp') TMPDIR = data_filename('tmp')
# Set this to True to rebuild the Twitter tokenization (which takes days)
TOKENIZE_TWITTER = True
def add_dep(lines, rule, input, output, extra=None, params=None): def add_dep(lines, rule, input, output, extra=None, params=None):
if isinstance(output, list): if isinstance(output, list):
output = ' '.join(output) output = ' '.join(output)
@ -48,16 +44,13 @@ def make_ninja_deps(rules_filename, out=sys.stdout):
# The first dependency is to make sure the build file is up to date. # The first dependency is to make sure the build file is up to date.
add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja', add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja',
extra='wordfreq_builder/ninja.py') extra='wordfreq_builder/ninja.py')
lines.extend(
if TOKENIZE_TWITTER: twitter_deps(
lines.extend( data_filename('raw-input/twitter/all-2014.txt'),
twitter_deps( slice_prefix=data_filename('slices/twitter/tweets-2014'),
data_filename('raw-input/twitter/all-2014.txt'), combined_prefix=data_filename('generated/twitter/tweets-2014'),
slice_prefix=data_filename('slices/twitter/tweets-2014'), slices=40,
combined_prefix=data_filename('generated/twitter/tweets-2014'), languages=CONFIG['sources']['twitter']
slices=40,
languages=CONFIG['sources']['twitter']
)
) )
lines.extend( lines.extend(
wikipedia_deps( wikipedia_deps(