mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 18:01:38 +00:00
removed TOKENIZE_TWITTER option
This commit is contained in:
parent
772c0cddd1
commit
00e18b7d4b
@ -10,10 +10,6 @@ HEADER = """# This file is automatically generated. Do not edit it.
|
|||||||
TMPDIR = data_filename('tmp')
|
TMPDIR = data_filename('tmp')
|
||||||
|
|
||||||
|
|
||||||
# Set this to True to rebuild the Twitter tokenization (which takes days)
|
|
||||||
TOKENIZE_TWITTER = True
|
|
||||||
|
|
||||||
|
|
||||||
def add_dep(lines, rule, input, output, extra=None, params=None):
|
def add_dep(lines, rule, input, output, extra=None, params=None):
|
||||||
if isinstance(output, list):
|
if isinstance(output, list):
|
||||||
output = ' '.join(output)
|
output = ' '.join(output)
|
||||||
@ -48,16 +44,13 @@ def make_ninja_deps(rules_filename, out=sys.stdout):
|
|||||||
# The first dependency is to make sure the build file is up to date.
|
# The first dependency is to make sure the build file is up to date.
|
||||||
add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja',
|
add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja',
|
||||||
extra='wordfreq_builder/ninja.py')
|
extra='wordfreq_builder/ninja.py')
|
||||||
|
lines.extend(
|
||||||
if TOKENIZE_TWITTER:
|
twitter_deps(
|
||||||
lines.extend(
|
data_filename('raw-input/twitter/all-2014.txt'),
|
||||||
twitter_deps(
|
slice_prefix=data_filename('slices/twitter/tweets-2014'),
|
||||||
data_filename('raw-input/twitter/all-2014.txt'),
|
combined_prefix=data_filename('generated/twitter/tweets-2014'),
|
||||||
slice_prefix=data_filename('slices/twitter/tweets-2014'),
|
slices=40,
|
||||||
combined_prefix=data_filename('generated/twitter/tweets-2014'),
|
languages=CONFIG['sources']['twitter']
|
||||||
slices=40,
|
|
||||||
languages=CONFIG['sources']['twitter']
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
lines.extend(
|
lines.extend(
|
||||||
wikipedia_deps(
|
wikipedia_deps(
|
||||||
|
Loading…
Reference in New Issue
Block a user