mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
fix extraneous dot in intermediate filenames
Former-commit-id: 6feae99381
This commit is contained in:
parent
460fbb84fd
commit
034d8f540b
@ -253,7 +253,7 @@ def reddit_deps(dirname_in, languages):
|
|||||||
# .txt.gz files
|
# .txt.gz files
|
||||||
for filepath in path_in.glob('*/*.bz2'):
|
for filepath in path_in.glob('*/*.bz2'):
|
||||||
base = reddit_base_filename(filepath)
|
base = reddit_base_filename(filepath)
|
||||||
transformed_file = wordlist_filename('reddit', base + '.all', '.txt')
|
transformed_file = wordlist_filename('reddit', base + '.all', 'txt')
|
||||||
slices[base] = transformed_file
|
slices[base] = transformed_file
|
||||||
add_dep(lines, 'extract_reddit', str(filepath), transformed_file)
|
add_dep(lines, 'extract_reddit', str(filepath), transformed_file)
|
||||||
|
|
||||||
@ -261,7 +261,7 @@ def reddit_deps(dirname_in, languages):
|
|||||||
transformed_file = slices[base]
|
transformed_file = slices[base]
|
||||||
language_outputs = []
|
language_outputs = []
|
||||||
for language in languages:
|
for language in languages:
|
||||||
filename = wordlist_filename('reddit', base + '.' + language, '.txt')
|
filename = wordlist_filename('reddit', base + '.' + language, 'txt')
|
||||||
language_outputs.append(filename)
|
language_outputs.append(filename)
|
||||||
|
|
||||||
count_filename = wordlist_filename('reddit', base + '.' + language, 'counts.txt')
|
count_filename = wordlist_filename('reddit', base + '.' + language, 'counts.txt')
|
||||||
@ -270,7 +270,7 @@ def reddit_deps(dirname_in, languages):
|
|||||||
|
|
||||||
# find the prefix by constructing a filename, then stripping off
|
# find the prefix by constructing a filename, then stripping off
|
||||||
# '.xx.txt' from the end
|
# '.xx.txt' from the end
|
||||||
prefix = wordlist_filename('reddit', base + '.xx', '.txt')[:-7]
|
prefix = wordlist_filename('reddit', base + '.xx', 'txt')[:-7]
|
||||||
add_dep(lines, 'tokenize_reddit', transformed_file, language_outputs,
|
add_dep(lines, 'tokenize_reddit', transformed_file, language_outputs,
|
||||||
params={'prefix': prefix},
|
params={'prefix': prefix},
|
||||||
extra='wordfreq_builder/tokenizers.py')
|
extra='wordfreq_builder/tokenizers.py')
|
||||||
|
Loading…
Reference in New Issue
Block a user