remove reddit_base_filename function

Former-commit-id: ced15d6eff
This commit is contained in:
Rob Speer 2016-03-31 13:39:13 -04:00
parent d924c8e2a5
commit 6f11256ed1

View File

@ -236,13 +236,6 @@ def jieba_deps(dirname_in, languages):
return lines return lines
def reddit_base_filename(path):
"""
Get the base name of a Reddit input file, without its path or extension.
"""
return path.stem
def reddit_deps(dirname_in, languages): def reddit_deps(dirname_in, languages):
lines = [] lines = []
path_in = pathlib.Path(dirname_in) path_in = pathlib.Path(dirname_in)
@ -252,7 +245,7 @@ def reddit_deps(dirname_in, languages):
# Extract text from the Reddit comment dumps, and write them to # Extract text from the Reddit comment dumps, and write them to
# .txt.gz files # .txt.gz files
for filepath in path_in.glob('*/*.bz2'): for filepath in path_in.glob('*/*.bz2'):
base = reddit_base_filename(filepath) base = filepath.stem
transformed_file = wordlist_filename('reddit', base + '.all', 'txt') transformed_file = wordlist_filename('reddit', base + '.all', 'txt')
slices[base] = transformed_file slices[base] = transformed_file
add_dep(lines, 'extract_reddit', str(filepath), transformed_file) add_dep(lines, 'extract_reddit', str(filepath), transformed_file)