From 5a37cc22c7d57db2e9335c0a5248ac2c04b961db Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Thu, 31 Mar 2016 13:39:13 -0400 Subject: [PATCH] remove reddit_base_filename function Former-commit-id: ced15d6eff16337b310134d4ddd74c47c60f7482 --- wordfreq_builder/wordfreq_builder/ninja.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/wordfreq_builder/wordfreq_builder/ninja.py b/wordfreq_builder/wordfreq_builder/ninja.py index 0dbcd82..3091a8e 100644 --- a/wordfreq_builder/wordfreq_builder/ninja.py +++ b/wordfreq_builder/wordfreq_builder/ninja.py @@ -236,13 +236,6 @@ def jieba_deps(dirname_in, languages): return lines -def reddit_base_filename(path): - """ - Get the base name of a Reddit input file, without its path or extension. - """ - return path.stem - - def reddit_deps(dirname_in, languages): lines = [] path_in = pathlib.Path(dirname_in) @@ -252,7 +245,7 @@ def reddit_deps(dirname_in, languages): # Extract text from the Reddit comment dumps, and write them to # .txt.gz files for filepath in path_in.glob('*/*.bz2'): - base = reddit_base_filename(filepath) + base = filepath.stem transformed_file = wordlist_filename('reddit', base + '.all', 'txt') slices[base] = transformed_file add_dep(lines, 'extract_reddit', str(filepath), transformed_file)