From 2840ca55aabf305739a060f964f464f7b08ad210 Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Thu, 24 Mar 2016 18:05:13 -0400 Subject: [PATCH] filter out downvoted Reddit posts Former-commit-id: 5b98794b86e07edff3e057ab69386e4755a0ff96 --- wordfreq_builder/rules.ninja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wordfreq_builder/rules.ninja b/wordfreq_builder/rules.ninja index 0263c7b..b43ebeb 100644 --- a/wordfreq_builder/rules.ninja +++ b/wordfreq_builder/rules.ninja @@ -104,4 +104,4 @@ rule cat command = cat $in > $out rule extract_reddit - command = bunzip2 -c $in | $JQ -r '.body' | fgrep -v '[deleted]' | sed 's/>/>/g' | sed 's/</ $out + command = bunzip2 -c $in | $JQ -r 'select(.score > 0) | .body' | fgrep -v '[deleted]' | sed 's/>/>/g' | sed 's/</ $out