diff --git a/wordfreq_builder/rules.ninja b/wordfreq_builder/rules.ninja index 0263c7b..b43ebeb 100644 --- a/wordfreq_builder/rules.ninja +++ b/wordfreq_builder/rules.ninja @@ -104,4 +104,4 @@ rule cat command = cat $in > $out rule extract_reddit - command = bunzip2 -c $in | $JQ -r '.body' | fgrep -v '[deleted]' | sed 's/>/>/g' | sed 's/</ $out + command = bunzip2 -c $in | $JQ -r 'select(.score > 0) | .body' | fgrep -v '[deleted]' | sed 's/>/>/g' | sed 's/</ $out