filter out downvoted Reddit posts

Former-commit-id: 5b98794b86
This commit is contained in:
Robyn Speer 2016-03-24 18:05:13 -04:00
parent 16841d4b0c
commit 2840ca55aa

View File

@ -104,4 +104,4 @@ rule cat
command = cat $in > $out
rule extract_reddit
command = bunzip2 -c $in | $JQ -r '.body' | fgrep -v '[deleted]' | sed 's/&gt;/>/g' | sed 's/&lt;/</g' | sed 's/&amp;/\&/g' > $out
command = bunzip2 -c $in | $JQ -r 'select(.score > 0) | .body' | fgrep -v '[deleted]' | sed 's/&gt;/>/g' | sed 's/&lt;/</g' | sed 's/&amp;/\&/g' > $out