mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 09:21:37 +00:00
filter out downvoted Reddit posts
This commit is contained in:
parent
cfe68893fa
commit
5b98794b86
@ -104,4 +104,4 @@ rule cat
|
|||||||
command = cat $in > $out
|
command = cat $in > $out
|
||||||
|
|
||||||
rule extract_reddit
|
rule extract_reddit
|
||||||
command = bunzip2 -c $in | $JQ -r '.body' | fgrep -v '[deleted]' | sed 's/>/>/g' | sed 's/</</g' | sed 's/&/\&/g' > $out
|
command = bunzip2 -c $in | $JQ -r 'select(.score > 0) | .body' | fgrep -v '[deleted]' | sed 's/>/>/g' | sed 's/</</g' | sed 's/&/\&/g' > $out
|
||||||
|
Loading…
Reference in New Issue
Block a user