From 5d14d24738e49d78d48dea9dd0839902b0100c5d Mon Sep 17 00:00:00 2001 From: Rob Speer Date: Wed, 29 Apr 2015 15:17:00 -0400 Subject: [PATCH] always use surface forms --- wordfreq_builder/wordfreq_builder/cmd_count_twitter.py | 5 ++--- wordfreq_builder/wordfreq_builder/cmd_count_wikipedia.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/wordfreq_builder/wordfreq_builder/cmd_count_twitter.py b/wordfreq_builder/wordfreq_builder/cmd_count_twitter.py index 7613d2d..1086f1b 100644 --- a/wordfreq_builder/wordfreq_builder/cmd_count_twitter.py +++ b/wordfreq_builder/wordfreq_builder/cmd_count_twitter.py @@ -4,7 +4,7 @@ from pathlib import Path import argparse -def count_twitter(pathname, offset=0, nsplit=1, surface=False): +def count_twitter(pathname, offset=0, nsplit=1, surface=True): path = Path(pathname) if surface == True: tokenizer = rosette_surface_tokenizer @@ -22,7 +22,6 @@ if __name__ == '__main__': parser.add_argument('filename', help='filename of input file containing one tweet per line') parser.add_argument('offset', type=int) parser.add_argument('nsplit', type=int) - parser.add_argument('-s', '--surface', action='store_true', help='Use surface text instead of stems') args = parser.parse_args() - count_twitter(args.filename, args.offset, args.nsplit, surface=args.surface) + count_twitter(args.filename, args.offset, args.nsplit, surface=True) diff --git a/wordfreq_builder/wordfreq_builder/cmd_count_wikipedia.py b/wordfreq_builder/wordfreq_builder/cmd_count_wikipedia.py index da51519..c362f03 100644 --- a/wordfreq_builder/wordfreq_builder/cmd_count_wikipedia.py +++ b/wordfreq_builder/wordfreq_builder/cmd_count_wikipedia.py @@ -4,7 +4,7 @@ from pathlib import Path import argparse -def count_wikipedia(filename, surface=False): +def count_wikipedia(filename, surface=True): path = Path(filename) if surface == True: tokenizer = rosette_surface_tokenizer @@ -18,7 +18,6 @@ def count_wikipedia(filename, surface=False): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('filename', help='flat text file containing extracted Wikipedia text') - parser.add_argument('-s', '--surface', action='store_true', help='Use surface text instead of stems') args = parser.parse_args() - count_wikipedia(args.filename, surface=args.surface) + count_wikipedia(args.filename, surface=True)