always use surface forms

This commit is contained in:
Rob Speer 2015-04-29 15:17:00 -04:00
parent 70c9e99ee4
commit 5d14d24738
2 changed files with 4 additions and 6 deletions

View File

@ -4,7 +4,7 @@ from pathlib import Path
import argparse
def count_twitter(pathname, offset=0, nsplit=1, surface=False):
def count_twitter(pathname, offset=0, nsplit=1, surface=True):
path = Path(pathname)
if surface == True:
tokenizer = rosette_surface_tokenizer
@ -22,7 +22,6 @@ if __name__ == '__main__':
parser.add_argument('filename', help='filename of input file containing one tweet per line')
parser.add_argument('offset', type=int)
parser.add_argument('nsplit', type=int)
parser.add_argument('-s', '--surface', action='store_true', help='Use surface text instead of stems')
args = parser.parse_args()
count_twitter(args.filename, args.offset, args.nsplit, surface=args.surface)
count_twitter(args.filename, args.offset, args.nsplit, surface=True)

View File

@ -4,7 +4,7 @@ from pathlib import Path
import argparse
def count_wikipedia(filename, surface=False):
def count_wikipedia(filename, surface=True):
path = Path(filename)
if surface == True:
tokenizer = rosette_surface_tokenizer
@ -18,7 +18,6 @@ def count_wikipedia(filename, surface=False):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('filename', help='flat text file containing extracted Wikipedia text')
parser.add_argument('-s', '--surface', action='store_true', help='Use surface text instead of stems')
args = parser.parse_args()
count_wikipedia(args.filename, surface=args.surface)
count_wikipedia(args.filename, surface=True)