mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 09:51:38 +00:00
always use surface forms
This commit is contained in:
parent
70c9e99ee4
commit
5d14d24738
@ -4,7 +4,7 @@ from pathlib import Path
|
||||
import argparse
|
||||
|
||||
|
||||
def count_twitter(pathname, offset=0, nsplit=1, surface=False):
|
||||
def count_twitter(pathname, offset=0, nsplit=1, surface=True):
|
||||
path = Path(pathname)
|
||||
if surface == True:
|
||||
tokenizer = rosette_surface_tokenizer
|
||||
@ -22,7 +22,6 @@ if __name__ == '__main__':
|
||||
parser.add_argument('filename', help='filename of input file containing one tweet per line')
|
||||
parser.add_argument('offset', type=int)
|
||||
parser.add_argument('nsplit', type=int)
|
||||
parser.add_argument('-s', '--surface', action='store_true', help='Use surface text instead of stems')
|
||||
args = parser.parse_args()
|
||||
count_twitter(args.filename, args.offset, args.nsplit, surface=args.surface)
|
||||
count_twitter(args.filename, args.offset, args.nsplit, surface=True)
|
||||
|
||||
|
@ -4,7 +4,7 @@ from pathlib import Path
|
||||
import argparse
|
||||
|
||||
|
||||
def count_wikipedia(filename, surface=False):
|
||||
def count_wikipedia(filename, surface=True):
|
||||
path = Path(filename)
|
||||
if surface == True:
|
||||
tokenizer = rosette_surface_tokenizer
|
||||
@ -18,7 +18,6 @@ def count_wikipedia(filename, surface=False):
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filename', help='flat text file containing extracted Wikipedia text')
|
||||
parser.add_argument('-s', '--surface', action='store_true', help='Use surface text instead of stems')
|
||||
args = parser.parse_args()
|
||||
count_wikipedia(args.filename, surface=args.surface)
|
||||
count_wikipedia(args.filename, surface=True)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user