mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 18:01:38 +00:00
parent
117e06d5a4
commit
09dff0186c
@ -17,7 +17,7 @@ CLD2_BAD_CHAR_RANGE = "[%s]" % "".join(
|
||||
CLD2_BAD_CHARS_RE = re.compile(CLD2_BAD_CHAR_RANGE)
|
||||
|
||||
TWITTER_HANDLE_RE = re.compile('@{0}+'.format(NON_PUNCT_RANGE))
|
||||
TCO_RE = re.compile('http(?:s)?://t.co/[a-zA-Z0-9]+'.format(NON_PUNCT_RANGE))
|
||||
TCO_RE = re.compile('http(?:s)?://t.co/[a-zA-Z0-9]+')
|
||||
|
||||
|
||||
def cld2_surface_tokenizer(text):
|
||||
|
Loading…
Reference in New Issue
Block a user