mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 09:21:37 +00:00
add docstrings to chinese_ and japanese_tokenize
Former-commit-id: e1f7a1ccf3
This commit is contained in:
parent
f4d865c0be
commit
eb08c0a951
@ -86,6 +86,9 @@ def turkish_tokenize(text, include_punctuation=False):
|
||||
|
||||
mecab_tokenize = None
|
||||
def japanese_tokenize(text, include_punctuation=False):
|
||||
"""
|
||||
Tokenize Japanese text, initializing the MeCab tokenizer if necessary.
|
||||
"""
|
||||
global mecab_tokenize
|
||||
if mecab_tokenize is None:
|
||||
from wordfreq.japanese import mecab_tokenize
|
||||
@ -96,6 +99,9 @@ def japanese_tokenize(text, include_punctuation=False):
|
||||
|
||||
jieba_tokenize = None
|
||||
def chinese_tokenize(text, include_punctuation=False, external_wordlist=False):
|
||||
"""
|
||||
Tokenize Chinese text, initializing the Jieba tokenizer if necessary.
|
||||
"""
|
||||
global jieba_tokenize
|
||||
if jieba_tokenize is None:
|
||||
from wordfreq.chinese import jieba_tokenize
|
||||
|
Loading…
Reference in New Issue
Block a user