mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
add docstrings to chinese_ and japanese_tokenize
Former-commit-id: e1f7a1ccf3
This commit is contained in:
parent
f4d865c0be
commit
eb08c0a951
@ -86,6 +86,9 @@ def turkish_tokenize(text, include_punctuation=False):
|
|||||||
|
|
||||||
mecab_tokenize = None
|
mecab_tokenize = None
|
||||||
def japanese_tokenize(text, include_punctuation=False):
|
def japanese_tokenize(text, include_punctuation=False):
|
||||||
|
"""
|
||||||
|
Tokenize Japanese text, initializing the MeCab tokenizer if necessary.
|
||||||
|
"""
|
||||||
global mecab_tokenize
|
global mecab_tokenize
|
||||||
if mecab_tokenize is None:
|
if mecab_tokenize is None:
|
||||||
from wordfreq.japanese import mecab_tokenize
|
from wordfreq.japanese import mecab_tokenize
|
||||||
@ -96,6 +99,9 @@ def japanese_tokenize(text, include_punctuation=False):
|
|||||||
|
|
||||||
jieba_tokenize = None
|
jieba_tokenize = None
|
||||||
def chinese_tokenize(text, include_punctuation=False, external_wordlist=False):
|
def chinese_tokenize(text, include_punctuation=False, external_wordlist=False):
|
||||||
|
"""
|
||||||
|
Tokenize Chinese text, initializing the Jieba tokenizer if necessary.
|
||||||
|
"""
|
||||||
global jieba_tokenize
|
global jieba_tokenize
|
||||||
if jieba_tokenize is None:
|
if jieba_tokenize is None:
|
||||||
from wordfreq.chinese import jieba_tokenize
|
from wordfreq.chinese import jieba_tokenize
|
||||||
|
Loading…
Reference in New Issue
Block a user