add docstrings to chinese_ and japanese_tokenize

Former-commit-id: e1f7a1ccf3
This commit is contained in:
Robyn Speer 2015-10-27 13:23:56 -04:00
parent f4d865c0be
commit eb08c0a951

View File

@ -86,6 +86,9 @@ def turkish_tokenize(text, include_punctuation=False):
mecab_tokenize = None mecab_tokenize = None
def japanese_tokenize(text, include_punctuation=False): def japanese_tokenize(text, include_punctuation=False):
"""
Tokenize Japanese text, initializing the MeCab tokenizer if necessary.
"""
global mecab_tokenize global mecab_tokenize
if mecab_tokenize is None: if mecab_tokenize is None:
from wordfreq.japanese import mecab_tokenize from wordfreq.japanese import mecab_tokenize
@ -96,6 +99,9 @@ def japanese_tokenize(text, include_punctuation=False):
jieba_tokenize = None jieba_tokenize = None
def chinese_tokenize(text, include_punctuation=False, external_wordlist=False): def chinese_tokenize(text, include_punctuation=False, external_wordlist=False):
"""
Tokenize Chinese text, initializing the Jieba tokenizer if necessary.
"""
global jieba_tokenize global jieba_tokenize
if jieba_tokenize is None: if jieba_tokenize is None:
from wordfreq.chinese import jieba_tokenize from wordfreq.chinese import jieba_tokenize