From eb08c0a951978470c243ec05e89f77e8c94d85db Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Tue, 27 Oct 2015 13:23:56 -0400 Subject: [PATCH] add docstrings to chinese_ and japanese_tokenize Former-commit-id: e1f7a1ccf38c05f1b6a771ab25b207586f1d4fa8 --- wordfreq/tokens.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/wordfreq/tokens.py b/wordfreq/tokens.py index ef7d145..f4d1339 100644 --- a/wordfreq/tokens.py +++ b/wordfreq/tokens.py @@ -86,6 +86,9 @@ def turkish_tokenize(text, include_punctuation=False): mecab_tokenize = None def japanese_tokenize(text, include_punctuation=False): + """ + Tokenize Japanese text, initializing the MeCab tokenizer if necessary. + """ global mecab_tokenize if mecab_tokenize is None: from wordfreq.japanese import mecab_tokenize @@ -96,6 +99,9 @@ def japanese_tokenize(text, include_punctuation=False): jieba_tokenize = None def chinese_tokenize(text, include_punctuation=False, external_wordlist=False): + """ + Tokenize Chinese text, initializing the Jieba tokenizer if necessary. + """ global jieba_tokenize if jieba_tokenize is None: from wordfreq.chinese import jieba_tokenize