add docstrings to chinese_ and japanese_tokenize

Former-commit-id: e1f7a1ccf3
2024-12-23 09:21:37 +00:00 · 2015-10-27 13:23:56 -04:00 · 2015-10-27 13:23:56 -04:00 · eb08c0a951
commit eb08c0a951
parent f4d865c0be
1 changed files with 6 additions and 0 deletions
--- a/wordfreq/tokens.py
+++ b/wordfreq/tokens.py
@ -86,6 +86,9 @@ def turkish_tokenize(text, include_punctuation=False):

 mecab_tokenize = None
 def japanese_tokenize(text, include_punctuation=False):
+    """
+    Tokenize Japanese text, initializing the MeCab tokenizer if necessary.
+    """
    global mecab_tokenize
    if mecab_tokenize is None:
        from wordfreq.japanese import mecab_tokenize
@ -96,6 +99,9 @@ def japanese_tokenize(text, include_punctuation=False):

 jieba_tokenize = None
 def chinese_tokenize(text, include_punctuation=False, external_wordlist=False):
+    """
+    Tokenize Chinese text, initializing the Jieba tokenizer if necessary.
+    """
    global jieba_tokenize
    if jieba_tokenize is None:
        from wordfreq.chinese import jieba_tokenize