From f7babea35201385f52bdf7315ede660354e077c8 Mon Sep 17 00:00:00 2001
From: Rob Speer <rob@luminoso.com>
Date: Wed, 26 Aug 2015 13:54:50 -0400
Subject: [PATCH] correct the simple_tokenize docstring

---
 wordfreq/tokens.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/wordfreq/tokens.py b/wordfreq/tokens.py
index 0936220..d0b160d 100644
--- a/wordfreq/tokens.py
+++ b/wordfreq/tokens.py
@@ -55,9 +55,8 @@ def simple_tokenize(text):
       ideograms and hiragana) relatively untokenized, instead of splitting each
       character into its own token.
 
-    - It excludes punctuation, many classes of symbols, and "extenders" with
-      nothing to extend, from being tokens, but it allows miscellaneous symbols
-      such as emoji.
+    - It outputs only the tokens that start with a word-like character, or
+      miscellaneous symbols such as emoji.
 
     - It breaks on all spaces, even the "non-breaking" ones.
     """