diff --git a/tests/test_chinese.py b/tests/test_chinese.py
index 25e6fe1..58df4a1 100644
--- a/tests/test_chinese.py
+++ b/tests/test_chinese.py
@@ -55,10 +55,19 @@ def test_tokens():
         ]
     )
 
-    # You match the same tokens if you look it up in Traditional Chinese.
-    eq_(tokenize(fact_simplified, 'zh'), tokenize(fact_traditional, 'zh'))
+    # Check that Traditional Chinese works at all
     assert_greater(word_frequency(fact_traditional, 'zh'), 0)
 
+    # You get the same token lengths if you look it up in Traditional Chinese,
+    # but the words are different
+    simp_tokens = tokenize(fact_simplified, 'zh', include_punctuation=True)
+    trad_tokens = tokenize(fact_traditional, 'zh', include_punctuation=True)
+    eq_(''.join(simp_tokens), fact_simplified)
+    eq_(''.join(trad_tokens), fact_traditional)
+    simp_lengths = [len(token) for token in simp_tokens]
+    trad_lengths = [len(token) for token in trad_tokens]
+    eq_(simp_lengths, trad_lengths)
+
 
 def test_combination():
     xiexie_freq = word_frequency('谢谢', 'zh')   # "Thanks"
diff --git a/wordfreq/chinese.py b/wordfreq/chinese.py
index c57e937..9f7b95a 100644
--- a/wordfreq/chinese.py
+++ b/wordfreq/chinese.py
@@ -49,4 +49,11 @@ def jieba_tokenize(text, external_wordlist=False):
     else:
         if jieba_tokenizer is None:
             jieba_tokenizer = jieba.Tokenizer(dictionary=DICT_FILENAME)
-        return jieba_tokenizer.lcut(simplify_chinese(text), HMM=False)
+
+        # Tokenize the Simplified Chinese version of the text, but return
+        # those spans from the original text, even if it's in Traditional
+        # Chinese
+        tokens = []
+        for _token, start, end in jieba_tokenizer.tokenize(simplify_chinese(text), HMM=False):
+            tokens.append(text[start:end])
+        return tokens