diff --git a/tests/test.py b/tests/test.py
index 2d11e35..397ce97 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -95,6 +95,12 @@ def test_tokenization():
     # apply.
     eq_(tokenize("can.t", 'en'), ['can', 't'])
 
+
+def test_casefolding():
+    eq_(tokenize('WEISS', 'de'), ['weiss'])
+    eq_(tokenize('weiß', 'de'), ['weiss'])
+
+
 def test_phrase_freq():
     plant = word_frequency("plan.t", 'en')
     assert_greater(plant, 0)
diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py
index f861c89..7f441ca 100644
--- a/wordfreq/__init__.py
+++ b/wordfreq/__init__.py
@@ -149,7 +149,7 @@ def simple_tokenize(text):
     sequence, but they are if they appear internally. "cats'" is not a token,
     but "cat's" is.
     """
-    return [token.lower() for token in TOKEN_RE.findall(text)]
+    return [token.casefold() for token in TOKEN_RE.findall(text)]
 
 mecab_tokenize = None
 def tokenize(text, lang):