diff --git a/wordfreq_builder/wordfreq_builder/tokenizers.py b/wordfreq_builder/wordfreq_builder/tokenizers.py
index 8e9f192..997c0c8 100644
--- a/wordfreq_builder/wordfreq_builder/tokenizers.py
+++ b/wordfreq_builder/wordfreq_builder/tokenizers.py
@@ -1,7 +1,7 @@
 from html.entities import name2codepoint
-from wordfreq import tokenize, TOKEN_RE, NON_PUNCT_RANGE
+from wordfreq import tokenize
 from ftfy.fixes import unescape_html
-import re
+import regex
 import pycld2
 
 CLD2_BAD_CHAR_RANGE = "[%s]" % "".join(
@@ -18,10 +18,10 @@ CLD2_BAD_CHAR_RANGE = "[%s]" % "".join(
     ] +
     [chr(65534+65536*x+y) for x in range(17) for y in range(2)]
 )
-CLD2_BAD_CHARS_RE = re.compile(CLD2_BAD_CHAR_RANGE)
+CLD2_BAD_CHARS_RE = regex.compile(CLD2_BAD_CHAR_RANGE)
 
-TWITTER_HANDLE_RE = re.compile('@{0}+'.format(NON_PUNCT_RANGE))
-TCO_RE = re.compile('http(?:s)?://t.co/[a-zA-Z0-9]+')
+TWITTER_HANDLE_RE = regex.compile(r'@[\S--\p{punct}]+')
+TCO_RE = regex.compile('http(?:s)?://t.co/[a-zA-Z0-9]+')
 
 
 def cld2_surface_tokenizer(text):