diff --git a/wordfreq_builder/wordfreq_builder/tokenizers.py b/wordfreq_builder/wordfreq_builder/tokenizers.py
index 6c21e92..b1a8c27 100644
--- a/wordfreq_builder/wordfreq_builder/tokenizers.py
+++ b/wordfreq_builder/wordfreq_builder/tokenizers.py
@@ -47,17 +47,6 @@ def last_tab(line):
     return line.split('\t')[-1].strip()
 
 
-def lowercase_text_filter(token):
-    """
-    If this looks like a token that we want to count, return it, lowercased.
-    If not, filter it out by returning None.
-    """
-    if TOKEN_RE.search(token):
-        return token.lower()
-    else:
-        return None
-
-
 def tokenize_file(in_filename, out_prefix, tokenizer, line_reader=last_tab):
     """
     Process a file by running it through the given tokenizer, sorting the
@@ -91,27 +80,3 @@ def fix_entities(text):
     def replace_entity(match):
         return chr(name2codepoint[match.group(1)])
     return ENTITY_RE.sub(replace_entity, text)
-
-
-def monolingual_tokenize_file(in_filename, out_filename, language,
-                              tokenizer, line_reader=last_tab,
-                              sample_proportion=1):
-    """
-    Process a file by running it through the given tokenizer, only keeping
-    lines of the language we're asking for, and inserting newlines
-    to mark the token boundaries.
-
-    `line_reader` is applied to each line before it given to the tokenizer
-
-    Only the first line out of every `sample_proportion` lines are run through
-    then tokenizer.
-    """
-    with open(in_filename, encoding='utf-8', errors='replace') as in_file:
-        with open(out_filename, 'w', encoding='utf-8') as out_file:
-            for i, line in enumerate(in_file):
-                if i % sample_proportion == 0:
-                    text = line_reader(line)
-                    tokens, line_language = tokenizer(text)
-                    if line_language == language:
-                        for token in tokens:
-                            print(token, file=out_file)