diff --git a/tests/test.py b/tests/test.py
index cabb8ac..9e1dca7 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -3,7 +3,7 @@ from wordfreq import (
     top_n_list, random_words, random_ascii_words, tokenize, lossy_tokenize
 )
 from nose.tools import (
-    eq_, assert_almost_equal, assert_greater, raises
+    eq_, assert_almost_equal, assert_greater, raises, assert_not_equal
 )
 
 
@@ -41,9 +41,24 @@ LAUGHTER_WORDS = {
 
 
 def test_languages():
-    # Make sure the number of available languages doesn't decrease
+    # Make sure we get all the languages when looking for the default
+    # 'best' wordlist
     avail = available_languages()
-    assert_greater(len(avail), 26)
+    assert_greater(len(avail), 32)
+
+    # 'small' covers the same languages, but with some different lists
+    avail_small = available_languages('small')
+    eq_(len(avail_small), len(avail))
+    assert_not_equal(avail_small, avail)
+
+    # 'combined' is the same as 'small'
+    avail_old_name = available_languages('combined')
+    eq_(avail_old_name, avail_small)
+
+    # 'large' covers fewer languages
+    avail_large = available_languages('large')
+    assert_greater(len(avail_large), 12)
+    assert_greater(len(avail), len(avail_large))
 
     # Look up the digit '2' in the main word list for each language
     for lang in avail:
@@ -55,17 +70,6 @@ def test_languages():
         assert_greater(word_frequency('2', new_lang_code), 0, new_lang_code)
 
 
-def test_twitter():
-    avail = available_languages('twitter')
-    assert_greater(len(avail), 15)
-
-    for lang in avail:
-        assert_greater(word_frequency('rt', lang, 'twitter'),
-                       word_frequency('rt', lang, 'combined'))
-        text = LAUGHTER_WORDS.get(lang, 'haha')
-        assert_greater(word_frequency(text, lang, wordlist='twitter'), 0, (text, lang))
-
-
 def test_minimums():
     eq_(word_frequency('esquivalience', 'en'), 0)
     eq_(word_frequency('esquivalience', 'en', minimum=1e-6), 1e-6)
diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py
index 4f56003..99eba75 100644
--- a/wordfreq/__init__.py
+++ b/wordfreq/__init__.py
@@ -90,11 +90,21 @@ def read_cBpack(filename):
     return data[1:]
 
 
-def available_languages(wordlist='combined'):
+def available_languages(wordlist='best'):
     """
-    List the languages (as language-code strings) that the wordlist of a given
-    name is available in.
+    Given a wordlist name, return a dictionary of language codes to filenames,
+    representing all the languages in which that wordlist is available.
     """
+    if wordlist == 'best':
+        available = available_languages('small')
+        available.update(available_languages('large'))
+        return available
+    elif wordlist == 'combined':
+        logger.warning(
+            "The 'combined' wordlists have been renamed to 'small'."
+        )
+        wordlist = 'small'
+
     available = {}
     for path in DATA_PATH.glob('*.msgpack.gz'):
         if not path.name.startswith('_'):
@@ -106,7 +116,7 @@ def available_languages(wordlist='combined'):
 
 
 @lru_cache(maxsize=None)
-def get_frequency_list(lang, wordlist='combined', match_cutoff=30):
+def get_frequency_list(lang, wordlist='best', match_cutoff=30):
     """
     Read the raw data from a wordlist file, returning it as a list of
     lists. (See `read_cBpack` for what this represents.)
@@ -187,7 +197,7 @@ def freq_to_zipf(freq):
 
 
 @lru_cache(maxsize=None)
-def get_frequency_dict(lang, wordlist='combined', match_cutoff=30):
+def get_frequency_dict(lang, wordlist='best', match_cutoff=30):
     """
     Get a word frequency list as a dictionary, mapping tokens to
     frequencies as floating-point probabilities.
@@ -201,7 +211,7 @@ def get_frequency_dict(lang, wordlist='combined', match_cutoff=30):
     return freqs
 
 
-def iter_wordlist(lang, wordlist='combined'):
+def iter_wordlist(lang, wordlist='best'):
     """
     Yield the words in a wordlist in approximate descending order of
     frequency.
@@ -247,33 +257,18 @@ def _word_frequency(word, lang, wordlist, minimum):
     return max(freq, minimum)
 
 
-def word_frequency(word, lang, wordlist='combined', minimum=0.):
+def word_frequency(word, lang, wordlist='best', minimum=0.):
     """
     Get the frequency of `word` in the language with code `lang`, from the
-    specified `wordlist`. The default wordlist is 'combined', built from
-    whichever of these five sources have sufficient data for the language:
+    specified `wordlist`.
 
-      - Full text of Wikipedia
-      - A sample of 72 million tweets collected from Twitter in 2014,
-        divided roughly into languages using automatic language detection
-      - Frequencies extracted from OpenSubtitles
-      - The Leeds Internet Corpus
-      - Google Books Syntactic Ngrams 2013
+    These wordlists can be specified:
 
-    Another available wordlist is 'twitter', which uses only the data from
-    Twitter.
-
-    Words that we believe occur at least once per million tokens, based on
-    the average of these lists, will appear in the word frequency list.
-
-    The value returned will always be at least as large as `minimum`.
-
-    If a word decomposes into multiple tokens, we'll return a smoothed estimate
-    of the word frequency that is no greater than the frequency of any of its
-    individual tokens.
-
-    It should be noted that the current tokenizer does not support
-    multi-word Chinese phrases.
+    - 'large': a wordlist built from at least 5 sources, containing word
+      frequencies of 10^-8 and higher
+    - 'small': a wordlist built from at least 3 sources, containing word
+      frquencies of 10^-6 and higher
+    - 'best': uses 'large' if available, and 'small' otherwise
     """
     args = (word, lang, wordlist, minimum)
     try:
@@ -285,7 +280,7 @@ def word_frequency(word, lang, wordlist='combined', minimum=0.):
         return _wf_cache[args]
 
 
-def zipf_frequency(word, lang, wordlist='combined', minimum=0.):
+def zipf_frequency(word, lang, wordlist='best', minimum=0.):
     """
     Get the frequency of `word`, in the language with code `lang`, on the Zipf
     scale.
@@ -313,7 +308,7 @@ def zipf_frequency(word, lang, wordlist='combined', minimum=0.):
 
 
 @lru_cache(maxsize=100)
-def top_n_list(lang, n, wordlist='combined', ascii_only=False):
+def top_n_list(lang, n, wordlist='best', ascii_only=False):
     """
     Return a frequency list of length `n` in descending order of frequency.
     This list contains words from `wordlist`, of the given language.
@@ -328,7 +323,7 @@ def top_n_list(lang, n, wordlist='combined', ascii_only=False):
     return results
 
 
-def random_words(lang='en', wordlist='combined', nwords=5, bits_per_word=12,
+def random_words(lang='en', wordlist='best', nwords=5, bits_per_word=12,
                  ascii_only=False):
     """
     Returns a string of random, space separated words.
@@ -353,7 +348,7 @@ def random_words(lang='en', wordlist='combined', nwords=5, bits_per_word=12,
     return ' '.join([random.choice(choices) for i in range(nwords)])
 
 
-def random_ascii_words(lang='en', wordlist='combined', nwords=5,
+def random_ascii_words(lang='en', wordlist='best', nwords=5,
                        bits_per_word=12):
     """
     Returns a string of random, space separated, ASCII words.
diff --git a/wordfreq/data/combined_ar.msgpack.gz b/wordfreq/data/small_ar.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ar.msgpack.gz
rename to wordfreq/data/small_ar.msgpack.gz
diff --git a/wordfreq/data/combined_bg.msgpack.gz b/wordfreq/data/small_bg.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_bg.msgpack.gz
rename to wordfreq/data/small_bg.msgpack.gz
diff --git a/wordfreq/data/combined_bn.msgpack.gz b/wordfreq/data/small_bn.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_bn.msgpack.gz
rename to wordfreq/data/small_bn.msgpack.gz
diff --git a/wordfreq/data/combined_ca.msgpack.gz b/wordfreq/data/small_ca.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ca.msgpack.gz
rename to wordfreq/data/small_ca.msgpack.gz
diff --git a/wordfreq/data/combined_cs.msgpack.gz b/wordfreq/data/small_cs.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_cs.msgpack.gz
rename to wordfreq/data/small_cs.msgpack.gz
diff --git a/wordfreq/data/combined_da.msgpack.gz b/wordfreq/data/small_da.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_da.msgpack.gz
rename to wordfreq/data/small_da.msgpack.gz
diff --git a/wordfreq/data/combined_de.msgpack.gz b/wordfreq/data/small_de.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_de.msgpack.gz
rename to wordfreq/data/small_de.msgpack.gz
diff --git a/wordfreq/data/combined_el.msgpack.gz b/wordfreq/data/small_el.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_el.msgpack.gz
rename to wordfreq/data/small_el.msgpack.gz
diff --git a/wordfreq/data/combined_en.msgpack.gz b/wordfreq/data/small_en.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_en.msgpack.gz
rename to wordfreq/data/small_en.msgpack.gz
diff --git a/wordfreq/data/combined_es.msgpack.gz b/wordfreq/data/small_es.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_es.msgpack.gz
rename to wordfreq/data/small_es.msgpack.gz
diff --git a/wordfreq/data/combined_fa.msgpack.gz b/wordfreq/data/small_fa.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_fa.msgpack.gz
rename to wordfreq/data/small_fa.msgpack.gz
diff --git a/wordfreq/data/combined_fi.msgpack.gz b/wordfreq/data/small_fi.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_fi.msgpack.gz
rename to wordfreq/data/small_fi.msgpack.gz
diff --git a/wordfreq/data/combined_fr.msgpack.gz b/wordfreq/data/small_fr.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_fr.msgpack.gz
rename to wordfreq/data/small_fr.msgpack.gz
diff --git a/wordfreq/data/combined_he.msgpack.gz b/wordfreq/data/small_he.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_he.msgpack.gz
rename to wordfreq/data/small_he.msgpack.gz
diff --git a/wordfreq/data/combined_hi.msgpack.gz b/wordfreq/data/small_hi.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_hi.msgpack.gz
rename to wordfreq/data/small_hi.msgpack.gz
diff --git a/wordfreq/data/combined_hu.msgpack.gz b/wordfreq/data/small_hu.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_hu.msgpack.gz
rename to wordfreq/data/small_hu.msgpack.gz
diff --git a/wordfreq/data/combined_id.msgpack.gz b/wordfreq/data/small_id.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_id.msgpack.gz
rename to wordfreq/data/small_id.msgpack.gz
diff --git a/wordfreq/data/combined_it.msgpack.gz b/wordfreq/data/small_it.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_it.msgpack.gz
rename to wordfreq/data/small_it.msgpack.gz
diff --git a/wordfreq/data/combined_ja.msgpack.gz b/wordfreq/data/small_ja.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ja.msgpack.gz
rename to wordfreq/data/small_ja.msgpack.gz
diff --git a/wordfreq/data/combined_ko.msgpack.gz b/wordfreq/data/small_ko.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ko.msgpack.gz
rename to wordfreq/data/small_ko.msgpack.gz
diff --git a/wordfreq/data/combined_mk.msgpack.gz b/wordfreq/data/small_mk.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_mk.msgpack.gz
rename to wordfreq/data/small_mk.msgpack.gz
diff --git a/wordfreq/data/combined_ms.msgpack.gz b/wordfreq/data/small_ms.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ms.msgpack.gz
rename to wordfreq/data/small_ms.msgpack.gz
diff --git a/wordfreq/data/combined_nb.msgpack.gz b/wordfreq/data/small_nb.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_nb.msgpack.gz
rename to wordfreq/data/small_nb.msgpack.gz
diff --git a/wordfreq/data/combined_nl.msgpack.gz b/wordfreq/data/small_nl.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_nl.msgpack.gz
rename to wordfreq/data/small_nl.msgpack.gz
diff --git a/wordfreq/data/combined_pl.msgpack.gz b/wordfreq/data/small_pl.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_pl.msgpack.gz
rename to wordfreq/data/small_pl.msgpack.gz
diff --git a/wordfreq/data/combined_pt.msgpack.gz b/wordfreq/data/small_pt.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_pt.msgpack.gz
rename to wordfreq/data/small_pt.msgpack.gz
diff --git a/wordfreq/data/combined_ro.msgpack.gz b/wordfreq/data/small_ro.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ro.msgpack.gz
rename to wordfreq/data/small_ro.msgpack.gz
diff --git a/wordfreq/data/combined_ru.msgpack.gz b/wordfreq/data/small_ru.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_ru.msgpack.gz
rename to wordfreq/data/small_ru.msgpack.gz
diff --git a/wordfreq/data/combined_sh.msgpack.gz b/wordfreq/data/small_sh.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_sh.msgpack.gz
rename to wordfreq/data/small_sh.msgpack.gz
diff --git a/wordfreq/data/combined_sv.msgpack.gz b/wordfreq/data/small_sv.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_sv.msgpack.gz
rename to wordfreq/data/small_sv.msgpack.gz
diff --git a/wordfreq/data/combined_tr.msgpack.gz b/wordfreq/data/small_tr.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_tr.msgpack.gz
rename to wordfreq/data/small_tr.msgpack.gz
diff --git a/wordfreq/data/combined_uk.msgpack.gz b/wordfreq/data/small_uk.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_uk.msgpack.gz
rename to wordfreq/data/small_uk.msgpack.gz
diff --git a/wordfreq/data/combined_zh.msgpack.gz b/wordfreq/data/small_zh.msgpack.gz
similarity index 100%
rename from wordfreq/data/combined_zh.msgpack.gz
rename to wordfreq/data/small_zh.msgpack.gz
diff --git a/wordfreq/data/twitter_ar.msgpack.gz b/wordfreq/data/twitter_ar.msgpack.gz
deleted file mode 100644
index cb5833f..0000000
Binary files a/wordfreq/data/twitter_ar.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_bg.msgpack.gz b/wordfreq/data/twitter_bg.msgpack.gz
deleted file mode 100644
index dca1a53..0000000
Binary files a/wordfreq/data/twitter_bg.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_bn.msgpack.gz b/wordfreq/data/twitter_bn.msgpack.gz
deleted file mode 100644
index 2d9c2e9..0000000
Binary files a/wordfreq/data/twitter_bn.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_ca.msgpack.gz b/wordfreq/data/twitter_ca.msgpack.gz
deleted file mode 100644
index 0ac8477..0000000
Binary files a/wordfreq/data/twitter_ca.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_cs.msgpack.gz b/wordfreq/data/twitter_cs.msgpack.gz
deleted file mode 100644
index a79cb61..0000000
Binary files a/wordfreq/data/twitter_cs.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_da.msgpack.gz b/wordfreq/data/twitter_da.msgpack.gz
deleted file mode 100644
index 392b1d9..0000000
Binary files a/wordfreq/data/twitter_da.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_de.msgpack.gz b/wordfreq/data/twitter_de.msgpack.gz
deleted file mode 100644
index 04b0f55..0000000
Binary files a/wordfreq/data/twitter_de.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_en.msgpack.gz b/wordfreq/data/twitter_en.msgpack.gz
deleted file mode 100644
index 604df8e..0000000
Binary files a/wordfreq/data/twitter_en.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_es.msgpack.gz b/wordfreq/data/twitter_es.msgpack.gz
deleted file mode 100644
index 03ad4d9..0000000
Binary files a/wordfreq/data/twitter_es.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_fa.msgpack.gz b/wordfreq/data/twitter_fa.msgpack.gz
deleted file mode 100644
index 008098a..0000000
Binary files a/wordfreq/data/twitter_fa.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_fi.msgpack.gz b/wordfreq/data/twitter_fi.msgpack.gz
deleted file mode 100644
index 221d599..0000000
Binary files a/wordfreq/data/twitter_fi.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_fr.msgpack.gz b/wordfreq/data/twitter_fr.msgpack.gz
deleted file mode 100644
index 5f59122..0000000
Binary files a/wordfreq/data/twitter_fr.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_he.msgpack.gz b/wordfreq/data/twitter_he.msgpack.gz
deleted file mode 100644
index 2bb0363..0000000
Binary files a/wordfreq/data/twitter_he.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_hi.msgpack.gz b/wordfreq/data/twitter_hi.msgpack.gz
deleted file mode 100644
index ee8df85..0000000
Binary files a/wordfreq/data/twitter_hi.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_hu.msgpack.gz b/wordfreq/data/twitter_hu.msgpack.gz
deleted file mode 100644
index cddde5d..0000000
Binary files a/wordfreq/data/twitter_hu.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_id.msgpack.gz b/wordfreq/data/twitter_id.msgpack.gz
deleted file mode 100644
index a7f020e..0000000
Binary files a/wordfreq/data/twitter_id.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_it.msgpack.gz b/wordfreq/data/twitter_it.msgpack.gz
deleted file mode 100644
index 2c2ee97..0000000
Binary files a/wordfreq/data/twitter_it.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_ja.msgpack.gz b/wordfreq/data/twitter_ja.msgpack.gz
deleted file mode 100644
index 2d398de..0000000
Binary files a/wordfreq/data/twitter_ja.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_ko.msgpack.gz b/wordfreq/data/twitter_ko.msgpack.gz
deleted file mode 100644
index 07ab5ce..0000000
Binary files a/wordfreq/data/twitter_ko.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_ms.msgpack.gz b/wordfreq/data/twitter_ms.msgpack.gz
deleted file mode 100644
index 31a12ad..0000000
Binary files a/wordfreq/data/twitter_ms.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_nb.msgpack.gz b/wordfreq/data/twitter_nb.msgpack.gz
deleted file mode 100644
index 542ba22..0000000
Binary files a/wordfreq/data/twitter_nb.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_nl.msgpack.gz b/wordfreq/data/twitter_nl.msgpack.gz
deleted file mode 100644
index d6821af..0000000
Binary files a/wordfreq/data/twitter_nl.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_pl.msgpack.gz b/wordfreq/data/twitter_pl.msgpack.gz
deleted file mode 100644
index dde20bb..0000000
Binary files a/wordfreq/data/twitter_pl.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_pt.msgpack.gz b/wordfreq/data/twitter_pt.msgpack.gz
deleted file mode 100644
index f6e3f7e..0000000
Binary files a/wordfreq/data/twitter_pt.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_ro.msgpack.gz b/wordfreq/data/twitter_ro.msgpack.gz
deleted file mode 100644
index d91122c..0000000
Binary files a/wordfreq/data/twitter_ro.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_ru.msgpack.gz b/wordfreq/data/twitter_ru.msgpack.gz
deleted file mode 100644
index b60cf65..0000000
Binary files a/wordfreq/data/twitter_ru.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_sh.msgpack.gz b/wordfreq/data/twitter_sh.msgpack.gz
deleted file mode 100644
index b2d4b88..0000000
Binary files a/wordfreq/data/twitter_sh.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_sv.msgpack.gz b/wordfreq/data/twitter_sv.msgpack.gz
deleted file mode 100644
index c4b1103..0000000
Binary files a/wordfreq/data/twitter_sv.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_tr.msgpack.gz b/wordfreq/data/twitter_tr.msgpack.gz
deleted file mode 100644
index 824a1d4..0000000
Binary files a/wordfreq/data/twitter_tr.msgpack.gz and /dev/null differ
diff --git a/wordfreq/data/twitter_uk.msgpack.gz b/wordfreq/data/twitter_uk.msgpack.gz
deleted file mode 100644
index 62b9ef8..0000000
Binary files a/wordfreq/data/twitter_uk.msgpack.gz and /dev/null differ