diff --git a/tests/test.py b/tests/test.py
index abc33c3..91f990a 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,5 +1,5 @@
 from wordfreq import (
-    word_frequency, available_languages, dB_to_freq, iter_wordlist,
+    word_frequency, available_languages, cB_to_freq, iter_wordlist,
     top_n_list, random_words, random_ascii_words
 )
 from nose.tools import (
@@ -48,7 +48,7 @@ def test_most_common_words():
         return top_n_list(lang, 1)[0]
 
     eq_(get_most_common('ar'), 'في')
-    eq_(get_most_common('de'), 'der')
+    eq_(get_most_common('de'), 'die')
     eq_(get_most_common('en'), 'the')
     eq_(get_most_common('es'), 'de')
     eq_(get_most_common('fr'), 'de')
@@ -70,15 +70,15 @@ def test_language_matching():
     eq_(word_frequency('的', 'cmn'), freq)
 
 
-def test_dB_conversion():
-    eq_(dB_to_freq(0), 1.)
-    assert_almost_equal(dB_to_freq(-10), 0.1)
-    assert_almost_equal(dB_to_freq(-60), 1e-6)
+def test_cB_conversion():
+    eq_(cB_to_freq(0), 1.)
+    assert_almost_equal(cB_to_freq(-100), 0.1)
+    assert_almost_equal(cB_to_freq(-600), 1e-6)
 
 
 @raises(ValueError)
-def test_failed_dB_conversion():
-    dB_to_freq(1)
+def test_failed_cB_conversion():
+    cB_to_freq(1)
 
 
 def test_tokenization():
diff --git a/wordfreq/__init__.py b/wordfreq/__init__.py
index 3858b98..dd26811 100644
--- a/wordfreq/__init__.py
+++ b/wordfreq/__init__.py
@@ -53,36 +53,56 @@ def tokenize(text, lang):
         return simple_tokenize(text)
 
 
-def read_dBpack(filename):
+def read_cBpack(filename):
     """
     Read a file from an idiosyncratic format that we use for storing
-    approximate word frequencies, called "dBpack".
+    approximate word frequencies, called "cBpack".
 
-    The dBpack format is as follows:
+    The cBpack format is as follows:
 
     - The file on disk is a gzipped file in msgpack format, which decodes to a
-      list of lists of words.
+      list whose first element is a header, and whose remaining elements are
+      lists of words, preceded by a header.
+
+    - The header is a dictionary with 'format' and 'version' keys that make
+      sure that we're reading the right thing.
 
     - Each inner list of words corresponds to a particular word frequency,
-      rounded to the nearest decibel. 0 dB represents a word that occurs with
-      probability 1, so it is the only word in the data (this of course doesn't
-      happen). -20 dB represents a word that occurs once per 100 tokens, -30 dB
-      represents a word that occurs once per 1000 tokens, and so on.
+      rounded to the nearest centibel -- that is, one tenth of a decibel, or
+      a factor of 10 ** .01.
 
-    - The index of each list within the overall list is the negative of its
-      frequency in decibels.
+      0 cB represents a word that occurs with probability 1, so it is the only
+      word in the data (this of course doesn't happen). -200 cB represents a
+      word that occurs once per 100 tokens, -300 cB represents a word that
+      occurs once per 1000 tokens, and so on.
+
+    - The index of each list within the overall list (without the header) is
+      the negative of its frequency in centibels.
 
     - Each inner list is sorted in alphabetical order.
 
     As an example, consider a corpus consisting only of the words "red fish
-    blue fish". The word "fish" occurs as 50% of tokens (-3 dB), while "red"
-    and "blue" occur as 25% of tokens (-6 dB). The dBpack file of their word
-    frequencies would decode to this list:
+    blue fish". The word "fish" occurs as 50% of tokens (-30 cB), while "red"
+    and "blue" occur as 25% of tokens (-60 cB). The cBpack file of their word
+    frequencies would decode to this:
 
-        [[], [], [], ['fish'], [], [], ['blue', 'red']]
+        [
+            {'format': 'cB', 'version': 1},
+            [], [], [], ...    # 30 empty lists
+            ['fish'],
+            [], [], [], ...    # 29 more empty lists
+            ['blue', 'red']
+        ]
     """
     with gzip.open(filename, 'rb') as infile:
-        return msgpack.load(infile, encoding='utf-8')
+        data = msgpack.load(infile, encoding='utf-8')
+        header = data[0]
+        if (
+            not isinstance(header, dict) or header.get('format') != 'cB'
+            or header.get('version') != 1
+        ):
+            raise ValueError("Unexpected header: %r" % header)
+        return data[1:]
 
 
 def available_languages(wordlist='combined'):
@@ -103,7 +123,7 @@ def available_languages(wordlist='combined'):
 def get_frequency_list(lang, wordlist='combined', match_cutoff=30):
     """
     Read the raw data from a wordlist file, returning it as a list of
-    lists. (See `read_dBpack` for what this represents.)
+    lists. (See `read_cBpack` for what this represents.)
 
     Because we use the `langcodes` module, we can handle slight
     variations in language codes. For example, looking for 'pt-BR',
@@ -123,25 +143,25 @@ def get_frequency_list(lang, wordlist='combined', match_cutoff=30):
             % (lang, best, langcodes.get(best).language_name('en'))
         )
 
-    return read_dBpack(available[best])
+    return read_cBpack(available[best])
 
 
-def dB_to_freq(dB):
+def cB_to_freq(cB):
     """
-    Convert a word frequency from the logarithmic decibel scale that we use
+    Convert a word frequency from the logarithmic centibel scale that we use
     internally, to a proportion from 0 to 1.
 
-    On this scale, 0 dB represents the maximum possible frequency of
-    1.0. -10 dB represents a word that happens 1 in 10 times,
-    -20 dB represents something that happens 1 in 100 times, and so on.
+    On this scale, 0 cB represents the maximum possible frequency of
+    1.0. -100 cB represents a word that happens 1 in 10 times,
+    -200 cB represents something that happens 1 in 100 times, and so on.
 
-    In general, x dB represents a frequency of 10 ** (x/10).
+    In general, x cB represents a frequency of 10 ** (x/100).
     """
-    if dB > 0:
+    if cB > 0:
         raise ValueError(
             "A frequency cannot be a positive number of decibels."
         )
-    return 10 ** (dB / 10)
+    return 10 ** (cB / 100)
 
 
 @lru_cache(maxsize=None)
@@ -154,7 +174,7 @@ def get_frequency_dict(lang, wordlist='combined', match_cutoff=30):
     pack = get_frequency_list(lang, wordlist, match_cutoff)
     for index, bucket in enumerate(pack):
         for word in bucket:
-            freqs[word] = dB_to_freq(-index)
+            freqs[word] = cB_to_freq(-index)
     return freqs
 
 
diff --git a/wordfreq/data/combined_ar.msgpack.gz b/wordfreq/data/combined_ar.msgpack.gz
index c97a420..5ce708f 100644
Binary files a/wordfreq/data/combined_ar.msgpack.gz and b/wordfreq/data/combined_ar.msgpack.gz differ
diff --git a/wordfreq/data/combined_de.msgpack.gz b/wordfreq/data/combined_de.msgpack.gz
index eec9c65..35384fc 100644
Binary files a/wordfreq/data/combined_de.msgpack.gz and b/wordfreq/data/combined_de.msgpack.gz differ
diff --git a/wordfreq/data/combined_el.msgpack.gz b/wordfreq/data/combined_el.msgpack.gz
index 107e3b5..bc0beab 100644
Binary files a/wordfreq/data/combined_el.msgpack.gz and b/wordfreq/data/combined_el.msgpack.gz differ
diff --git a/wordfreq/data/combined_en.msgpack.gz b/wordfreq/data/combined_en.msgpack.gz
index 842df21..673b9ca 100644
Binary files a/wordfreq/data/combined_en.msgpack.gz and b/wordfreq/data/combined_en.msgpack.gz differ
diff --git a/wordfreq/data/combined_es.msgpack.gz b/wordfreq/data/combined_es.msgpack.gz
index ff030ca..9a4b475 100644
Binary files a/wordfreq/data/combined_es.msgpack.gz and b/wordfreq/data/combined_es.msgpack.gz differ
diff --git a/wordfreq/data/combined_fr.msgpack.gz b/wordfreq/data/combined_fr.msgpack.gz
index 33140ee..bbcfc60 100644
Binary files a/wordfreq/data/combined_fr.msgpack.gz and b/wordfreq/data/combined_fr.msgpack.gz differ
diff --git a/wordfreq/data/combined_id.msgpack.gz b/wordfreq/data/combined_id.msgpack.gz
index 7da4279..4ff43c3 100644
Binary files a/wordfreq/data/combined_id.msgpack.gz and b/wordfreq/data/combined_id.msgpack.gz differ
diff --git a/wordfreq/data/combined_it.msgpack.gz b/wordfreq/data/combined_it.msgpack.gz
index 6460266..f02c507 100644
Binary files a/wordfreq/data/combined_it.msgpack.gz and b/wordfreq/data/combined_it.msgpack.gz differ
diff --git a/wordfreq/data/combined_ja.msgpack.gz b/wordfreq/data/combined_ja.msgpack.gz
index 0b515b1..1c38166 100644
Binary files a/wordfreq/data/combined_ja.msgpack.gz and b/wordfreq/data/combined_ja.msgpack.gz differ
diff --git a/wordfreq/data/combined_ko.msgpack.gz b/wordfreq/data/combined_ko.msgpack.gz
index f23a604..5469582 100644
Binary files a/wordfreq/data/combined_ko.msgpack.gz and b/wordfreq/data/combined_ko.msgpack.gz differ
diff --git a/wordfreq/data/combined_ms.msgpack.gz b/wordfreq/data/combined_ms.msgpack.gz
index ca7e111..3c63beb 100644
Binary files a/wordfreq/data/combined_ms.msgpack.gz and b/wordfreq/data/combined_ms.msgpack.gz differ
diff --git a/wordfreq/data/combined_nl.msgpack.gz b/wordfreq/data/combined_nl.msgpack.gz
index eaaef0b..eed5525 100644
Binary files a/wordfreq/data/combined_nl.msgpack.gz and b/wordfreq/data/combined_nl.msgpack.gz differ
diff --git a/wordfreq/data/combined_pt.msgpack.gz b/wordfreq/data/combined_pt.msgpack.gz
index c7fb843..b8251b3 100644
Binary files a/wordfreq/data/combined_pt.msgpack.gz and b/wordfreq/data/combined_pt.msgpack.gz differ
diff --git a/wordfreq/data/combined_ru.msgpack.gz b/wordfreq/data/combined_ru.msgpack.gz
index f2d848c..6a05d2b 100644
Binary files a/wordfreq/data/combined_ru.msgpack.gz and b/wordfreq/data/combined_ru.msgpack.gz differ
diff --git a/wordfreq/data/combined_zh.msgpack.gz b/wordfreq/data/combined_zh.msgpack.gz
index 84cf890..c988cb8 100644
Binary files a/wordfreq/data/combined_zh.msgpack.gz and b/wordfreq/data/combined_zh.msgpack.gz differ