diff --git a/wordfreq_builder/rules.ninja b/wordfreq_builder/rules.ninja
index 8303663..0b7e57f 100644
--- a/wordfreq_builder/rules.ninja
+++ b/wordfreq_builder/rules.ninja
@@ -55,6 +55,17 @@ rule convert_leeds
 rule convert_opensubtitles
   command = mkdir -p $$(dirname $out) && tr ' ' ',' < $in > $out
 
+# Convert and clean up the Google Books Syntactic N-grams data. Concatenate all
+# the input files, keep only the single words and their counts, and only keep
+# lines with counts of 100 or more.
+#
+# (These will still be repeated as the word appears in different grammatical
+# roles, information that the source data provides that we're discarding. The
+# source data was already filtered to only show words in roles with at least
+# two-digit counts of occurences.)
+rule convert_google_syntactic_ngrams
+  command = mkdir -p $$(dirname $out) && zcat $in | cut -f 1,3 | grep -v '[,"]' | sed -rn 's/(.*)\s(...+)/\1,\2/p' > $out
+
 rule count
   command = mkdir -p $$(dirname $out) && python -m wordfreq_builder.cli.count_tokens $in $out
 
diff --git a/wordfreq_builder/wordfreq_builder/config.py b/wordfreq_builder/wordfreq_builder/config.py
index b6af74d..72f5967 100644
--- a/wordfreq_builder/wordfreq_builder/config.py
+++ b/wordfreq_builder/wordfreq_builder/config.py
@@ -16,6 +16,7 @@ CONFIG = {
         'wikipedia': [
             'ar', 'de', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
             'pt', 'ru'
+            # many more can be added
         ],
         'opensubtitles': [
             # All languages where the most common word in OpenSubtitles
@@ -27,6 +28,11 @@ CONFIG = {
         ],
         'leeds': [
             'ar', 'de', 'el', 'en', 'es', 'fr', 'it', 'ja', 'pt', 'ru', 'zh'
+        ],
+        'google-books': [
+            'en',
+            # Using the 2012 data, we could get French, German, Italian,
+            # Russian, Spanish, and (Simplified) Chinese.
         ]
     },
     'wordlist_paths': {
@@ -34,6 +40,7 @@ CONFIG = {
         'wikipedia': 'generated/wikipedia/wikipedia_{lang}.{ext}',
         'opensubtitles': 'generated/opensubtitles/opensubtitles_{lang}.{ext}',
         'leeds': 'generated/leeds/leeds_internet_{lang}.{ext}',
+        'google-books': 'generated/google-books/google_books_{lang}.{ext}',
         'combined': 'generated/combined/combined_{lang}.{ext}'
     },
     'min_sources': 2
diff --git a/wordfreq_builder/wordfreq_builder/ninja.py b/wordfreq_builder/wordfreq_builder/ninja.py
index 3dcc9ee..243b3ef 100644
--- a/wordfreq_builder/wordfreq_builder/ninja.py
+++ b/wordfreq_builder/wordfreq_builder/ninja.py
@@ -71,6 +71,11 @@ def make_ninja_deps(rules_filename, out=sys.stdout):
             CONFIG['sources']['wikipedia']
         )
     )
+    lines.extend(
+        google_books_deps(
+            data_filename('raw-input/google-books')
+        )
+    )
     lines.extend(
         leeds_deps(
             data_filename('source-lists/leeds'),
@@ -106,6 +111,22 @@ def wikipedia_deps(dirname_in, languages):
     return lines
 
 
+def google_books_deps(dirname_in):
+    # Get English data from the split-up files of the Google Syntactic N-grams
+    # 2013 corpus.
+    lines = []
+
+    # Yes, the files are numbered 00 through 98 of 99. This is not an
+    # off-by-one error. Not on my part, anyway.
+    input_files = [
+        '{}/nodes.{:>02d}-of-99.gz'.format(dirname_in, i)
+        for i in range(99)
+    ]
+    output_file = wordlist_filename('google-books', 'en', 'counts.txt')
+    add_dep(lines, 'convert_google_syntactic_ngrams', input_files, output_file)
+    return lines
+
+
 def twitter_preprocess_deps(input_filename, slice_prefix,
                             combined_prefix, slices, languages):
     lines = []
@@ -192,7 +213,7 @@ def combine_lists(languages):
         output_dBpack = wordlist_filename('combined', language, 'msgpack.gz')
         add_dep(lines, 'freqs2dB', output_file, output_dBpack,
                 extra='wordfreq_builder/word_counts.py')
-        
+
         lines.append('default {}'.format(output_dBpack))
     return lines