From 06f8b299712a5952c5847ece8b13c1a18fcc2ed0 Mon Sep 17 00:00:00 2001
From: Rob Speer <rob@luminoso.com>
Date: Tue, 22 Sep 2015 15:31:27 -0400
Subject: [PATCH] document what this file is for

---
 scripts/make_chinese_mapping.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/scripts/make_chinese_mapping.py b/scripts/make_chinese_mapping.py
index 19b7826..9855e18 100644
--- a/scripts/make_chinese_mapping.py
+++ b/scripts/make_chinese_mapping.py
@@ -1,3 +1,16 @@
+"""
+Generate a Python file, _chinese_mapping.py, that maps Traditional Chinese
+characters to their Simplified Chinese equivalents.
+
+This is meant to be a normalization of text, somewhat like case-folding -- not
+an actual translator, a task for which this method would be unsuitable. We
+store word frequencies using Simplified Chinese characters so that, in the large
+number of cases where a Traditional Chinese word has an obvious Simplified Chinese
+mapping, we can get a frequency for it that's the same in Simplified and Traditional
+Chinese.
+
+Generating this mapping requires the external Chinese conversion tool OpenCC.
+"""
 import unicodedata
 import itertools
 import os