From 5b9b2d2d0281e59186841c06389910e86f91c852 Mon Sep 17 00:00:00 2001 From: Robyn Speer Date: Fri, 4 Sep 2015 16:32:37 -0400 Subject: [PATCH] add Polish and Swedish, which have sufficient data Former-commit-id: 447d7e5134d9e64e55318e6e4f6880c182384153 --- wordfreq_builder/wordfreq_builder/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wordfreq_builder/wordfreq_builder/config.py b/wordfreq_builder/wordfreq_builder/config.py index dc61bc6..42fccff 100644 --- a/wordfreq_builder/wordfreq_builder/config.py +++ b/wordfreq_builder/wordfreq_builder/config.py @@ -12,15 +12,15 @@ CONFIG = { # Consider adding: # 'th' when we get tokenization for it # 'hi' when we stop messing up its tokenization - # 'tl' because it's probably ready right now - # 'pl' because we have 3 sources for it + # 'tl' with one more data source + # 'sv' because its data sources are ready 'twitter': [ 'ar', 'de', 'el', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl', - 'pt', 'ru', 'tr' + 'pl', 'pt', 'ru', 'sv', 'tr' ], 'wikipedia': [ 'ar', 'de', 'en', 'el', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl', - 'pt', 'ru', 'tr' + 'pl', 'pt', 'ru', 'sv', 'tr' ], 'opensubtitles': [ # This list includes languages where the most common word in