mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 01:41:39 +00:00
update comments in wordfreq_builder.config; remove unused 'version'
This commit is contained in:
parent
0ab23f8a28
commit
bc323eccaf
@ -1,19 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
CONFIG = {
|
CONFIG = {
|
||||||
'version': '1.0b',
|
|
||||||
# data_dir is a relative or absolute path to where the wordlist data
|
# data_dir is a relative or absolute path to where the wordlist data
|
||||||
# is stored
|
# is stored
|
||||||
'data_dir': 'data',
|
'data_dir': 'data',
|
||||||
'sources': {
|
'sources': {
|
||||||
# A list of language codes (possibly un-standardized) that we'll
|
# A list of language codes that we'll look up in filenames for these
|
||||||
# look up in filenames for these various data sources.
|
# various data sources.
|
||||||
#
|
#
|
||||||
# Consider adding:
|
# Consider adding:
|
||||||
# 'th' when we get tokenization for it
|
# 'th' when we get tokenization for it
|
||||||
# 'hi' when we stop messing up its tokenization
|
# 'hi' when we stop messing up its tokenization
|
||||||
# 'tl' with one more data source
|
# 'tl' with one more data source
|
||||||
# 'sv' because its data sources are ready
|
|
||||||
'twitter': [
|
'twitter': [
|
||||||
'ar', 'de', 'el', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
|
'ar', 'de', 'el', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
|
||||||
'pl', 'pt', 'ru', 'sv', 'tr'
|
'pl', 'pt', 'ru', 'sv', 'tr'
|
||||||
|
Loading…
Reference in New Issue
Block a user