mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 01:41:39 +00:00
add Polish and Swedish, which have sufficient data
Former-commit-id: 447d7e5134
This commit is contained in:
parent
f7a4e2c444
commit
5b9b2d2d02
@ -12,15 +12,15 @@ CONFIG = {
|
|||||||
# Consider adding:
|
# Consider adding:
|
||||||
# 'th' when we get tokenization for it
|
# 'th' when we get tokenization for it
|
||||||
# 'hi' when we stop messing up its tokenization
|
# 'hi' when we stop messing up its tokenization
|
||||||
# 'tl' because it's probably ready right now
|
# 'tl' with one more data source
|
||||||
# 'pl' because we have 3 sources for it
|
# 'sv' because its data sources are ready
|
||||||
'twitter': [
|
'twitter': [
|
||||||
'ar', 'de', 'el', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
|
'ar', 'de', 'el', 'en', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
|
||||||
'pt', 'ru', 'tr'
|
'pl', 'pt', 'ru', 'sv', 'tr'
|
||||||
],
|
],
|
||||||
'wikipedia': [
|
'wikipedia': [
|
||||||
'ar', 'de', 'en', 'el', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
|
'ar', 'de', 'en', 'el', 'es', 'fr', 'id', 'it', 'ja', 'ko', 'ms', 'nl',
|
||||||
'pt', 'ru', 'tr'
|
'pl', 'pt', 'ru', 'sv', 'tr'
|
||||||
],
|
],
|
||||||
'opensubtitles': [
|
'opensubtitles': [
|
||||||
# This list includes languages where the most common word in
|
# This list includes languages where the most common word in
|
||||||
|
Loading…
Reference in New Issue
Block a user