mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
take out OpenSubtitles for Chinese
This commit is contained in:
parent
bc323eccaf
commit
d9c44d5fcc
@ -146,7 +146,7 @@ at least 3 different sources of word frequencies:
|
||||
Russian ru │ - Yes Yes Yes Yes -
|
||||
Swedish sv │ - Yes - Yes Yes -
|
||||
Turkish tr │ - Yes - Yes Yes -
|
||||
Chinese zh │ Yes Yes Yes - - Jieba
|
||||
Chinese zh │ Yes - Yes - - Jieba
|
||||
|
||||
|
||||
Additionally, Korean is marginally supported. You can look up frequencies in
|
||||
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -23,11 +23,12 @@ CONFIG = {
|
||||
'opensubtitles': [
|
||||
# This list includes languages where the most common word in
|
||||
# OpenSubtitles appears at least 5000 times. However, we exclude
|
||||
# German, where SUBTLEX has done better processing of the same data.
|
||||
# languages where SUBTLEX has apparently done a better job,
|
||||
# specifically German and Chinese.
|
||||
'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'el', 'en', 'es', 'et',
|
||||
'fa', 'fi', 'fr', 'he', 'hr', 'hu', 'id', 'is', 'it', 'lt', 'lv',
|
||||
'mk', 'ms', 'nb', 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq',
|
||||
'sr', 'sv', 'tr', 'uk', 'zh'
|
||||
'sr', 'sv', 'tr', 'uk'
|
||||
],
|
||||
'leeds': [
|
||||
'ar', 'de', 'el', 'en', 'es', 'fr', 'it', 'ja', 'pt', 'ru', 'zh'
|
||||
|
Loading…
Reference in New Issue
Block a user