mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-24 01:41:39 +00:00
take out OpenSubtitles for Chinese
This commit is contained in:
parent
bc323eccaf
commit
d9c44d5fcc
@ -146,7 +146,7 @@ at least 3 different sources of word frequencies:
|
|||||||
Russian ru │ - Yes Yes Yes Yes -
|
Russian ru │ - Yes Yes Yes Yes -
|
||||||
Swedish sv │ - Yes - Yes Yes -
|
Swedish sv │ - Yes - Yes Yes -
|
||||||
Turkish tr │ - Yes - Yes Yes -
|
Turkish tr │ - Yes - Yes Yes -
|
||||||
Chinese zh │ Yes Yes Yes - - Jieba
|
Chinese zh │ Yes - Yes - - Jieba
|
||||||
|
|
||||||
|
|
||||||
Additionally, Korean is marginally supported. You can look up frequencies in
|
Additionally, Korean is marginally supported. You can look up frequencies in
|
||||||
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -23,11 +23,12 @@ CONFIG = {
|
|||||||
'opensubtitles': [
|
'opensubtitles': [
|
||||||
# This list includes languages where the most common word in
|
# This list includes languages where the most common word in
|
||||||
# OpenSubtitles appears at least 5000 times. However, we exclude
|
# OpenSubtitles appears at least 5000 times. However, we exclude
|
||||||
# German, where SUBTLEX has done better processing of the same data.
|
# languages where SUBTLEX has apparently done a better job,
|
||||||
|
# specifically German and Chinese.
|
||||||
'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'el', 'en', 'es', 'et',
|
'ar', 'bg', 'bs', 'ca', 'cs', 'da', 'el', 'en', 'es', 'et',
|
||||||
'fa', 'fi', 'fr', 'he', 'hr', 'hu', 'id', 'is', 'it', 'lt', 'lv',
|
'fa', 'fi', 'fr', 'he', 'hr', 'hu', 'id', 'is', 'it', 'lt', 'lv',
|
||||||
'mk', 'ms', 'nb', 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq',
|
'mk', 'ms', 'nb', 'nl', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq',
|
||||||
'sr', 'sv', 'tr', 'uk', 'zh'
|
'sr', 'sv', 'tr', 'uk'
|
||||||
],
|
],
|
||||||
'leeds': [
|
'leeds': [
|
||||||
'ar', 'de', 'el', 'en', 'es', 'fr', 'it', 'ja', 'pt', 'ru', 'zh'
|
'ar', 'de', 'el', 'en', 'es', 'fr', 'it', 'ja', 'pt', 'ru', 'zh'
|
||||||
|
Loading…
Reference in New Issue
Block a user