mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-23 17:31:41 +00:00
parent
ab8c2e2331
commit
064ee22a33
@ -48,8 +48,8 @@ def cld2_surface_tokenizer(text):
|
|||||||
# Low-frequency languages tend to be detected incorrectly. Keep a limited
|
# Low-frequency languages tend to be detected incorrectly. Keep a limited
|
||||||
# list of languages we're allowed to use here.
|
# list of languages we're allowed to use here.
|
||||||
KEEP_THESE_LANGUAGES = {
|
KEEP_THESE_LANGUAGES = {
|
||||||
'ar', 'de', 'el', 'en', 'es', 'fr', 'hr', 'id', 'ja', 'ko', 'ms', 'nl',
|
'ar', 'de', 'el', 'en', 'es', 'fr', 'hr', 'id', 'it', 'ja', 'ko', 'ms',
|
||||||
'pl', 'pt', 'ro', 'ru', 'sv', 'th'
|
'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'th'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user