mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-26 02:28:50 +00:00
transliterate: organize the 'borrowed letters' better
This commit is contained in:
parent
99eac54b31
commit
e4f40a0ce9
@ -32,10 +32,15 @@ SR_CYRL_TO_LATN_DICT = {
|
|||||||
ord('Џ'): 'Dž', ord('џ'): 'dž',
|
ord('Џ'): 'Dž', ord('џ'): 'dž',
|
||||||
ord('Ш'): 'Š', ord('ш'): 'š',
|
ord('Ш'): 'Š', ord('ш'): 'š',
|
||||||
|
|
||||||
# Handle borrowed letters from Russian
|
# Handle Cyrillic letters from other languages. We hope these cases don't
|
||||||
|
# come up often when we're trying to transliterate Serbian, but if these
|
||||||
|
# letters show up in loan-words or code-switching text, we can at least
|
||||||
|
# transliterate them approximately instead of leaving them as Cyrillic
|
||||||
|
# letters surrounded by Latin.
|
||||||
|
|
||||||
|
# Russian letters
|
||||||
ord('Ё'): 'Jo', ord('ё'): 'Jo',
|
ord('Ё'): 'Jo', ord('ё'): 'Jo',
|
||||||
ord('Й'): 'J', ord('й'): 'j',
|
ord('Й'): 'J', ord('й'): 'j',
|
||||||
ord('Ў'): 'U', ord('ў'): 'u',
|
|
||||||
ord('Щ'): 'Šč', ord('щ'): 'šč',
|
ord('Щ'): 'Šč', ord('щ'): 'šč',
|
||||||
ord('Ъ'): '', ord('ъ'): '',
|
ord('Ъ'): '', ord('ъ'): '',
|
||||||
ord('Ы'): 'Y', ord('ы'): 'y',
|
ord('Ы'): 'Y', ord('ы'): 'y',
|
||||||
@ -44,13 +49,16 @@ SR_CYRL_TO_LATN_DICT = {
|
|||||||
ord('Ю'): 'Ju', ord('ю'): 'ju',
|
ord('Ю'): 'Ju', ord('ю'): 'ju',
|
||||||
ord('Я'): 'Ja', ord('я'): 'ja',
|
ord('Я'): 'Ja', ord('я'): 'ja',
|
||||||
|
|
||||||
# Handle borrowed letters from Ukrainian
|
# Belarusian letter
|
||||||
|
ord('Ў'): 'Ŭ', ord('ў'): 'ŭ',
|
||||||
|
|
||||||
|
# Ukrainian letters
|
||||||
ord('Є'): 'Je', ord('є'): 'je',
|
ord('Є'): 'Je', ord('є'): 'je',
|
||||||
ord('І'): 'I', ord('і'): 'i',
|
ord('І'): 'I', ord('і'): 'i',
|
||||||
ord('Ї'): 'Ji', ord('ї'): 'ji',
|
ord('Ї'): 'Ji', ord('ї'): 'ji',
|
||||||
ord('Ґ'): 'G', ord('ґ'): 'g',
|
ord('Ґ'): 'G', ord('ґ'): 'g',
|
||||||
|
|
||||||
# Handle borrowed letters from Macedonian
|
# Macedonian letters
|
||||||
ord('Ѕ'): 'Dz', ord('ѕ'): 'dz',
|
ord('Ѕ'): 'Dz', ord('ѕ'): 'dz',
|
||||||
ord('Ѓ'): 'Ǵ', ord('ѓ'): 'ǵ',
|
ord('Ѓ'): 'Ǵ', ord('ѓ'): 'ǵ',
|
||||||
ord('Ќ'): 'Ḱ', ord('ќ'): 'ḱ',
|
ord('Ќ'): 'Ḱ', ord('ќ'): 'ḱ',
|
||||||
|
Loading…
Reference in New Issue
Block a user