Module:tt-translit
Documentation for this module may be created at Module:tt-translit/doc
local export = {}
local rsubn = mw.ustring.gsub
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
local tt = {
['б']='b',['Б']='B', ['в']='w',['В']='W', ['г']='g',['Г']='G', ['д']='d',['Д']='D',
['з']='z',['З']='Z', ['й']='y',['Й']='Y', ['к']='k',['К']='K', ['л']='l',['Л']='L',
['м']='m',['М']='M', ['н']='n',['Н']='N', ['п']='p',['П']='P', ['р']='r',['Р']='R',
['с']='s',['С']='S', ['т']='t',['Т']='T', ['ф']='f',['Ф']='F', ['х']='x',['Х']='X',
['ч']='ç',['Ч']='Ç', ['ш']='ş',['Ш']='Ş',
['җ']='c',['Җ']='C', ['ң']='ñ',['Ң']='Ñ', ['һ']='h',['Һ']='H',
['ж']='j',['Ж']='J', ['ц']='ts',['Ц']='Ts', ['щ']='şç',['Щ']='Şç', ['ё']='yo',['Ё']='Yo',
['а']='a',['А']='A', ['ы']='ı',['Ы']='I', ['о']='o',['О']='O', ['у']='u',['У']='U',
['ә']='ä',['Ә']='Ä', ['э']='e',['Э']='E', ['и']='i',['И']='İ', ['ө']='ö',['Ө']='Ö', ['ү']='ü',['Ү']='Ü',
--['я']='ya',['Я']='Ya', ['е']='ye',['Е']='Ye', ['ю']='yu',['Ю']='Yu',
--['е']='e',['Е']='E',
['ь']='ʹ',['Ь']='ʹ', ['ъ']='ʺ',['Ъ']='ʺ',
['ҡ']='q',['Ҡ']='Q', ['ғ']='ğ',['Ғ']='Ğ', ['Ӹ']='Iy',['ӹ']='ıy',
}
local consonants = 'БВГДЗЙКЛМНПРСТФХЧШҖҢҺбвгдзйклмнпрстфхчшҗңһЖЦЩжцщҠҒҡғ'
local vowels_hard = 'АЫӸОУаыӹоу'
local vowels_soft = 'ӘЭИӨҮәэиөү'
local vowels_iotated = 'ЯЕЮяею' -- ё is only in loans
local consonants_soft2hard = {['К']='Ҡ', ['Г']='Ғ', ['к']='ҡ', ['г']='ғ'}
local vowels_hard2soft = {['А']='Ә', ['Ы']='Э', ['Ӹ']='И', ['О']='Ө', ['У']='Ү', ['а']='ә', ['ы']='э', ['ӹ']='и', ['о']='ө', ['у']='ү'}
local vowels_iotated_expanded_hard = {['Я']='Йа', ['Е']='Йы', ['Ю']='Йу', ['я']='йа', ['е']='йы', ['ю']='йу'}
local vowels_iotated_expanded_soft = {['Я']='Йә', ['Е']='Йэ', ['Ю']='Йү', ['я']='йә', ['е']='йэ', ['ю']='йү'}
local tt_Arab_New = {
-- [[s:mul:Рус мәктәпләре өчен татар теле дәреслеге/13]]
-- XXX: need to investigate the most appropriate Unicode codepoints to use for tt-Arab
['ا']='а', ['ە']='ә',
['ب']='б', ['پ']='п', ['ت']='т',
['ج']='җ', ['چ']='ч', ['ح']='х',
['د']='д',
['ر']='р', ['ز']='з', ['ژ']='ж',
['س']='с', ['ش']='ш',
['ع']='ғ',
['ف']='ф', ['ق']='ҡ', ['ک']='к', ['گ']='г', ['ڭ']='ң',
['ل']='л',
['م']='м',
['ن']='н',
['ۇ']='ө', ['و']='ү', ['ۋ']='в',
['ه']='һ',
['ىُ']='э', ['ی']='и',
['ث']='с', ['خ']='х', ['ذ']='з', ['ص']='с', ['ض']='з', ['ط']='т', ['ظ']='з', ['غ']='ғ',
['ئ']='ь',
['ࢭ']='ъ',
['۱']='1', ['۲']='2', ['۳']='3', ['۴']='4', ['۵']='5',
['۶']='6', ['۷']='7', ['۸']='8', ['۹']='9', ['۰']='0',
['١']='1', ['٢']='2', ['٣']='3', ['٤']='4', ['٥']='5',
['٦']='6', ['٧']='7', ['٨']='8', ['٩']='9', ['٠']='0',
['،']=',', ['؟']='?',
}
-- excluding ә/а. яңа имля has separate letters
--local vowels_soft2hard = {['э']='ы', ['и']='ӹ', ['ө']='о', ['ү']='у'}
-- XXX: keep и for now. less unsightly and more common than ый? can и vs. ый even be predicted accurately?
local vowels_soft2hard = {['э']='ы', ['и']='и', ['ө']='о', ['ү']='у'}
function export.tr(text, lang, sc)
if sc == 'tt-Arab' then
-- яңа имля.
-- automatic insertion of э/ы would be Cool
-- but maybe we don't have to worry about that
-- since яңалиф also omits them.
-- visualize the continuity between the two.
-- also, insertion would wreak havoc on иске имля
-- quick fixes for иске имля?
text = rsub(text, '^او', 'ئو')
text = rsub(text, '^ای', 'ئی')
text = rsub(text, '^آ', 'ئا')
text = rsub(text, '^ا', 'ئە')
text = rsub(text, '([%p%s])او', '%1ئو')
text = rsub(text, '([%p%s])ای', '%1ئی')
text = rsub(text, '([%p%s])آ', '%1ئا')
text = rsub(text, '([%p%s])ا', '%1ئە')
text = rsub(text, 'ه$', 'ە')
text = rsub(text, 'ه([%p%s' .. mw.ustring.char(0x200C) .. '])', 'ە%1')
text = rsub(text, mw.ustring.char(0x200C), '') -- ZERO WIDTH NON-JOINER
text = rsub(text, '(.)' .. mw.ustring.char(0x0651), '%1%1') -- SHADDA
text = rsub(text, 'ىُ', tt_Arab_New) -- `э/ы` is not atomic in Unicode
text = rsub(text, '.', tt_Arab_New)
text = rsub(text, 'ии([әэөаү])', 'ий%1')
text = rsub(text, 'и([әэөаү])', 'й%1')
text = rsub(text, '([әэөаүи])и', '%1й')
text = rsub(text, 'ү([әэөаи])', 'в%1')
text = rsub(text, '([әэөаиү])ү', '%1в')
text = rsub(text,
'([^%p%s]+)',
function(text)
text = rsub(text, mw.ustring.format('^(ъ?)и([%s])', consonants), '%1й%2')
text = rsub(text, mw.ustring.format('^(ь)([%s])', consonants), '%1э%2')
if mw.ustring.match(text, '[ъаҡғ]') then
text = rsub(text, mw.ustring.format('([%s])', vowels_soft), vowels_soft2hard)
end
text = rsub(text, '^ъ', '')
text = rsub(text, '^ь', '')
return text
end
)
text = rsub(text, '.', tt)
return text
end
-- normalize pure vocalic e
text = rsub(text,
mw.ustring.format('([%s])([Ее])', consonants),
function(consonant, e)
local uniotated = {['Е']='Э', ['е']='э'}
return consonant .. uniotated[e]
end
)
-- simplify handling ый
text = rsub(text, 'Ы[Йй]', 'Ӹ')
text = rsub(text, 'ый', 'ӹ')
-- Russian loan sounds
-- XXX: an idea: identify Russian loans by adding an accent mark?
--text = rsub(text, 'ия', 'ийә')
-- process iotated soft vowels
text = rsub(text,
mw.ustring.format('([%s])([%s]*[%s])', vowels_iotated, consonants, vowels_soft),
function(vowel_iotated, following)
return vowels_iotated_expanded_soft[vowel_iotated] .. following
end
)
text = rsub(text,
mw.ustring.format('([%s])([%s]*)([Ьь])', vowels_iotated, consonants),
function(vowel_iotated, following, soft_sign)
return vowels_iotated_expanded_soft[vowel_iotated] .. following
end
)
text = rsub_repeatedly(text,
mw.ustring.format('([%s])([%s])', vowels_soft, vowels_iotated),
function(preceding, vowel_iotated)
return preceding .. vowels_iotated_expanded_soft[vowel_iotated]
end
)
-- process iotated hard vowels
text = rsub(text,
mw.ustring.format('([%s])', vowels_iotated),
function(vowel_iotated)
return vowels_iotated_expanded_hard[vowel_iotated]
end
)
-- verbal noun + 3rd person possessive
text = rsub(text, 'үйэ', 'үвэ')
-- q/ğ is indicated by using a hard vowel, even in soft vowel words
text = rsub(text,
mw.ustring.format('([КГкг])([%s]+)([%s])([Ъъ])', vowels_hard, consonants),
function(kg, vowel, following, soft_and_glottal_sign)
-- XXX: presumably this is what ъ means here
return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following .. 'ь'
end
)
text = rsub(text,
mw.ustring.format('([КГкг])([%s]+)([%s]+[%s])', vowels_hard, consonants, vowels_soft),
function(kg, vowel, following)
return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following
end
)
text = rsub(text,
mw.ustring.format('([КГкг])([%s]+)([%s])([Ьь])', vowels_hard, consonants),
function(kg, vowel, following, soft_sign)
return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following
end
)
text = rsub(text,
mw.ustring.format('([%s]?)([КГкг])([%s]?)', vowels_hard, vowels_hard),
function(preceding, kg, following)
return preceding .. (((following ~= '') or (preceding ~= '' and following == '')) and consonants_soft2hard[kg] or kg) .. following
end
)
text = rsub(text, '([КГкг])([Ъъ])', function(kg, hard_sign) return consonants_soft2hard[kg] end)
-- excrescent y/w after i/u
text = rsub_repeatedly(text, '([Ии])([' .. vowels_hard .. vowels_soft .. '])', '%1й%2')
text = rsub_repeatedly(text, '([УҮуү])([' .. vowels_hard .. vowels_soft .. '])', '%1в%2')
-- semivocalic w after vowels
text = rsub(text, '([' .. vowels_hard .. vowels_soft .. '])[УҮуү]', '%1в')
-- glottal stop after vowels
text = rsub(text, '([' .. vowels_hard .. vowels_soft .. '])[Ээ]', '%1ь')
text = rsub(text, '.', tt)
return text
end
return export