Module:su-Latn-Sund-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Sundanese language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:su-Latn-Sund-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

-- Sign definitions
local panyakra = "ᮢ"  -- medial -r-
local panglayar = "ᮁ" -- final -r-
local virama = "᮪"   -- cancels inherent vowel

-- Latin to Sundanese mappings
local consonants = {"k","g","c","j","t","d","n","p","b","m","y","l","w","s","h","f","q","v","x","z","ny","ng","kh","sy"}
local mapping = {
  ["k"] = "ᮊ", ["g"] = "ᮌ", ["c"] = "ᮎ", ["j"] = "ᮏ", ["ny"] = "ᮑ", ["ng"] = "ᮍ",
  ["t"] = "ᮒ", ["d"] = "ᮓ", ["n"] = "ᮔ", ["p"] = "ᮕ", ["b"] = "ᮘ", ["m"] = "ᮙ",
  ["y"] = "ᮚ", ["l"] = "ᮜ", ["w"] = "ᮝ", ["s"] = "ᮞ", ["h"] = "ᮠ",
  ["f"] = "ᮖ", ["q"] = "ᮋ", ["v"] = "ᮗ", ["x"] = "ᮟ", ["z"] = "ᮐ",
  ["kh"] = "ᮮ", ["sy"] = "ᮯ",
  -- Vowels
  ["a"] = "ᮃ", ["é"] = "ᮆ", ["i"] = "ᮄ", ["o"] = "ᮇ", ["u"] = "ᮅ", ["e"] = "ᮈ", ["eu"] = "ᮉ",
}
-- Latin vowel set
local latinVowels = {a=true, ["é"]=true, e=true, i=true, o=true, u=true}

-- Diacritics for vowels following a consonant
local diacritics = { ["i"]="ᮤ", ["u"]="ᮥ", ["é"]="ᮦ", ["o"]="ᮧ", ["e"]="ᮨ", ["eu"]="ᮩ" }

-- Finals (isolated after vowels)
local finals = { ["ng"]="ᮀ", ["r"]=panglayar, ["h"]="ᮂ", ["k"]="ᮾ", ["m"]="ᮿ" }

-- Transliterate a single word
local function transliterate_word(tok)
  local out = ""
  local i = 1
  while i <= #tok do
    -- Check consonant+r cluster
    local cluster
    for _, c in ipairs(consonants) do
      if mw.ustring.sub(tok, i, i+#c-1) == c and mw.ustring.sub(tok, i+#c, i+#c) == 'r' then
        cluster = c
        break
      end
    end
    if cluster then
      out = out .. mapping[cluster] .. panyakra
      i = i + #cluster + 1
      -- skip inherent 'a' or attach diacritic if next is vowel
      local v = mw.ustring.sub(tok, i, i)
      if diacritics[v] then
        out = out .. diacritics[v]
        i = i + 1
      elseif v == 'a' then
        i = i + 1
      end
    else
      -- Normal mapping
      local matched = false
      for _, c in ipairs(consonants) do
        if mw.ustring.sub(tok, i, i+#c-1) == c then
          local glyph = mapping[c]
          i = i + #c
          -- Check for diacritic
          local v2 = mw.ustring.sub(tok, i, i)
          if diacritics[v2] then
            glyph = glyph .. diacritics[v2]
            i = i + 1
          else
            -- Only add inherent vowel if next Latin char is vowel
            local nextChar = mw.ustring.sub(tok, i, i)
            if latinVowels[nextChar] then
              glyph = glyph .. "ᮃ"
            end
          end
          out = out .. glyph
          matched = true
          break
        end
      end
      if not matched then
        -- Vowel or other
        local ch = mw.ustring.sub(tok, i, i)
        if mapping[ch] then
          out = out .. mapping[ch]
        else
          out = out .. ch
        end
        i = i + 1
      end
    end
  end
  -- Post-processing
  -- Vowel + r -> panglayar
  out = mw.ustring.gsub(out, '([ᮃᮆᮄᮇᮅᮈᮉ])r', '%1'..panglayar)
  -- Append virama to bare final consonants
  out = mw.ustring.gsub(out, '([ᮊᮌᮍᮎᮏᮑᮒᮓᮔᮕᮘᮙᮚᮛᮜᮝᮞᮠᮖᮋᮗᮟᮐᮮᮯ])$', '%1'..virama)
  return out
end

function export.tr(text)
  return mw.ustring.gsub(text, '%w+', transliterate_word)
end

return export