Module:su-Latn-Sund-translit

Revision as of 12:46, 21 April 2026 by Sware (talk | contribs) (1 revision imported)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:su-Latn-Sund-translit/doc

local export = {}

-- Sign definitions
local panyakra = "ᮢ"  -- medial -r-
local panglayar = "ᮁ" -- final -r-
local virama = "᮪"   -- cancels inherent vowel

-- Latin to Sundanese mappings
local consonants = {"k","g","c","j","t","d","n","p","b","m","y","l","w","s","h","f","q","v","x","z","ny","ng","kh","sy"}
local mapping = {
  ["k"] = "ᮊ", ["g"] = "ᮌ", ["c"] = "ᮎ", ["j"] = "ᮏ", ["ny"] = "ᮑ", ["ng"] = "ᮍ",
  ["t"] = "ᮒ", ["d"] = "ᮓ", ["n"] = "ᮔ", ["p"] = "ᮕ", ["b"] = "ᮘ", ["m"] = "ᮙ",
  ["y"] = "ᮚ", ["l"] = "ᮜ", ["w"] = "ᮝ", ["s"] = "ᮞ", ["h"] = "ᮠ",
  ["f"] = "ᮖ", ["q"] = "ᮋ", ["v"] = "ᮗ", ["x"] = "ᮟ", ["z"] = "ᮐ",
  ["kh"] = "ᮮ", ["sy"] = "ᮯ",
  -- Vowels
  ["a"] = "ᮃ", ["é"] = "ᮆ", ["i"] = "ᮄ", ["o"] = "ᮇ", ["u"] = "ᮅ", ["e"] = "ᮈ", ["eu"] = "ᮉ",
}
-- Latin vowel set
local latinVowels = {a=true, ["é"]=true, e=true, i=true, o=true, u=true}

-- Diacritics for vowels following a consonant
local diacritics = { ["i"]="ᮤ", ["u"]="ᮥ", ["é"]="ᮦ", ["o"]="ᮧ", ["e"]="ᮨ", ["eu"]="ᮩ" }

-- Finals (isolated after vowels)
local finals = { ["ng"]="ᮀ", ["r"]=panglayar, ["h"]="ᮂ", ["k"]="ᮾ", ["m"]="ᮿ" }

-- Transliterate a single word
local function transliterate_word(tok)
  local out = ""
  local i = 1
  while i <= #tok do
    -- Check consonant+r cluster
    local cluster
    for _, c in ipairs(consonants) do
      if mw.ustring.sub(tok, i, i+#c-1) == c and mw.ustring.sub(tok, i+#c, i+#c) == 'r' then
        cluster = c
        break
      end
    end
    if cluster then
      out = out .. mapping[cluster] .. panyakra
      i = i + #cluster + 1
      -- skip inherent 'a' or attach diacritic if next is vowel
      local v = mw.ustring.sub(tok, i, i)
      if diacritics[v] then
        out = out .. diacritics[v]
        i = i + 1
      elseif v == 'a' then
        i = i + 1
      end
    else
      -- Normal mapping
      local matched = false
      for _, c in ipairs(consonants) do
        if mw.ustring.sub(tok, i, i+#c-1) == c then
          local glyph = mapping[c]
          i = i + #c
          -- Check for diacritic
          local v2 = mw.ustring.sub(tok, i, i)
          if diacritics[v2] then
            glyph = glyph .. diacritics[v2]
            i = i + 1
          else
            -- Only add inherent vowel if next Latin char is vowel
            local nextChar = mw.ustring.sub(tok, i, i)
            if latinVowels[nextChar] then
              glyph = glyph .. "ᮃ"
            end
          end
          out = out .. glyph
          matched = true
          break
        end
      end
      if not matched then
        -- Vowel or other
        local ch = mw.ustring.sub(tok, i, i)
        if mapping[ch] then
          out = out .. mapping[ch]
        else
          out = out .. ch
        end
        i = i + 1
      end
    end
  end
  -- Post-processing
  -- Vowel + r -> panglayar
  out = mw.ustring.gsub(out, '([ᮃᮆᮄᮇᮅᮈᮉ])r', '%1'..panglayar)
  -- Append virama to bare final consonants
  out = mw.ustring.gsub(out, '([ᮊᮌᮍᮎᮏᮑᮒᮓᮔᮕᮘᮙᮚᮛᮜᮝᮞᮠᮖᮋᮗᮟᮐᮮᮯ])$', '%1'..virama)
  return out
end

function export.tr(text)
  return mw.ustring.gsub(text, '%w+', transliterate_word)
end

return export