Module:mn-common: Difference between revisions

No edit summary
Tag: Manual revert
No edit summary
 
Line 1: Line 1:
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local export = {}
local export = {}
local lang = require("Module:languages").getByCode( "mn" )
local links = require( "Module:links" )
table["reverse"] = require( "Module:table" ).reverse
table["removeDuplicates"] = require( "Module:table" ).removeDuplicates


local char = mw.ustring.char
local concat = table.concat
local find = mw.ustring.find
local find = m_str_utils.find
local len = mw.ustring.len
local insert = table.insert
local match = mw.ustring.match
local len = m_str_utils.len
local gmatch = mw.ustring.gmatch
local match = m_str_utils.match
local sub = mw.ustring.sub
local gmatch = m_str_utils.gmatch
local gsub = mw.ustring.gsub
local sub = m_str_utils.sub
local lower = mw.ustring.lower
local gsub = m_str_utils.gsub
local reverse = require( "Module:string" ).reverse
local lower = m_str_utils.lower
local reverse = m_str_utils.reverse
local reverse_array = m_table.reverse
local remove_duplicates = m_table.removeDuplicates
local sort = table.sort
local u = m_str_utils.char


export.FVS1 = char( 0x180B )
export.FVS1 = u( 0x180B )
export.FVS2 = char( 0x180C )
export.FVS2 = u( 0x180C )
export.FVS3 = char( 0x180D )
export.FVS3 = u( 0x180D )
export.FVS4 = char( 0x180F )
export.FVS4 = u( 0x180F )
export.MVS = char( 0x180E )
export.MVS = u( 0x180E )
export.NNBSP = char( 0x202F )
export.NNBSP = u( 0x202F )
export.stem_barrier = char( 0xF000 )
export.stem_barrier = u( 0xF000 )


local function format_Mongolian_text(text) return "<span class=\"Mong\" lang=\"mn\">" .. text .. "</span>" end
local function format_Mongolian_text(text) return "<span class=\"Mong\" lang=\"mn\">" .. text .. "</span>" end
Line 32: Line 36:
local args = require("Module:parameters").process(frame:getParent().args, params)
local args = require("Module:parameters").process(frame:getParent().args, params)
local title = args[1]
local title = args[1]
local curr_title = mw.title.getCurrentTitle().subpageText
local curr_title = mw.loadData("Module:headword/data").pagename
local content = mw.title.new(title):getContent()
local content = mw.title.new(title):getContent()
local senses = {}
local senses = {}
Line 45: Line 49:
if not match(content, "==Mongolian==") then
if not match(content, "==Mongolian==") then
categories = categories .. "[[Category:Mongolian redlinks/mn-see]]"
categories = categories .. "[[Category:Mongolian redlinks/mn-see]]"
elseif not match(content, "mn%-IPA") and not match(content, "mn%-see") then
require("Module:debug").track("mn-see/unidirectional reference to variant")
elseif not match(content, curr_title) then
require("Module:debug").track("mn-see/unidirectional reference variant→orthodox")
end
end
end
end
Line 77: Line 85:
if not sense:match("rfdef") and not sense:match("defn") then
if not sense:match("rfdef") and not sense:match("defn") then
sense_id = sense_id + 1
sense_id = sense_id + 1
table.insert(senses, sense)
insert(senses, sense)
end
end
end
end
table.insert(text, table.concat(senses, "\n# "))
insert(text, concat(senses, "\n# "))
table.insert(text, "</div>")
return frame:preprocess( table.concat(text) )
return frame:preprocess( concat(text) )
end
end
Line 95: Line 101:
local args = require("Module:parameters").process(frame:getParent().args, params)
local args = require("Module:parameters").process(frame:getParent().args, params)
local title = args[1]
local title = args[1]
local curr_title = mw.title.getCurrentTitle().subpageText
local curr_title = mw.loadData("Module:headword/data").pagename
local content = mw.title.new(title):getContent()
local content = mw.title.new(title):getContent()
local senses = {}
local senses = {}
Line 108: Line 114:
if not match(content, "==Mongolian==") then
if not match(content, "==Mongolian==") then
categories = categories .. "[[Category:Mongolian redlinks/mn-IPA-see]]"
categories = categories .. "[[Category:Mongolian redlinks/mn-IPA-see]]"
elseif not match(content, "mn%-IPA") and not match(content, "mn%-see") and not match(content, "mn%-IPA-see") then
require("Module:debug").track("mn-IPA-see/unidirectional reference to variant")
elseif not match(content, curr_title) then
require("Module:debug").track("mn-IPA-see/unidirectional reference variant→orthodox")
end
end
end
end
Line 158: Line 168:
for _, v in pairs( switchers ) do
for _, v in pairs( switchers ) do
v = gsub( v, "(.)(.)", "%1" .. char( 0x301 ) .. "?" .. char( 0x300 ) .. "?%2" )
v = gsub( v, "(.)(.)", "%1" .. u( 0x301 ) .. "?" .. u( 0x300 ) .. "?%2" )
local c = 0
local c = 0
while c ~= nil do
while c ~= nil do
c = find( lower( text ), v, c + 1 )
c = find( lower( text ), v, c + 1 )
if c ~= nil and c ~= 1 then
if c ~= nil and c ~= 1 then
table.insert( breaks, c )
insert( breaks, c )
end
end
end
end
Line 171: Line 181:
for _,v in pairs( switchers2 ) do
for _,v in pairs( switchers2 ) do
if match( text, v .. "$" ) then
if match( text, v .. "$" ) then
table.insert( breaks, len( text ) - 1 )
insert( breaks, len( text ) - 1 )
end
end
end
end
end
end
table.sort( breaks )
sort( breaks )
for i, b in ipairs( breaks ) do
for i, b in ipairs( breaks ) do
table.insert( vh, { Cyrl = {}, Mong = {} } )
insert( vh, { Cyrl = {}, Mong = {} } )
if i == #breaks then
if i == #breaks then
vh[i].substring = sub( text, b, len( text ) )
vh[i].substring = sub( text, b, len( text ) )
Line 193: Line 203:
s.substring = lower( s.substring )
s.substring = lower( s.substring )
if params.bor == "Russian" then s.substring = gsub( s.substring, "у", "ү" ) end
if params.bor == "Russian" then s.substring = gsub( s.substring, "у", "ү" ) end
local substring_nostress = gsub( s.substring, "[" .. char( 0x301 ) .. char( 0x300 ) .. "]", "" )
local substring_nostress = gsub( s.substring, "[" .. u( 0x301 ) .. u( 0x300 ) .. "]", "" )
if match( s.substring, "кило" .. char( 0x301 ) .. "?$" ) then -- irregular
if match( s.substring, "кило" .. u( 0x301 ) .. "?$" ) then -- irregular
vh[i].Cyrl.a = "э"
vh[i].Cyrl.a = "э"
vh[i].location = find( s.substring, "[эүею]" )
vh[i].location = find( s.substring, "[эүею]" )
Line 209: Line 219:
vh[i].position = "front"
vh[i].position = "front"
vh[i].quality = "unrounded"
vh[i].quality = "unrounded"
elseif match( s.substring, "[ауяᠠᠣᠤ]" .. char( 0x301 ) ) then
elseif match( s.substring, "[ауяᠠᠣᠤ]" .. u( 0x301 ) ) then
vh[i].Cyrl.a = "а"
vh[i].Cyrl.a = "а"
vh[i].location = find( s.substring, char( 0x301 ) ) - 1
vh[i].location = find( s.substring, u( 0x301 ) ) - 1
vh[i].position = "back"
vh[i].position = "back"
vh[i].quality = "unrounded"
vh[i].quality = "unrounded"
elseif match( s.substring, "[оё]" .. char( 0x301 ) ) then
elseif match( s.substring, "[оё]" .. u( 0x301 ) ) then
vh[i].Cyrl.a = "о"
vh[i].Cyrl.a = "о"
vh[i].location = find( s.substring, char( 0x301 ) ) - 1
vh[i].location = find( s.substring, u( 0x301 ) ) - 1
vh[i].position = "back"
vh[i].position = "back"
vh[i].quality = "rounded"
vh[i].quality = "rounded"
Line 401: Line 411:
local punctuation = "[%s%p]"
local punctuation = "[%s%p]"
local final_clusters = require( "Module:mn/data" ).syll_final_cons
local final_clusters = require( "Module:mn/data" ).syll_final_cons
local stress = char( 0x301 ) .. char( 0x300 )
local stress = u( 0x301 ) .. u( 0x300 )
-- Strip diacritics.
-- Strip diacritics.
local chars = {}
local chars = {}
for v in gmatch( text, "[%w%s%p" .. stress .. export.stem_barrier .. "]" ) do
for v in gmatch( text, "[%w%s%p" .. stress .. export.stem_barrier .. "]" ) do
table.insert( chars, v )
insert( chars, v )
end
end
Line 413: Line 423:
-- First letter.
-- First letter.
if i == 1 or match( chars[i-1], punctuation ) then
if i == 1 or match( chars[i-1], punctuation ) then
table.insert( breaks, i )
insert( breaks, i )
-- Stem barrier is used by the inflection templates.
-- Stem barrier is used by the inflection templates.
elseif match( chars[i-1], export.stem_barrier ) then
elseif match( chars[i-1], export.stem_barrier ) then
table.insert( breaks, i )
insert( breaks, i )
-- If a vowel preceded by a hard sign or the temporary break character, then must be the break.
-- If a vowel preceded by a hard sign or the temporary break character, then must be the break.
elseif match( v, vowel ) and match( chars[i-1], "[Ъъ]" ) then
elseif match( v, vowel ) and match( chars[i-1], "[Ъъ]" ) then
table.insert( breaks, i )
insert( breaks, i )
-- If Е/е preceded by a soft sign, count backwards until vowel, punctuation/space or start of string is found; if a vowel is found first, then preceding sign must be medial, so is the break; if punctuation/start of string found first, letter is part of word-initial cluster, so is not the break (occurs in loanwords, e.g. Вьет|нам ("Vietnam")).
-- If Е/е preceded by a soft sign, count backwards until vowel, punctuation/space or start of string is found; if a vowel is found first, then preceding sign must be medial, so is the break; if punctuation/start of string found first, letter is part of word-initial cluster, so is not the break (occurs in loanwords, e.g. Вьет|нам ("Vietnam")).
elseif match( v, "[Ее]" ) and match( chars[i-1], "[Ьь]" ) then
elseif match( v, "[Ее]" ) and match( chars[i-1], "[Ьь]" ) then
Line 428: Line 438:
-- If break, replaces the consonant preceding the soft sign as the break.
-- If break, replaces the consonant preceding the soft sign as the break.
if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end
if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end
table.insert( breaks, i )
insert( breaks, i )
end
end
end
end
Line 441: Line 451:
-- If break, replaces the consonant preceding the soft sign as the break.
-- If break, replaces the consonant preceding the soft sign as the break.
if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end
if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end
table.insert( breaks, i )
insert( breaks, i )
end
end
break
break
Line 453: Line 463:
j = j - 1
j = j - 1
if match( chars[j], vowel ) then
if match( chars[j], vowel ) then
table.insert( breaks, i )
insert( breaks, i )
end
end
end
end
Line 463: Line 473:
while j > 1 and j > i - #final_clusters and stable > i - j and ( match( chars[j-1], consonant ) or match( chars[j-1], sign ) ) do
while j > 1 and j > i - #final_clusters and stable > i - j and ( match( chars[j-1], consonant ) or match( chars[j-1], sign ) ) do
j = j - 1
j = j - 1
table.insert( check, chars[j] )
insert( check, chars[j] )
for k,cluster in ipairs( final_clusters[#check] ) do
for k,cluster in ipairs( final_clusters[#check] ) do
if match( table.concat( table.reverse( check ) ), cluster ) then
if match( concat( reverse_array( check ) ), cluster ) then
stable = stable + 1
stable = stable + 1
break
break
Line 471: Line 481:
end
end
if stable == i - j then
if stable == i - j then
table.insert( breaks, j )
insert( breaks, j )
end
end
end
end
-- Iotated ("ya"-type) vowel after a vowel.
-- Iotated ("ya"-type) vowel after a vowel.
elseif match( v, iotated ) and ( match( chars[i-1], vowel ) or ( match( chars[i-1], "[".. stress .. "]" ) and match( chars[i-2], vowel ) ) ) then
elseif match( v, iotated ) and ( match( chars[i-1], vowel ) or ( match( chars[i-1], "[".. stress .. "]" ) and match( chars[i-2], vowel ) ) ) then
table.insert( breaks, i )
insert( breaks, i )
end
end
end
end
-- Reform text without diacritics.
-- Reform text without diacritics.
text = table.concat( chars )
text = concat( chars )
breaks = table.removeDuplicates( breaks )
breaks = remove_duplicates( breaks )
local syll = {}
local syll = {}
for i,v in ipairs( breaks ) do
for i,v in ipairs( breaks ) do
if i == #breaks then
if i == #breaks then
table.insert( syll, sub( text, v ) )
insert( syll, sub( text, v ) )
else
else
table.insert( syll, sub( text, v, breaks[i+1] - 1 ) )
insert( syll, sub( text, v, breaks[i+1] - 1 ) )
end
end
end
end
Line 513: Line 523:
match( text, "ө[влмн]бө$" ),
match( text, "ө[влмн]бө$" ),
match( text, "[эүе][влмн]бэ$" ),
match( text, "[эүе][влмн]бэ$" ),
match( text, "[бвглмнр]" .. vh.Cyrl.a .. char( 0x301 ) .. "?" .. char( 0x300 ) .. "?н" .. vh.Cyrl.a .. "$" ),
match( text, "[бвглмнр]" .. vh.Cyrl.a .. u( 0x301 ) .. "?" .. u( 0x300 ) .. "?н" .. vh.Cyrl.a .. "$" ),
match( text, "[ауя]нга$" ),
match( text, "[ауя]нга$" ),
match( text, "[оё]нго$" ),
match( text, "[оё]нго$" ),
Line 534: Line 544:
if not params then params = {} end
if not params then params = {} end
local vh = export.vowelharmony( text, params )[#export.vowelharmony( text, params )]
local vh = export.vowelharmony( text, params )[#export.vowelharmony( text, params )]
local syllables = table.reverse( export.syllables( text ) )
local syllables = reverse_array( export.syllables( text ) )
if not params.proper and ( vh.location ~= len( text ) - 1 or vh.violation == false ) and not params.bor then -- exclude proper nouns, loanwords and terms where the deleted vowel determines the vowel harmony
if not params.proper and ( vh.location ~= len( text ) - 1 or vh.violation == false ) and not params.bor then -- exclude proper nouns, loanwords and terms where the deleted vowel determines the vowel harmony
Line 574: Line 584:
end
end
end
end
return table.concat( table.reverse( syllables ) )
return concat( reverse_array( syllables ) )
end
end


Line 582: Line 592:
for _, v in ipairs( forms ) do
for _, v in ipairs( forms ) do
local val = gsub( v.form, "|", "<!>" )
local val = gsub( v.form, "|", "<!>" )
table.insert( new_vals, val )
insert( new_vals, val )
end
end
return table.concat( new_vals, "," )
return concat( new_vals, "," )
else
else
return nil
return nil