Module:mn-common: Difference between revisions
No edit summary Tag: Manual revert |
No edit summary |
||
| Line 1: | Line 1: | ||
local m_str_utils = require("Module:string utilities") | |||
local m_table = require("Module:table") | |||
local export = {} | local export = {} | ||
local | local concat = table.concat | ||
local find = | local find = m_str_utils.find | ||
local len = | local insert = table.insert | ||
local match = | local len = m_str_utils.len | ||
local gmatch = | local match = m_str_utils.match | ||
local sub = | local gmatch = m_str_utils.gmatch | ||
local gsub = | local sub = m_str_utils.sub | ||
local lower = | local gsub = m_str_utils.gsub | ||
local reverse = | local lower = m_str_utils.lower | ||
local reverse = m_str_utils.reverse | |||
local reverse_array = m_table.reverse | |||
local remove_duplicates = m_table.removeDuplicates | |||
local sort = table.sort | |||
local u = m_str_utils.char | |||
export.FVS1 = | export.FVS1 = u( 0x180B ) | ||
export.FVS2 = | export.FVS2 = u( 0x180C ) | ||
export.FVS3 = | export.FVS3 = u( 0x180D ) | ||
export.FVS4 = | export.FVS4 = u( 0x180F ) | ||
export.MVS = | export.MVS = u( 0x180E ) | ||
export.NNBSP = | export.NNBSP = u( 0x202F ) | ||
export.stem_barrier = | export.stem_barrier = u( 0xF000 ) | ||
local function format_Mongolian_text(text) return "<span class=\"Mong\" lang=\"mn\">" .. text .. "</span>" end | local function format_Mongolian_text(text) return "<span class=\"Mong\" lang=\"mn\">" .. text .. "</span>" end | ||
| Line 32: | Line 36: | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params) | ||
local title = args[1] | local title = args[1] | ||
local curr_title = mw. | local curr_title = mw.loadData("Module:headword/data").pagename | ||
local content = mw.title.new(title):getContent() | local content = mw.title.new(title):getContent() | ||
local senses = {} | local senses = {} | ||
| Line 45: | Line 49: | ||
if not match(content, "==Mongolian==") then | if not match(content, "==Mongolian==") then | ||
categories = categories .. "[[Category:Mongolian redlinks/mn-see]]" | categories = categories .. "[[Category:Mongolian redlinks/mn-see]]" | ||
elseif not match(content, "mn%-IPA") and not match(content, "mn%-see") then | |||
require("Module:debug").track("mn-see/unidirectional reference to variant") | |||
elseif not match(content, curr_title) then | |||
require("Module:debug").track("mn-see/unidirectional reference variant→orthodox") | |||
end | end | ||
end | end | ||
| Line 77: | Line 85: | ||
if not sense:match("rfdef") and not sense:match("defn") then | if not sense:match("rfdef") and not sense:match("defn") then | ||
sense_id = sense_id + 1 | sense_id = sense_id + 1 | ||
insert(senses, sense) | |||
end | end | ||
end | end | ||
insert(text, concat(senses, "\n# ")) | |||
return frame:preprocess( | return frame:preprocess( concat(text) ) | ||
end | end | ||
| Line 95: | Line 101: | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params) | ||
local title = args[1] | local title = args[1] | ||
local curr_title = mw. | local curr_title = mw.loadData("Module:headword/data").pagename | ||
local content = mw.title.new(title):getContent() | local content = mw.title.new(title):getContent() | ||
local senses = {} | local senses = {} | ||
| Line 108: | Line 114: | ||
if not match(content, "==Mongolian==") then | if not match(content, "==Mongolian==") then | ||
categories = categories .. "[[Category:Mongolian redlinks/mn-IPA-see]]" | categories = categories .. "[[Category:Mongolian redlinks/mn-IPA-see]]" | ||
elseif not match(content, "mn%-IPA") and not match(content, "mn%-see") and not match(content, "mn%-IPA-see") then | |||
require("Module:debug").track("mn-IPA-see/unidirectional reference to variant") | |||
elseif not match(content, curr_title) then | |||
require("Module:debug").track("mn-IPA-see/unidirectional reference variant→orthodox") | |||
end | end | ||
end | end | ||
| Line 158: | Line 168: | ||
for _, v in pairs( switchers ) do | for _, v in pairs( switchers ) do | ||
v = gsub( v, "(.)(.)", "%1" .. | v = gsub( v, "(.)(.)", "%1" .. u( 0x301 ) .. "?" .. u( 0x300 ) .. "?%2" ) | ||
local c = 0 | local c = 0 | ||
while c ~= nil do | while c ~= nil do | ||
c = find( lower( text ), v, c + 1 ) | c = find( lower( text ), v, c + 1 ) | ||
if c ~= nil and c ~= 1 then | if c ~= nil and c ~= 1 then | ||
insert( breaks, c ) | |||
end | end | ||
end | end | ||
| Line 171: | Line 181: | ||
for _,v in pairs( switchers2 ) do | for _,v in pairs( switchers2 ) do | ||
if match( text, v .. "$" ) then | if match( text, v .. "$" ) then | ||
insert( breaks, len( text ) - 1 ) | |||
end | end | ||
end | end | ||
end | end | ||
sort( breaks ) | |||
for i, b in ipairs( breaks ) do | for i, b in ipairs( breaks ) do | ||
insert( vh, { Cyrl = {}, Mong = {} } ) | |||
if i == #breaks then | if i == #breaks then | ||
vh[i].substring = sub( text, b, len( text ) ) | vh[i].substring = sub( text, b, len( text ) ) | ||
| Line 193: | Line 203: | ||
s.substring = lower( s.substring ) | s.substring = lower( s.substring ) | ||
if params.bor == "Russian" then s.substring = gsub( s.substring, "у", "ү" ) end | if params.bor == "Russian" then s.substring = gsub( s.substring, "у", "ү" ) end | ||
local substring_nostress = gsub( s.substring, "[" .. | local substring_nostress = gsub( s.substring, "[" .. u( 0x301 ) .. u( 0x300 ) .. "]", "" ) | ||
if match( s.substring, "кило" .. | if match( s.substring, "кило" .. u( 0x301 ) .. "?$" ) then -- irregular | ||
vh[i].Cyrl.a = "э" | vh[i].Cyrl.a = "э" | ||
vh[i].location = find( s.substring, "[эүею]" ) | vh[i].location = find( s.substring, "[эүею]" ) | ||
| Line 209: | Line 219: | ||
vh[i].position = "front" | vh[i].position = "front" | ||
vh[i].quality = "unrounded" | vh[i].quality = "unrounded" | ||
elseif match( s.substring, "[ауяᠠᠣᠤ]" .. | elseif match( s.substring, "[ауяᠠᠣᠤ]" .. u( 0x301 ) ) then | ||
vh[i].Cyrl.a = "а" | vh[i].Cyrl.a = "а" | ||
vh[i].location = find( s.substring, | vh[i].location = find( s.substring, u( 0x301 ) ) - 1 | ||
vh[i].position = "back" | vh[i].position = "back" | ||
vh[i].quality = "unrounded" | vh[i].quality = "unrounded" | ||
elseif match( s.substring, "[оё]" .. | elseif match( s.substring, "[оё]" .. u( 0x301 ) ) then | ||
vh[i].Cyrl.a = "о" | vh[i].Cyrl.a = "о" | ||
vh[i].location = find( s.substring, | vh[i].location = find( s.substring, u( 0x301 ) ) - 1 | ||
vh[i].position = "back" | vh[i].position = "back" | ||
vh[i].quality = "rounded" | vh[i].quality = "rounded" | ||
| Line 401: | Line 411: | ||
local punctuation = "[%s%p]" | local punctuation = "[%s%p]" | ||
local final_clusters = require( "Module:mn/data" ).syll_final_cons | local final_clusters = require( "Module:mn/data" ).syll_final_cons | ||
local stress = | local stress = u( 0x301 ) .. u( 0x300 ) | ||
-- Strip diacritics. | -- Strip diacritics. | ||
local chars = {} | local chars = {} | ||
for v in gmatch( text, "[%w%s%p" .. stress .. export.stem_barrier .. "]" ) do | for v in gmatch( text, "[%w%s%p" .. stress .. export.stem_barrier .. "]" ) do | ||
insert( chars, v ) | |||
end | end | ||
| Line 413: | Line 423: | ||
-- First letter. | -- First letter. | ||
if i == 1 or match( chars[i-1], punctuation ) then | if i == 1 or match( chars[i-1], punctuation ) then | ||
insert( breaks, i ) | |||
-- Stem barrier is used by the inflection templates. | -- Stem barrier is used by the inflection templates. | ||
elseif match( chars[i-1], export.stem_barrier ) then | elseif match( chars[i-1], export.stem_barrier ) then | ||
insert( breaks, i ) | |||
-- If a vowel preceded by a hard sign or the temporary break character, then must be the break. | -- If a vowel preceded by a hard sign or the temporary break character, then must be the break. | ||
elseif match( v, vowel ) and match( chars[i-1], "[Ъъ]" ) then | elseif match( v, vowel ) and match( chars[i-1], "[Ъъ]" ) then | ||
insert( breaks, i ) | |||
-- If Е/е preceded by a soft sign, count backwards until vowel, punctuation/space or start of string is found; if a vowel is found first, then preceding sign must be medial, so is the break; if punctuation/start of string found first, letter is part of word-initial cluster, so is not the break (occurs in loanwords, e.g. Вьет|нам ("Vietnam")). | -- If Е/е preceded by a soft sign, count backwards until vowel, punctuation/space or start of string is found; if a vowel is found first, then preceding sign must be medial, so is the break; if punctuation/start of string found first, letter is part of word-initial cluster, so is not the break (occurs in loanwords, e.g. Вьет|нам ("Vietnam")). | ||
elseif match( v, "[Ее]" ) and match( chars[i-1], "[Ьь]" ) then | elseif match( v, "[Ее]" ) and match( chars[i-1], "[Ьь]" ) then | ||
| Line 428: | Line 438: | ||
-- If break, replaces the consonant preceding the soft sign as the break. | -- If break, replaces the consonant preceding the soft sign as the break. | ||
if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end | if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end | ||
insert( breaks, i ) | |||
end | end | ||
end | end | ||
| Line 441: | Line 451: | ||
-- If break, replaces the consonant preceding the soft sign as the break. | -- If break, replaces the consonant preceding the soft sign as the break. | ||
if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end | if breaks[#breaks] == i - 2 then breaks[#breaks] = nil end | ||
insert( breaks, i ) | |||
end | end | ||
break | break | ||
| Line 453: | Line 463: | ||
j = j - 1 | j = j - 1 | ||
if match( chars[j], vowel ) then | if match( chars[j], vowel ) then | ||
insert( breaks, i ) | |||
end | end | ||
end | end | ||
| Line 463: | Line 473: | ||
while j > 1 and j > i - #final_clusters and stable > i - j and ( match( chars[j-1], consonant ) or match( chars[j-1], sign ) ) do | while j > 1 and j > i - #final_clusters and stable > i - j and ( match( chars[j-1], consonant ) or match( chars[j-1], sign ) ) do | ||
j = j - 1 | j = j - 1 | ||
insert( check, chars[j] ) | |||
for k,cluster in ipairs( final_clusters[#check] ) do | for k,cluster in ipairs( final_clusters[#check] ) do | ||
if match( | if match( concat( reverse_array( check ) ), cluster ) then | ||
stable = stable + 1 | stable = stable + 1 | ||
break | break | ||
| Line 471: | Line 481: | ||
end | end | ||
if stable == i - j then | if stable == i - j then | ||
insert( breaks, j ) | |||
end | end | ||
end | end | ||
-- Iotated ("ya"-type) vowel after a vowel. | -- Iotated ("ya"-type) vowel after a vowel. | ||
elseif match( v, iotated ) and ( match( chars[i-1], vowel ) or ( match( chars[i-1], "[".. stress .. "]" ) and match( chars[i-2], vowel ) ) ) then | elseif match( v, iotated ) and ( match( chars[i-1], vowel ) or ( match( chars[i-1], "[".. stress .. "]" ) and match( chars[i-2], vowel ) ) ) then | ||
insert( breaks, i ) | |||
end | end | ||
end | end | ||
-- Reform text without diacritics. | -- Reform text without diacritics. | ||
text = | text = concat( chars ) | ||
breaks = | breaks = remove_duplicates( breaks ) | ||
local syll = {} | local syll = {} | ||
for i,v in ipairs( breaks ) do | for i,v in ipairs( breaks ) do | ||
if i == #breaks then | if i == #breaks then | ||
insert( syll, sub( text, v ) ) | |||
else | else | ||
insert( syll, sub( text, v, breaks[i+1] - 1 ) ) | |||
end | end | ||
end | end | ||
| Line 513: | Line 523: | ||
match( text, "ө[влмн]бө$" ), | match( text, "ө[влмн]бө$" ), | ||
match( text, "[эүе][влмн]бэ$" ), | match( text, "[эүе][влмн]бэ$" ), | ||
match( text, "[бвглмнр]" .. vh.Cyrl.a .. | match( text, "[бвглмнр]" .. vh.Cyrl.a .. u( 0x301 ) .. "?" .. u( 0x300 ) .. "?н" .. vh.Cyrl.a .. "$" ), | ||
match( text, "[ауя]нга$" ), | match( text, "[ауя]нга$" ), | ||
match( text, "[оё]нго$" ), | match( text, "[оё]нго$" ), | ||
| Line 534: | Line 544: | ||
if not params then params = {} end | if not params then params = {} end | ||
local vh = export.vowelharmony( text, params )[#export.vowelharmony( text, params )] | local vh = export.vowelharmony( text, params )[#export.vowelharmony( text, params )] | ||
local syllables = | local syllables = reverse_array( export.syllables( text ) ) | ||
if not params.proper and ( vh.location ~= len( text ) - 1 or vh.violation == false ) and not params.bor then -- exclude proper nouns, loanwords and terms where the deleted vowel determines the vowel harmony | if not params.proper and ( vh.location ~= len( text ) - 1 or vh.violation == false ) and not params.bor then -- exclude proper nouns, loanwords and terms where the deleted vowel determines the vowel harmony | ||
| Line 574: | Line 584: | ||
end | end | ||
end | end | ||
return | return concat( reverse_array( syllables ) ) | ||
end | end | ||
| Line 582: | Line 592: | ||
for _, v in ipairs( forms ) do | for _, v in ipairs( forms ) do | ||
local val = gsub( v.form, "|", "<!>" ) | local val = gsub( v.form, "|", "<!>" ) | ||
insert( new_vals, val ) | |||
end | end | ||
return | return concat( new_vals, "," ) | ||
else | else | ||
return nil | return nil | ||