Module:interlinear: Difference between revisions

No edit summary
No edit summary
 
(9 intermediate revisions by the same user not shown)
Line 1: Line 1:
local p = {}
local p = {}
local data = mw.loadData( 'Module:Interlinear/data' )
local data = mw.loadData( 'Module:interlinear/data' )
local gloss_override = {} -- for custom gloss abbreviations
local gloss_override = {} -- for custom gloss abbreviations
local getArgs = require('Module:Arguments').getArgs
local getArgs = require('Module:Arguments').getArgs
local yesno = require('Module:Yesno')
local yesno = require('Module:Yesno')
local lang_data = mw.loadData( 'Module:Lang/data' )
local languages = require('Module:languages') -- Switched from Module:Lang/data


--------------------------
--------------------------
Line 15: Line 15:
-------------------
-------------------
local conf = { --settings
local conf = { --settings
WordSeparator = " \n\r\t", -- Don't replace with %s as this would include non-breaking spaces
WordSeparator = " \n\r\t",  
GlossAbbrPattern = "^([Ø0-9A-Z]+)$", -- this isn't a full regex, but a Lua pattern
GlossAbbrPattern = "^([Ø0-9A-Z]+)$",
-- NOTE: The following characters must be formatted for use in a pattern set.
GlossAbbrBoundary = "-.,;:<>‹›⟨⟩/\\~+=%?%s%[%]()%_\127'",
GlossAbbrBoundary = "-.,;:<>‹›/\\~+=%?%s%[%]()%_\127'",
GlossExcludeTable = {I = true,},
GlossExcludeTable = {I = true,}, --strings not be treated as glossing abbreviations
GlossExcludePattern = '^[0-9][0-9]+$',  
GlossExcludePattern = '^[0-9][0-9]+$', -- excludes strings consisting entirely of digits
GlossSmallCapsExclude = "^[AOPS]$",  
GlossSmallCapsExclude = "^[AOPS]$", -- glossing abbreviations matching this pattern will not be rendered in small caps
GlossingType = "label",  
GlossingType = "label", -- if set to "label" gloss abbreviations are formatted as an <abbr> with the "label" appearing in a tooltip
-- if set to "wikilink" the abbreviation is formatted as a wikilink to the relevant wikipedia article
-- if set to "none" abbreviations aren't formatted at all
ErrorCategory = "[[Category:Pages with errors in interlinear text]]",
ErrorCategory = "[[Category:Pages with errors in interlinear text]]",
AmbiguousGlossCategory = "[[Category:Articles with ambiguous glossing abbreviations]]",
AmbiguousGlossCategory = "[[Category:Articles with ambiguous glossing abbreviations]]",
MessageGlossingError = "Error(s) in interlinear glossing",
MessageGlossingError = "Error(s) in interlinear glossing",
combining_gender_numbers = "[0-9][0-9]?$", --e.g. G4 '4th gender' or CL7 'class 7'
combining_gender_numbers = "[0-9][0-9]?$",  
combining_gender_prefixes = {G = "gender", CL = "class"},
combining_gender_prefixes = {G = "gender", CL = "class"},
combining_person = {["1"] = "first person", ["2"] = "second person", ["3"] = "third person"},
combining_person = {["1"] = "first person", ["2"] = "second person", ["3"] = "third person",},
combining_number = {S = "singular", SG = "singular", P = "plural", PL = "plural", D = "dual", DU = "dual", TRI = "trial"},
combining_number = {
S = "singular", SG = "singular",
P = "plural", PL = "plural",
D = "dual", DU = "dual",
TRI = "trial", PAU = "paucal", COL = "collective",
IN = "inclusive", INC = "inclusive", INCL = "inclusive",
EX = "exclusive", EXC = "exclusive", EXCL = "exclusive"
},
combining_gender = {F = "feminine", M = "masculine", N = "neuter"},
combining_gender = {F = "feminine", M = "masculine", N = "neuter"},
LowerCaseGlosses = {["1sg"] = true, ["2sg"] = true, ["3sg"] = true, ["1du"] = true, ["2du"] = true, ["3du"] = true, ["1pl"] = true, ["2pl"] = true,
LowerCaseGlosses = {
["3pl"] = true, ["Fsg"] = true, ["Fpl"] = true, ["Msg"] = true, ["Mpl"] = true,}, -- these are the non-all-upper-case strings that will be recognised as glossing abbreviations
["1sg"] = true, ["2sg"] = true, ["3sg"] = true,
["1du"] = true, ["2du"] = true, ["3du"] = true,
["1pl"] = true, ["2pl"] = true, ["3pl"] = true,
["Fsg"] = true, ["Fpl"] = true, ["Msg"] = true, ["Mpl"] = true,
},  
ErrorHelpLocation = "Template:Interlinear",
ErrorHelpLocation = "Template:Interlinear",
}
}
Line 41: Line 49:
-- CSS styles and classes
-- CSS styles and classes
---------------------
---------------------
conf.style = { --CSS styles
conf.style = {  
WordDiv = "float: left; margin-bottom: 0.3em;",
WordDiv = "float: left; margin-bottom: 0.3em;",
WordMargin = "margin-right: 1em;",
WordMargin = "margin-right: 1em;",
WordP = "margin: 0px;", -- the style for the word <p> elements
WordP = "margin: 0px;",  
GlossAbbr = "font-variant: small-caps; font-variant-numeric: oldstyle-nums; text-transform: lowercase; ", -- won't be applied to gloss abbreviations containing lower-case characters
GlossAbbr = "font-variant: small-caps; font-variant-numeric: lining-nums; text-transform: lowercase; ",
HiddenText = "display: none;",
HiddenText = "display: none;",
EndDiv = "clear: left; display: block;", -- style of the <div> element at the end of the interlinear display
EndDiv = "clear: left; display: block;",  
ErrorMessage = "font-size: inherit",
ErrorMessage = "font-size: inherit",
}
}
conf.class = { --CSS classes
conf.class = {  
Interlinear = "interlinear",
Interlinear = "interlinear",
GlossAbbr  = "gloss-abbr",
GlossAbbr  = "gloss-abbr",
Line 57: Line 65:
ErrorMessage = "error",
ErrorMessage = "error",
}
}
---------------------
---------------------
-- Section transclusion
-- Section transclusion
---------------------
---------------------
local page_content = nil -- lazy initilization
local page_content = nil  
local function get_section(frame, section_name)
local function get_section(frame, section_name)
if page_content == nil then
if page_content == nil then
Line 73: Line 82:
return ''
return ''
end
end
---------------------
---------------------
-- Sundry small functions
-- Sundry small functions
Line 81: Line 91:


local function tidyCss(str)
local function tidyCss(str)
str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1") -- trims quotation marks
str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1")  
if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end -- appends ";" if missing
if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end  
return str
return str
end
end


local function highlight(text)
local function highlight(text)
if text then
if text then return '' .. text .. '' else return "" end
return '<span style="color:#C00;font-weight:bold;">' .. text .. '</span>'
else return "" end
end
end


Line 96: Line 104:
end
end


local function is_empty(str) -- returns "false" if its argument is a string containing chars other than spaces &c.
local function is_empty(str)  
if not str then return true end
if not str then return true end
if mw.ustring.find(str, "[^" .. conf.WordSeparator .. "]")
if mw.ustring.find(str, "[^" .. conf.WordSeparator .. "]")
Line 105: Line 113:
local function help_link (anchor)
local function help_link (anchor)
if anchor then
if anchor then
return " ([[" .. conf.ErrorHelpLocation .. "#" .. anchor .. "|help]])"
return " ([[w:" .. conf.ErrorHelpLocation .. "#" .. anchor .. "|help]])"
else return "" end
else return "" end
end
end


-- the following is part of a trial implementation of automatic transliteration:
-- Trial implementation of automatic transliteration using Module:languages:
local function transliterate (str, lang_from, lang_to,  scheme)
local function transliterate (str, lang_code)
if not lang_code then
msg:add("error", "Source language for transliteration is not set")
return ""
end
-- Integration with Module:languages
local lang_obj = languages.getByCode(lang_code)
if lang_obj and type(lang_obj.transliterate) == "function" then
return lang_obj:transliterate(str) or ""
end
-- Fallback for specific modules if getByCode doesn't provide it
local lookup = {grc = {module = 'Module:Ancient Greek', funct = "transliterate", } }
local lookup = {grc = {module = 'Module:Ancient Greek', funct = "transliterate", } }
if not lang_from then
local t = lookup[lang_code]
msg:add("error", "Source language for transliteration is not set")
if t then
else
local module = require(t.module)
local t = lookup[lang_from]
return module[t.funct](str)
if t then
else
local module = require(t.module)
msg:add("error", "Can't find transliterator for language '" .. lang_code .. "'")
return module[t.funct](str)
else msg:add("error", "Can't find transliterator for language '" .. lang_from .. "'")
end
end
end
return ""
return ""
end -- end of trial block
end  


--------------------
--------------------
-- The following two functions update the glossing settings based on the received
-- Glossing settings
-- template arguments. set_global_glossing_settings() updates the global settings
-- that are valid for all gloss abbreviations. set_glossing_type()
-- returns the glossing type, which can vary between the different lines.
--------------------
--------------------
local function set_global_glossing_settings(a)
local function set_global_glossing_settings(a)
local style = ""
local style = ""
if a.style then style = tidyCss(a.style) end
if a.style then style = tidyCss(a.style) end
if a.underline == "no" then
if a.underline == "no" then style = style .. "text-decoration: none;" end
style = style .. "text-decoration: none;" end
if a.small_caps == "no" then style = style .. "font-variant:normal; text-transform: none;" end
if a.small_caps == "no" then
style = style .. "font-variant:normal; text-transform: none;" end
if style ~= "" then conf.style.GlossAbbr = conf.style.GlossAbbr .. style end
if style ~= "" then conf.style.GlossAbbr = conf.style.GlossAbbr .. style end
end
end
Line 147: Line 159:
if mw.ustring.find(glossing, 'link') then
if mw.ustring.find(glossing, 'link') then
GlossingType = "wikilink"
GlossingType = "wikilink"
elseif mw.ustring.find(glossing, 'label')
elseif mw.ustring.find(glossing, 'label') or mw.ustring.find(glossing, 'no link') then
or mw.ustring.find(glossing, 'no link') then
GlossingType = 'label'
GlossingType = 'label'
elseif mw.ustring.find(glossing, 'no abbr') then
elseif mw.ustring.find(glossing, 'no abbr') then
Line 177: Line 188:


---------------------
---------------------
-- The UserMessages object contains and processes error messages and warnings
-- UserMessages object
---------------------
---------------------
local UserMessages = {errors = {}, warnings = {}, gloss_messages = {}}
local UserMessages = {errors = {}, warnings = {}, gloss_messages = {}}
function UserMessages:add(msgtype, text, gloss)
function UserMessages:add(msgtype, text, gloss)
if msgtype == "gloss_message" then
if msgtype == "gloss_message" then self.gloss_messages[gloss] = text
self.gloss_messages[gloss] = text
elseif msgtype == "warning" then table.insert(self.warnings, text)
elseif msgtype == "warning" then
elseif msgtype == "non-repeating error" then self.errors.nre = text
table.insert(self.warnings, text)
elseif msgtype == "ambiguous gloss" then self.if_ambiguous_glosses = true
elseif msgtype == "non-repeating error" then
elseif msgtype == "error" then table.insert(self.errors, text)
self.errors.nre = text
else return error("UserMessages:add(): unknown message type", 2) end
elseif msgtype == "ambiguous gloss" then
self.if_ambiguous_glosses = true
elseif msgtype == "error" then
table.insert(self.errors, text)
else return error("UserMessages:add(): unknown message type", 2)
end
end
end
function UserMessages:print_errors()
function UserMessages:print_errors()
Line 201: Line 206:
err_span:attr("style", conf.style.ErrorMessage)
err_span:attr("style", conf.style.ErrorMessage)
err_span:addClass(conf.class.ErrorMessage)
err_span:addClass(conf.class.ErrorMessage)
for _,v in pairs(self.errors) do
for _,v in pairs(self.errors) do err_span:wikitext(" " .. v .. ";") end
err_span:wikitext(" " .. v .. ";") end
if namespace % 2 == 0 and namespace ~= 2 then err_span:wikitext(conf.ErrorCategory) end
if namespace % 2 == 0 and namespace ~= 2 -- non-talk namespaces, excluding user pages; if modifying please update the description on the category page
then err_span:wikitext(conf.ErrorCategory)
end
out = tostring(err_span)
out = tostring(err_span)
mw.addWarning(conf.MessageGlossingError)
mw.addWarning(conf.MessageGlossingError)
end
end
if self.if_ambiguous_glosses then
if self.if_ambiguous_glosses then
if namespace == 0 -- article namespace
if namespace == 0 then out = out .. conf.AmbiguousGlossCategory end
then out = out .. conf.AmbiguousGlossCategory -- this category will only track articles
end
end
end
return out
return out
Line 218: Line 218:
function UserMessages:print_warnings()
function UserMessages:print_warnings()
local out = ""
local out = ""
-- Messages and warnings get displayed only if the page is being viewed in "preview" mode:
if displaying_messages and (next(self.gloss_messages) or next(self.warnings)) then
if displaying_messages and (next(self.gloss_messages) or next(self.warnings)) then
local div = mw.html.create("div")
local div = mw.html.create("div")
Line 233: Line 232:
if self.gloss_messages then
if self.gloss_messages then
div:wikitext("<p>  To change any of the following default expansions, see [[Template:Interlinear/doc#Custom abbreviations|the template's documentation]]:</p>")
div:wikitext("<p>  To change any of the following default expansions, see [[Template:Interlinear/doc#Custom abbreviations|the template's documentation]]:</p>")
end
end
for _,v in pairs(self.gloss_messages) do
for _,v in pairs(self.gloss_messages) do
div:wikitext("<p>" .. v .. "</p>")
div:wikitext("<p>" .. v .. "</p>")
Line 243: Line 242:


---------------------
---------------------
-- gloss_lookup() receives a gloss abbreviation and tries to uncover its meaning.
-- Gloss Lookup & Format
---------------------
---------------------
local function gloss_lookup(a, label, wikilink)
local function gloss_lookup(a, label, wikilink)
Line 256: Line 255:
local prefix = mw.ustring.sub(a,1,1)
local prefix = mw.ustring.sub(a,1,1)
local suffix = mw.ustring.sub(a,2)
local suffix = mw.ustring.sub(a,2)
if conf.combining_person[prefix] then -- is it of the form 1PL or 3FS?
if conf.combining_person[prefix] then  
_label = conf.combining_person[prefix]
_label = conf.combining_person[prefix]
local _suffix = conf.combining_number[suffix] or conf.combining_gender[suffix]
local _suffix = conf.combining_number[suffix] or conf.combining_gender[suffix]
Line 264: Line 263:
local suffix1 = mw.ustring.sub(suffix,1,1)
local suffix1 = mw.ustring.sub(suffix,1,1)
local suffix2 = mw.ustring.sub(suffix,2)
local suffix2 = mw.ustring.sub(suffix,2)
if conf.combining_gender[suffix1]
if conf.combining_gender[suffix1] and conf.combining_number[suffix2] then
and conf.combining_number[suffix2] then
_label = _label .. ", " .. conf.combining_gender[suffix1] .. ", " .. conf.combining_number[suffix2]
_label = _label .. ", " .. conf.combining_gender[suffix1] .. ", " .. conf.combining_number[suffix2]
else _label = nil end
else _label = nil end
end
end
elseif mw.ustring.match(suffix,conf.combining_gender_numbers) then -- cases like G4 = gender 4
elseif mw.ustring.match(suffix,conf.combining_gender_numbers) then  
local _i,_j = mw.ustring.find(a, conf.combining_gender_numbers)
local _i,_j = mw.ustring.find(a, conf.combining_gender_numbers)
local _pre = mw.ustring.sub(a, 1, _i - 1)
local _pre = mw.ustring.sub(a, 1, _i - 1)
Line 276: Line 274:
_label = conf.combining_gender_prefixes[_pre] .. " " .. _suff
_label = conf.combining_gender_prefixes[_pre] .. " " .. _suff
end
end
elseif prefix == "N" then -- dealing with cases like NPST = non-past
elseif prefix == "N" then  
local s = gloss_override[suffix] or data.abbreviations[suffix]
local s = gloss_override[suffix] or data.abbreviations[suffix]
if s ~= nil and not s.ExcludeNegation then
if s ~= nil and not s.ExcludeNegation then
Line 292: Line 290:
end
end


---------------------
-- format_gloss() calls gloss_lookup() to find the meaning of a gloss
-- abbreviation, which it then proceeds to format
---------------------
local function format_gloss(gloss, label, wikilink)
local function format_gloss(gloss, label, wikilink)
if string.sub(gloss,1,3) == "000" then -- checks for a common component of exposed strip markers (see [[:mw:Strip marker]])
if string.sub(gloss,1,3) == "000" then return gloss end
return gloss
local gloss2 = mw.ustring.gsub(gloss,"<.->","")  
end
gloss2 = mw.ustring.gsub(gloss2, "%'%'+", "")  
local gloss2 = mw.ustring.gsub(gloss,"<.->","") -- remove any html fluff
gloss2 = mw.ustring.gsub(gloss2, "%'%'+", "") -- remove wiki bold/italic formatting
gloss2 = mw.text.trim(mw.ustring.upper(gloss2))
gloss2 = mw.text.trim(mw.ustring.upper(gloss2))
if not (label or wikilink)
if not (label or wikilink) or (not label and glossing_type == "label") or (not wikilink  and glossing_type == "wikilink") then
or (not label and glossing_type == "label")
if glossing_type ~= "no abbr" then label, wikilink, source = gloss_lookup(gloss2, label, wikilink) end
or (not wikilink  and glossing_type == "wikilink")
then
if glossing_type ~= "no abbr"
then label, wikilink, source = gloss_lookup(gloss2, label, wikilink)
end
end
end
local gloss_node
local gloss_node
if glossing_type == "no abbr"
if glossing_type == "no abbr" then gloss_node = mw.html.create("span")
then gloss_node = mw.html.create("span")
else gloss_node = mw.html.create("abbr") end
else gloss_node = mw.html.create("abbr") end
gloss_node:addClass(conf.class.GlossAbbr)
gloss_node:addClass(conf.class.GlossAbbr)
if label or wikilink then
if label or wikilink then
if not mw.ustring.match(gloss, "%l") -- excluding glosses that contain lower-case characters
if not mw.ustring.match(gloss, "%l") and not mw.ustring.match(gloss,conf.GlossSmallCapsExclude) then
and not mw.ustring.match(gloss,conf.GlossSmallCapsExclude) -- and also excluding A, O etc. from rendering in small caps
gloss_node:attr("style", conf.style.GlossAbbr)
then gloss_node:attr("style", conf.style.GlossAbbr)
end
end
local abbr_label
local abbr_label = label and label or wikilink
if label then abbr_label = label
else abbr_label = wikilink end
gloss_node:attr("title", abbr_label)
gloss_node:attr("title", abbr_label)
if source ~= "local" and data.abbreviations[gloss2] then
if source ~= "local" and data.abbreviations[gloss2] then
if data.abbreviations[gloss2].ambiguous then
if data.abbreviations[gloss2].ambiguous then
gloss_node:addClass(conf.class.GlossAbbrAmb)
gloss_node:addClass(conf.class.GlossAbbrAmb)
msg:add("ambiguous gloss")
msg:add("ambiguous gloss")
end
end
end
end
if glossing_type == "wikilink" and wikilink
if glossing_type == "wikilink" and wikilink then gloss_node:wikitext("[[w:", wikilink, "|" , gloss, "]]")
then gloss_node:wikitext("[[", wikilink, "|" , gloss, "]]")
else gloss_node:wikitext(gloss) end
else gloss_node:wikitext(gloss) end
if source ~= "local" and displaying_messages then -- logging gloss lookups:
if source ~= "local" and displaying_messages then  
local message = ""
local message = ""
if label then
if label then message = "assuming " .. gloss2 .. " means \"" .. abbr_label .. "\";" end
message = "assuming " .. gloss2 .. " means \"" .. abbr_label .. "\";" end
if glossing_type == "wikilink" and wikilink then
if glossing_type == "wikilink" and wikilink then
message = message .. " linking to [[" .. wikilink .. "]];"
message = message .. " linking to [[w:" .. wikilink .. "|" .. wikilink .. "]];"
end
end
msg:add("gloss_message", message, gloss)
msg:add("gloss_message", message, gloss)
end
end
elseif glossing_type == "no abbr"
elseif glossing_type == "no abbr" then
then gloss_node
gloss_node:attr("style", conf.style.GlossAbbr):wikitext(gloss)
:attr("style", conf.style.GlossAbbr)
:wikitext(gloss)
else
else
if displaying_messages then
if displaying_messages then
Line 352: Line 333:
end
end
msg:add("non-repeating error", "Unknown glossing abbreviation(s)" .. help_link("gloss abbr"))
msg:add("non-repeating error", "Unknown glossing abbreviation(s)" .. help_link("gloss abbr"))
gloss_node
gloss_node:addClass(conf.class.GlossAbbrError):addClass("error"):css("font-size", "100%")
:addClass(conf.class.GlossAbbrError)
:addClass("error")
:css("font-size", "100%")
:attr("title", gloss2 .. ": glossing abbreviation not found")
:attr("title", gloss2 .. ": glossing abbreviation not found")
:attr("style", conf.style.ErrorMessage)
:attr("style", conf.style.ErrorMessage)
Line 363: Line 341:
end
end


---------------------
-- find_gloss() parses a word into morphemes, and it calls format_gloss()
-- for anything that looks like a glossing abbreviation.
---------------------
local function find_gloss(word)
local function find_gloss(word)
local function scan_gloss(boundary, gloss_abbr) -- checks a morpheme if it is a gloss abbreviation
local function scan_gloss(boundary, gloss_abbr)
if (mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern)
if (mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern) or conf.LowerCaseGlosses[gloss_abbr])
or conf.LowerCaseGlosses[gloss_abbr])
and not (conf.GlossExcludeTable[gloss_abbr] or mw.ustring.match(gloss_abbr, conf.GlossExcludePattern)) then
and not (conf.GlossExcludeTable[gloss_abbr]
gloss_abbr = format_gloss(gloss_abbr)
or mw.ustring.match(gloss_abbr, conf.GlossExcludePattern))
then gloss_abbr = format_gloss(gloss_abbr)
end
end
return boundary .. gloss_abbr
return boundary .. gloss_abbr
end
end
local word = mw.text.decode(word, true)
local word = mw.text.decode(word, true)
if word == "I" -- for the case of the English word "I", the 1SG pronoun
if word == "I" then return word end
then return word end
local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"
local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"
word = mw.ustring.gsub(word, pattern, scan_gloss) -- splits into morphemes
word = mw.ustring.gsub(word, pattern, scan_gloss)
return word
return word
end
end


---------------------
-- The main purpose of the bletcherous parse() is to split a line into words and and then for each eligible word
-- to call find_gloss(). The parser outputs the individual words (with any gloss abbreviation formatting applied).
-- The simple job of splitting at whitespaces has been made complicated by a) the fact that the input can contain
-- whitespaces inside the various html elements that are the result of the application of various formatting templates;
-- and b) the need to be able to recognise the output of the template that formats custom gloss abbreviations
-- (and hence skip passing it on to find_gloss). See talk for a suggestion about its future.
---------------------
local function parse(cline, i, tags_found,ifglossing)
local function parse(cline, i, tags_found,ifglossing)
local function issue_error(message, culprit)
local function issue_error(message, culprit)
UserMessages:add("error",  message .. ": ''" .. mw.ustring.sub(cline.whole, 1, i-1) .. "'''" .. culprit  .. "'''''")
UserMessages:add("error",  message .. ": ''" .. mw.ustring.sub(cline.whole, 1, i-1) .. "'''" .. culprit  .. "'''''")
end
end
if i > cline.length then return i end --this will only be triggered if the current line has less words than line 1
if i > cline.length then return i end
local next_step, j, _, chunk
local next_step, j, _, chunk
local probe = mw.ustring.sub(cline.whole,i,i)
local probe = mw.ustring.sub(cline.whole,i,i)
if mw.ustring.match(probe,"[" .. conf.WordSeparator .. "]") and tags_found == 0
if mw.ustring.match(probe,"[" .. conf.WordSeparator .. "]") and tags_found == 0 then
then next_step =  i-1
next_step =  i-1
elseif probe == "[" then --Wikilink?
elseif probe == "[" then  
if mw.ustring.sub(cline.whole,i+1,i+1) == "[" then
if mw.ustring.sub(cline.whole,i+1,i+1) == "[" then
_,j,chunk = mw.ustring.find(cline.whole,"(%[%[.-%]%])", i)
_,j,chunk = mw.ustring.find(cline.whole,"(%[%[.-%]%])", i)
else chunk = "["; j = i end --not a wikilink then
else chunk = "["; j = i end  
buffer = buffer .. chunk
buffer = buffer .. chunk
next_step =  parse(cline, j+1,tags_found,ifglossing)
next_step =  parse(cline, j+1,tags_found,ifglossing)
elseif probe == "{"  and tags_found == 0 then --curly brackets enclose a sequence of words to be treated as a single unit
elseif probe == "{"  and tags_found == 0 then  
_,j,chunk = mw.ustring.find(cline.whole,"(.-)(})", i+1)
_,j,chunk = mw.ustring.find(cline.whole,"(.-)(})", i+1)
if not chunk then
if not chunk then
Line 421: Line 383:
buffer = buffer .. chunk
buffer = buffer .. chunk
next_step =  parse(cline, j+1,tags_found,ifglossing)
next_step =  parse(cline, j+1,tags_found,ifglossing)
elseif probe == "<" then -- We've encountered an HTML tag. What do we do now?
elseif probe == "<" then  
local _,j,chunk = mw.ustring.find(cline.whole,"(<.->)",i)
local _,j,chunk = mw.ustring.find(cline.whole,"(<.->)",i)
if not chunk then
if not chunk then
issue_error("Unclosed angle bracket", "<")
issue_error("Unclosed angle bracket", "<")
chunk = highlight("<"); j = i
chunk = highlight("<"); j = i
elseif mw.ustring.sub(cline.whole,i,i+1) == "</" then -- It's a CLOSING tag
elseif mw.ustring.sub(cline.whole,i,i+1) == "</" then  
if cline.glossing
if mw.ustring.find(chunk, "</abbr>", 1, true) then ifglossing=true end
and ifglossing==false
and mw.ustring.match(chunk,"</abbr>")
then ifglossing=true end
tags_found = tags_found - 1
tags_found = tags_found - 1
elseif not mw.ustring.match(chunk, "/>$") -- It's an OPENING tag, unless it opens a self-closing element (in which case the element is ignored)
elseif not mw.ustring.match(chunk, "/>$") then
then if ifglossing == true -- the following checks for the output of {{ggl}}:
if ifglossing == true and mw.ustring.find(chunk, conf.class.GlossAbbr, 1, true) then ifglossing = false end
and mw.ustring.find(chunk, conf.class.GlossAbbr, 1, true) -- it's important that the "find" function uses literal strings and not patterns
then ifglossing = false end
tags_found = tags_found + 1
tags_found = tags_found + 1
end
end
buffer = buffer .. chunk
buffer = buffer .. chunk
next_step = parse(cline, j+1,tags_found,ifglossing)
next_step = parse(cline, j+1,tags_found,ifglossing)
else -- No HTML tags, so we only need to find where the word ends
else  
local _,k,chunk = mw.ustring.find(cline.whole,"(..-)([ <[])",i)
local _,k,chunk = mw.ustring.find(cline.whole,"(..-)([ <[])",i)
if k then --ordinary text
if k then  
if ifglossing==true then
if ifglossing==true then buffer = buffer .. find_gloss(chunk)
buffer = buffer .. find_gloss(chunk)
else
else
if cline.tone_sup then chunk = tone_sup(chunk) end
if cline.tone_sup then chunk = tone_sup(chunk) end
Line 450: Line 406:
end
end
next_step = parse(cline, k, tags_found, ifglossing)
next_step = parse(cline, k, tags_found, ifglossing)
else -- reached end of string
else  
if ifglossing == true then
if ifglossing == true then chunk = find_gloss(mw.ustring.sub(cline.whole,i))
chunk = find_gloss(mw.ustring.sub(cline.whole,i))
else
else
chunk = mw.ustring.sub(cline.whole,i)
chunk = mw.ustring.sub(cline.whole,i)
Line 463: Line 418:
return next_step
return next_step
end
end
--------------------
 
-- The following function is called by Template:gcl and is used for formatting an individual glossing abbreviation
--------------------
function p.gcl(frame)
function p.gcl(frame)
local args = getArgs(frame,{
local args = getArgs(frame,{ trim = true, removeBlanks = false, parentOnly = true, wrappers = {'Template:Grammatical category label'}, })
trim = true,
removeBlanks = false,
parentOnly = true,
wrappers = {'Template:Gcl'},
})
msg = UserMessages
msg = UserMessages
set_global_glossing_settings{style = args.style, underline = args.underline, small_caps = args['small-caps']}
set_global_glossing_settings{style = args.style, underline = args.underline, small_caps = args['small-caps']}
if not args.glossing then
if not args.glossing then glossing_type = conf.GlossingType else glossing_type = set_glossing_type(args.glossing) end
glossing_type = conf.GlossingType -- a global variable
else glossing_type = set_glossing_type(args.glossing)
end
local gloss, label, wikilink = args[1], args[2], args[3]
local gloss, label, wikilink = args[1], args[2], args[3]
if not gloss then UserMessages:add("error", "No gloss supplied")
if not gloss then UserMessages:add("error", "No gloss supplied") return UserMessages:print() end
return UserMessages:print() end
if wikilink and not args.glossing then glossing_type = 'wikilink' end  
if wikilink and not args.glossing then -- if a wikilink is supplied and glossing isn't set to 'label'...
glossing_type = 'wikilink' end --    .. then the wikilink will be formatted as such
if label == "" then label = nil end
if label == "" then label = nil end
if wikilink == "" then wikilink = nil end
if wikilink == "" then wikilink = nil end
Line 490: Line 433:
end
end


--------------------
-- The following is the function called by Template:Interlinear.
-- It processes the template arguments, then calls parse() to split the input lines into words
-- and it then builds the output html.
--------------------
function p.interlinearise(frame)
function p.interlinearise(frame)
---------------------
-- Prepare arguments
---------------------
local if_auto_translit = false
local if_auto_translit = false
local args = getArgs(frame, { -- configuration for Module:Arguments
local args = getArgs(frame, { trim = true, removeBlanks = false, parentFirst = true, wrappers = {'Template:Interlinear', 'Template:Fs interlinear'}, })
trim = true,
removeBlanks = false,
parentFirst = true,
wrappers = {'Template:Interlinear', 'Template:Fs interlinear'},
})
local template_name = frame:getParent():getTitle()
local template_name = frame:getParent():getTitle()
if template_name == 'Template:Fs interlinear' then
if template_name == 'Template:Fs interlinear' then
Line 514: Line 444:
if args.transl and not args.transl2 then args.transl2 = args.transl end
if args.transl and not args.transl2 then args.transl2 = args.transl end
if_auto_translit = true
if_auto_translit = true
end
end
local revid = frame:preprocess( "{{REVISIONID}}" )
local revid = frame:preprocess( "{{REVISIONID}}" )
if revid == "" then
if revid == "" then
if not args['display-messages'] or yesno(args['display-messages']) then
if not args['display-messages'] or yesno(args['display-messages']) then displaying_messages = true end
displaying_messages = true end-- messages will be displayed only in preview mode
end
end
msg = UserMessages
msg = UserMessages
Line 526: Line 454:
local function set_italics(n)
local function set_italics(n)
line[n].attr.style = line[n].attr.style .. "font-style: italic;"
line[n].attr.style = line[n].attr.style .. "font-style: italic;"
line[n].tone_sup = true -- single digits are assumed to be tone markers and will hence be superscripted
line[n].tone_sup = true  
if args['tone-superscripting'] and not yesno(args['tone-superscripting'])
if args['tone-superscripting'] and not yesno(args['tone-superscripting']) then line[n].tone_sup = false end
then line[n].tone_sup = false end
end
end


if args.glossing then -- the glossing= parameter sets the default glossing type
if args.glossing then  
local _gl = set_glossing_type(args.glossing)
local _gl = set_glossing_type(args.glossing)
if _gl then conf.GlossingType = _gl end
if _gl then conf.GlossingType = _gl end
end
end
--this looks for a list of glossing abbreviations on the page that transcludes the template:
local _ablist_section = get_section(frame, 'list-of-glossing-abbreviations')
local _ablist_section = get_section(frame, 'list-of-glossing-abbreviations')
if _ablist_section and _ablist_section ~= "" then
if _ablist_section and _ablist_section ~= "" then
local _a = mw.ustring.gsub(_ablist_section, '</?div [^\n]*>', '') -- strips off the div tags
local _a = mw.ustring.gsub(_ablist_section, '<div.->', '')  
set_custom_glosses(_a)
set_custom_glosses(_a)
end
end
--and this looks looks for a list of abbreviations set within the template:
local _ablist = args.abbreviations
local _ablist = args.abbreviations
if _ablist and _ablist ~= ""
if _ablist and _ablist ~= "" then set_custom_glosses(_ablist) end
then set_custom_glosses(_ablist) end
local _ablist = args.ablist
local _ablist = args.ablist
if _ablist and _ablist ~= ""
if _ablist and _ablist ~= "" then set_custom_glosses(_ablist) end
then set_custom_glosses(_ablist) end


local _spacing = tonumber(args.spacing)
local _spacing = tonumber(args.spacing)
if _spacing and _spacing <= 20
if _spacing and _spacing <= 20 then conf.style.WordDiv = conf.style.WordDiv .. 'margin-right: ' .. _spacing .. 'em;'
then conf.style.WordDiv = conf.style.WordDiv .. 'margin-right: ' .. _spacing .. 'em;'
else conf.style.WordDiv = conf.style.WordDiv .. conf.style.WordMargin end
else conf.style.WordDiv = conf.style.WordDiv .. conf.style.WordMargin
end


local offset, last_line = 0, 0
local offset, last_line = 0, 0
for j,v in ipairs(args) do -- iterates over the unnamed parameters from the template
for j,v in ipairs(args) do  
last_line = last_line +1
last_line = last_line + 1
if is_empty(v)
if is_empty(v) then offset = offset + 1
then offset = offset + 1
else
else
local i = j - offset
local i = j - offset
Line 565: Line 485:
v = normalise(v)
v = normalise(v)


-- the following is part of a trial implementation of automatic transliteration:
if if_auto_translit and v == "auto" and i > 1 then
if if_auto_translit and v == "auto" and i > 1 then
local source_line = line[i-1]
local source_line = line[i-1]
local src_lang = source_line.lang
local src_lang = source_line.lang or args.lang
if not src_lang then src_lang = args.lang end
if src_lang then
if src_lang then
v = transliterate(source_line.whole, src_lang)
v = transliterate(source_line.whole, src_lang)
else v = ""; msg:add("error", "No language specified for automatic transliteration")
else v = ""; msg:add("error", "No language specified for automatic transliteration") end
end
end  
end -- end of trial block


line[i].whole = v
line[i].whole = v
Line 585: Line 502:
end
end


---prepare style arguments----
line[i].class = ""
line[i].class = ""
local _style = args["style" .. i]
local _style = args["style" .. i]
if not _style then _style = ""
if not _style then _style = "" else _style = tidyCss(_style) end
else _style = tidyCss(_style) end
--line[i].attr holds the attributes for the <p> elements that enclose the words in line i
line[i].attr = {style = conf.style.WordP .. _style}
line[i].attr = {style = conf.style.WordP .. _style}


local _lang = args["lang" .. i]
local _lang = args["lang" .. i]
if _lang and #_lang > 1 then
if _lang and #_lang > 1 then line[i].lang = _lang
line[i].lang = _lang
else _lang = args.lang
else _lang = args.lang
if _lang and #_lang > 1 and i == 1 then -- if a lang= parameter is supplied, it's assumed to apply to line 1
if _lang and #_lang > 1 and i == 1 then line[i].lang = _lang end
line[i].lang = _lang
end
end
end
line[i].attr.lang = line[i].lang
line[i].attr.lang = line[i].lang
--the following emulates the behaviour of {{Bo-textonly}} (see Template talk:Fs interlinear#Tibetan):
if template_name == 'Template:Fs interlinear' then
if template_name == 'Template:Fs interlinear' then
if _lang == "bo" and i == 1 then
if _lang == "bo" and i == 1 then
line[1].class = line[1].class .. " uchen"
line[1].class = line[1].class .. " uchen"
line[1].attr.style = line[1].attr.style .. "font-size:1.25em; word-wrap:break-word;"
line[1].attr.style = line[1].attr.style .. "font-size:1.25em; word-wrap:break-word;"
end
end
if template_name == 'Template:Interlinear' then
if _lang == "nv" and i == 1 then
line[1].attr.style = line[1].attr.style .. "font-family: Aboriginal Sans, DejaVu Sans, Calibri, Arial Unicode MS, sans-serif;"
end
end
end
end


if yesno(args["italics" .. i]) then
if yesno(args["italics" .. i]) then set_italics(i) end
set_italics(i)
end


local _transl = args["transl" .. i]
local _transl = args["transl" .. i]
if _transl and #_transl > 1 then
if _transl and #_transl > 1 then
_transl = mw.ustring.lower(_transl)
_transl = mw.ustring.lower(_transl)
local _lookup = lang_data.translit_title_table[_transl]
if _lookup then
-- Look up transliteration schema titles dynamically from Module:languages
if _lang and _lookup[_lang] then
local langObj = languages.getByCode(_lang or "en")
_transl = _lookup[_lang]
local t_title = _transl .. " transliteration" -- generic fallback
else _transl = _lookup.default
if langObj and type(langObj.getTranslitTitle) == "function" then
end
    t_title = langObj:getTranslitTitle(_transl) or t_title
if _transl then
line[i].attr.title = _transl
end
else  msg:add("error", "Transliteration scheme '" .. _transl .. "' not recognised")
end
end
line[i].attr.title = t_title
end
end


Line 633: Line 543:
if _glossing then
if _glossing then
line[i].glossing = set_glossing_type(_glossing)
line[i].glossing = set_glossing_type(_glossing)
-- Do not treat default glossing settings as custom.
if not ((i == 1 and not yesno(_glossing)) or (i == 2 and yesno(_glossing))) then
if not ((i == 1 and not yesno(_glossing)) or (i == 2 and yesno(_glossing))) then
line.HasCustomGlossing = true
line.HasCustomGlossing = true
Line 640: Line 549:


local _ipa = args['ipa' .. i]
local _ipa = args['ipa' .. i]
if yesno(_ipa) then
if yesno(_ipa) then line[i].class = "IPA" end
line[i].class = "IPA"
end
 
local _class = args['class' .. i]
local _class = args['class' .. i]
if _class then
if _class then line[i].class = line[i].class .. " " .. _class end
line[i].class = line[i].class .. " " .. _class
if line[i].class == "" then line[i].class = nil end
end
end  
 
end
if line[i].class == ""
then line[i].class = nil end
end -- ends the first if-statement in the loop
end -- ends the FOR cycle


local line_count = #line
local line_count = #line
Line 666: Line 568:
set_italics(n)
set_italics(n)
elseif not (_italics and not yesno(_italics)) and not (args["italics1"] and not yesno(args["italics1"])) then
elseif not (_italics and not yesno(_italics)) and not (args["italics1"] and not yesno(args["italics1"])) then
set_italics(1) -- by default, the first line will get italicised, unless italics=no or italics1=no
set_italics(1)  
end
end
-- the last unnamed parameter is assumed to be the free translation:
free_translation = args[last_line]
free_translation = args[last_line]
if not is_empty(free_translation) then
if not is_empty(free_translation) then line [line_count] = nil end
line [line_count] = nil   end --... and is thus excluded from interlinearising
end
end


-- If glossing isn't specified for any line, then it's chosen by default to occur
if yesno(args.glossing) == false then line.HasCustomGlossing = true end
-- in the second line, unless only a single line has been supplied, in which case
-- the assumption is that it is the one containing grammatical glosses
if yesno(args.glossing) == false then
line.HasCustomGlossing = true
end
if not line.HasCustomGlossing then
if not line.HasCustomGlossing then
if line_count == 1 then
if line_count == 1 then line[1].glossing = conf.GlossingType
line[1].glossing = conf.GlossingType
elseif line[2] then line[2].glossing = conf.GlossingType end
elseif line[2] then
line[2].glossing = conf.GlossingType
end
end
end
set_global_glossing_settings{style = args['glossing-style'], underline = args.underline, small_caps = args['small-caps']}
set_global_glossing_settings{style = args['glossing-style'], underline = args.underline, small_caps = args['small-caps']}


---------------------
-- Segment lines into words
---------------------
for i,v in ipairs(line) do
for i,v in ipairs(line) do
local ifglossing = false
local ifglossing = false
if line[i].glossing then
if line[i].glossing then
ifglossing = true -- if true the parser will attempt to format gloss abbreviations in the current line
ifglossing = true  
glossing_type = line[i].glossing -- neccessarily a global variable
glossing_type = line[i].glossing  
end
end
local wc, n = 1, 1
local wc, n = 1, 1
Line 708: Line 597:
end
end


----Check for mismatches in number of words across lines----
local number_of_words, mismatch_found = 0, false
local number_of_words, mismatch_found = 0, false
for i,v in ipairs(line) do -- find the maximum number of words in any line
for i,v in ipairs(line) do  
local wc = #line[i].words
local wc = #line[i].words
if wc ~= number_of_words then
if wc ~= number_of_words then
if i ~= 1 and wc ~= 0 then
if i ~= 1 and wc ~= 0 then mismatch_found = true end
mismatch_found = true
if wc > number_of_words then number_of_words = wc end
end
if wc > number_of_words then
number_of_words = wc
end
end
end
end
end
----Deal with mismatches---
 
if mismatch_found then
if mismatch_found then
local error_text = "Mismatch in the number of words between lines: "
local error_text = "Mismatch in the number of words between lines: "
Line 728: Line 612:
error_text = error_text .. wc .. " word(s) in line " .. i .. ", "
error_text = error_text .. wc .. " word(s) in line " .. i .. ", "
if wc ~= number_of_words then
if wc ~= number_of_words then
for current_word = wc+1, number_of_words do
for current_word = wc+1, number_of_words do line[i].words[current_word] = " " end
line[i].words[current_word] = "&nbsp;"
end
end
end
end
end
if string.sub(error_text, -2) == ", "
if string.sub(error_text, -2) == ", " then error_text = string.sub(error_text, 1, #error_text - 2) .. " " end
then error_text = string.sub(error_text, 1, #error_text - 2) .. " "
end
error_text = error_text .. help_link("mismatch")
error_text = error_text .. help_link("mismatch")
UserMessages:add("error", error_text)
UserMessages:add("error", error_text)
end
end


---------------------
-- Build the HTML
---------------------
---- If just a single line was supplied, format it as inline text
if line_count == 1 then
if line_count == 1 then
local span = mw.html.create('span')
local span = mw.html.create('span')
span:attr(line[1].attr)
span:attr(line[1].attr)
for wi = 1, number_of_words do
for wi = 1, number_of_words do
local space
local space = (wi < number_of_words) and " " or ""
if wi < number_of_words then space = " " else space = "" end
span:wikitext(line[1].words[wi] .. space)
span:wikitext(line[1].words[wi] .. space)
end
end
Line 755: Line 630:
end
end


---- More than one line supplied, so we'll produce interlinear display
local div = mw.html.create("div")
local div = mw.html.create("div")
div:addClass(conf.class.Interlinear)
div:addClass(conf.class.Interlinear)


-- For stuff to be displayed in the left margin, like example numbering
local number, indent = nil, nil
local number, indent = nil, nil
if args.number and args.number ~= ""
if args.number and args.number ~= "" then number = args.number end
then number = args.number end
if args.indent and args.indent ~="" then indent = args.indent end
if args.indent and args.indent ~=""
then indent = args.indent end
if indent or number then
if indent or number then
if not indent then indent = "4" end --default value
if not indent then indent = "4" end  
div:css("margin-left", indent .. 'em')
div:css("margin-left", indent .. 'em')
if number then
if number then
div:tag("div")
div:tag("div"):css("position", "absolute"):css("left", "1em"):wikitext(args.number)
:css("position", "absolute")
:css("left", "1em")
:wikitext(args.number)
end
end
end
end


if args.box and args.box ~= "" then
if args.box and args.box ~= "" then
div:css("background-color", "#f8f9fa")
div:css("background-color", "#f8f9fa"):css("border", "1px solid #eaecf0"):css("padding", "1em") end
:css("border", "1px solid #eaecf0")
if args.top and args.top ~= "" then div:tag("div"):wikitext(args.top) end
:css("padding", "1em") end
if args.top and args.top ~= "" then --lines to display above the interlinear block
div:tag("div")
:wikitext(args.top)
end


-- Producing the interlinear block
for wi = 1, number_of_words do
for wi = 1, number_of_words do
local div2 = div:tag("div")
local div2 = div:tag("div"):attr("style", conf.style.WordDiv)
:attr("style", conf.style.WordDiv)
for i,_ in ipairs (line) do
for i,_ in ipairs (line) do
if line[i].whole ~= "" then -- skipping empty lines
if line[i].whole ~= "" then  
local p = div2:tag("p")
local p = div2:tag("p")
p:attr(line[i].attr)
p:attr(line[i].attr)
if line[i].class then
if line[i].class then p:addClass(line[i].class) end
p:addClass(line[i].class)
end
local _text = line[i].words[wi]
local _text = line[i].words[wi]
if _text == "" or _text == " "
if _text == "" or _text == " " then _text = " " end  
then _text = "&nbsp;" end -- <p> elements without content mess up the interlinear display
p:wikitext(_text)
p:wikitext(_text)
end
end
Line 804: Line 662:
end
end


--- If any "comments" have been specified, add them at the end of each line
if line.hasComments then
if line.hasComments then
local divc = div:tag("div")
local divc = div:tag("div"):attr("style", conf.style.WordDiv)
:attr("style", conf.style.WordDiv)
for i,_ in ipairs (line) do
for i,_ in ipairs (line) do
local p = divc:tag("p")
local p = divc:tag("p"):attr("style", conf.style.WordP)
p:attr("style", conf.style.WordP)
if line[i].c then p:wikitext(line[i].c) else p:wikitext(" ") end
if line[i].c then
p:wikitext(line[i].c)
else p:wikitext("&nbsp;")
end
end
end
end
end


--Add hidden lines containing the content of each line of interlinear text: this is for accessibility
for i,v in ipairs(line) do
for i,v in ipairs(line) do
local hidden_line = div:tag("p")
local hidden_line = div:tag("p")
hidden_line:attr("style", conf.style.HiddenText)
hidden_line:attr("style", conf.style.HiddenText):wikitext(v.whole)
:wikitext(v.whole)
end
end


-- Format the free translation
local ft_line = div:tag("p")
local ft_line = div:tag("p")
if free_translation and free_translation ~= "" then
if free_translation and free_translation ~= "" then
Line 831: Line 680:
ft_line:wikitext(free_translation)
ft_line:wikitext(free_translation)
end
end
if args.bottom and args.bottom ~= ""
if args.bottom and args.bottom ~= "" then
then local bottom = div:tag('p')
local bottom = div:tag('p')
bottom:css('margin-top', '0')
bottom:css('margin-top', '0')
bottom:wikitext(args.bottom)
bottom:wikitext(args.bottom)
end
end
ft_line:node(msg:print_errors()) -- for error messages
ft_line:node(msg:print_errors())  


local end_div = div:tag("div")
local end_div = div:tag("div")
end_div:attr("style", conf.style.EndDiv)
end_div:attr("style", conf.style.EndDiv)
div:newline()
div:newline()
local temp_track = ""
local temp_track = ""
if last_line == 2
if last_line == 2 then temp_track = "[[Category:Pages with interlinear glosses using two unnamed parameters]]" end
then temp_track = "[[Category:Pages with interlinear glosses using two unnamed parameters]]"
if last_line > 3 and template_name ~= 'Template:Fs interlinear' then  temp_track = "[[Category:Pages with interlinear glosses using more than three unnamed parameters]]" end
end
if last_line > 3 and template_name ~= 'Template:Fs interlinear'
then  temp_track = "[[Category:Pages with interlinear glosses using more than three unnamed parameters]]"
end
return tostring(div) .. temp_track .. msg:print_warnings()
return tostring(div) .. temp_track .. msg:print_warnings()
end
end


return p
return p