|
|
Line 1: |
Line 1: |
| local export = {} | | local export = {} |
|
| |
| local spacingPunctuation = "[%s%p]+"
| |
| --[[ List of punctuation or spacing characters that are found inside of words.
| |
| Used to exclude characters from the regex above. ]]
| |
| local wordPunc = "-־׳״'.·*’་"
| |
| local notWordPunc = "[^" .. wordPunc .. "]+"
| |
| local pos_for_gender_number_cat = {
| |
| ["nouns"] = "nouns",
| |
| ["proper nouns"] = "nouns",
| |
| -- We include verbs because impf and pf are valid "genders".
| |
| ["verbs"] = "verbs",
| |
| }
| |
|
| |
| local function format_inflection_parts(data, parts)
| |
| local m_links = require("Module:links")
| |
|
| |
| for key, part in ipairs(parts) do
| |
| if type(part) ~= "table" then
| |
| part = {term = part}
| |
| end
| |
|
| |
| -- Convert the term into a full link
| |
| -- Don't show a transliteration here, the consensus seems to be not to
| |
| -- show them in headword lines to avoid clutter.
| |
| part = m_links.full_link(
| |
| {
| |
| term = not nolink and part.term or nil,
| |
| alt = part.alt or (nolink and part.term or nil),
| |
| lang = part.lang or data.lang,
| |
| genders = part.genders,
| |
| },
| |
| face,
| |
| false
| |
| )
| |
|
| |
| part = qualifiers .. part
| |
|
| |
| parts[key] = part
| |
| end
| |
|
| |
| local parts_output = ""
| |
|
| |
| if #parts > 0 then
| |
| parts_output = " " .. table.concat(parts, " <i>or</i> ")
| |
| end
| |
|
| |
| return "<i>" .. parts.label .. "</i>" .. parts_output
| |
| end
| |
|
| |
| -- Format the inflections following the headword
| |
| local function format_inflections(data)
| |
| if data.inflections and #data.inflections > 0 then
| |
| -- Format each inflection individually
| |
| for key, infl in ipairs(data.inflections) do
| |
| data.inflections[key] = format_inflection_parts(data, infl)
| |
| end
| |
|
| |
| return " (" .. table.concat(data.inflections, ", ") .. ")"
| |
| else
| |
| return ""
| |
| end
| |
| end
| |
|
| |
| local function format_genders(data)
| |
| if data.genders and #data.genders > 0 then
| |
| local pos_for_cat
| |
| if not data.nogendercat then
| |
| local pos_category = pos_category:gsub("^reconstructed ", "")
| |
| pos_for_cat = pos_for_gender_number_cat[pos_category]
| |
| end
| |
| local gen = require("Module:getn")
| |
| local text, cats = gen.format_genders(data.genders, data.lang, pos_for_cat)
| |
| for _, cat in ipairs(cats) do
| |
| table.insert(data.categories, cat)
| |
| end
| |
| return " " .. text
| |
| else
| |
| return ""
| |
| end
| |
| end
| |
|
| |
|
| -- The main entry point. | | -- The main entry point. |
Line 192: |
Line 112: |
| end | | end |
| end | | end |
|
| |
|
| |
|
| |
|
| |
| -- Format a headword with transliterations
| |
| local function format_headword(data)
| |
| for i, head in ipairs(data.heads) do
| |
|
| |
| -- Apply processing to the headword, for formatting links and such
| |
| if head:find("[[", nil, true) then
| |
| head = {term = head, lang = data.lang}
| |
| end
| |
|
| |
| data.heads[i] = head
| |
| end
| |
|
| |
| return table.concat(data.heads, " <i>or</i> ")
| |
| end
| |
|
| |
| -- Add links to a multiword head.
| |
| function export.add_multiword_links(head)
| |
| local function workaround_to_exclude_chars(s)
| |
| return mw.ustring.gsub(s, notWordPunc, "]]%1[[Contionary:")
| |
| end
| |
|
| |
| head = "[[Contionary:"
| |
| .. mw.ustring.gsub(
| |
| head,
| |
| spacingPunctuation,
| |
| workaround_to_exclude_chars
| |
| )
| |
| .. "]]"
| |
| head = mw.ustring.gsub(head, "%[%[%]%]", "")
| |
| return head
| |
| end
| |
|
| |
| -- Return true if the given head is multiword according to the algorithm used
| |
| -- in full_headword().
| |
| function export.head_is_multiword(head)
| |
|
| |
| for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do
| |
| if mw.ustring.find(possibleWordBreak, notWordPunc) then
| |
| return true
| |
| end
| |
| end
| |
|
| |
| return false
| |
| end
| |
|
| |
| local function preprocess(data, postype)
| |
| if type(data.heads) ~= "table" then
| |
| data.heads = { data.heads }
| |
| end
| |
|
| |
| if not data.heads or #data.heads == 0 then
| |
| data.heads = {""}
| |
| end
| |
|
| |
| local default_head = mw.title.getCurrentTitle().text
| |
| local unmodified_default_head = default_head
| |
|
| |
| -- Add links to multi-word page names when appropriate
| |
| if export.head_is_multiword(default_head) then
| |
| default_head = export.add_multiword_links(default_head)
| |
| end
| |
|
| |
| -- If a head is the empty string "", then replace it with the default
| |
| for i, head in ipairs(data.heads) do
| |
| if head == "" then
| |
| head = default_head
| |
| end
| |
| data.heads[i] = head
| |
| end
| |
| end
| |
|
| |
| local function show_headword_line(data)
| |
| local namespace = mw.title.getCurrentTitle().nsText
| |
|
| |
| if not data.noposcat then
| |
| local pos_category = "[sS]iwa " .. data.pos_category
| |
| end
| |
|
| |
| -- Preprocess
| |
| preprocess(data, postype)
| |
|
| |
| -- Format and return all the gathered information
| |
| return
| |
| format_headword(data) ..
| |
| format_genders(data) ..
| |
| format_inflections(data) ..
| |
| require("Module:utilities").format_categories(
| |
| tracking_categories, data.lang, data.sort_key, nil,
| |
| data.force_cat_output or test_force_categories, data.sc
| |
| )
| |
| end
| |
|
| |
| function full_headword(data)
| |
| local tracking_categories = {}
| |
|
| |
| -- Were any categories specified?
| |
| if data.categories and #data.categories > 0 then
| |
|
| |
| if not data.pos_category
| |
| and mw.ustring.find(data.categories[1], "^[sS]iwa")
| |
| then
| |
| data.pos_category = mw.ustring.gsub(data.categories[1], "^[sS]iwa ", "")
| |
| table.remove(data.categories, 1)
| |
| end
| |
| end
| |
|
| |
| if not data.pos_category then
| |
| error(
| |
| 'No valid part-of-speech categories were found in the list '
| |
| .. 'of categories passed to the function "full_headword". '
| |
| .. 'The part-of-speech category should consist of a language\'s '
| |
| .. 'canonical name plus a part of speech.'
| |
| )
| |
| end
| |
|
| |
| -- This may add more categories (e.g. gender categories), so make sure it gets
| |
| -- evaluated first.
| |
| local text = show_headword_line(data)
| |
| return
| |
| text ..
| |
| require("Module:utilities").format_categories(
| |
| data.categories, nil,
| |
| data.force_cat_output
| |
| ) ..
| |
| require("Module:utilities").format_categories(
| |
| tracking_categories, nil,
| |
| data.force_cat_output
| |
| )
| |
| end
| |
|
| |
| return export
| |