Module:IPA: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Tag: Reverted
Line 1: Line 1:
local export = {}
local export = {}


local force_cat = false -- for testing
local m_IPA = require("Module:IPA")
local parameter_utilities_module = "Module:parameter utilities"


local m_data = mw.loadData("Module:IPA/data")
-- Used for [[Template:IPA]].
local m_str_utils = require("Module:string utilities")
function export.IPA(frame)
local m_symbols = mw.loadData("Module:IPA/data/symbols")
local parent_args = frame:getParent().args
local pron_qualifier_module = "Module:pron qualifier"
local include_langname = frame.args.include_langname
local qualifier_module = "Module:qualifier"
local compat = parent_args.lang
local references_module = "Module:references"
local offset = compat and 0 or 1
local syllables_module = "Module:syllables"
local lang_arg = compat and "lang" or 1
local utilities_module = "Module:utilities"
local m_syllables -- [[Module:syllables]]; loaded below if needed


local concat = table.concat
local params = {
local find = string.find
[lang_arg] = {required = true, type = "language", default = "en"},
local gmatch = m_str_utils.gmatch
[1 + offset] = {list = true, disallow_holes = true},
local gsub = string.gsub
-- Deprecated; don't use in new code.
local insert = table.insert
["qual"] = {list = true, allow_holes = true, separate_no_index = true, alias_of = "q"},
local len = m_str_utils.len
["nocount"] = {type = "boolean"},
local listToText = mw.text.listToText
["nocat"] = {type = "boolean"},
local match = string.match
["sort"] = {},
local sub = string.sub
}
local u = m_str_utils.char
local ufind = m_str_utils.find
local ugsub = m_str_utils.gsub
local umatch = m_str_utils.match
local usub = m_str_utils.sub


local namespace = mw.title.getCurrentTitle().namespace
local m_param_utils = require(parameter_utilities_module)
local is_content_page = namespace == 0 or namespace == 118


local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)
local param_mods = m_param_utils.construct_param_mods {
if split_output ~= "raw" then
{group = {"ref", "a", "q"}},
if categories[1] then
{group = "link", include = {"t", "gloss", "pos"}},
categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)
}
else
categories = ""
end
end
if split_output then -- for use of IPA in links, etc.
if errtext then
return prontext, categories, errtext
else
return prontext, categories
end
else
return prontext .. (errtext or "") .. categories
end
end


--[==[
local items, args = m_param_utils.process_list_arguments {
Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by
params = params,
the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category
param_mods = param_mods,
{{cd|<var>lang</var> terms with IPA pronunciation}}. Other than the extra preceding text and category, this is identical
raw_args = parent_args,
to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a
termarg = 1 + offset,
single parameter `data`, an object with the following fields:
term_dest = "pron",
* `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for
track_module = "IPA",
  pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
}
  add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category
  {{cd|<var>lang</var> terms with IPA pronunciation}}; and for determining the proper sort keys for categories. Unlike
  for {format_IPA_multiple()}, `lang` may not be {nil}.
* `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.
* `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.
* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
  `items`.
* `sort_key`: Explicit sort key used for categories.
* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
  or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore
  them.
* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
* `include_langname`: If specified, prefix the result with the language name, followed by a colon.
* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
  pronunciations and preceding {"IPA:"}.
* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
  pronunciations and preceding {"IPA:"}.
* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
]==]
function export.format_IPA_full(data)
if type(data) ~= "table" or data.getCode then
error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")
end
local lang = data.lang
local items = data.items
local err = data.err
local separator = data.separator
local sort_key = data.sort_key
local no_count = data.no_count
local split_output = data.split_output
local q = data.q
local qq = data.qq
local a = data.a
local aa = data.aa
local include_langname = data.include_langname


local hasKey = m_data.langs_with_infopages
local lang = args[lang_arg]


if not lang or not lang.getCode then
local data = {
error("Must specify language to format_IPA_full()")
lang = lang,
end
items = items,
local langname = lang:getCanonicalName()
no_count = args.nocount,
 
nocat = args.nocat,
local prefix_text
sort_key = args.sort,
if err then
include_langname = include_langname,
prefix_text = '<span class="error">' .. err .. '</span>'
q = args.q.default,
else
qq = args.qq.default,
prefix_text = langname .. " pronunciation"
a = args.a.default,
prefix_text = "[[" .. prefix_text .. "|key]]"
aa = args.aa.default,
end
}
 
local prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix_text .. ")</sup>:&#32;"
 
local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")


if is_content_page then
return m_IPA.format_IPA_full(data)
insert(categories, {
cat = langname .. " terms with IPA pronunciation",
sort_key = sort_key
})
end
 
local prontext = prefix .. IPAs
if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then
prontext = require(pron_qualifier_module).format_qualifiers {
lang = lang,
text = prontext,
q = q,
qq = qq,
a = a,
aa = aa,
}
end
if include_langname then
prontext = langname .. ": " .. prontext
end
return process_maybe_split_categories(split_output, categories, prontext, lang)
end
end


local function split_phonemic_phonetic(pron)
-- Used for [[Template:IPAchar]].
local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$")
function export.IPAchar(frame)
if reconstructed then
local parent_args = frame.getParent and frame:getParent().args or frame
return reconstructed .. phonemic, reconstructed .. phonetic
else
return pron, nil
end
end


local function determine_repr(pron)
local params = {
local repr_mark = {}
[1] = {list = true, disallow_holes = true},
local repr, reconstructed
-- FIXME, remove this.
["lang"] = {}, -- This parameter is not used and does nothing, but is allowed for futureproofing.
}


-- remove initial asterisk before representation marks, used on some Reconstruction pages
local m_param_utils = require(parameter_utilities_module)
if sub(pron, 1, 1) == "*" then
reconstructed = true
pron = sub(pron, 2)
end


local representation_types = {
local param_mods = m_param_utils.construct_param_mods {
['/'] = { right = '/', type = 'phonemic', },
-- It doesn't really make sense to have separate overall a=/aa=/q=/qq= for {{IPAchar}}, which doesn't format a
['['] = { right = ']', type = 'phonetic', },
-- whole line but just individual pronunciations. Instead they are associated with the first item.
['⟨'] = { right = '⟩', type = 'orthographic', },
{group = {"ref", "a", "q"}, separate_no_index = false},
['-'] = { type = 'rhyme' },
-- Deprecated; don't use in new code.
{param = "qual", alias_of = "q"},
}
}


repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = ufind(pron, '^(.).-(.)$')
local items, args = m_param_utils.process_list_arguments {
params = params,
param_mods = param_mods,
raw_args = parent_args,
termarg = 1,
term_dest = "pron",
track_module = "IPAchar",
}


local representation_type = representation_types[repr_mark.left]
-- Format
return m_IPA.format_IPA_multiple(nil, items)
end


if representation_type then
function export.XSAMPA(frame)
if representation_type.right then
local params = {
if repr_mark.right == representation_type.right then
[1] = { required = true },
repr = representation_type.type
}
end
local args = require("Module:parameters").process(frame:getParent().args, params)
else
repr = representation_type.type
return m_IPA.XSAMPA_to_IPA(args[1] or "[Eg'zA:mp5=]")
end
else
repr = nil
end
 
return repr, reconstructed
end
end


local function hasInvalidSeparators(transcription)
-- Used by [[Template:X2IPA]]
if match(transcription, "%.\203[\136\140]") then -- [ˈˌ]
function export.X2IPAtemplate(frame)
return true
local parent_args = frame.getParent and frame:getParent().args or frame
else
local compat = parent_args["lang"]
return false
local offset = compat and 0 or 1
end
end


--[==[
local params = {
Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a
[compat and "lang" or 1] = {required = true, default = "und"},
category {{cd|<var>lang</var> terms with IPA pronunciation}}). Individual pronunciations are formatted using
[1 + offset] = {list = true, allow_holes = true},
{format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.
["ref"] = {list = true, allow_holes = true},
Parameters accepted are:
["a"] = {list = true, allow_holes = true, separate_no_index = true},
* `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for
["aa"] = {list = true, allow_holes = true, separate_no_index = true},
  pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
["q"] = {list = true, allow_holes = true, separate_no_index = true},
  add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the
["qq"] = {list = true, allow_holes = true, separate_no_index = true},
  proper sort keys for categories. `lang` may be {nil}.
["qual"] = {list = true, allow_holes = true},
* `items` is a list of pronunciations, each of which is an object with the following properties:
["nocount"] = {type = "boolean"},
** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic
["sort"] = {},
    (surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme
}
(beginning with a hyphen);
** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent
local args = require("Module:parameters").process(parent_args, params)
    qualifiers;
** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent
local m_XSAMPA = require("Module:IPA/X-SAMPA")
    qualifiers;
** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted
local pronunciations, refs, a, aa, q, qq, qual, lang =
    pronunciation; note that `qualifiers` is deprecated;
args[1 + offset], args.ref, args.a, args.aa, args.q, args.qq, args.qual, args[compat and "lang" or 1]
** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
local output = {}
** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;
table.insert(output, "{{IPA")
** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and
    qualifiers; the value of a list item is either a string containing the reference text (typically a call to a
table.insert(output, "|" .. lang)
citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`
(the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}
or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in
{{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or
{{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference
appropriately and insert a footnote number that hyperlinks to the actual reference, located in the
{{cd|<nowiki><references /></nowiki>}} section;
** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;
** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;
** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent
  qualifiers and pre-text; defaults to the outer `separator` parameter.
* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
  `items`.
* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
  categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.
* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
]==]
function export.format_IPA_multiple(lang, items, separator, no_count, split_output)
local categories = {}
separator = separator or ", "


-- Format
if a.default then
if not items[1] then
table.insert(output, "|a=" .. a.default)
if namespace == 10 then -- Template
end
insert(items, {pron = "/aɪ piː ˈeɪ/"})
if q.default then
else
table.insert(output, "|q=" .. q.default)
insert(categories, "Pronunciation templates without a pronunciation")
end
end
end
 
for i = 1, math.max(pronunciations.maxindex, refs.maxindex, a.maxindex, aa.maxindex, q.maxindex, qq.maxindex,
local bits = {}
qual.maxindex) do
 
if pronunciations[i] then
for i, item in ipairs(items) do
table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i]))
local bit
 
-- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or
-- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation
-- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).
if item.pron == "" then
bit = ""
else
local item_categories, errtext
bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")
bit = bit .. errtext
for _, cat in ipairs(item_categories) do
insert(categories, cat)
end
end
end
 
if a[i] then
if item.pretext then
table.insert(output, "|a" .. i .. "=" .. a[i])
bit = item.pretext .. bit
end
end
 
if aa[i] then
if item.posttext then
table.insert(output, "|aa" .. i .. "=" .. aa[i])
bit = bit .. item.posttext
end
end
 
if q[i] then
local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]
table.insert(output, "|q" .. i .. "=" .. q[i])
or item.a and item.a[1] or item.aa and item.aa[1]
local has_gloss_or_pos = item.gloss or item.pos
if has_qualifiers or has_gloss_or_pos then
-- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.
-- Should we do something different?
local q = item.q
if has_gloss_or_pos then
q = mw.clone(item.q) or {}
if item.gloss then
local m_qualifier = require(qualifier_module)
insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..
m_qualifier.wrap_qualifier_css("", "quote"))
end
if item.pos then
-- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.
insert(q, item.pos)
end
end
 
bit = require("Module:pron qualifier").format_qualifiers {
lang = lang,
text = bit,
q = q,
qq = item.qq,
qualifiers = item.qualifiers,
a = item.a,
aa = item.aa,
}
end
end
 
if qq[i] then
if item.note then
table.insert(output, "|qq" .. i .. "=" .. qq[i])
-- Support removed on 2024-06-15.
error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")
end
end
if item.refs then
if refs[i] then
local refspecs = item.refs
table.insert(output, "|ref" .. i .. "=" .. refs[i])
if #refspecs > 0 then
bit = bit .. require(references_module).format_references(refspecs)
end
end
end
 
if qual[i] then
bit = (item.separator or (i == 1 and "" or separator)) .. bit
table.insert(output, "|qual" .. i .. "=" .. qual[i])
 
insert(bits, bit)
 
if lang then
-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
-- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.
if not no_count and namespace == 0 then
m_syllables = m_syllables or require(syllables_module)
local langcode = lang:getCode()
if m_data.langs_to_generate_syllable_count_categories[langcode] then
local phonemic, phonetic = split_phonemic_phonetic(item.pron)
local use_it
if not phonetic then -- not a '/.../ [...]' combined pronunciation
local repr = determine_repr(phonemic)
if m_data.langs_to_use_phonetic_notation[langcode] then
use_it = repr == "phonetic" and phonemic or nil
else
use_it = repr == "phonemic" and phonemic or nil
end
elseif repr == "phonetic" then
use_it = phonetic
elseif repr == "phonemic" then
use_it = phonemic
end
-- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].
if use_it and not (find(use_it, " ") or find(use_it, "‿")) then
local syllable_count = m_syllables.getVowels(use_it, lang)
if syllable_count then
insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..
"-syllable words")
end
end
end
end
 
-- The nature of hasInvalidSeparators() is such that we don't have to split a combined '/.../ [...]' spec
-- into its parts in order to process.
if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then
insert(categories, "IPA for English using .ˈ or .ˌ")
end
end
end
end
end
 
if aa.default then
return process_maybe_split_categories(split_output, categories, concat(bits), lang)
table.insert(output, "|aa=" .. aa.default)
end
end
 
if qq.default then
--[=[
table.insert(output, "|qq=" .. qq.default)
Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from
{format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that
pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and
categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but
may have HTML added surrounding invalid characters so they appear in red.
]=]
local function format_one_IPA(lang, pron, err, categories)
-- Remove wikilinks, so that wikilink brackets are not misinterpreted as indicating phonetic transcription
local without_links = gsub(pron, "%[%[[^|%]]+|([^%]]+)%]%]", "%1")
without_links = gsub(without_links, "%[%[[^%]]+%]%]", "%1")
 
-- Detect whether this is a phonemic or phonetic transcription
local repr, reconstructed = determine_repr(without_links)
 
if reconstructed then
pron = sub(pron, 2)
without_links = sub(without_links, 2)
end
end
 
if args.nocount then
-- If valid, strip the representation marks
table.insert(output, "|nocount=1")
if repr == "phonemic" then
pron = usub(pron, 2, -2)
without_links = usub(without_links, 2, -2)
elseif repr == "phonetic" then
pron = usub(pron, 2, -2)
without_links = usub(without_links, 2, -2)
elseif repr == "orthographic" then
pron = usub(pron, 2, -2)
without_links = usub(without_links, 2, -2)
elseif repr == "rhyme" then
pron = usub(pron, 2)
without_links = usub(without_links, 2)
else
insert(categories, "IPA pronunciations with invalid representation marks")
-- insert(err, "invalid representation marks")
-- Removed because it's annoying when previewing pronunciation pages.
end
end
 
if args.sort then
if pron == "" then
table.insert(output, "|sort=" .. args.sort)
insert(categories, "IPA pronunciations with no pronunciation present")
end
end
table.insert(output, "}}")


-- Check for obsolete and nonstandard symbols
return table.concat(output)
for i, symbol in ipairs(m_data.nonstandard) do
end
local result
for nonstandard in gmatch(pron, symbol) do
if not result then
result = {}
end
insert(result, nonstandard)
insert(categories,
{cat = "IPA pronunciations with obsolete or nonstandard characters", sort_key = nonstandard}
)
end


if result then
-- Used by [[Template:X2IPAchar]]
insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")
function export.X2IPAchar(frame)
break
local params = {
end
[1] = { list = true, allow_holes = true },
end
["ref"] = {list = true, allow_holes = true},
["q"] = {list = true, allow_holes = true, require_index = true},
["qq"] = {list = true, allow_holes = true, require_index = true},
["qual"] = { list = true, allow_holes = true },
-- FIXME, remove this.
["lang"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)


--[[ Check for invalid symbols after removing the following:
local m_XSAMPA = require("Module:IPA/X-SAMPA")
1. wikilinks (handled above)
2. paired HTML tags
local pronunciations, refs, q, qq, qual, lang = args[1], args.ref, args.q, args.qq, args.qual, args.lang
3. bolding
4. italics
local output = {}
5. HTML entity for space
table.insert(output, "{{IPAchar")
6. asterisk at beginning of transcription
7. comma followed by spacing characters
for i = 1, math.max(pronunciations.maxindex, refs.maxindex, q.maxindex, qq.maxindex, qual.maxindex) do
8. superscripts enclosed in superscript parentheses ]]
if pronunciations[i] then
local found_HTML
table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i]))
local result = gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",
function(tagName, content)
found_HTML = true
return content
end)
result = gsub(result, "'''([^']*)'''", "%1")
result = gsub(result, "''([^']*)''", "%1")
result = gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.
result = gsub(result, "^%*", "")
result = ugsub(result, ",%s+", "")
 
-- VS15
local vs15_class = "[" .. m_symbols.add_vs15 .. "]"
if umatch(pron, vs15_class) then
local vs15 = u(0xFE0E)
if find(result, vs15) then
result = gsub(result, vs15, "")
pron = gsub(pron, vs15, "")
end
end
pron = ugsub(pron, "(" .. vs15_class .. ")", "%1" .. vs15)
if q[i] then
end
table.insert(output, "|q" .. i .. "=" .. q[i])
 
if result ~= "" then
local suggestions = {}
for k, v in pairs(m_symbols.invalid) do
if find(result, k, 1, true) then
insert(suggestions, k .. " with " .. v)
end
end
end
if suggestions[1] then
if qq[i] then
suggestions = listToText(suggestions)
table.insert(output, "|qq" .. i .. "=" .. qq[i])
if is_content_page then
error("Invalid IPA: replace " .. suggestions)
else
insert(err, "replace " .. suggestions)
end
end
end
result = ugsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")
if qual[i] then
local per_lang_valid
table.insert(output, "|qual" .. i .. "=" .. qual[i])
if lang then
per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]
end
end
per_lang_valid = per_lang_valid or ""
if refs[i] then
result = ugsub(result, "[" .. m_symbols.valid .. per_lang_valid .. "]", "")
table.insert(output, "|ref" .. i .. "=" .. refs[i])
if result ~= "" then
local category = "IPA pronunciations with invalid IPA characters"
if not is_content_page then
category = category .. "/non_mainspace"
end
insert(categories, category)
insert(err, "invalid IPA characters (" .. result .. ")")
end
end
end
end


if found_HTML then
if lang then
insert(categories, "IPA pronunciations with paired HTML tags")
table.insert(output, "|lang=" .. lang)
end
end
table.insert(output, "}}")
return table.concat(output)
end


if repr == "phonemic" or repr == "rhyme" then
-- Used by [[Template:x2rhymes]]
if lang and m_data.phonemes[lang:getCode()] then
function export.X2rhymes(frame)
local valid_phonemes = m_data.phonemes[lang:getCode()]
local parent_args = frame.getParent and frame:getParent().args or frame
local rest = pron
local compat = parent_args["lang"]
local phonemes = {}
local offset = compat and 0 or 1


while #rest > 0 do
local params = {
local longestmatch, longestmatch_len = "", 0
[compat and "lang" or 1] = {required = true, default = "und"},
[1 + offset] = {required = true, list = true, allow_holes = true},
}
local args = require("Module:parameters").process(parent_args, params)
local m_XSAMPA = require("Module:IPA/X-SAMPA")
pronunciations, lang = args[1 + offset], args[compat and "lang" or 1]
local output =  {}
table.insert(output, "{{rhymes")
table.insert(output, "|" .. lang)


local rest_init = sub(rest, 1, 1)
for i = 1, pronunciations.maxindex do
if rest_init == "(" or rest_init == ")" then
if pronunciations[i] then
longestmatch = rest_init
table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i]))
longestmatch_len = 1
else
for _, phoneme in ipairs(valid_phonemes) do
local phoneme_len = len(phoneme)
if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then
longestmatch = phoneme
longestmatch_len = len(longestmatch)
end
end
end
 
if longestmatch_len > 0 then
insert(phonemes, longestmatch)
rest = usub(rest, longestmatch_len + 1)
else
local phoneme = usub(rest, 1, 1)
insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
rest = usub(rest, 2)
insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode())
end
end
 
pron = concat(phonemes)
end
 
if repr == "phonemic" then
pron = "/" .. pron .. "/"
else
pron = "-" .. pron
end
end
elseif repr == "phonetic" then
pron = "[" .. pron .. "]"
elseif repr == "orthographic" then
pron = "⟨" .. pron .. "⟩"
end
end
 
if reconstructed then
table.insert(output, "}}")
pron = "*" .. pron
end
return table.concat(output)
 
return pron
end
end


--[==[
-- Used for [[Template:enPR]].
Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and
function export.enPR(frame)
error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic
local parent_args = frame:getParent().args
(surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined
phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.
If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page
will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}
also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is
a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values
are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`
is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of
category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].
If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated
string.
]==]
function export.format_IPA(lang, pron, split_output)
local err = {}
local categories = {}


-- `pron` shouldn't contain ref tags.
local params = {
if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then
[1] = {list = true, disallow_holes = true},
error("<ref> tags found inside pronunciation parameter.")
}
end


local phonemic, phonetic = split_phonemic_phonetic(pron)
local m_param_utils = require(parameter_utilities_module)
pron = format_one_IPA(lang, phonemic, err, categories)
if phonetic then
phonetic = format_one_IPA(lang, phonetic, err, categories)
pron = pron .. " " .. phonetic
end


if err[1] then
local param_mods = m_param_utils.construct_param_mods {
err = '<span class="previewonly error" style="font-size: small;>&#32;' .. concat(err, ", ") .. "</span>"
{group = {"q", "a", "ref"}},
else
}
err = ""
end


return process_maybe_split_categories(split_output, categories, '<span class="IPA">' .. pron .. "</span>", lang,
local items, args = m_param_utils.process_list_arguments {
err)
params = params,
end
param_mods = param_mods,
raw_args = parent_args,
termarg = 1,
term_dest = "pron",
track_module = "enPR",
}


--[==[
local data = {
Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to
items = items,
[[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The
q = args.q.default,
pronunciations are formatted by wrapping them in the {{cd|AHD}} and {{cd|enPR}} CSS classes and adding any left and
qq = args.qq.default,
right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular
a = args.a.default,
and accent qualifiers. There is a single parameter `data`, an object with the following fields:
aa = args.aa.default,
* `items` is a list of enPR pronunciations, each of which is an object with the following properties:
}
** `pron`: the enPR pronunciation;
** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;
** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.
* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
  pronunciations and preceding {"enPR:"}.
* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
  pronunciations and preceding {"enPR:"}.
* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
]==]
function export.format_enPR_full(data)
local prefix = "[[Appendix:English pronunciation|enPR]]: "
local lang = require("Module:languages").getByCode("en")
local parts = {}
 
for _, item in ipairs(data.items) do
local part = '<span class="AHD enPR">' .. item.pron .. "</span>"
 
if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then
part = require("Module:pron qualifier").format_qualifiers {
lang = lang,
text = part,
q = item.q,
qq = item.qq,
a = item.a,
aa = item.aa,
}
end
insert(parts, part)
end
 
local prontext = prefix .. concat(parts, ", ")
if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then
prontext = require(pron_qualifier_module).format_qualifiers {
lang = lang,
text = prontext,
q = data.q,
qq = data.qq,
a = data.a,
aa = data.aa,
}
end


return prontext
return m_IPA.format_enPR_full(data)
end
end


return export
return export

Revision as of 10:25, 8 January 2025



local export = {}

local m_IPA = require("Module:IPA")
local parameter_utilities_module = "Module:parameter utilities"

-- Used for [[Template:IPA]].
function export.IPA(frame)
	local parent_args = frame:getParent().args
	local include_langname = frame.args.include_langname
	local compat = parent_args.lang
	local offset = compat and 0 or 1
	local lang_arg = compat and "lang" or 1

	local params = {
		[lang_arg] = {required = true, type = "language", default = "en"},
		[1 + offset] = {list = true, disallow_holes = true},
		-- Deprecated; don't use in new code.
		["qual"] = {list = true, allow_holes = true, separate_no_index = true, alias_of = "q"},
		["nocount"] = {type = "boolean"},
		["nocat"] = {type = "boolean"},
		["sort"] = {},
	}

	local m_param_utils = require(parameter_utilities_module)

	local param_mods = m_param_utils.construct_param_mods {
		{group = {"ref", "a", "q"}},
		{group = "link", include = {"t", "gloss", "pos"}},
	}

	local items, args = m_param_utils.process_list_arguments {
		params = params,
		param_mods = param_mods,
		raw_args = parent_args,
		termarg = 1 + offset,
		term_dest = "pron",
		track_module = "IPA",
	}

	local lang = args[lang_arg]

	local data = {
		lang = lang,
		items = items,
		no_count = args.nocount,
		nocat = args.nocat,
		sort_key = args.sort,
		include_langname = include_langname,
		q = args.q.default,
		qq = args.qq.default,
		a = args.a.default,
		aa = args.aa.default,
	}

	return m_IPA.format_IPA_full(data)
end

-- Used for [[Template:IPAchar]].
function export.IPAchar(frame)
	local parent_args = frame.getParent and frame:getParent().args or frame

	local params = {
		[1] = {list = true, disallow_holes = true},
		-- FIXME, remove this.
		["lang"] = {}, -- This parameter is not used and does nothing, but is allowed for futureproofing.
	}

	local m_param_utils = require(parameter_utilities_module)

	local param_mods = m_param_utils.construct_param_mods {
		-- It doesn't really make sense to have separate overall a=/aa=/q=/qq= for {{IPAchar}}, which doesn't format a
		-- whole line but just individual pronunciations. Instead they are associated with the first item.
		{group = {"ref", "a", "q"}, separate_no_index = false},
		-- Deprecated; don't use in new code.
		{param = "qual", alias_of = "q"},
	}

	local items, args = m_param_utils.process_list_arguments {
		params = params,
		param_mods = param_mods,
		raw_args = parent_args,
		termarg = 1,
		term_dest = "pron",
		track_module = "IPAchar",
	}

	-- Format
	return m_IPA.format_IPA_multiple(nil, items)
end

function export.XSAMPA(frame)
	local params = {
		[1] = { required = true },
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	return m_IPA.XSAMPA_to_IPA(args[1] or "[Eg'zA:mp5=]")
end

-- Used by [[Template:X2IPA]]
function export.X2IPAtemplate(frame)
	local parent_args = frame.getParent and frame:getParent().args or frame
	local compat = parent_args["lang"]
	local offset = compat and 0 or 1

	local params = {
		[compat and "lang" or 1] = {required = true, default = "und"},
		[1 + offset] = {list = true, allow_holes = true},
		["ref"] = {list = true, allow_holes = true},
		["a"] = {list = true, allow_holes = true, separate_no_index = true},
		["aa"] = {list = true, allow_holes = true, separate_no_index = true},
		["q"] = {list = true, allow_holes = true, separate_no_index = true},
		["qq"] = {list = true, allow_holes = true, separate_no_index = true},
		["qual"] = {list = true, allow_holes = true},
		["nocount"] = {type = "boolean"},
		["sort"] = {},
	}
	
	local args = require("Module:parameters").process(parent_args, params)
	
	local m_XSAMPA = require("Module:IPA/X-SAMPA")
	
	local pronunciations, refs, a, aa, q, qq, qual, lang =
		args[1 + offset], args.ref, args.a, args.aa, args.q, args.qq, args.qual, args[compat and "lang" or 1]
	
	local output = {}
	table.insert(output, "{{IPA")
	
	table.insert(output, "|" .. lang)

	if a.default then
		table.insert(output, "|a=" .. a.default)
	end
	if q.default then
		table.insert(output, "|q=" .. q.default)
	end
	for i = 1, math.max(pronunciations.maxindex, refs.maxindex, a.maxindex, aa.maxindex, q.maxindex, qq.maxindex,
		qual.maxindex) do
		if pronunciations[i] then
			table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i]))
		end
		if a[i] then
			table.insert(output, "|a" .. i .. "=" .. a[i])
		end
		if aa[i] then
			table.insert(output, "|aa" .. i .. "=" .. aa[i])
		end
		if q[i] then
			table.insert(output, "|q" .. i .. "=" .. q[i])
		end
		if qq[i] then
			table.insert(output, "|qq" .. i .. "=" .. qq[i])
		end
		if refs[i] then
			table.insert(output, "|ref" .. i .. "=" .. refs[i])
		end
		if qual[i] then
			table.insert(output, "|qual" .. i .. "=" .. qual[i])
		end
	end
	if aa.default then
		table.insert(output, "|aa=" .. aa.default)
	end
	if qq.default then
		table.insert(output, "|qq=" .. qq.default)
	end
	if args.nocount then
		table.insert(output, "|nocount=1")
	end
	if args.sort then
		table.insert(output, "|sort=" .. args.sort)
	end
	
	table.insert(output, "}}")

	return table.concat(output)
end

-- Used by [[Template:X2IPAchar]]
function export.X2IPAchar(frame)
	local params = {
		[1] = { list = true, allow_holes = true },
		["ref"] = {list = true, allow_holes = true},
		["q"] = {list = true, allow_holes = true, require_index = true},
		["qq"] = {list = true, allow_holes = true, require_index = true},
		["qual"] = { list = true, allow_holes = true },
		-- FIXME, remove this.
		["lang"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)

	local m_XSAMPA = require("Module:IPA/X-SAMPA")
	
	local pronunciations, refs, q, qq, qual, lang = args[1], args.ref, args.q, args.qq, args.qual, args.lang
	
	local output = {}
	table.insert(output, "{{IPAchar")
	
	for i = 1, math.max(pronunciations.maxindex, refs.maxindex, q.maxindex, qq.maxindex, qual.maxindex) do
		if pronunciations[i] then
			table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i]))
		end
		if q[i] then
			table.insert(output, "|q" .. i .. "=" .. q[i])
		end
		if qq[i] then
			table.insert(output, "|qq" .. i .. "=" .. qq[i])
		end
		if qual[i] then
			table.insert(output, "|qual" .. i .. "=" .. qual[i])
		end
		if refs[i] then
			table.insert(output, "|ref" .. i .. "=" .. refs[i])
		end
	end

	if lang then
		table.insert(output, "|lang=" .. lang)
	end
	
	table.insert(output, "}}")
	
	return table.concat(output)
end

-- Used by [[Template:x2rhymes]]
function export.X2rhymes(frame)
	local parent_args = frame.getParent and frame:getParent().args or frame
	local compat = parent_args["lang"]
	local offset = compat and 0 or 1

	local params = {
		[compat and "lang" or 1] = {required = true, default = "und"},
		[1 + offset] = {required = true, list = true, allow_holes = true},
	}
	
	local args = require("Module:parameters").process(parent_args, params)
	
	local m_XSAMPA = require("Module:IPA/X-SAMPA")
	
	pronunciations, lang = args[1 + offset], args[compat and "lang" or 1]
	
	local output =  {}
	table.insert(output, "{{rhymes")
	
	table.insert(output, "|" .. lang)

	for i = 1, pronunciations.maxindex do
		if pronunciations[i] then
			table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i]))
		end
	end
	
	table.insert(output, "}}")
	
	return table.concat(output)
end

-- Used for [[Template:enPR]].
function export.enPR(frame)
	local parent_args = frame:getParent().args

	local params = {
		[1] = {list = true, disallow_holes = true},
	}

	local m_param_utils = require(parameter_utilities_module)

	local param_mods = m_param_utils.construct_param_mods {
		{group = {"q", "a", "ref"}},
	}

	local items, args = m_param_utils.process_list_arguments {
		params = params,
		param_mods = param_mods,
		raw_args = parent_args,
		termarg = 1,
		term_dest = "pron",
		track_module = "enPR",
	}

	local data = {
		items = items,
		q = args.q.default,
		qq = args.qq.default,
		a = args.a.default,
		aa = args.aa.default,
	}

	return m_IPA.format_enPR_full(data)
end

return export