Module:IPA: Difference between revisions

Line 1:

local export = {}

~~-- [[Module:IPA/data]]~~

local m_data = mw.loadData('Module:IPA/data') ~~-- [[~~Module:~~IPA/data]]~~

local force_cat = false -- for testing

local m_symbols = mw.loadData('Module:IPA/data/symbols') ~~-- [[~~Module:~~IPA/data/symbols]]~~

local m_data = mw.loadData("Module:IPA/data")

local m_str_utils = require("Module:string utilities")

local m_symbols = mw.loadData("Module:IPA/data/symbols")

local pron_qualifier_module = "Module:pron qualifier"

local qualifier_module = "Module:qualifier"

local references_module = "Module:references"

local syllables_module = "Module:syllables"

local utilities_module = "Module:utilities"

local m_syllables -- [[Module:syllables]]; loaded below if needed

local ~~sub~~ = mw.~~ustring~~.sub

local concat = table.concat

local ~~find~~ = mw.~~ustring~~.find

local find = string.find

local gsub = mw.~~ustring~~.~~gsub~~

local gmatch = m_str_utils.gmatch

local ~~match~~ = mw.~~ustring~~.~~match~~

local gsub = string.gsub

local ~~gmatch~~ = mw.~~ustring~~.~~gmatch~~

local insert = table.insert

local U = mw.~~ustring~~.~~char~~

local len = m_str_utils.len

local listToText = mw.text.listToText

local match = string.match

local sub = string.sub

local u = m_str_utils.char

local ufind = m_str_utils.find

local ugsub = m_str_utils.gsub

local umatch = m_str_utils.match

local usub = m_str_utils.sub

local namespace = mw.title.getCurrentTitle().namespace

local is_content_page = namespace == 0 or namespace == 118

local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)

if split_output ~= "raw" then

if categories[1] then

categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)

else

categories = ""

end

if split_output then -- for use of IPA in links, etc.

if errtext then

return prontext, categories, errtext

else

return prontext, categories

end

else

return prontext .. (errtext or "") .. categories

end

--[==[

Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by

the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category

{{cd|<var>lang</var> terms with IPA pronunciation}}. Other than the extra preceding text and category, this is identical

to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a

single parameter `data`, an object with the following fields:

* `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for

pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to

add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category

{{cd|<var>lang</var> terms with IPA pronunciation}}; and for determining the proper sort keys for categories. Unlike

for {format_IPA_multiple()}, `lang` may not be {nil}.

* `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.

* `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.

* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the

first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in

`items`.

* `sort_key`: Explicit sort key used for categories.

* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that

only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a

given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup

or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore

them.

* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted

categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is

the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category

strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If

`split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.

* `include_langname`: If specified, prefix the result with the language name, followed by a colon.

* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted

pronunciations and preceding {"IPA:"}.

* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.

* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted

pronunciations and preceding {"IPA:"}.

* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.

]==]

function export.format_IPA_full(data)

if type(data) ~= "table" or data.getCode then

error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")

end

local lang = data.lang

local items = data.items

local err = data.err

local separator = data.separator

local sort_key = data.sort_key

local no_count = data.no_count

local split_output = data.split_output

local q = data.q

local qq = data.qq

local a = data.a

local aa = data.aa

local include_langname = data.include_langname

~~function export.format_IPA_full(lang, items, err, separator, sortKey, no_count)~~

~~local IPA_key, key_link, err_text, prefix, IPAs, category~~

local hasKey = m_data.langs_with_infopages

local ~~namespace~~ = ~~mw.title.getCurrentTitle~~()~~.nsText~~

if not lang or not lang.getCode then

error("Must specify language to format_IPA_full()")

end

local langname = lang:getCanonicalName()

local prefix_text

if err then

~~err_text~~ = '' .. err .. ''

prefix_text = '' .. err .. ''

else

~~key_link~~ = "[[~~IPA for~~ " .. ~~lang:getCanonicalName()~~ .. "|key]]"

if hasKey[lang:getCode()] then

prefix_text = "Appendix:" .. langname .. " pronunciation"

else

prefix_text = "wikipedia:" .. langname .. " phonology"

end

prefix_text = "[[" .. prefix_text .. "|key]]"

end

local prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]](" .. prefix_text .. "): "

local prefix = "[[~~wikt:~~Wiktionary:International Phonetic Alphabet|IPA]](" .. ~~( key_link or err_text )~~ .. "): "

local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")

IPAs = export.format_IPA_multiple(lang, items, separator, no_count)

if is_content_page then

insert(categories, {

cat = langname .. " terms with IPA pronunciation",

sort_key = sort_key

})

end

~~return~~ prefix .. IPAs

local prontext = prefix .. IPAs

if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then

prontext = require(pron_qualifier_module).format_qualifiers {

lang = lang,

text = prontext,

q = q,

qq = qq,

a = a,

aa = aa,

}

end

if include_langname then

prontext = langname .. ": " .. prontext

end

return process_maybe_split_categories(split_output, categories, prontext, lang)

end

local function split_phonemic_phonetic(pron)

local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$")

if reconstructed then

return reconstructed .. phonemic, reconstructed .. phonetic

else

return pron, nil

end

Line 35:

Line 161:

local repr_mark = {}

local repr, reconstructed

-- remove initial asterisk before representation marks, used on some Reconstruction pages

if ~~find~~(pron, "^%*") then

if sub(pron, 1, 1) == "*" then

reconstructed = true

pron = sub(pron, 2)

end

local representation_types = {

['/'] = { right = '/', type = 'phonemic', },

Line 48:

Line 174:

['-'] = { type = 'rhyme' },

}

repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = ~~find~~(pron, '^(.).-(.)$')

repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = ufind(pron, '^(.).-(.)$')

local representation_type = representation_types[repr_mark.left]

if representation_type then

if representation_type.right then

Line 64:

Line 190:

repr = nil

end

return repr, reconstructed

end

local function hasInvalidSeparators(transcription)

if ~~find~~(transcription, "%.[ˈˌ]") then

if match(transcription, "%.\203[\136\140]") then -- [ˈˌ]

return true

else

Line 76:

Line 202:

end

function export.format_IPA_multiple(lang, items, separator, no_count)

--[==[

Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a

category {{cd|<var>lang</var> terms with IPA pronunciation}}). Individual pronunciations are formatted using

{format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.

Parameters accepted are:

* `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for

pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to

add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the

proper sort keys for categories. `lang` may be {nil}.

* `items` is a list of pronunciations, each of which is an object with the following properties:

** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic

(surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme

(beginning with a hyphen);

** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent

qualifiers;

** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent

qualifiers;

** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted

pronunciation; note that `qualifiers` is deprecated;

** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;

** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;

** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;

** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and

qualifiers; the value of a list item is either a string containing the reference text (typically a call to a

citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`

(the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}

or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in

{{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or

{{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference

appropriately and insert a footnote number that hyperlinks to the actual reference, located in the

{{cd|<nowiki><references /></nowiki>}} section;

** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;

** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;

** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent

qualifiers and pre-text; defaults to the outer `separator` parameter.

* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the

first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in

`items`.

* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that

only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a

given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup

categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.

* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted

categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is

the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category

strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If

`split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.

]==]

function export.format_IPA_multiple(lang, items, separator, no_count, split_output)

local categories = {}

separator = separator or ', '

separator = separator or ", "

-- Format

if not items[1] then

if ~~mw.title.getCurrentTitle().nsText~~ == "Template~~" then~~

if namespace == 10 then -- Template

~~table.~~insert(items, {pron = "/aɪ piː ˈeɪ/"})

insert(items, {pron = "/aɪ piː ˈeɪ/"})

else

~~table.~~insert(categories, "~~[[Category:~~Pronunciation templates without a pronunciation]]")

insert(categories, "Pronunciation templates without a pronunciation")

end

local bits = {}

for _, item in ipairs(items) do

for i, item in ipairs(items) do

local bit = export.format_IPA(lang, item.pron)

local bit

-- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or

-- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation

-- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).

if item.pron == "" then

bit = ""

else

local item_categories, errtext

bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")

bit = bit .. errtext

for _, cat in ipairs(item_categories) do

insert(categories, cat)

end

if item.pretext then

bit = item.pretext .. bit

end

if item.posttext then

bit = bit .. item.posttext

end

if item.qualifiers and item.qualifiers[1] then

local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]

bit = require("Module:qualifier").~~format_qualifier(~~item.qualifiers) .. ~~" "~~ .~~. bit~~

or item.a and item.a[1] or item.aa and item.aa[1]

local has_gloss_or_pos = item.gloss or item.pos

if has_qualifiers or has_gloss_or_pos then

-- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.

-- Should we do something different?

local q = item.q

if has_gloss_or_pos then

q = mw.clone(item.q) or {}

if item.gloss then

local m_qualifier = require(qualifier_module)

insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..

m_qualifier.wrap_qualifier_css("”", "quote"))

end

if item.pos then

-- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.

insert(q, item.pos)

end

bit = require("Module:pron qualifier").format_qualifiers {

lang = lang,

text = bit,

q = q,

qq = item.qq,

qualifiers = item.qualifiers,

a = item.a,

aa = item.aa,

}

end

if ~~item.refs or~~ item.note then

if item.note then

~~local refspecs~~

-- Support removed on 2024-06-15.

~~if item~~.note ~~then~~

error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")

~~-- FIXME: eliminate item.note in favor of item~~.refs~~. Use tracking to find places~~

end

~~-- that use item.note.~~

if item.refs then

~~refspecs = {~~item.~~note}~~

local refspecs = item.refs

~~else~~

refspecs = item.refs

~~end~~

~~local refs = {}~~

if #refspecs > 0 then

~~for _, refspec in ipairs(refspecs) do~~

bit = bit .. require(references_module).format_references(refspecs)

~~if type(refspec) ~~~= ~~"table" then~~

~~refspec = {text = refspec}~~

~~end~~

~~local refargs~~

~~if refspec~~.~~name or refspec~~.~~group then~~

~~refargs = {name = refspec.name, group = refspec.group}~~

~~end~~

~~table.insert(refs, mw.getCurrentFrame~~()~~:extensionTag("ref", refspec~~.~~text, refargs))~~

~~end~~

~~bit = bit .. table.concat~~(~~refs~~)

end

~~table~~.insert(bits, bit)

bit = (item.separator or (i == 1 and "" or separator)) .. bit

insert(bits, bit)

if lang then

-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].

-- Don't do this if the term has spaces or a liaison mark (‿).

-- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.

if not no_count and ~~mw.title.getCurrentTitle().~~namespace == 0 then

if not no_count and namespace == 0 then

m_syllables = m_syllables or require(~~'Module:syllables'~~)

m_syllables = m_syllables or require(syllables_module)

local langcode = lang:getCode()

if m_data.langs_to_generate_syllable_count_categories[langcode] then

local ~~repr~~ = ~~determine_repr~~(item.pron)

local phonemic, phonetic = split_phonemic_phonetic(item.pron)

local use_it

if m_data.langs_to_use_phonetic_notation[langcode] then

if not phonetic then -- not a '/.../ [...]' combined pronunciation

use_it = repr == "~~phonetic~~"

local repr = determine_repr(phonemic)

~~else~~

if m_data.langs_to_use_phonetic_notation[langcode] then

use_it = repr == "phonemic"

use_it = repr == "phonetic" and phonemic or nil

else

use_it = repr == "phonemic" and phonemic or nil

end

elseif repr == "phonetic" then

use_it = phonetic

elseif repr == "phonemic" then

use_it = phonemic

end

if use_it and not find(~~item.pron~~, "[ ‿]") then

-- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].

local syllable_count = m_syllables.getVowels(~~item.pron~~, lang)

if use_it and not (find(use_it, " ") or find(use_it, "‿")) then

local syllable_count = m_syllables.getVowels(use_it, lang)

if syllable_count then

~~table.~~insert(categories, ~~"[[Category:" ..~~ lang:getCanonicalName() .. " " .. syllable_count .. "-syllable words]]")

insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..

"-syllable words")

end

Line 156:

Line 368:

end

-- The nature of hasInvalidSeparators() is such that we don't have to split a combined '/.../ [...]' spec

-- into its parts in order to process.

if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then

~~table.~~insert(categories, "~~[[Category:~~IPA for English using .ˈ or .ˌ]]")

insert(categories, "IPA for English using .ˈ or .ˌ")

end

return ~~table.~~concat(bits, ~~separator) .. table.concat(categories~~)

return process_maybe_split_categories(split_output, categories, concat(bits), lang)

end

-- ~~Takes an~~ IPA pronunciation and ~~formats it~~ and adds ~~cleanup~~ categories.

--[=[

function ~~export.format_IPA~~(lang, pron, ~~split_output)~~

Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from

~~local~~ err ~~= {}~~

{format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that

~~local~~ categories ~~= {}~~

pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and

categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but

-- Remove wikilinks, so that wikilink brackets are not misinterpreted as

may have HTML added surrounding invalid characters so they appear in red.

-- indicating ~~phonemic~~ transcription

]=]

local ~~str_gsub~~ = ~~string.~~gsub

local function format_one_IPA(lang, pron, err, categories)

~~local without_links = str_gsub~~(pron, '%[%[[^|%]]+|([^%]]+)%]%]', '%1')

-- Remove wikilinks, so that wikilink brackets are not misinterpreted as indicating phonetic transcription

without_links = ~~str_gsub~~(without_links, '%[%[[^%]]+%]%]', '%1')

local without_links = gsub(pron, "%[%[[^|%]]+|([^%]]+)%]%]", "%1")

without_links = gsub(without_links, "%[%[[^%]]+%]%]", "%1")

-- Detect whether this is a phonemic or phonetic transcription

local repr, reconstructed = determine_repr(without_links)

if reconstructed then

pron = sub(pron, 2)

without_links = sub(without_links, 2)

end

-- If valid, strip the representation marks

if repr == "phonemic" then

pron = ~~sub~~(pron, 2, -2)

pron = usub(pron, 2, -2)

without_links = ~~sub~~(without_links, 2, -2)

without_links = usub(without_links, 2, -2)

elseif repr == "phonetic" then

pron = ~~sub~~(pron, 2, -2)

pron = usub(pron, 2, -2)

without_links = ~~sub~~(without_links, 2, -2)

without_links = usub(without_links, 2, -2)

elseif repr == "orthographic" then

pron = ~~sub~~(pron, 2, -2)

pron = usub(pron, 2, -2)

without_links = ~~sub~~(without_links, 2, -2)

without_links = usub(without_links, 2, -2)

elseif repr == "rhyme" then

pron = ~~sub~~(pron, 2)

pron = usub(pron, 2)

without_links = ~~sub~~(without_links, 2)

without_links = usub(without_links, 2)

else

~~table.~~insert(categories, "~~[[Category:~~IPA pronunciations with invalid representation marks]]")

insert(categories, "IPA pronunciations with invalid representation marks")

-- ~~table.~~insert(err, "invalid representation marks")

-- insert(err, "invalid representation marks")

-- Removed because it's annoying when previewing pronunciation pages.

end

if pron == "" then

~~table.~~insert(categories, "~~[[Category:~~IPA pronunciations with no pronunciation present]]")

insert(categories, "IPA pronunciations with no pronunciation present")

end

-- Check for obsolete and nonstandard symbols

for i, symbol in ipairs(m_data.nonstandard) do

Line 213:

Line 429:

result = {}

end

~~table.~~insert(result, nonstandard)

insert(result, nonstandard)

~~table.~~insert(categories, "~~[[Category:~~IPA pronunciations with obsolete or nonstandard characters|" .. nonstandard ~~.. "]]"~~)

insert(categories,

{cat = "IPA pronunciations with obsolete or nonstandard characters", sort_key = nonstandard}

)

end

if result then

~~table.~~insert(err, "obsolete or nonstandard characters (" .~~. table~~.concat(result) .. ")")

insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")

break

end

--[[ Check for invalid symbols after removing the following:

1. wikilinks (handled above)

Line 233:

Line 451:

8. superscripts enclosed in superscript parentheses ]]

local found_HTML

local result = ~~str_gsub~~(without_links, "<(%a+)[^>]*>([^<]+)</%1>",

local result = gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",

function(tagName, content)

found_HTML = true

return content

end)

result = ~~str_gsub~~(result, "'''([^']*)'''", "%1")

result = gsub(result, "'''([^']*)'''", "%1")

result = ~~str_gsub~~(result, "''([^']*)''", "%1")

result = gsub(result, "''([^']*)''", "%1")

result = ~~str_gsub~~(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.

result = gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.

result = ~~str_gsub~~(result, "^%*", "")

result = gsub(result, "^%*", "")

result = ~~gsub~~(result, ",%s+", "")

result = ugsub(result, ",%s+", "")

~~result = gsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")~~

~~result = gsub(result, '[' .. m_symbols.valid .. ']', '')~~

-- VS15

local vs15_class = "[" .. m_symbols.add_vs15 .. "]"

if ~~mw.ustring.find~~(pron, vs15_class) then

if umatch(pron, vs15_class) then

local vs15 = U(0xFE0E)

local vs15 = u(0xFE0E)

if ~~mw.ustring.~~find(result, vs15) then

if find(result, vs15) then

result = gsub(result, vs15, "")

pron = ~~mw.ustring.~~gsub(pron, vs15, "")

pron = gsub(pron, vs15, "")

end

pron = ~~mw.ustring.gsub~~(pron, "(" .. vs15_class .. ")", "%1" .. vs15)

pron = ugsub(pron, "(" .. vs15_class .. ")", "%1" .. vs15)

end

if result ~= '' then

if result ~= "" then

local suggestions = {}

mw.~~log~~(~~pron~~, ~~result~~)

for k, v in pairs(m_symbols.invalid) do

~~local namespace = mw~~.~~title~~.~~getCurrentTitle().namespace~~

if find(result, k, 1, true) then

~~local category~~

insert(suggestions, k .. " with " .. v)

~~if namespace == 0 then~~

end

~~-- main namespace~~

~~category =~~ "~~IPA pronunciations~~ with ~~invalid IPA characters~~"

~~elseif namespace == 118 then~~

~~-- reconstruction namespace~~

~~category = "IPA pronunciations with invalid IPA characters/reconstruction"~~

~~else~~

~~category = "IPA pronunciations with invalid IPA characters/non_mainspace"~~

end

~~for character in gmatch(result, ".") do~~

if suggestions[1] then

~~local suggestion~~ = ~~m_symbols.~~suggestions~~[character]~~

suggestions = listToText(suggestions)

if ~~suggestion~~ then

if is_content_page then

~~table~~.insert(~~suggestions~~, ~~character ..~~ " ~~with~~ " .. ~~suggestion~~)

error("Invalid IPA: replace " .. suggestions)

else

insert(err, "replace " .. suggestions)

end

~~table.insert(categories, "[[Category:" .. category .. "|" .. character .. "]]")~~

end

~~table.insert~~(~~err~~, "~~invalid IPA characters (~~" .. ~~result~~ .. ")")

result = ugsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")

if ~~suggestions~~[1] then

local per_lang_valid

~~table~~.insert(err, "~~replace~~ " .. ~~table~~.~~concat(suggestions,~~ ", "))

if lang then

per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]

end

per_lang_valid = per_lang_valid or ""

result = ugsub(result, "[" .. m_symbols.valid .. per_lang_valid .. "]", "")

if result ~= "" then

local category = "IPA pronunciations with invalid IPA characters"

if not is_content_page then

category = category .. "/non_mainspace"

end

insert(categories, category)

insert(err, "invalid IPA characters (" .. result .. ")")

end

if found_HTML then

~~table.~~insert(categories, "~~[[Category:~~IPA pronunciations with paired HTML tags]]")

insert(categories, "IPA pronunciations with paired HTML tags")

end

~~-- Reference inside IPA template usage~~

~~-- FIXME: Doesn't work; you can't put HTML in module output.~~

~~--if mw.ustring.find(pron, '</ref>') then~~

~~-- table.insert(categories, "[[Category:IPA pronunciations with reference]]")~~

~~--end~~

if repr == "phonemic" or repr == "rhyme" then

if lang and m_data.phonemes[lang:getCode()] then

Line 299:

Line 514:

local rest = pron

local phonemes = {}

while ~~mw.ustring.len(~~rest) > 0 do

while #rest > 0 do

local longestmatch = ""

local longestmatch, longestmatch_len = "", 0

if sub(rest, 1, 1) == "(" or ~~sub(rest, 1, 1)~~ == ")" then

local rest_init = sub(rest, 1, 1)

longestmatch = ~~sub(rest,~~ 1~~, 1)~~

if rest_init == "(" or rest_init == ")" then

longestmatch = rest_init

longestmatch_len = 1

else

for _, phoneme in ipairs(valid_phonemes) do

~~if mw.ustring.~~len(phoneme) > ~~mw.ustring.len(longestmatch)~~ and ~~sub~~(rest, 1, ~~mw.ustring.len(phoneme)~~) == phoneme then

local phoneme_len = len(phoneme)

if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then

longestmatch = phoneme

longestmatch_len = len(longestmatch)

end

if ~~mw.ustring.len(longestmatch)~~ > 0 then

if longestmatch_len > 0 then

~~table.~~insert(phonemes, longestmatch)

insert(phonemes, longestmatch)

rest = ~~sub~~(rest, ~~mw.ustring.len(longestmatch)~~ + 1)

rest = usub(rest, longestmatch_len + 1)

else

local phoneme = ~~sub~~(rest, 1, 1)

local phoneme = usub(rest, 1, 1)

~~table.~~insert(phonemes, "" .. phoneme .. "")

insert(phonemes, "" .. phoneme .. "")

rest = ~~sub~~(rest, 2)

rest = usub(rest, 2)

~~table.~~insert(categories, "~~[[Category:~~IPA pronunciations with invalid phonemes/" .. lang:getCode() ~~.. "]]"~~)

insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode())

end

pron = ~~table.~~concat(phonemes)

pron = concat(phonemes)

end

if repr == "phonemic" then

pron = "/" .. pron .. "/"

Line 337:

Line 556:

pron = "⟨" .. pron .. "⟩"

end

if reconstructed then

pron = "*" .. pron

end

return pron

end

--[==[

Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and

error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic

(surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined

phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.

If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page

will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}

also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is

a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values

are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`

is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of

category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].

If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated

string.

]==]

function export.format_IPA(lang, pron, split_output)

local err = {}

local categories = {}

-- `pron` shouldn't contain ref tags.

if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then

error("<ref> tags found inside pronunciation parameter.")

end

local phonemic, phonetic = split_phonemic_phonetic(pron)

pron = format_one_IPA(lang, phonemic, err, categories)

if phonetic then

phonetic = format_one_IPA(lang, phonetic, err, categories)

pron = pron .. " " .. phonetic

end

if err[1] then

err = ' ' .~~. table~~.concat(err, ', ') .. ''

err = ' ' .. concat(err, ", ") .. ""

else

err = ""

end

if split_output ~~then -- for use of IPA in links~~

return process_maybe_split_categories(split_output, categories, '' .. pron .. "", lang,

~~return~~ '' .. pron .. '', ~~table.concat(categories)~~, ~~err~~

err)

~~else~~

~~return '' .. pron .. '' ..~~ err ~~.. table.concat(categories~~)

~~end~~

end

~~function export~~.~~example~~(~~frame~~)

--[==[

~~local output =~~ {}

Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to

[[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The

~~local m_links = require~~(~~'Module~~:~~links'~~)

pronunciations are formatted by wrapping them in the {{cd|AHD}} and {{cd|enPR}} CSS classes and adding any left and

~~local m_languages = require~~(~~'Module:languages'~~)

right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular

and accent qualifiers. There is a single parameter `data`, an object with the following fields:

~~table~~.~~insert~~(

* `items` is a list of enPR pronunciations, each of which is an object with the following properties:

~~output~~,

** `pron`: the enPR pronunciation;

[[

** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;

{~~| class=~~"~~wikitable~~"

** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;

~~! Term !! IPA !! Generated X-SAMPA !! Regenerated IPA !! Matched?~~

** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;

]]

** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.

)

* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted

local ~~row~~ =

pronunciations and preceding {"enPR:"}.

[[

* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.

|-

* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted

~~| link || IPA || XSAMPA~~ |~~| regenerated_IPA || matched~~

pronunciations and preceding {"enPR:"}.

]]

* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.

]==]

local ~~examples~~ = mw.~~text.split~~(~~frame.args[1],~~ ",%s*")

function export.format_enPR_full(data)

local prefix = "[[Appendix:English pronunciation|enPR]]: "

local ~~m_XSAMPA~~ = ~~require("Module:IPA/X-SAMPA")~~

local lang = require("Module:languages").getByCode("en")

local parts = {}

for _, ~~example~~ in ~~pairs~~(~~examples~~) do

local ~~lang, word~~ = ~~match(example,~~ "~~(%l%l%l?):(~~.~~+) [~~/~~%[]~~")

for _, item in ipairs(data.items) do

local part = '' .. item.pron .. ""

if ~~lang~~ then

~~lang~~ = ~~m_languages.getByCode~~(~~lang~~) ~~or error('"'~~ .. ~~lang~~ .. ~~'" is not~~ a ~~valid language code~~.')

if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then

part = require("Module:pron qualifier").format_qualifiers {

lang = lang,

text = part,

q = item.q,

qq = item.qq,

a = item.a,

aa = item.aa,

}

end

insert(parts, part)

local ~~IPA~~ = ~~match~~(~~example~~, "~~/[^/]+/~~")

end

or ~~match(example, "%[~~[~~^%]~~]~~+%]")~~

or ~~error('No IPA transcription found in "'~~ .. ~~example~~ .. ~~'".')~~

local prontext = prefix .. concat(parts, ", ")

~~local XSAMPA~~ = ~~m_XSAMPA.IPA_to_XSAMPA~~(~~IPA~~)

if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then

~~local regenerated_IPA = m_XSAMPA~~.~~XSAMPA_to_IPA(XSAMPA)~~

prontext = require(pron_qualifier_module).format_qualifiers {

lang = lang,

~~content =~~ {

text = prontext,

~~link = lang and word and m_links.full_link{ term = word,~~ lang = lang },

q = data.q,

~~matched~~ = ~~IPA == regenerated_IPA~~

qq = data.qq,

~~and 'yes'~~

a = data.a,

~~or 'no'~~,

aa = data.aa,

~~IPA~~ = ~~'' .. IPA .~~. ~~''~~,

~~XSAMPA~~ = ~~'<code>' .. XSAMPA .~~. ~~'</code>'~~,

~~regenerated_IPA~~ = ~~'' .. regenerated_IPA .. ''~~

}

~~local function add_content(item)~~

~~return content[item] or ""~~

~~end~~

~~local row = gsub(row, "[%a_]+", add_content)~~

~~table.insert(output, row)~~

end

~~table.insert(output, "|}")~~

return prontext

return ~~table.concat(output)~~

end

return export

@@ Line 1: / Line 1: @@
 local export = {}
--- [[Module:IPA/data]]
-local m_data = mw.loadData('Module:IPA/data') -- [[Module:IPA/data]]
+local force_cat = false -- for testing
-local m_symbols = mw.loadData('Module:IPA/data/symbols') -- [[Module:IPA/data/symbols]]
+local m_data = mw.loadData("Module:IPA/data")
+local m_str_utils = require("Module:string utilities")
+local m_symbols = mw.loadData("Module:IPA/data/symbols")
+local pron_qualifier_module = "Module:pron qualifier"
+local qualifier_module = "Module:qualifier"
+local references_module = "Module:references"
+local syllables_module = "Module:syllables"
+local utilities_module = "Module:utilities"
 local m_syllables -- [[Module:syllables]]; loaded below if needed
-local sub = mw.ustring.sub
+local concat = table.concat
-local find = mw.ustring.find
+local find = string.find
-local gsub = mw.ustring.gsub
+local gmatch = m_str_utils.gmatch
-local match = mw.ustring.match
+local gsub = string.gsub
-local gmatch = mw.ustring.gmatch
+local insert = table.insert
-local U = mw.ustring.char
+local len = m_str_utils.len
+local listToText = mw.text.listToText
+local match = string.match
+local sub = string.sub
+local u = m_str_utils.char
+local ufind = m_str_utils.find
+local ugsub = m_str_utils.gsub
+local umatch = m_str_utils.match
+local usub = m_str_utils.sub
+local namespace = mw.title.getCurrentTitle().namespace
+local is_content_page = namespace == 0 or namespace == 118
+local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)
+	if split_output ~= "raw" then
+		if categories[1] then
+			categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)
+		else
+			categories = ""
+		end
+	end
+	if split_output then -- for use of IPA in links, etc.
+		if errtext then
+			return prontext, categories, errtext
+		else
+			return prontext, categories
+		end
+	else
+		return prontext .. (errtext or "") .. categories
+	end
+end
+--[==[
+Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by
+the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category
+{{cd|<var>lang</var> terms with IPA pronunciation}}. Other than the extra preceding text and category, this is identical
+to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a
+single parameter `data`, an object with the following fields:
+* `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for
+   pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
+   add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category
+   {{cd|<var>lang</var> terms with IPA pronunciation}}; and for determining the proper sort keys for categories. Unlike
+   for {format_IPA_multiple()}, `lang` may not be {nil}.
+* `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.
+* `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.
+* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
+  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
+  `items`.
+* `sort_key`: Explicit sort key used for categories.
+* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
+  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
+  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
+  or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore
+  them.
+* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
+  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
+  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
+  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
+  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
+* `include_langname`: If specified, prefix the result with the language name, followed by a colon.
+* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
+  pronunciations and preceding {"IPA:"}.
+* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
+* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
+  pronunciations and preceding {"IPA:"}.
+* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
+]==]
+function export.format_IPA_full(data)
+	if type(data) ~= "table" or data.getCode then
+		error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")
+	end
+	local lang = data.lang
+	local items = data.items
+	local err = data.err
+	local separator = data.separator
+	local sort_key = data.sort_key
+	local no_count = data.no_count
+	local split_output = data.split_output
+	local q = data.q
+	local qq = data.qq
+	local a = data.a
+	local aa = data.aa
+	local include_langname = data.include_langname
-function export.format_IPA_full(lang, items, err, separator, sortKey, no_count)
-	local IPA_key, key_link, err_text, prefix, IPAs, category
 	local hasKey = m_data.langs_with_infopages
-	local namespace = mw.title.getCurrentTitle().nsText
+	if not lang or not lang.getCode then
+		error("Must specify language to format_IPA_full()")
+	end
+	local langname = lang:getCanonicalName()
+	local prefix_text
 	if err then
-		err_text = '<span class="error">' .. err .. '</span>'
+		prefix_text = '<span class="error">' .. err .. '</span>'
 	else
-		key_link = "[[IPA for " .. lang:getCanonicalName() .. "|key]]"
+		if hasKey[lang:getCode()] then
+			prefix_text = "Appendix:" .. langname .. " pronunciation"
+		else
+			prefix_text = "wikipedia:" .. langname .. " phonology"
+		end
+		prefix_text = "[[" .. prefix_text .. "|key]]"
 	end
+	local prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix_text .. ")</sup>:&#32;"
-	local prefix = "[[wikt:Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. ( key_link or err_text ) .. ")</sup>:&#32;"
+	local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")
-	IPAs = export.format_IPA_multiple(lang, items, separator, no_count)
+	if is_content_page then
+		insert(categories, {
+			cat = langname .. " terms with IPA pronunciation",
+			sort_key = sort_key
+		})
+	end
-	return prefix .. IPAs
+	local prontext = prefix .. IPAs
+	if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then
+		prontext = require(pron_qualifier_module).format_qualifiers {
+			lang = lang,
+			text = prontext,
+			q = q,
+			qq = qq,
+			a = a,
+			aa = aa,
+		}
+	end
+	if include_langname then
+		prontext = langname .. ": " .. prontext
+	end
+	return process_maybe_split_categories(split_output, categories, prontext, lang)
+end
+local function split_phonemic_phonetic(pron)
+	local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$")
+	if reconstructed then
+		return reconstructed .. phonemic, reconstructed .. phonetic
+	else
+		return pron, nil
+	end
 end
@@ Line 35: / Line 161: @@
 	local repr_mark = {}
 	local repr, reconstructed
 	-- remove initial asterisk before representation marks, used on some Reconstruction pages
-	if find(pron, "^%*") then
+	if sub(pron, 1, 1) == "*" then
 		reconstructed = true
 		pron = sub(pron, 2)
 	end
 	local representation_types = {
 		['/'] = { right = '/', type = 'phonemic', },
@@ Line 48: / Line 174: @@
 		['-'] = { type = 'rhyme' },
 	}
-	repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = find(pron, '^(.).-(.)$')
+	repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = ufind(pron, '^(.).-(.)$')
 	local representation_type = representation_types[repr_mark.left]
 	if representation_type then
 		if representation_type.right then
@@ Line 64: / Line 190: @@
 		repr = nil
 	end
 	return repr, reconstructed
 end
 local function hasInvalidSeparators(transcription)
-	if find(transcription, "%.[ˈˌ]") then
+	if match(transcription, "%.\203[\136\140]") then -- [ˈˌ]
 		return true
 	else
@@ Line 76: / Line 202: @@
 end
-function export.format_IPA_multiple(lang, items, separator, no_count)
+--[==[
+Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a
+category {{cd|<var>lang</var> terms with IPA pronunciation}}). Individual pronunciations are formatted using
+{format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.
+Parameters accepted are:
+* `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for
+   pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
+   add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the
+   proper sort keys for categories. `lang` may be {nil}.
+* `items` is a list of pronunciations, each of which is an object with the following properties:
+** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic
+     (surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme
+	 (beginning with a hyphen);
+** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent
+     qualifiers;
+** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent
+     qualifiers;
+** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted
+     pronunciation; note that `qualifiers` is deprecated;
+** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
+** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
+** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;
+** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and
+     qualifiers; the value of a list item is either a string containing the reference text (typically a call to a
+	 citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`
+	 (the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}
+	 or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in
+	 {{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or
+	 {{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference
+	 appropriately and insert a footnote number that hyperlinks to the actual reference, located in the
+	 {{cd|<nowiki><references /></nowiki>}} section;
+** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;
+** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;
+** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent
+   qualifiers and pre-text; defaults to the outer `separator` parameter.
+* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
+  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
+  `items`.
+* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
+  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
+  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
+  categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.
+* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
+  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
+  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
+  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
+  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
+]==]
+function export.format_IPA_multiple(lang, items, separator, no_count, split_output)
 	local categories = {}
-	separator = separator or ', '
+	separator = separator or ", "
 	-- Format
 	if not items[1] then
-		if mw.title.getCurrentTitle().nsText == "Template" then
+		if namespace == 10 then -- Template
-			table.insert(items, {pron = "/aɪ piː ˈeɪ/"})
+			insert(items, {pron = "/aɪ piː ˈeɪ/"})
 		else
-			table.insert(categories, "[[Category:Pronunciation templates without a pronunciation]]")
+			insert(categories, "Pronunciation templates without a pronunciation")
 		end
 	end
 	local bits = {}
-	for _, item in ipairs(items) do
+	for i, item in ipairs(items) do
-		local bit = export.format_IPA(lang, item.pron)
+		local bit
+		-- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or
+		-- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation
+		-- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).
+		if item.pron == "" then
+			bit = ""
+		else
+			local item_categories, errtext
+			bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")
+			bit = bit .. errtext
+			for _, cat in ipairs(item_categories) do
+				insert(categories, cat)
+			end
+		end
 		if item.pretext then
 			bit = item.pretext .. bit
 		end
 		if item.posttext then
 			bit = bit .. item.posttext
 		end
-		if item.qualifiers and item.qualifiers[1] then
+		local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]
-			bit = require("Module:qualifier").format_qualifier(item.qualifiers) .. " " .. bit
+			or item.a and item.a[1] or item.aa and item.aa[1]
+		local has_gloss_or_pos = item.gloss or item.pos
+		if has_qualifiers or has_gloss_or_pos then
+			-- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.
+			-- Should we do something different?
+			local q = item.q
+			if has_gloss_or_pos then
+				q = mw.clone(item.q) or {}
+				if item.gloss then
+					local m_qualifier = require(qualifier_module)
+					insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..
+						m_qualifier.wrap_qualifier_css("”", "quote"))
+				end
+				if item.pos then
+					-- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.
+					insert(q, item.pos)
+				end
+			end
+			bit = require("Module:pron qualifier").format_qualifiers {
+				lang = lang,
+				text = bit,
+				q = q,
+				qq = item.qq,
+				qualifiers = item.qualifiers,
+				a = item.a,
+				aa = item.aa,
+			}
 		end
-		if item.refs or item.note then
+		if item.note then
-			local refspecs
+			-- Support removed on 2024-06-15.
-			if item.note then
+			error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")
-				-- FIXME: eliminate item.note in favor of item.refs. Use tracking to find places
+		end
-				-- that use item.note.
+		if item.refs then
-				refspecs = {item.note}
+			local refspecs = item.refs
-			else
-				refspecs = item.refs
-			end
-			local refs = {}
 			if #refspecs > 0 then
-				for _, refspec in ipairs(refspecs) do
+				bit = bit .. require(references_module).format_references(refspecs)
-					if type(refspec) ~= "table" then
-						refspec = {text = refspec}
-					end
-					local refargs
-					if refspec.name or refspec.group then
-						refargs = {name = refspec.name, group = refspec.group}
-					end
-					table.insert(refs, mw.getCurrentFrame():extensionTag("ref", refspec.text, refargs))
-				end
-				bit = bit .. table.concat(refs)
 			end
 		end
-		table.insert(bits, bit)
+		bit = (item.separator or (i == 1 and "" or separator)) .. bit
+		insert(bits, bit)
 		if lang then
 			-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
-			-- Don't do this if the term has spaces or a liaison mark (‿).
+			-- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.
-			if not no_count and mw.title.getCurrentTitle().namespace == 0 then
+			if not no_count and namespace == 0 then
-				m_syllables = m_syllables or require('Module:syllables')
+				m_syllables = m_syllables or require(syllables_module)
 				local langcode = lang:getCode()
 				if m_data.langs_to_generate_syllable_count_categories[langcode] then
-					local repr = determine_repr(item.pron)
+					local phonemic, phonetic = split_phonemic_phonetic(item.pron)
 					local use_it
-					if m_data.langs_to_use_phonetic_notation[langcode] then
+					if not phonetic then -- not a '/.../ [...]' combined pronunciation
-						use_it = repr == "phonetic"
+						local repr = determine_repr(phonemic)
-					else
+						if m_data.langs_to_use_phonetic_notation[langcode] then
-						use_it = repr == "phonemic"
+							use_it = repr == "phonetic" and phonemic or nil
+						else
+							use_it = repr == "phonemic" and phonemic or nil
+						end
+					elseif repr == "phonetic" then
+						use_it = phonetic
+					elseif repr == "phonemic" then
+						use_it = phonemic
 					end
-					if use_it and not find(item.pron, "[ ‿]") then
+					-- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].
-						local syllable_count = m_syllables.getVowels(item.pron, lang)
+					if use_it and not (find(use_it, " ") or find(use_it, "‿")) then
+						local syllable_count = m_syllables.getVowels(use_it, lang)
 						if syllable_count then
-							table.insert(categories, "[[Category:" .. lang:getCanonicalName() .. " " .. syllable_count .. "-syllable words]]")
+							insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..
+								"-syllable words")
 						end
 					end
@@ Line 156: / Line 368: @@
 			end
+			-- The nature of hasInvalidSeparators() is such that we don't have to split a combined '/.../ [...]' spec
+			-- into its parts in order to process.
 			if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then
-				table.insert(categories, "[[Category:IPA for English using .ˈ or .ˌ]]")
+				insert(categories, "IPA for English using .ˈ or .ˌ")
 			end
 		end
 	end
-	return table.concat(bits, separator) .. table.concat(categories)
+	return process_maybe_split_categories(split_output, categories, concat(bits), lang)
 end
--- Takes an IPA pronunciation and formats it and adds cleanup categories.
+--[=[
-function export.format_IPA(lang, pron, split_output)
+Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from
-	local err = {}
+{format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that
-	local categories = {}
+pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and
+categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but
-	-- Remove wikilinks, so that wikilink brackets are not misinterpreted as
+may have HTML added surrounding invalid characters so they appear in red.
-	-- indicating phonemic transcription
+]=]
-	local str_gsub = string.gsub
+local function format_one_IPA(lang, pron, err, categories)
-	local without_links = str_gsub(pron, '%[%[[^|%]]+|([^%]]+)%]%]', '%1')
+	-- Remove wikilinks, so that wikilink brackets are not misinterpreted as indicating phonetic transcription
-	without_links = str_gsub(without_links, '%[%[[^%]]+%]%]', '%1')
+	local without_links = gsub(pron, "%[%[[^|%]]+|([^%]]+)%]%]", "%1")
+	without_links = gsub(without_links, "%[%[[^%]]+%]%]", "%1")
 	-- Detect whether this is a phonemic or phonetic transcription
 	local repr, reconstructed = determine_repr(without_links)
 	if reconstructed then
 		pron = sub(pron, 2)
+		without_links = sub(without_links, 2)
 	end
 	-- If valid, strip the representation marks
 	if repr == "phonemic" then
-		pron = sub(pron, 2, -2)
+		pron = usub(pron, 2, -2)
-		without_links = sub(without_links, 2, -2)
+		without_links = usub(without_links, 2, -2)
 	elseif repr == "phonetic" then
-		pron = sub(pron, 2, -2)
+		pron = usub(pron, 2, -2)
-		without_links = sub(without_links, 2, -2)
+		without_links = usub(without_links, 2, -2)
 	elseif repr == "orthographic" then
-		pron = sub(pron, 2, -2)
+		pron = usub(pron, 2, -2)
-		without_links = sub(without_links, 2, -2)
+		without_links = usub(without_links, 2, -2)
 	elseif repr == "rhyme" then
-		pron = sub(pron, 2)
+		pron = usub(pron, 2)
-		without_links = sub(without_links, 2)
+		without_links = usub(without_links, 2)
 	else
-		table.insert(categories, "[[Category:IPA pronunciations with invalid representation marks]]")
+		insert(categories, "IPA pronunciations with invalid representation marks")
-		-- table.insert(err, "invalid representation marks")
+		-- insert(err, "invalid representation marks")
 		-- Removed because it's annoying when previewing pronunciation pages.
 	end
 	if pron == "" then
-		table.insert(categories, "[[Category:IPA pronunciations with no pronunciation present]]")
+		insert(categories, "IPA pronunciations with no pronunciation present")
 	end
 	-- Check for obsolete and nonstandard symbols
 	for i, symbol in ipairs(m_data.nonstandard) do
@@ Line 213: / Line 429: @@
 				result = {}
 			end
-			table.insert(result, nonstandard)
+			insert(result, nonstandard)
-			table.insert(categories, "[[Category:IPA pronunciations with obsolete or nonstandard characters|" .. nonstandard .. "]]")
+			insert(categories,
+				{cat = "IPA pronunciations with obsolete or nonstandard characters", sort_key = nonstandard}
+			)
 		end
 		if result then
-			table.insert(err, "obsolete or nonstandard characters (" .. table.concat(result) .. ")")
+			insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")
 			break
 		end
 	end
 	--[[ Check for invalid symbols after removing the following:
 . wikilinks (handled above)
@@ Line 233: / Line 451: @@
 . superscripts enclosed in superscript parentheses		]]
 	local found_HTML
-	local result = str_gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",
+	local result = gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",
 		function(tagName, content)
 			found_HTML = true
 			return content
 		end)
-	result = str_gsub(result, "'''([^']*)'''", "%1")
+	result = gsub(result, "'''([^']*)'''", "%1")
-	result = str_gsub(result, "''([^']*)''", "%1")
+	result = gsub(result, "''([^']*)''", "%1")
-	result = str_gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.
+	result = gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.
-	result = str_gsub(result, "^%*", "")
+	result = gsub(result, "^%*", "")
-	result = gsub(result, ",%s+", "")
+	result = ugsub(result, ",%s+", "")
-	result = gsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")
-	result = gsub(result, '[' .. m_symbols.valid .. ']', '')
 	-- VS15
 	local vs15_class = "[" .. m_symbols.add_vs15 .. "]"
-	if mw.ustring.find(pron, vs15_class) then
+	if umatch(pron, vs15_class) then
-		local vs15 = U(0xFE0E)
+		local vs15 = u(0xFE0E)
-		if mw.ustring.find(result, vs15) then
+		if find(result, vs15) then
 			result = gsub(result, vs15, "")
-			pron = mw.ustring.gsub(pron, vs15, "")
+			pron = gsub(pron, vs15, "")
 		end
-		pron = mw.ustring.gsub(pron, "(" .. vs15_class .. ")", "%1" .. vs15)
+		pron = ugsub(pron, "(" .. vs15_class .. ")", "%1" .. vs15)
 	end
-	if result ~= '' then
+	if result ~= "" then
 		local suggestions = {}
-		mw.log(pron, result)
+		for k, v in pairs(m_symbols.invalid) do
-		local namespace = mw.title.getCurrentTitle().namespace
+			if find(result, k, 1, true) then
-		local category
+				insert(suggestions, k .. " with " .. v)
-		if namespace == 0 then
+			end
-			-- main namespace
-			category = "IPA pronunciations with invalid IPA characters"
-		elseif namespace == 118 then
-			-- reconstruction namespace
-			category = "IPA pronunciations with invalid IPA characters/reconstruction"
-		else
-			category = "IPA pronunciations with invalid IPA characters/non_mainspace"
 		end
-		for character in gmatch(result, ".") do
+		if suggestions[1] then
-			local suggestion = m_symbols.suggestions[character]
+			suggestions = listToText(suggestions)
-			if suggestion then
+			if is_content_page then
-				table.insert(suggestions, character .. " with " .. suggestion)
+				error("Invalid IPA: replace " .. suggestions)
+			else
+				insert(err, "replace " .. suggestions)
 			end
-			table.insert(categories, "[[Category:" .. category .. "|" .. character .. "]]")
 		end
-		table.insert(err, "invalid IPA characters (" .. result .. ")")
+		result = ugsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")
-		if suggestions[1] then
+		local per_lang_valid
-			table.insert(err, "replace " .. table.concat(suggestions, ", "))
+		if lang then
+			per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]
+		end
+		per_lang_valid = per_lang_valid or ""
+		result = ugsub(result, "[" .. m_symbols.valid .. per_lang_valid .. "]", "")
+		if result ~= "" then
+			local category = "IPA pronunciations with invalid IPA characters"
+			if not is_content_page then
+				category = category .. "/non_mainspace"
+			end
+			insert(categories, category)
+			insert(err, "invalid IPA characters (" .. result .. ")")
 		end
 	end
 	if found_HTML then
-		table.insert(categories, "[[Category:IPA pronunciations with paired HTML tags]]")
+		insert(categories, "IPA pronunciations with paired HTML tags")
 	end
-	-- Reference inside IPA template usage
-	-- FIXME: Doesn't work; you can't put HTML in module output.
-	--if mw.ustring.find(pron, '</ref>') then
-	--	table.insert(categories, "[[Category:IPA pronunciations with reference]]")
-	--end
 	if repr == "phonemic" or repr == "rhyme" then
 		if lang and m_data.phonemes[lang:getCode()] then
@@ Line 299: / Line 514: @@
 			local rest = pron
 			local phonemes = {}
-			while mw.ustring.len(rest) > 0 do
+			while #rest > 0 do
-				local longestmatch = ""
+				local longestmatch, longestmatch_len = "", 0
-				if sub(rest, 1, 1) == "(" or sub(rest, 1, 1) == ")" then
+				local rest_init = sub(rest, 1, 1)
-					longestmatch = sub(rest, 1, 1)
+				if rest_init == "(" or rest_init == ")" then
+					longestmatch = rest_init
+					longestmatch_len = 1
 				else
 					for _, phoneme in ipairs(valid_phonemes) do
-						if mw.ustring.len(phoneme) > mw.ustring.len(longestmatch) and sub(rest, 1, mw.ustring.len(phoneme)) == phoneme then
+						local phoneme_len = len(phoneme)
+						if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then
 							longestmatch = phoneme
+							longestmatch_len = len(longestmatch)
 						end
 					end
 				end
-				if mw.ustring.len(longestmatch) > 0 then
+				if longestmatch_len > 0 then
-					table.insert(phonemes, longestmatch)
+					insert(phonemes, longestmatch)
-					rest = sub(rest, mw.ustring.len(longestmatch) + 1)
+					rest = usub(rest, longestmatch_len + 1)
 				else
-					local phoneme = sub(rest, 1, 1)
+					local phoneme = usub(rest, 1, 1)
-					table.insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
+					insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
-					rest = sub(rest, 2)
+					rest = usub(rest, 2)
-					table.insert(categories, "[[Category:IPA pronunciations with invalid phonemes/" .. lang:getCode() .. "]]")
+					insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode())
 				end
 			end
-			pron = table.concat(phonemes)
+			pron = concat(phonemes)
 		end
 		if repr == "phonemic" then
 			pron = "/" .. pron .. "/"
@@ Line 337: / Line 556: @@
 		pron = "⟨" .. pron .. "⟩"
 	end
 	if reconstructed then
 		pron = "*" .. pron
 	end
+	return pron
+end
+--[==[
+Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and
+error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic
+(surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined
+phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.
+If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page
+will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}
+also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is
+a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values
+are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`
+is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of
+category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].
+If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated
+string.
+]==]
+function export.format_IPA(lang, pron, split_output)
+	local err = {}
+	local categories = {}
+	-- `pron` shouldn't contain ref tags.
+	if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then
+		error("<ref> tags found inside pronunciation parameter.")
+	end
+	local phonemic, phonetic = split_phonemic_phonetic(pron)
+	pron = format_one_IPA(lang, phonemic, err, categories)
+	if phonetic then
+		phonetic = format_one_IPA(lang, phonetic, err, categories)
+		pron = pron .. " " .. phonetic
+	end
 	if err[1] then
-		err = '<span class="previewonly error" style="font-size: small;>&#32;' .. table.concat(err, ', ') .. '</span>'
+		err = '<span class="previewonly error" style="font-size: small;>&#32;' .. concat(err, ", ") .. "</span>"
 	else
 		err = ""
 	end
-	if split_output then -- for use of IPA in links
+	return process_maybe_split_categories(split_output, categories, '<span class="IPA">' .. pron .. "</span>", lang,
-		return '<span style=\"font-size:110%;font-family:Gentium,\'DejaVu Sans\',\'Segoe UI\',sans-serif>' .. pron .. '</span>', table.concat(categories), err
+		err)
-	else
-		return '<span style=\"font-size:110%;font-family:Gentium,\'DejaVu Sans\',\'Segoe UI\',sans-serif>' .. pron .. '</span>' .. err .. table.concat(categories)
-	end
 end
-function export.example(frame)
+--[==[
-	local output = {}
+Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to
+[[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The
-	local m_links = require('Module:links')
+pronunciations are formatted by wrapping them in the {{cd|AHD}} and {{cd|enPR}} CSS classes and adding any left and
-	local m_languages = require('Module:languages')
+right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular
+and accent qualifiers. There is a single parameter `data`, an object with the following fields:
-	table.insert(
+* `items` is a list of enPR pronunciations, each of which is an object with the following properties:
-		output,
+** `pron`: the enPR pronunciation;
-[[
+** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;
-{| class="wikitable"
+** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
-! Term !! IPA !! Generated X-SAMPA !! Regenerated IPA !! Matched?
+** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
-]]
+** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.
-	)
+* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
-	local row =
+  pronunciations and preceding {"enPR:"}.
-[[
+* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
-|-
+* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
-| link || IPA || XSAMPA || regenerated_IPA || matched
+  pronunciations and preceding {"enPR:"}.
-]]
+* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
+]==]
-	local examples = mw.text.split(frame.args[1], ",%s*")
+function export.format_enPR_full(data)
+	local prefix = "[[Appendix:English pronunciation|enPR]]: "
-	local m_XSAMPA = require("Module:IPA/X-SAMPA")
+	local lang = require("Module:languages").getByCode("en")
+	local parts = {}
-	for _, example in pairs(examples) do
-		local lang, word = match(example, "(%l%l%l?):(.+) [/%[]")
+	for _, item in ipairs(data.items) do
+		local part = '<span class="AHD enPR">' .. item.pron .. "</span>"
-		if lang then
-			lang = m_languages.getByCode(lang) or error('"' .. lang .. '" is not a valid language code.')
+		if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then
+			part = require("Module:pron qualifier").format_qualifiers {
+				lang = lang,
+				text = part,
+				q = item.q,
+				qq = item.qq,
+				a = item.a,
+				aa = item.aa,
+			}
 		end
+		insert(parts, part)
-		local IPA = match(example, "/[^/]+/")
+	end
-			or match(example, "%[[^%]]+%]")
-			or error('No IPA transcription found in "' .. example .. '".')
+	local prontext = prefix .. concat(parts, ", ")
-		local XSAMPA = m_XSAMPA.IPA_to_XSAMPA(IPA)
+	if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then
-		local regenerated_IPA = m_XSAMPA.XSAMPA_to_IPA(XSAMPA)
+		prontext = require(pron_qualifier_module).format_qualifiers {
+			lang = lang,
-		content = {
+			text = prontext,
-			link = lang and word and m_links.full_link{ term = word, lang = lang },
+			q = data.q,
-			matched = IPA == regenerated_IPA
+			qq = data.qq,
-				and '<span style="color: green;">yes</span>'
+			a = data.a,
-				or '<span style="color: red;">no</span>',
+			aa = data.aa,
-			IPA = '<span style=\"font-size:110%;font-family:Gentium,\'DejaVu Sans\',\'Segoe UI\',sans-serif>' .. IPA .. '</span>',
-			XSAMPA = '<code>' .. XSAMPA .. '</code>',
-			regenerated_IPA = '<span style=\"font-size:110%;font-family:Gentium,\'DejaVu Sans\',\'Segoe UI\',sans-serif>' .. regenerated_IPA .. '</span>'
 		}
-		local function add_content(item)
-			return content[item] or ""
-		end
-		local row = gsub(row, "[%a_]+", add_content)
-		table.insert(output, row)
 	end
-	table.insert(output, "|}")
+	return prontext
-	return table.concat(output)
 end
 return export