Module:IPA: Difference between revisions

(23 intermediate revisions by the same user not shown)

Line 1:

local export = {}

~~-- [[Module:IPA/data]]~~

local ~~m_data~~ = ~~mw.loadData('~~Module:~~IPA/data') -- [[~~Module:~~IPA/data]]~~

local force_cat = false -- for testing

local ~~m_symbols~~ = mw.loadData('Module:IPA/data~~/symbols'~~) ~~-- [[Module:IPA/data/symbols]]~~

local pages_module = "Module:pages"

local pron_qualifier_module = "Module:pron qualifier"

local qualifier_module = "Module:qualifier"

local references_module = "Module:references"

local string_utilities_module = "Module:string utilities"

local syllables_module = "Module:syllables"

local utilities_module = "Module:utilities"

local m_data = mw.loadData("Module:IPA/data")

local m_str_utils = require(string_utilities_module)

local m_syllables -- [[Module:syllables]]; loaded below if needed

local m_symbols = mw.loadData("Module:IPA/data/symbols")

local concat = table.concat

local decode_entities = m_str_utils.decode_entities

local find = string.find

local gmatch = m_str_utils.gmatch

local gsub = string.gsub

local insert = table.insert

local is_preview = require(pages_module).is_preview

local len = m_str_utils.len

local listToText = mw.text.listToText

local match = string.match

local pattern_escape = m_str_utils.pattern_escape

local sub = string.sub

local u = m_str_utils.char

local ugsub = m_str_utils.gsub

local umatch = m_str_utils.match

local usub = m_str_utils.sub

local ~~sub~~ = mw.~~ustring~~.~~sub~~

local namespace = mw.title.getCurrentTitle().namespace

local ~~find~~ = ~~mw.ustring.find~~

local is_content_page = namespace == 0 or namespace == 120

~~local gsub~~ = ~~mw.ustring.gsub~~

~~local match~~ = ~~mw.ustring.match~~

~~local gmatch~~ = ~~mw.ustring.gmatch~~

~~local U~~ = ~~mw.ustring.char~~

function ~~export.format_IPA_full~~(~~lang~~, ~~items~~, ~~err~~, ~~separator, sortKey~~, ~~no_count~~)

local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)

~~local IPA_key~~, ~~key_link~~, ~~err_text~~, ~~prefix~~, ~~IPAs, category~~

if split_output ~= "raw" then

~~local hasKey~~ = ~~m_data.langs_with_infopages~~

if categories[1] then

~~local namespace = mw.title.getCurrentTitle().nsText~~

categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)

else

if ~~err~~ then

categories = ""

~~err_text = ''~~ .~~. err .. ''~~

end

~~else~~

end

if ~~hasKey[lang:getCode()]~~ then

if split_output then -- for use of IPA in links, etc.

~~IPA_key = "wikt:Appendix:" .. lang:getCanonicalName() .. " pronunciation"~~

if errtext then

return prontext, categories, errtext

else

~~IPA_key = "wikipedia:" .. lang:getCanonicalName() .. " phonology"~~

return prontext, categories

end

else

~~key_link~~ = "[[" .. ~~IPA_key~~ .. "|~~key~~]]"

return prontext .. (errtext or "") .. categories

end

--[==[

Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by

the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category

` ``lang`` terms with IPA pronunciation`. Other than the extra preceding text and category, this is identical

to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a

single parameter `data`, an object with the following fields:

* `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for

pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to

add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category

` ``lang`` terms with IPA pronunciation`; and for determining the proper sort keys for categories. Unlike

for {format_IPA_multiple()}, `lang` may not be {nil}.

* `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.

* `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.

* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the

first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in

`items`.

* `sort_key`: Explicit sort key used for categories.

* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that

only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a

given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup

or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore

them.

* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted

categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is

the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category

strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If

`split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.

* `include_langname`: If specified, prefix the result with the language name, followed by a colon.

* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted

pronunciations and preceding {"IPA:"}.

* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.

* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted

pronunciations and preceding {"IPA:"}.

* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.

]==]

function export.format_IPA_full(data)

if type(data) ~= "table" or data.getCode then

error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")

end

local lang = data.lang

local items = data.items

local err = data.err

local separator = data.separator

local sort_key = data.sort_key

local no_count = data.no_count

local split_output = data.split_output

local q = data.q

local qq = data.qq

local a = data.a

local aa = data.aa

local include_langname = data.include_langname

local key = data.key

if not lang or not lang.getCode then

error("Must specify language to format_IPA_full()")

end

local langname = lang:getCanonicalName()

local prefix_text

local ~~prefix = "[[wikt:Wiktionary:International Phonetic Alphabet|IPA]](" .. ( key_link or err_text ) .. "): "~~

if err then

prefix_text = '' .. err .. ''

~~IPAs = export.format_IPA_multiple(lang, items, separator, no_count)~~

if ~~lang and (namespace == "" or namespace == "Reconstruction")~~ then

~~sortKey~~ = ~~sortKey or lang:makeSortKey(mw.title.getCurrentTitle().text)~~

~~sortKey~~ = ~~sortKey and (~~"|" .. ~~sortKey) or ""~~

~~category = "[[Category:"~~ .. ~~lang:getCanonicalName() .. " terms with IPA pronunciation" .. sortKey .. "]]"~~

else

~~category~~ = ""

prefix_text = key or "IPA for " .. langname

prefix_text = "[[" .. prefix_text .. "|key]]"

end

~~return~~ prefix .. IPAs .. ~~category~~

local prefix = "[[wikt:Wiktionary:International Phonetic Alphabet|IPA]](" .. prefix_text .. "): "

local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")

local prontext = prefix .. IPAs

if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then

prontext = require(pron_qualifier_module).format_qualifiers {

lang = lang,

text = prontext,

q = q,

qq = qq,

a = a,

aa = aa,

}

end

if include_langname then

prontext = langname .. ": " .. prontext

end

return process_maybe_split_categories(split_output, categories, prontext, lang)

end

local function split_phonemic_phonetic(pron)

local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$")

if reconstructed then

return reconstructed .. phonemic, reconstructed .. phonetic

else

return pron, nil

end

local function determine_repr(pron)

local ~~repr_mark = {}~~

local reconstructed

~~local repr,~~ reconstructed

-- Temporarily remove any initial asterisk before representation marks,

-- remove initial asterisk before representation marks, ~~used on some Reconstruction pages~~

-- which avoids having to account for it in the data, but set the

if ~~find~~(pron, "^%*") then

-- `reconstructed` flag.

if sub(pron, 1, 1) == "*" then

reconstructed = true

pron = sub(pron, 2)

end

local ~~representation_types = {~~

-- Some representation types have aliases for convenience (e.g. "// //" is

~~['/'] = { right = '/'~~, ~~type~~ = ~~'phonemic'~~, },

-- an alias for "⫽ ⫽"). and these need to be substituted in before checking

[~~'['~~] = ~~{ right = '~~]', ~~type~~ = ~~'phonetic'~~, },

-- for other data.

~~['⟨'] = { right = '⟩', type = 'orthographic', }~~,

local opening, n = match(pron, "^.[\128-\191]*")

['-'] = ~~{ type~~ = ~~'rhyme' },~~

local subs_data = m_data.representation_subs[opening]

}

if subs_data then

pron, n = ugsub(pron, subs_data[1], subs_data[2])

~~repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = find(pron, '^(.).~~-(~~.)$'~~)

-- If the substitution was made, `opening` needs to be changed to the

-- new opening character.

local ~~representation_type~~ = representation_types[~~repr_mark.left~~]

if n ~= 0 then

opening = subs_data[3]

if ~~representation_type~~ then

end

~~if representation_type.right then~~

end

if ~~repr_mark~~.~~right == representation_type~~.~~right~~ then

~~repr = representation_type.type~~

-- Get the type data based on the opening character (if any), and set the

~~end~~

-- representation type if the closing character matches.

~~else~~

local type_data, repr, closing = m_data.representation_types[opening]

repr = ~~representation_type.type~~

if type_data then

closing = type_data[2]

if type_data and match(pron, pattern_escape(closing) .. "$", #opening + 1) then

repr = type_data[1]

end

~~else~~

~~repr = nil~~

end

return repr, reconstructed

-- Default to the empty string.

if not repr then

opening, closing = "", ""

end

-- Reattach the asterisk if reconstructed.

if reconstructed then

pron = "*" .. pron

end

return pron, repr, opening, closing, reconstructed

end

local function hasInvalidSeparators(transcription)

~~if find~~(transcription, "%.[ˈˌ]") ~~then~~

-- Escape certain characters as well as pauses, which have the format "(...)" (with any number of dots), to avoid false-positives.

~~return~~ true

transcription = transcription:gsub(".[\128-\191]*", m_symbols.separator_escapes)

~~else~~

:gsub("%(%.+%)", "\3")

~~return false~~

:gsub("[()]+", "")

~~end~~

return (

transcription:find("..", nil, true) or

transcription:match("%.%f[%z \1\2\3,:;]") or

transcription:match("\1%f[%z \2\3,:;]") or

transcription:match("\2%f[%z \1\3,:;]") or

transcription:match("\3[:;]") or

transcription:match("%f[^%z \1\2\3,]%.")

) and true or false

end

function export.format_IPA_multiple(lang, items, separator, no_count)

--[==[

~~local notes = {}~~

Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a

category ` ``lang`` terms with IPA pronunciation`). Individual pronunciations are formatted using

{format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.

Parameters accepted are:

* `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for

pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to

add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the

proper sort keys for categories. `lang` may be {nil}.

* `items` is a list of pronunciations, each of which is an object with the following properties:

** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic

(surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme

(beginning with a hyphen);

** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent

qualifiers;

** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent

qualifiers;

** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted

pronunciation; note that `qualifiers` is deprecated;

** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;

** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;

** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;

** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and

qualifiers; the value of a list item is either a string containing the reference text (typically a call to a

citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`

(the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}

or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in

{{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or

{{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference

appropriately and insert a footnote number that hyperlinks to the actual reference, located in the

{{cd|<nowiki><references /></nowiki>}} section;

** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;

** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;

** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent

qualifiers and pre-text; defaults to the outer `separator` parameter.

* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the

first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in

`items`.

* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that

only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a

given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup

categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.

* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted

categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is

the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category

strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If

`split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.

]==]

function export.format_IPA_multiple(lang, items, separator, no_count, split_output)

local categories = {}

separator = separator or ', '

separator = separator or ", "

-- Format

if not items[1] then

if ~~mw.title.getCurrentTitle().nsText~~ == "Template~~" then~~

if namespace == 10 then -- Template

~~table.~~insert(items, {pron = "/aɪ piː ˈeɪ/"})

insert(items, {pron = "/aɪ piː ˈeɪ/"})

~~else~~

~~table.insert(categories, "[[Category:Pronunciation templates without a pronunciation]]"~~)

end

local bits = {}

for _, item in ipairs(items) do

for i, item in ipairs(items) do

local bit = export.format_IPA(lang, item.pron)

local bit

if item.qualifiers and item.qualifiers[1] then

-- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or

bit = require("Module:qualifier").~~format_qualifier(~~item.qualifiers) .. ~~" "~~ .~~. bit~~

-- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation

-- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).

if item.pron == "" then

bit = ""

else

local item_categories, errtext

bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")

bit = bit .. errtext

for _, cat in ipairs(item_categories) do

insert(categories, cat)

end

if item.pretext then

bit = item.pretext .. bit

end

if item.posttext then

bit = bit .. item.posttext

end

local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]

or item.a and item.a[1] or item.aa and item.aa[1]

local has_gloss_or_pos = item.gloss or item.pos

if has_qualifiers or has_gloss_or_pos then

-- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.

-- Should we do something different?

local q = item.q

if has_gloss_or_pos then

q = mw.clone(item.q) or {}

if item.gloss then

local m_qualifier = require(qualifier_module)

insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..

m_qualifier.wrap_qualifier_css("”", "quote"))

end

if item.pos then

-- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.

insert(q, item.pos)

end

bit = require("Module:pron qualifier").format_qualifiers {

lang = lang,

text = bit,

q = q,

qq = item.qq,

qualifiers = item.qualifiers,

a = item.a,

aa = item.aa,

}

end

if item.note then

~~bit = bit~~ .. mw.~~getCurrentFrame~~()~~:extensionTag(~~"~~ref", item.note~~)

-- Support removed on 2024-06-15.

error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")

end

if item.refs then

~~table~~.~~insert(bits, bit)~~

local refspecs = item.refs

if #refspecs > 0 then

~~--[=[ [[Special:WhatLinksHere/Template:tracking/IPA/syntax-error]]~~

bit = bit .. require(references_module).format_references(refspecs)

~~The length or gemination symbol should not appear after a syllable break or stress symbol. ]~~=]

end

~~if find(item~~.~~pron, "[ˈˌ%~~.~~][ːˑ]") then~~

require(~~"Module:debug"~~).~~track~~(~~"IPA/syntax-error"~~)

end

bit = (item.separator or (i == 1 and "" or separator)) .. bit

insert(bits, bit)

if lang then

-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].

if not no_count and ~~mw.title.getCurrentTitle().~~namespace == 0 then

-- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.

m_syllables = m_syllables or require(~~'Module~~:~~syllables'~~)

if not no_count and namespace == 0 then

if ~~m_syllables~~.~~hasDiphthongs(lang)~~ then

m_syllables = m_syllables or require(syllables_module)

~~if determine_repr~~(item.pron) == "phonemic" or ~~lang~~:~~getCode~~() == "ru" then

local langcode = lang:getCode()

local syllable_count = m_syllables.getVowels(~~item.pron~~, lang)

if m_data.langs_to_generate_syllable_count_categories[langcode] then

local raw_phonemic, phonetic, use_it = split_phonemic_phonetic(item.pron)

local phonemic, repr = determine_repr(raw_phonemic)

if not phonetic then -- not a '/.../ [...]' combined pronunciation

if m_data.langs_to_use_phonetic_notation[langcode] then

use_it = repr == "phonetic" and phonemic or nil

else

use_it = repr == "phonemic" and phonemic or nil

end

elseif repr == "phonetic" then

use_it = phonetic

elseif repr == "phonemic" then

use_it = phonemic

end

-- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].

if use_it and not (find(use_it, " ") or find(use_it, "‿")) then

local syllable_count = m_syllables.getVowels(use_it, lang)

if syllable_count then

~~table.~~insert(categories, ~~"[[Category:" ..~~ lang:getCanonicalName() .. " " .. syllable_count .. "-syllable words]]")

insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..

end

"-syllable words")

end

~~end~~

~~if lang:getCode() == "en" then~~

~~if hasInvalidSeparators(item.pron) then~~

~~table.insert(categories, "[[Category:IPA for English using .ˈ or .ˌ]]")~~

end

Line 148:

Line 379:

end

return ~~table.~~concat(bits, ~~separator) .. table.concat(categories~~)

return process_maybe_split_categories(split_output, categories, concat(bits), lang)

end

-- ~~Takes an~~ IPA pronunciation ~~and formats it and adds cleanup categories~~.

--[=[

~~function export~~.format_IPA(~~lang, pron, split_output~~)

Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from

~~local~~ err = {}

{format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that

~~local~~ categories ~~= {}~~

pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and

categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but

~~-- Remove wikilinks~~, ~~so that wikilink brackets are not misinterpreted~~ as

may have HTML added surrounding invalid characters so they appear in red.

~~-- indicating phonemic transcription~~

]=]

~~local str_gsub~~ = ~~string.gsub~~

local function format_one_IPA(lang, raw_pron, err, categories)

local ~~without_links = str_gsub~~(~~pron~~, ~~'%[%[[^|%]]+|([^%]]+)%]%]'~~, ~~'%1'~~)

-- Disallow wikilinks.

~~without_links = str_gsub~~(~~without_links~~, '%[%[[^%]]+%]~~%]', '%1'~~)

if match(raw_pron, "%[%[.-%]%]") then

error("IPA input must not contain wikilinks.")

~~-- Detect whether this is a phonemic or phonetic transcription~~

~~local repr, reconstructed = determine_repr(without_links)~~

~~if reconstructed~~ then

~~pron = sub~~(~~pron, 2~~)

end

~~-- If valid, strip the representation marks~~

raw_pron = decode_entities(raw_pron)

~~if repr == "phonemic" then~~

~~pron~~ = ~~sub~~(~~pron, 2, -2~~)

-- Detect the type of transcription.

~~without_links = sub(without_links, 2, -2)~~

local pron, repr, opening, closing, reconstructed = determine_repr(raw_pron)

~~elseif repr == "phonetic" then~~

~~pron = sub(pron, 2,~~ -2)

~~without_links = sub(without_links, 2,~~ -2)

~~elseif repr == "orthographic" then~~

~~pron = sub(~~pron, 2, ~~-2)~~

~~without_links = sub(without_links~~, ~~2, -2)~~

~~elseif repr == "rhyme" then~~

~~pron = sub(pron~~, 2)

~~without_links~~ = ~~sub(without_links, 2)~~

~~else~~

~~table.insert(categories, "[[Category:IPA pronunciations with invalid representation marks]]")~~

~~-- table.insert~~(~~err, "invalid representation marks"~~)

~~-- Removed because it's annoying when previewing pronunciation pages.~~

~~end~~

if ~~pron~~ == "" then

-- Strip any reconstruction asterisk and representation marks.

~~table.~~insert(categories, "~~[[Category:~~IPA pronunciations with ~~no pronunciation present]]~~")

pron = sub(pron, #opening + 1 + (reconstructed and 1 or 0), -#closing - 1)

if repr ~= "orthographic" and lang and lang:getCode() == "en" and hasInvalidSeparators(pron) then

insert(categories, "English IPA pronunciations with invalid separators")

end

-- Check for obsolete and nonstandard symbols

for i, symbol in ipairs(m_data.nonstandard) do

for _, symbol in ipairs(m_data.nonstandard) do

local result

for nonstandard in gmatch(pron, symbol) do

Line 199:

Line 414:

result = {}

end

~~table.~~insert(result, nonstandard)

insert(result, nonstandard)

~~table.insert(categories, "[[Category:IPA pronunciations with obsolete or nonstandard characters|" .. nonstandard .. "]]"~~)

end

if result then

~~table.~~insert(err, "obsolete or nonstandard characters (" .~~. table~~.concat(result) .. ")")

insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")

break

end

--[[ Check for invalid symbols after removing the following:

1. wikilinks (handled above)

Line 214:

Line 428:

3. bolding

4. italics

5~~. HTML entity for space~~

5. asterisk at beginning of transcription

6. asterisk at beginning of transcription

6. comma followed by spacing characters

7. comma followed by spacing characters

7. superscripts enclosed in superscript parentheses ]]

8. superscripts enclosed in superscript parentheses ]]

local found_HTML

local result = ~~str_gsub~~(~~without_links~~, "<(%a+)[^>]*>([^<]+)</%1>",

local result = gsub(pron, "<(%a+)[^>]*>([^<]+)</%1>",

function(tagName, content)

found_HTML = true

return content

end)

result = ~~str_gsub~~(result, "'''([^']*)'''", "%1")

result = gsub(result, "'''([^']*)'''", "%1")

result = ~~str_gsub~~(result, "''([^']*)''", "%1")

result = gsub(result, "''([^']*)''", "%1")

result = ~~str_gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.~~

result = gsub(result, "^%*", "")

~~result = str_gsub~~(result, "^%*", "")

result = ugsub(result, ",%s+", "")

result = ~~gsub~~(result, ",%s+", "")

~~result~~ = ~~gsub(result,~~ "⁽[".. m_symbols.~~superscripts~~ .. "]~~+⁾", "~~")

-- VS15

~~result = gsub~~(~~result~~, ~~'[' .. m_symbols.valid .. ']', ''~~)

local vs15_class = "[" .. m_symbols.add_vs15 .. "]"

~~if result ~= ''~~ then

if umatch(pron, vs15_class) then

local ~~suggestions~~ = {}

local vs15 = u(0xFE0E)

~~mw.log~~(~~pron, result~~)

if find(result, vs15) then

~~local namespace = mw.title.getCurrentTitle~~()~~.namespace~~

result = gsub(result, vs15, "")

~~local category~~

pron = gsub(pron, vs15, "")

~~if namespace == 0~~ then

~~-- main namespace~~

~~category~~ = "~~IPA pronunciations with invalid IPA characters~~"

~~elseif namespace == 118 then~~

~~-- reconstruction namespace~~

~~category~~ = "~~IPA pronunciations with invalid IPA characters/reconstruction"~~

~~else~~

~~category = "IPA pronunciations with invalid IPA characters/non_mainspace~~"

end

~~for character in gmatch~~(~~result~~, "."~~) do~~

pron = ugsub(pron, vs15_class, "%0" .. vs15)

local ~~suggestion~~ = m_symbols.~~suggestions~~[~~character~~]

end

if ~~suggestion~~ then

table.~~insert~~(~~suggestions~~, ~~character ..~~ " ~~with~~ " ~~.. suggestion~~)

if result ~= "" then

if lang then

-- Get the per_lang_valid data, and convert any per-language valid sequences to spaces.

local per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]

if per_lang_valid then

if type(per_lang_valid) == "table" then

for _, pattern in pairs(per_lang_valid) do

result = ugsub(result, pattern, " ")

end

else -- Should be a string.

result = ugsub(result, per_lang_valid, " ")

end

~~table.insert(categories, "[[Category:" .. category .. "|" .. character .. "]]")~~

end

table.~~insert~~(~~err, "~~invalid ~~IPA characters~~ (~~" ..~~ result ~~.. "~~)")

local suggestions

~~if suggestions[1] then~~

-- Check for any invalid sequences, excluding anything in the per-language lookup table.

~~table.insert~~(~~err~~, ~~"replace " .. table.concat(suggestions~~, ", "))

for k, v in pairs(m_symbols.invalid) do

if find(result, k, nil, true) then

result = ugsub(result, k, v)

end

-- Convert any valid character sequences to spaces

for _, pattern in pairs(m_symbols.valid) do

result = ugsub(result, pattern, " ")

end

~~if found_HTML then~~

if (repr == "phonemic" or repr == "rhyme") and lang and m_data.phonemes[lang:getCode()] then

~~table.insert(categories, "[[Category:IPA pronunciations with paired HTML tags]]")~~

local valid_phonemes = m_data.phonemes[lang:getCode()]

~~end~~

local rest = pron

local phonemes = {}

~~-- Reference inside IPA template usage~~

~~-- FIXME: Doesn't work; you can't put HTML in module output.~~

while #rest > 0 do

--if ~~mw.ustring.find(pron, '</ref>') then~~

local longestmatch, longestmatch_len = "", 0

~~-- table.insert~~(~~categories, "[[Category:IPA pronunciations with reference]]")~~

~~--end~~

local rest_init = sub(rest, 1, 1)

if rest_init == "(" or rest_init == ")" then

if repr == "phonemic" or repr == "rhyme" ~~then~~

longestmatch = rest_init

if lang and m_data.phonemes[lang:getCode()] then

longestmatch_len = 1

local valid_phonemes = m_data.phonemes[lang:getCode()]

else

local rest = pron

for _, phoneme in ipairs(valid_phonemes) do

local phonemes = {}

local phoneme_len = len(phoneme)

if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then

while ~~mw.ustring.len(~~rest) > 0 do

longestmatch = phoneme

local longestmatch = ""

longestmatch_len = len(longestmatch)

if sub(rest, 1, 1) == "(" or ~~sub(rest, 1, 1)~~ == ")" then

longestmatch = ~~sub(rest,~~ 1~~, 1)~~

else

for _, phoneme in ipairs(valid_phonemes) do

~~if mw.ustring.~~len(phoneme) > ~~mw.ustring.len(longestmatch)~~ and ~~sub~~(rest, 1, ~~mw.ustring.len(phoneme)~~) == phoneme then

longestmatch = phoneme

~~end~~

end

~~if mw.ustring.len(longestmatch) > 0 then~~

~~table.insert(phonemes, longestmatch)~~

~~rest = sub(rest, mw.ustring.len(longestmatch) + 1)~~

~~else~~

~~local phoneme = sub(rest, 1, 1)~~

~~table.insert(phonemes, "" .. phoneme .. "")~~

~~rest = sub(rest, 2)~~

~~table.insert(categories, "[[Category:IPA pronunciations with invalid phonemes/" .. lang:getCode() .. "]]")~~

~~require("Module:debug").track("IPA/invalid phonemes/" .. phoneme)~~

~~end~~

end

~~pron~~ = ~~table~~.~~concat~~(~~phonemes~~)

if longestmatch_len > 0 then

insert(phonemes, longestmatch)

rest = usub(rest, longestmatch_len + 1)

else

local phoneme = usub(rest, 1, 1)

insert(phonemes, "" .. phoneme .. "")

rest = usub(rest, 2)

end

~~if repr =~~= "~~phonemic~~" ~~then~~

pron = concat(phonemes)

~~pron =~~ "/" .. pron .. ~~"/"~~

end

~~else~~

~~pron~~ = ~~"-"~~ .. pron

return (reconstructed and "*" or "") .. opening .. pron .. closing

~~end~~

end

~~elseif repr == "~~phonetic" then

~~pron = "["~~ .. ~~pron~~ .. "]"

--[==[

~~elseif repr~~ == "~~orthographic~~" then

Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and

~~pron =~~ "~~⟨" .. pron .~~. "⟩"

error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic

(surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined

phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.

If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page

will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}

also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is

a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values

are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`

is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of

category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].

If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated

string.

]==]

function export.format_IPA(lang, pron, split_output)

local err = {}

local categories = {}

-- `pron` shouldn't contain ref tags.

if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then

error("<ref> tags found inside pronunciation parameter.")

end

if ~~reconstructed~~ then

local phonemic, phonetic = split_phonemic_phonetic(pron)

pron = "*" .. ~~pron~~

pron = format_one_IPA(lang, phonemic, err, categories)

if phonetic then

phonetic = format_one_IPA(lang, phonetic, err, categories)

pron = pron .. " " .. phonetic

end

if err[1] then

if err[1] and is_preview() then

err = ' ' .~~. table~~.concat(err, ', ') .. ''

err = ' ' .. concat(err, ", ") .. ""

else

err = ""

end

if split_output ~~then -- for use of IPA in links~~

return process_maybe_split_categories(split_output, categories, '' .. pron .. "", lang,

~~return~~ '' .. pron .. '', ~~table.concat(categories)~~, ~~err~~

err)

~~else~~

~~return '' .. pron .. '' ..~~ err ~~.. table.concat(categories~~)

~~end~~

end

~~function export~~.~~example~~(~~frame~~)

--[==[

~~local output =~~ {}

Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to

[[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The

~~local m_links = require~~(~~'Module~~:~~links')~~

pronunciations are formatted by wrapping them in the `AHD` and `enPR` CSS classes and adding any left and

~~local m_languages = require~~(~~'Module:languages'~~)

right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular

and accent qualifiers. There is a single parameter `data`, an object with the following fields:

~~table~~.~~insert~~(

* `items` is a list of enPR pronunciations, each of which is an object with the following properties:

~~output~~,

** `pron`: the enPR pronunciation;

[[

** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;

{~~| class=~~"~~wikitable~~"

** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;

~~! Term !! IPA !! Generated X-SAMPA !! Regenerated IPA !! Matched?~~

** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;

]]

** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.

)

* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted

local ~~row~~ =

pronunciations and preceding {"enPR:"}.

[[

* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.

|-

* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted

~~| link || IPA || XSAMPA || regenerated_IPA || matched~~

pronunciations and preceding {"enPR:"}.

]]

* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.

]==]

local ~~examples~~ = mw.~~text.split~~(~~frame.args[1],~~ ",%s*")

function export.format_enPR_full(data)

local prefix = "[[wikt:Appendix:English pronunciation|enPR]]: "

local ~~m_XSAMPA~~ = ~~require("Module:IPA/X-SAMPA")~~

local lang = require("Module:languages").getByCode("en")

local parts = {}

for _, ~~example~~ in ~~pairs~~(~~examples~~) do

local ~~lang, word~~ = ~~match(example,~~ "~~(%l%l%l?):(~~.~~+) [~~/~~%[]~~")

for _, item in ipairs(data.items) do

local part = '' .. item.pron .. ""

if ~~lang~~ then

~~lang~~ = ~~m_languages.getByCode~~(~~lang~~) ~~or error('"'~~ .. ~~lang~~ .. ~~'" is not~~ a ~~valid language code~~.')

if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then

part = require("Module:pron qualifier").format_qualifiers {

lang = lang,

text = part,

q = item.q,

qq = item.qq,

a = item.a,

aa = item.aa,

}

end

insert(parts, part)

local ~~IPA~~ = ~~match~~(~~example~~, "~~/[^/]+/~~")

end

or ~~match(example, "%[~~[~~^%]]+%~~]")

or ~~error('No IPA transcription found in "'~~ .. ~~example~~ .. ~~'".')~~

local prontext = prefix .. concat(parts, ", ")

~~local XSAMPA~~ = ~~m_XSAMPA.IPA_to_XSAMPA~~(~~IPA~~)

if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then

~~local regenerated_IPA = m_XSAMPA~~.~~XSAMPA_to_IPA(XSAMPA)~~

prontext = require(pron_qualifier_module).format_qualifiers {

lang = lang,

~~content =~~ {

text = prontext,

~~link = lang and word and m_links.full_link{ term = word,~~ lang = lang },

q = data.q,

~~matched~~ = ~~IPA == regenerated_IPA~~

qq = data.qq,

~~and 'yes'~~

a = data.a,

~~or 'no'~~,

aa = data.aa,

~~IPA~~ = ~~''~~ .~~. IPA .. ''~~,

~~XSAMPA~~ = ~~'<code>' .. XSAMPA .~~. ~~''~~,

~~regenerated_IPA~~ = ~~''~~ .~~. regenerated_IPA .. ''~~

}

~~local function add_content(item)~~

~~return content[item] or ""~~

~~end~~

~~local row = gsub(row, "[%a_]+", add_content)~~

~~table.insert(output, row)~~

end

~~table.insert(output, "|}")~~

return prontext

return ~~table.concat(output)~~

end

return export

@@ Line 1: / Line 1: @@
 local export = {}
--- [[Module:IPA/data]]
-local m_data = mw.loadData('Module:IPA/data') -- [[Module:IPA/data]]
+local force_cat = false -- for testing
-local m_symbols = mw.loadData('Module:IPA/data/symbols') -- [[Module:IPA/data/symbols]]
+local pages_module = "Module:pages"
+local pron_qualifier_module = "Module:pron qualifier"
+local qualifier_module = "Module:qualifier"
+local references_module = "Module:references"
+local string_utilities_module = "Module:string utilities"
+local syllables_module = "Module:syllables"
+local utilities_module = "Module:utilities"
+local m_data = mw.loadData("Module:IPA/data")
+local m_str_utils = require(string_utilities_module)
 local m_syllables -- [[Module:syllables]]; loaded below if needed
+local m_symbols = mw.loadData("Module:IPA/data/symbols")
+local concat = table.concat
+local decode_entities = m_str_utils.decode_entities
+local find = string.find
+local gmatch = m_str_utils.gmatch
+local gsub = string.gsub
+local insert = table.insert
+local is_preview = require(pages_module).is_preview
+local len = m_str_utils.len
+local listToText = mw.text.listToText
+local match = string.match
+local pattern_escape = m_str_utils.pattern_escape
+local sub = string.sub
+local u = m_str_utils.char
+local ugsub = m_str_utils.gsub
+local umatch = m_str_utils.match
+local usub = m_str_utils.sub
-local sub = mw.ustring.sub
+local namespace = mw.title.getCurrentTitle().namespace
-local find = mw.ustring.find
+local is_content_page = namespace == 0 or namespace == 120
-local gsub = mw.ustring.gsub
-local match = mw.ustring.match
-local gmatch = mw.ustring.gmatch
-local U = mw.ustring.char
-function export.format_IPA_full(lang, items, err, separator, sortKey, no_count)
+local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)
-	local IPA_key, key_link, err_text, prefix, IPAs, category
+	if split_output ~= "raw" then
-	local hasKey = m_data.langs_with_infopages
+		if categories[1] then
-	local namespace = mw.title.getCurrentTitle().nsText
+			categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)
+		else
-	if err then
+			categories = ""
-		err_text = '<span class="error">' .. err .. '</span>'
+		end
-	else
+	end
-		if hasKey[lang:getCode()] then
+	if split_output then -- for use of IPA in links, etc.
-			IPA_key = "wikt:Appendix:" .. lang:getCanonicalName() .. " pronunciation"
+		if errtext then
+			return prontext, categories, errtext
 		else
-			IPA_key = "wikipedia:" .. lang:getCanonicalName() .. " phonology"
+			return prontext, categories
 		end
+	else
-		key_link = "[[" .. IPA_key .. "|key]]"
+		return prontext .. (errtext or "") .. categories
+	end
+end
+--[==[
+Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by
+the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category
+` ``lang`` terms with IPA pronunciation`. Other than the extra preceding text and category, this is identical
+to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a
+single parameter `data`, an object with the following fields:
+* `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for
+   pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
+   add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category
+   ` ``lang`` terms with IPA pronunciation`; and for determining the proper sort keys for categories. Unlike
+   for {format_IPA_multiple()}, `lang` may not be {nil}.
+* `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.
+* `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.
+* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
+  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
+  `items`.
+* `sort_key`: Explicit sort key used for categories.
+* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
+  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
+  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
+  or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore
+  them.
+* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
+  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
+  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
+  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
+  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
+* `include_langname`: If specified, prefix the result with the language name, followed by a colon.
+* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
+  pronunciations and preceding {"IPA:"}.
+* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
+* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
+  pronunciations and preceding {"IPA:"}.
+* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
+]==]
+function export.format_IPA_full(data)
+	if type(data) ~= "table" or data.getCode then
+		error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")
+	end
+	local lang = data.lang
+	local items = data.items
+	local err = data.err
+	local separator = data.separator
+	local sort_key = data.sort_key
+	local no_count = data.no_count
+	local split_output = data.split_output
+	local q = data.q
+	local qq = data.qq
+	local a = data.a
+	local aa = data.aa
+	local include_langname = data.include_langname
+	local key = data.key
+	if not lang or not lang.getCode then
+		error("Must specify language to format_IPA_full()")
 	end
+	local langname = lang:getCanonicalName()
+	local prefix_text
-	local prefix = "[[wikt:Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. ( key_link or err_text ) .. ")</sup>:&#32;"
+	if err then
+		prefix_text = '<span class="error">' .. err .. '</span>'
-	IPAs = export.format_IPA_multiple(lang, items, separator, no_count)
-	if lang and (namespace == "" or namespace == "Reconstruction") then
-		sortKey = sortKey or lang:makeSortKey(mw.title.getCurrentTitle().text)
-		sortKey = sortKey and ("|" .. sortKey) or ""
-		category = "[[Category:" .. lang:getCanonicalName() .. " terms with IPA pronunciation" .. sortKey .. "]]"
 	else
-		category = ""
+		prefix_text = key or "IPA for " .. langname
+		prefix_text = "[[" .. prefix_text .. "|key]]"
 	end
-	return prefix .. IPAs .. category
+	local prefix = "[[wikt:Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix_text .. ")</sup>:&#32;"
+	local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")
+	local prontext = prefix .. IPAs
+	if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then
+		prontext = require(pron_qualifier_module).format_qualifiers {
+			lang = lang,
+			text = prontext,
+			q = q,
+			qq = qq,
+			a = a,
+			aa = aa,
+		}
+	end
+	if include_langname then
+		prontext = langname .. ": " .. prontext
+	end
+	return process_maybe_split_categories(split_output, categories, prontext, lang)
+end
+local function split_phonemic_phonetic(pron)
+	local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$")
+	if reconstructed then
+		return reconstructed .. phonemic, reconstructed .. phonetic
+	else
+		return pron, nil
+	end
 end
 local function determine_repr(pron)
-	local repr_mark = {}
+	local reconstructed
-	local repr, reconstructed
+	-- Temporarily remove any initial asterisk before representation marks,
-	-- remove initial asterisk before representation marks, used on some Reconstruction pages
+	-- which avoids having to account for it in the data, but set the
-	if find(pron, "^%*") then
+	-- `reconstructed` flag.
+	if sub(pron, 1, 1) == "*" then
 		reconstructed = true
 		pron = sub(pron, 2)
 	end
-	local representation_types = {
+	-- Some representation types have aliases for convenience (e.g. "// //" is
-		['/'] = { right = '/', type = 'phonemic', },
+	-- an alias for "⫽ ⫽"). and these need to be substituted in before checking
-		['['] = { right = ']', type = 'phonetic', },
+	-- for other data.
-		['⟨'] = { right = '⟩', type = 'orthographic', },
+	local opening, n = match(pron, "^.[\128-\191]*")
-		['-'] = { type = 'rhyme' },
+	local subs_data = m_data.representation_subs[opening]
-	}
+	if subs_data then
+		pron, n = ugsub(pron, subs_data[1], subs_data[2])
-	repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = find(pron, '^(.).-(.)$')
+		-- If the substitution was made, `opening` needs to be changed to the
+		-- new opening character.
-	local representation_type = representation_types[repr_mark.left]
+		if n ~= 0 then
+			opening = subs_data[3]
-	if representation_type then
+		end
-		if representation_type.right then
+	end
-			if repr_mark.right == representation_type.right then
-				repr = representation_type.type
+	-- Get the type data based on the opening character (if any), and set the
-			end
+	-- representation type if the closing character matches.
-		else
+	local type_data, repr, closing = m_data.representation_types[opening]
-			repr = representation_type.type
+	if type_data then
+		closing = type_data[2]
+		if type_data and match(pron, pattern_escape(closing) .. "$", #opening + 1) then
+			repr = type_data[1]
 		end
-	else
-		repr = nil
 	end
-	return repr, reconstructed
+	-- Default to the empty string.
+	if not repr then
+		opening, closing = "", ""
+	end
+	-- Reattach the asterisk if reconstructed.
+	if reconstructed then
+		pron = "*" .. pron
+	end
+	return pron, repr, opening, closing, reconstructed
 end
 local function hasInvalidSeparators(transcription)
-	if find(transcription, "%.[ˈˌ]") then
+	-- Escape certain characters as well as pauses, which have the format "(...)" (with any number of dots), to avoid false-positives.
-		return true
+	transcription = transcription:gsub(".[\128-\191]*", m_symbols.separator_escapes)
-	else
+		:gsub("%(%.+%)", "\3")
-		return false
+		:gsub("[()]+", "")
-	end
+	return (
+		transcription:find("..", nil, true) or
+		transcription:match("%.%f[%z \1\2\3,:;]") or
+		transcription:match("\1%f[%z \2\3,:;]") or
+		transcription:match("\2%f[%z \1\3,:;]") or
+		transcription:match("\3[:;]") or
+		transcription:match("%f[^%z \1\2\3,]%.")
+	) and true or false
 end
-function export.format_IPA_multiple(lang, items, separator, no_count)
+--[==[
-	local notes = {}
+Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a
+category ` ``lang`` terms with IPA pronunciation`). Individual pronunciations are formatted using
+{format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.
+Parameters accepted are:
+* `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for
+   pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
+   add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the
+   proper sort keys for categories. `lang` may be {nil}.
+* `items` is a list of pronunciations, each of which is an object with the following properties:
+** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic
+     (surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme
+	 (beginning with a hyphen);
+** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent
+     qualifiers;
+** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent
+     qualifiers;
+** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted
+     pronunciation; note that `qualifiers` is deprecated;
+** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
+** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
+** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;
+** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and
+     qualifiers; the value of a list item is either a string containing the reference text (typically a call to a
+	 citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`
+	 (the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}
+	 or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in
+	 {{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or
+	 {{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference
+	 appropriately and insert a footnote number that hyperlinks to the actual reference, located in the
+	 {{cd|<nowiki><references /></nowiki>}} section;
+** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;
+** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;
+** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent
+   qualifiers and pre-text; defaults to the outer `separator` parameter.
+* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
+  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
+  `items`.
+* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
+  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
+  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
+  categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.
+* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
+  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
+  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
+  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
+  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
+]==]
+function export.format_IPA_multiple(lang, items, separator, no_count, split_output)
 	local categories = {}
-	separator = separator or ', '
+	separator = separator or ", "
 	-- Format
 	if not items[1] then
-		if mw.title.getCurrentTitle().nsText == "Template" then
+		if namespace == 10 then -- Template
-			table.insert(items, {pron = "/aɪ piː ˈeɪ/"})
+			insert(items, {pron = "/aɪ piː ˈeɪ/"})
-		else
-			table.insert(categories, "[[Category:Pronunciation templates without a pronunciation]]")
 		end
 	end
 	local bits = {}
-	for _, item in ipairs(items) do
+	for i, item in ipairs(items) do
-		local bit = export.format_IPA(lang, item.pron)
+		local bit
-		if item.qualifiers and item.qualifiers[1] then
+		-- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or
-			bit = require("Module:qualifier").format_qualifier(item.qualifiers) .. " " .. bit
+		-- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation
+		-- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).
+		if item.pron == "" then
+			bit = ""
+		else
+			local item_categories, errtext
+			bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")
+			bit = bit .. errtext
+			for _, cat in ipairs(item_categories) do
+				insert(categories, cat)
+			end
+		end
+		if item.pretext then
+			bit = item.pretext .. bit
+		end
+		if item.posttext then
+			bit = bit .. item.posttext
+		end
+		local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]
+			or item.a and item.a[1] or item.aa and item.aa[1]
+		local has_gloss_or_pos = item.gloss or item.pos
+		if has_qualifiers or has_gloss_or_pos then
+			-- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.
+			-- Should we do something different?
+			local q = item.q
+			if has_gloss_or_pos then
+				q = mw.clone(item.q) or {}
+				if item.gloss then
+					local m_qualifier = require(qualifier_module)
+					insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..
+						m_qualifier.wrap_qualifier_css("”", "quote"))
+				end
+				if item.pos then
+					-- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.
+					insert(q, item.pos)
+				end
+			end
+			bit = require("Module:pron qualifier").format_qualifiers {
+				lang = lang,
+				text = bit,
+				q = q,
+				qq = item.qq,
+				qualifiers = item.qualifiers,
+				a = item.a,
+				aa = item.aa,
+			}
 		end
 		if item.note then
-			bit = bit .. mw.getCurrentFrame():extensionTag("ref", item.note)
+			-- Support removed on 2024-06-15.
+			error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")
 		end
+		if item.refs then
-		table.insert(bits, bit)
+			local refspecs = item.refs
+			if #refspecs > 0 then
-		--[=[	[[Special:WhatLinksHere/Template:tracking/IPA/syntax-error]]
+				bit = bit .. require(references_module).format_references(refspecs)
-				The length or gemination symbol should not appear after a syllable break or stress symbol.	]=]
+			end
-		if find(item.pron, "[ˈˌ%.][ːˑ]") then
-			require("Module:debug").track("IPA/syntax-error")
 		end
+		bit = (item.separator or (i == 1 and "" or separator)) .. bit
+		insert(bits, bit)
 		if lang then
 			-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
-			if not no_count and mw.title.getCurrentTitle().namespace == 0 then
+			-- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.
-				m_syllables = m_syllables or require('Module:syllables')
+			if not no_count and namespace == 0 then
-				if m_syllables.hasDiphthongs(lang) then
+				m_syllables = m_syllables or require(syllables_module)
-					if determine_repr(item.pron) == "phonemic" or lang:getCode() == "ru" then
+				local langcode = lang:getCode()
-						local syllable_count = m_syllables.getVowels(item.pron, lang)
+				if m_data.langs_to_generate_syllable_count_categories[langcode] then
+					local raw_phonemic, phonetic, use_it = split_phonemic_phonetic(item.pron)
+					local phonemic, repr = determine_repr(raw_phonemic)
+					if not phonetic then -- not a '/.../ [...]' combined pronunciation
+						if m_data.langs_to_use_phonetic_notation[langcode] then
+							use_it = repr == "phonetic" and phonemic or nil
+						else
+							use_it = repr == "phonemic" and phonemic or nil
+						end
+					elseif repr == "phonetic" then
+						use_it = phonetic
+					elseif repr == "phonemic" then
+						use_it = phonemic
+					end
+					-- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].
+					if use_it and not (find(use_it, " ") or find(use_it, "‿")) then
+						local syllable_count = m_syllables.getVowels(use_it, lang)
 						if syllable_count then
-							table.insert(categories, "[[Category:" .. lang:getCanonicalName() .. " " .. syllable_count .. "-syllable words]]")
+							insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..
-						end
+								"-syllable words")
+						end
 					end
-				end
-			end
-			if lang:getCode() == "en" then
-				if hasInvalidSeparators(item.pron) then
-					table.insert(categories, "[[Category:IPA for English using .ˈ or .ˌ]]")
 				end
 			end
@@ Line 148: / Line 379: @@
 	end
-	return table.concat(bits, separator) .. table.concat(categories)
+	return process_maybe_split_categories(split_output, categories, concat(bits), lang)
 end
--- Takes an IPA pronunciation and formats it and adds cleanup categories.
+--[=[
-function export.format_IPA(lang, pron, split_output)
+Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from
-	local err = {}
+{format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that
-	local categories = {}
+pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and
+categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but
-	-- Remove wikilinks, so that wikilink brackets are not misinterpreted as
+may have HTML added surrounding invalid characters so they appear in red.
-	-- indicating phonemic transcription
+]=]
-	local str_gsub = string.gsub
+local function format_one_IPA(lang, raw_pron, err, categories)
-	local without_links = str_gsub(pron, '%[%[[^|%]]+|([^%]]+)%]%]', '%1')
+	-- Disallow wikilinks.
-	without_links = str_gsub(without_links, '%[%[[^%]]+%]%]', '%1')
+	if match(raw_pron, "%[%[.-%]%]") then
+		error("IPA input must not contain wikilinks.")
-	-- Detect whether this is a phonemic or phonetic transcription
-	local repr, reconstructed = determine_repr(without_links)
-	if reconstructed then
-		pron = sub(pron, 2)
 	end
-	-- If valid, strip the representation marks
+	raw_pron = decode_entities(raw_pron)
-	if repr == "phonemic" then
-		pron = sub(pron, 2, -2)
+	-- Detect the type of transcription.
-		without_links = sub(without_links, 2, -2)
+	local pron, repr, opening, closing, reconstructed = determine_repr(raw_pron)
-	elseif repr == "phonetic" then
-		pron = sub(pron, 2, -2)
-		without_links = sub(without_links, 2, -2)
-	elseif repr == "orthographic" then
-		pron = sub(pron, 2, -2)
-		without_links = sub(without_links, 2, -2)
-	elseif repr == "rhyme" then
-		pron = sub(pron, 2)
-		without_links = sub(without_links, 2)
-	else
-		table.insert(categories, "[[Category:IPA pronunciations with invalid representation marks]]")
-		-- table.insert(err, "invalid representation marks")
-		-- Removed because it's annoying when previewing pronunciation pages.
-	end
-	if pron == "" then
+	-- Strip any reconstruction asterisk and representation marks.
-		table.insert(categories, "[[Category:IPA pronunciations with no pronunciation present]]")
+	pron = sub(pron, #opening + 1 + (reconstructed and 1 or 0), -#closing - 1)
+	if repr ~= "orthographic" and lang and lang:getCode() == "en" and hasInvalidSeparators(pron) then
+		insert(categories, "English IPA pronunciations with invalid separators")
 	end
 	-- Check for obsolete and nonstandard symbols
-	for i, symbol in ipairs(m_data.nonstandard) do
+	for _, symbol in ipairs(m_data.nonstandard) do
 		local result
 		for nonstandard in gmatch(pron, symbol) do
@@ Line 199: / Line 414: @@
 				result = {}
 			end
-			table.insert(result, nonstandard)
+			insert(result, nonstandard)
-			table.insert(categories, "[[Category:IPA pronunciations with obsolete or nonstandard characters|" .. nonstandard .. "]]")
 		end
 		if result then
-			table.insert(err, "obsolete or nonstandard characters (" .. table.concat(result) .. ")")
+			insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")
 			break
 		end
 	end
 	--[[ Check for invalid symbols after removing the following:
 . wikilinks (handled above)
@@ Line 214: / Line 428: @@
 . bolding
 . italics
-. HTML entity for space
+. asterisk at beginning of transcription
-. asterisk at beginning of transcription
+. comma followed by spacing characters
-. comma followed by spacing characters
+. superscripts enclosed in superscript parentheses		]]
-. superscripts enclosed in superscript parentheses		]]
 	local found_HTML
-	local result = str_gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",
+	local result = gsub(pron, "<(%a+)[^>]*>([^<]+)</%1>",
 		function(tagName, content)
 			found_HTML = true
 			return content
 		end)
-	result = str_gsub(result, "'''([^']*)'''", "%1")
+	result = gsub(result, "'''([^']*)'''", "%1")
-	result = str_gsub(result, "''([^']*)''", "%1")
+	result = gsub(result, "''([^']*)''", "%1")
-	result = str_gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.
+	result = gsub(result, "^%*", "")
-	result = str_gsub(result, "^%*", "")
+	result = ugsub(result, ",%s+", "")
-	result = gsub(result, ",%s+", "")
-	result = gsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")
+	-- VS15
-	result = gsub(result, '[' .. m_symbols.valid .. ']', '')
+	local vs15_class = "[" .. m_symbols.add_vs15 .. "]"
-	if result ~= '' then
+	if umatch(pron, vs15_class) then
-		local suggestions = {}
+		local vs15 = u(0xFE0E)
-		mw.log(pron, result)
+		if find(result, vs15) then
-		local namespace = mw.title.getCurrentTitle().namespace
+			result = gsub(result, vs15, "")
-		local category
+			pron = gsub(pron, vs15, "")
-		if namespace == 0 then
-			-- main namespace
-			category = "IPA pronunciations with invalid IPA characters"
-		elseif namespace == 118 then
-			-- reconstruction namespace
-			category = "IPA pronunciations with invalid IPA characters/reconstruction"
-		else
-			category = "IPA pronunciations with invalid IPA characters/non_mainspace"
 		end
-		for character in gmatch(result, ".") do
+		pron = ugsub(pron, vs15_class, "%0" .. vs15)
-			local suggestion = m_symbols.suggestions[character]
+	end
-			if suggestion then
-				table.insert(suggestions, character .. " with " .. suggestion)
+	if result ~= "" then
+		if lang then
+			-- Get the per_lang_valid data, and convert any per-language valid sequences to spaces.
+			local per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]
+			if per_lang_valid then
+				if type(per_lang_valid) == "table" then
+					for _, pattern in pairs(per_lang_valid) do
+						result = ugsub(result, pattern, " ")
+					end
+				else -- Should be a string.
+					result = ugsub(result, per_lang_valid, " ")
+				end
 			end
-			table.insert(categories, "[[Category:" .. category .. "|" .. character .. "]]")
 		end
-		table.insert(err, "invalid IPA characters (" .. result .. ")")
+		local suggestions
-		if suggestions[1] then
+		-- Check for any invalid sequences, excluding anything in the per-language lookup table.
-			table.insert(err, "replace " .. table.concat(suggestions, ", "))
+		for k, v in pairs(m_symbols.invalid) do
+			if find(result, k, nil, true) then
+				result = ugsub(result, k, v)
+			end
+		end
+		-- Convert any valid character sequences to spaces
+		for _, pattern in pairs(m_symbols.valid) do
+			result = ugsub(result, pattern, " ")
 		end
 	end
-	if found_HTML then
+	if (repr == "phonemic" or repr == "rhyme") and lang and m_data.phonemes[lang:getCode()] then
-		table.insert(categories, "[[Category:IPA pronunciations with paired HTML tags]]")
+		local valid_phonemes = m_data.phonemes[lang:getCode()]
-	end
+		local rest = pron
+		local phonemes = {}
-	-- Reference inside IPA template usage
-	-- FIXME: Doesn't work; you can't put HTML in module output.
+		while #rest > 0 do
-	--if mw.ustring.find(pron, '</ref>') then
+			local longestmatch, longestmatch_len = "", 0
-	--	table.insert(categories, "[[Category:IPA pronunciations with reference]]")
-	--end
+			local rest_init = sub(rest, 1, 1)
+			if rest_init == "(" or rest_init == ")" then
-	if repr == "phonemic" or repr == "rhyme" then
+				longestmatch = rest_init
-		if lang and m_data.phonemes[lang:getCode()] then
+				longestmatch_len = 1
-			local valid_phonemes = m_data.phonemes[lang:getCode()]
+			else
-			local rest = pron
+				for _, phoneme in ipairs(valid_phonemes) do
-			local phonemes = {}
+					local phoneme_len = len(phoneme)
+					if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then
-			while mw.ustring.len(rest) > 0 do
+						longestmatch = phoneme
-				local longestmatch = ""
+						longestmatch_len = len(longestmatch)
-				if sub(rest, 1, 1) == "(" or sub(rest, 1, 1) == ")" then
-					longestmatch = sub(rest, 1, 1)
-				else
-					for _, phoneme in ipairs(valid_phonemes) do
-						if mw.ustring.len(phoneme) > mw.ustring.len(longestmatch) and sub(rest, 1, mw.ustring.len(phoneme)) == phoneme then
-							longestmatch = phoneme
-						end
 					end
 				end
-				if mw.ustring.len(longestmatch) > 0 then
-					table.insert(phonemes, longestmatch)
-					rest = sub(rest, mw.ustring.len(longestmatch) + 1)
-				else
-					local phoneme = sub(rest, 1, 1)
-					table.insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
-					rest = sub(rest, 2)
-					table.insert(categories, "[[Category:IPA pronunciations with invalid phonemes/" .. lang:getCode() .. "]]")
-					require("Module:debug").track("IPA/invalid phonemes/" .. phoneme)
-				end
 			end
-			pron = table.concat(phonemes)
+			if longestmatch_len > 0 then
+				insert(phonemes, longestmatch)
+				rest = usub(rest, longestmatch_len + 1)
+			else
+				local phoneme = usub(rest, 1, 1)
+				insert(phonemes, "<span style=\"color: var(--wikt-palette-red,red)\">" .. phoneme .. "</span>")
+				rest = usub(rest, 2)
+			end
 		end
-		if repr == "phonemic" then
+		pron = concat(phonemes)
-			pron = "/" .. pron .. "/"
+	end
-		else
-			pron = "-" .. pron
+	return (reconstructed and "*" or "") .. opening .. pron .. closing
-		end
+end
-	elseif repr == "phonetic" then
-		pron = "[" .. pron .. "]"
+--[==[
-	elseif repr == "orthographic" then
+Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and
-		pron = "⟨" .. pron .. "⟩"
+error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic
+(surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined
+phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.
+If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page
+will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}
+also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is
+a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values
+are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`
+is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of
+category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].
+If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated
+string.
+]==]
+function export.format_IPA(lang, pron, split_output)
+	local err = {}
+	local categories = {}
+	-- `pron` shouldn't contain ref tags.
+	if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then
+		error("<ref> tags found inside pronunciation parameter.")
 	end
-	if reconstructed then
+	local phonemic, phonetic = split_phonemic_phonetic(pron)
-		pron = "*" .. pron
+	pron = format_one_IPA(lang, phonemic, err, categories)
+	if phonetic then
+		phonetic = format_one_IPA(lang, phonetic, err, categories)
+		pron = pron .. " " .. phonetic
 	end
-	if err[1] then
+	if err[1] and is_preview() then
-		err = '<span class="previewonly error" style="font-size: small;>&#32;' .. table.concat(err, ', ') .. '</span>'
+		err = '<span class="error" style="font-size: small;>&#32;' .. concat(err, ", ") .. "</span>"
 	else
 		err = ""
 	end
-	if split_output then -- for use of IPA in links
+	return process_maybe_split_categories(split_output, categories, '<span class="IPA nowrap">' .. pron .. "</span>", lang,
-		return '<span class="IPA">' .. pron .. '</span>', table.concat(categories), err
+		err)
-	else
-		return '<span class="IPA">' .. pron .. '</span>' .. err .. table.concat(categories)
-	end
 end
-function export.example(frame)
+--[==[
-	local output = {}
+Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to
+[[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The
-	local m_links = require('Module:links')
+pronunciations are formatted by wrapping them in the `AHD` and `enPR` CSS classes and adding any left and
-	local m_languages = require('Module:languages')
+right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular
+and accent qualifiers. There is a single parameter `data`, an object with the following fields:
-	table.insert(
+* `items` is a list of enPR pronunciations, each of which is an object with the following properties:
-		output,
+** `pron`: the enPR pronunciation;
-[[
+** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;
-{| class="wikitable"
+** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
-! Term !! IPA !! Generated X-SAMPA !! Regenerated IPA !! Matched?
+** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
-]]
+** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.
-	)
+* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
-	local row =
+  pronunciations and preceding {"enPR:"}.
-[[
+* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
-|-
+* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
-| link || IPA || XSAMPA || regenerated_IPA || matched
+  pronunciations and preceding {"enPR:"}.
-]]
+* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
+]==]
-	local examples = mw.text.split(frame.args[1], ",%s*")
+function export.format_enPR_full(data)
+	local prefix = "[[wikt:Appendix:English pronunciation|enPR]]: "
-	local m_XSAMPA = require("Module:IPA/X-SAMPA")
+	local lang = require("Module:languages").getByCode("en")
+	local parts = {}
-	for _, example in pairs(examples) do
-		local lang, word = match(example, "(%l%l%l?):(.+) [/%[]")
+	for _, item in ipairs(data.items) do
+		local part = '<span class="AHD enPR">' .. item.pron .. "</span>"
-		if lang then
-			lang = m_languages.getByCode(lang) or error('"' .. lang .. '" is not a valid language code.')
+		if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then
+			part = require("Module:pron qualifier").format_qualifiers {
+				lang = lang,
+				text = part,
+				q = item.q,
+				qq = item.qq,
+				a = item.a,
+				aa = item.aa,
+			}
 		end
+		insert(parts, part)
-		local IPA = match(example, "/[^/]+/")
+	end
-			or match(example, "%[[^%]]+%]")
-			or error('No IPA transcription found in "' .. example .. '".')
+	local prontext = prefix .. concat(parts, ", ")
-		local XSAMPA = m_XSAMPA.IPA_to_XSAMPA(IPA)
+	if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then
-		local regenerated_IPA = m_XSAMPA.XSAMPA_to_IPA(XSAMPA)
+		prontext = require(pron_qualifier_module).format_qualifiers {
+			lang = lang,
-		content = {
+			text = prontext,
-			link = lang and word and m_links.full_link{ term = word, lang = lang },
+			q = data.q,
-			matched = IPA == regenerated_IPA
+			qq = data.qq,
-				and '<span style="color: green;">yes</span>'
+			a = data.a,
-				or '<span style="color: red;">no</span>',
+			aa = data.aa,
-			IPA = '<span class="IPA">' .. IPA .. '</span>',
-			XSAMPA = '<code>' .. XSAMPA .. '</span>',
-			regenerated_IPA = '<span class="IPA">' .. regenerated_IPA .. '</span>'
 		}
-		local function add_content(item)
-			return content[item] or ""
-		end
-		local row = gsub(row, "[%a_]+", add_content)
-		table.insert(output, row)
 	end
-	table.insert(output, "|}")
+	return prontext
-	return table.concat(output)
 end
 return export