|
|
| Line 1: |
Line 1: |
| local export = {} | | local export = {} |
|
| |
|
| local force_cat = false -- for testing | | local m_IPA = require("Module:IPA") |
| | local parameter_utilities_module = "Module:parameter utilities" |
|
| |
|
| local m_data = mw.loadData("Module:IPA/data")
| | -- Used for [[Template:IPA]]. |
| local m_str_utils = require("Module:string utilities")
| | function export.IPA(frame) |
| local m_symbols = mw.loadData("Module:IPA/data/symbols") | | local parent_args = frame:getParent().args |
| local pron_qualifier_module = "Module:pron qualifier" | | local include_langname = frame.args.include_langname |
| local qualifier_module = "Module:qualifier" | | local compat = parent_args.lang |
| local references_module = "Module:references"
| | local offset = compat and 0 or 1 |
| local syllables_module = "Module:syllables" | | local lang_arg = compat and "lang" or 1 |
| local utilities_module = "Module:utilities" | |
| local m_syllables -- [[Module:syllables]]; loaded below if needed
| |
|
| |
|
| local concat = table.concat | | local params = { |
| local find = string.find
| | [lang_arg] = {required = true, type = "language", default = "en"}, |
| local gmatch = m_str_utils.gmatch
| | [1 + offset] = {list = true, disallow_holes = true}, |
| local gsub = string.gsub
| | -- Deprecated; don't use in new code. |
| local insert = table.insert
| | ["qual"] = {list = true, allow_holes = true, separate_no_index = true, alias_of = "q"}, |
| local len = m_str_utils.len
| | ["nocount"] = {type = "boolean"}, |
| local listToText = mw.text.listToText
| | ["nocat"] = {type = "boolean"}, |
| local match = string.match
| | ["sort"] = {}, |
| local sub = string.sub
| | } |
| local u = m_str_utils.char
| |
| local ufind = m_str_utils.find
| |
| local ugsub = m_str_utils.gsub
| |
| local umatch = m_str_utils.match
| |
| local usub = m_str_utils.sub
| |
|
| |
|
| local namespace = mw.title.getCurrentTitle().namespace | | local m_param_utils = require(parameter_utilities_module) |
| local is_content_page = namespace == 0 or namespace == 118
| |
|
| |
|
| local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext) | | local param_mods = m_param_utils.construct_param_mods { |
| if split_output ~= "raw" then
| | {group = {"ref", "a", "q"}}, |
| if categories[1] then
| | {group = "link", include = {"t", "gloss", "pos"}}, |
| categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)
| | } |
| else | |
| categories = ""
| |
| end
| |
| end
| |
| if split_output then -- for use of IPA in links, etc.
| |
| if errtext then
| |
| return prontext, categories, errtext
| |
| else
| |
| return prontext, categories
| |
| end
| |
| else
| |
| return prontext .. (errtext or "") .. categories
| |
| end | |
| end
| |
|
| |
|
| --[==[
| | local items, args = m_param_utils.process_list_arguments { |
| Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by
| | params = params, |
| the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category
| | param_mods = param_mods, |
| {{cd|<var>lang</var> terms with IPA pronunciation}}. Other than the extra preceding text and category, this is identical
| | raw_args = parent_args, |
| to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a
| | termarg = 1 + offset, |
| single parameter `data`, an object with the following fields:
| | term_dest = "pron", |
| * `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for
| | track_module = "IPA", |
| pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
| | } |
| add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category
| |
| {{cd|<var>lang</var> terms with IPA pronunciation}}; and for determining the proper sort keys for categories. Unlike
| |
| for {format_IPA_multiple()}, `lang` may not be {nil}.
| |
| * `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.
| |
| * `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.
| |
| * `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
| |
| first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
| |
| `items`.
| |
| * `sort_key`: Explicit sort key used for categories.
| |
| * `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
| |
| only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
| |
| given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
| |
| or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore
| |
| them.
| |
| * `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
| |
| categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
| |
| the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
| |
| strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
| |
| `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
| |
| * `include_langname`: If specified, prefix the result with the language name, followed by a colon.
| |
| * `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"IPA:"}.
| |
| * `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
| |
| * `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"IPA:"}.
| |
| * `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
| |
| ]==]
| |
| function export.format_IPA_full(data)
| |
| if type(data) ~= "table" or data.getCode then
| |
| error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")
| |
| end
| |
| local lang = data.lang
| |
| local items = data.items
| |
| local err = data.err
| |
| local separator = data.separator
| |
| local sort_key = data.sort_key
| |
| local no_count = data.no_count
| |
| local split_output = data.split_output
| |
| local q = data.q
| |
| local qq = data.qq
| |
| local a = data.a
| |
| local aa = data.aa
| |
| local include_langname = data.include_langname
| |
|
| |
|
| local hasKey = m_data.langs_with_infopages | | local lang = args[lang_arg] |
|
| |
|
| if not lang or not lang.getCode then | | local data = { |
| error("Must specify language to format_IPA_full()") | | lang = lang, |
| end
| | items = items, |
| local langname = lang:getCanonicalName()
| | no_count = args.nocount, |
| | | nocat = args.nocat, |
| local prefix_text
| | sort_key = args.sort, |
| if err then
| | include_langname = include_langname, |
| prefix_text = '<span class="error">' .. err .. '</span>' | | q = args.q.default, |
| else
| | qq = args.qq.default, |
| prefix_text = langname .. " pronunciation" | | a = args.a.default, |
| prefix_text = "[[" .. prefix_text .. "|key]]" | | aa = args.aa.default, |
| end
| | } |
| | |
| local prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix_text .. ")</sup>: "
| |
| | |
| local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw") | |
|
| |
|
| if is_content_page then | | return m_IPA.format_IPA_full(data) |
| insert(categories, {
| |
| cat = langname .. " terms with IPA pronunciation",
| |
| sort_key = sort_key
| |
| })
| |
| end
| |
| | |
| local prontext = prefix .. IPAs
| |
| if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then
| |
| prontext = require(pron_qualifier_module).format_qualifiers {
| |
| lang = lang,
| |
| text = prontext,
| |
| q = q,
| |
| qq = qq,
| |
| a = a,
| |
| aa = aa,
| |
| }
| |
| end
| |
| if include_langname then
| |
| prontext = langname .. ": " .. prontext
| |
| end
| |
| return process_maybe_split_categories(split_output, categories, prontext, lang)
| |
| end | | end |
|
| |
|
| local function split_phonemic_phonetic(pron)
| | -- Used for [[Template:IPAchar]]. |
| local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$") | | function export.IPAchar(frame) |
| if reconstructed then
| | local parent_args = frame.getParent and frame:getParent().args or frame |
| return reconstructed .. phonemic, reconstructed .. phonetic
| |
| else
| |
| return pron, nil
| |
| end
| |
| end
| |
|
| |
|
| local function determine_repr(pron) | | local params = { |
| local repr_mark = {}
| | [1] = {list = true, disallow_holes = true}, |
| local repr, reconstructed | | -- FIXME, remove this. |
| | ["lang"] = {}, -- This parameter is not used and does nothing, but is allowed for futureproofing. |
| | } |
|
| |
|
| -- remove initial asterisk before representation marks, used on some Reconstruction pages | | local m_param_utils = require(parameter_utilities_module) |
| if sub(pron, 1, 1) == "*" then
| |
| reconstructed = true
| |
| pron = sub(pron, 2)
| |
| end
| |
|
| |
|
| local representation_types = { | | local param_mods = m_param_utils.construct_param_mods { |
| ['/'] = { right = '/', type = 'phonemic', }, | | -- It doesn't really make sense to have separate overall a=/aa=/q=/qq= for {{IPAchar}}, which doesn't format a |
| ['['] = { right = ']', type = 'phonetic', }, | | -- whole line but just individual pronunciations. Instead they are associated with the first item. |
| ['⟨'] = { right = '⟩', type = 'orthographic', },
| | {group = {"ref", "a", "q"}, separate_no_index = false}, |
| ['-'] = { type = 'rhyme' }, | | -- Deprecated; don't use in new code. |
| | {param = "qual", alias_of = "q"}, |
| } | | } |
|
| |
|
| repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = ufind(pron, '^(.).-(.)$') | | local items, args = m_param_utils.process_list_arguments { |
| | params = params, |
| | param_mods = param_mods, |
| | raw_args = parent_args, |
| | termarg = 1, |
| | term_dest = "pron", |
| | track_module = "IPAchar", |
| | } |
|
| |
|
| local representation_type = representation_types[repr_mark.left] | | -- Format |
| | return m_IPA.format_IPA_multiple(nil, items) |
| | end |
|
| |
|
| if representation_type then
| | function export.XSAMPA(frame) |
| if representation_type.right then
| | local params = { |
| if repr_mark.right == representation_type.right then
| | [1] = { required = true }, |
| repr = representation_type.type
| | } |
| end
| | local args = require("Module:parameters").process(frame:getParent().args, params) |
| else
| | |
| repr = representation_type.type
| | return m_IPA.XSAMPA_to_IPA(args[1] or "[Eg'zA:mp5=]") |
| end
| |
| else | |
| repr = nil
| |
| end | |
| | |
| return repr, reconstructed | |
| end | | end |
|
| |
|
| local function hasInvalidSeparators(transcription)
| | -- Used by [[Template:X2IPA]] |
| if match(transcription, "%.\203[\136\140]") then -- [ˈˌ] | | function export.X2IPAtemplate(frame) |
| return true
| | local parent_args = frame.getParent and frame:getParent().args or frame |
| else | | local compat = parent_args["lang"] |
| return false
| | local offset = compat and 0 or 1 |
| end
| |
| end
| |
|
| |
|
| --[==[
| | local params = { |
| Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a
| | [compat and "lang" or 1] = {required = true, default = "und"}, |
| category {{cd|<var>lang</var> terms with IPA pronunciation}}). Individual pronunciations are formatted using
| | [1 + offset] = {list = true, allow_holes = true}, |
| {format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations. | | ["ref"] = {list = true, allow_holes = true}, |
| Parameters accepted are:
| | ["a"] = {list = true, allow_holes = true, separate_no_index = true}, |
| * `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for
| | ["aa"] = {list = true, allow_holes = true, separate_no_index = true}, |
| pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
| | ["q"] = {list = true, allow_holes = true, separate_no_index = true}, |
| add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the
| | ["qq"] = {list = true, allow_holes = true, separate_no_index = true}, |
| proper sort keys for categories. `lang` may be {nil}.
| | ["qual"] = {list = true, allow_holes = true}, |
| * `items` is a list of pronunciations, each of which is an object with the following properties:
| | ["nocount"] = {type = "boolean"}, |
| ** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic
| | ["sort"] = {}, |
| (surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme
| | } |
| (beginning with a hyphen);
| | |
| ** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent
| | local args = require("Module:parameters").process(parent_args, params) |
| qualifiers;
| | |
| ** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent
| | local m_XSAMPA = require("Module:IPA/X-SAMPA") |
| qualifiers;
| | |
| ** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted
| | local pronunciations, refs, a, aa, q, qq, qual, lang = |
| pronunciation; note that `qualifiers` is deprecated;
| | args[1 + offset], args.ref, args.a, args.aa, args.q, args.qq, args.qual, args[compat and "lang" or 1] |
| ** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
| | |
| ** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
| | local output = {} |
| ** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;
| | table.insert(output, "{{IPA") |
| ** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and
| | |
| qualifiers; the value of a list item is either a string containing the reference text (typically a call to a
| | table.insert(output, "|" .. lang) |
| citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`
| |
| (the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}
| |
| or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in
| |
| {{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or
| |
| {{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference
| |
| appropriately and insert a footnote number that hyperlinks to the actual reference, located in the
| |
| {{cd|<nowiki><references /></nowiki>}} section;
| |
| ** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;
| |
| ** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;
| |
| ** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent
| |
| qualifiers and pre-text; defaults to the outer `separator` parameter.
| |
| * `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
| |
| first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
| |
| `items`.
| |
| * `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
| |
| only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
| |
| given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
| |
| categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.
| |
| * `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
| |
| categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
| |
| the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
| |
| strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
| |
| `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
| |
| ]==]
| |
| function export.format_IPA_multiple(lang, items, separator, no_count, split_output)
| |
| local categories = {} | |
| separator = separator or ", " | |
|
| |
|
| -- Format
| | if a.default then |
| if not items[1] then | | table.insert(output, "|a=" .. a.default) |
| if namespace == 10 then -- Template | | end |
| insert(items, {pron = "/aɪ piː ˈeɪ/"})
| | if q.default then |
| else | | table.insert(output, "|q=" .. q.default) |
| insert(categories, "Pronunciation templates without a pronunciation")
| |
| end
| |
| end | | end |
| | | for i = 1, math.max(pronunciations.maxindex, refs.maxindex, a.maxindex, aa.maxindex, q.maxindex, qq.maxindex, |
| local bits = {}
| | qual.maxindex) do |
| | | if pronunciations[i] then |
| for i, item in ipairs(items) do | | table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i])) |
| local bit
| |
| | |
| -- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or
| |
| -- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation
| |
| -- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example). | |
| if item.pron == "" then | |
| bit = "" | |
| else
| |
| local item_categories, errtext
| |
| bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")
| |
| bit = bit .. errtext
| |
| for _, cat in ipairs(item_categories) do
| |
| insert(categories, cat)
| |
| end
| |
| end | | end |
| | | if a[i] then |
| if item.pretext then | | table.insert(output, "|a" .. i .. "=" .. a[i]) |
| bit = item.pretext .. bit | |
| end | | end |
| | | if aa[i] then |
| if item.posttext then | | table.insert(output, "|aa" .. i .. "=" .. aa[i]) |
| bit = bit .. item.posttext | |
| end | | end |
| | | if q[i] then |
| local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1] | | table.insert(output, "|q" .. i .. "=" .. q[i]) |
| or item.a and item.a[1] or item.aa and item.aa[1]
| |
| local has_gloss_or_pos = item.gloss or item.pos
| |
| if has_qualifiers or has_gloss_or_pos then
| |
| -- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers. | |
| -- Should we do something different?
| |
| local q = item.q
| |
| if has_gloss_or_pos then
| |
| q = mw.clone(item.q) or {}
| |
| if item.gloss then
| |
| local m_qualifier = require(qualifier_module)
| |
| insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..
| |
| m_qualifier.wrap_qualifier_css("”", "quote"))
| |
| end
| |
| if item.pos then
| |
| -- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.
| |
| insert(q, item.pos)
| |
| end
| |
| end
| |
| | |
| bit = require("Module:pron qualifier").format_qualifiers {
| |
| lang = lang,
| |
| text = bit,
| |
| q = q,
| |
| qq = item.qq,
| |
| qualifiers = item.qualifiers,
| |
| a = item.a,
| |
| aa = item.aa,
| |
| }
| |
| end | | end |
| | | if qq[i] then |
| if item.note then | | table.insert(output, "|qq" .. i .. "=" .. qq[i]) |
| -- Support removed on 2024-06-15. | |
| error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")
| |
| end | | end |
| if item.refs then | | if refs[i] then |
| local refspecs = item.refs | | table.insert(output, "|ref" .. i .. "=" .. refs[i]) |
| if #refspecs > 0 then
| |
| bit = bit .. require(references_module).format_references(refspecs)
| |
| end
| |
| end | | end |
| | | if qual[i] then |
| bit = (item.separator or (i == 1 and "" or separator)) .. bit
| | table.insert(output, "|qual" .. i .. "=" .. qual[i]) |
| | |
| insert(bits, bit)
| |
| | |
| if lang then | |
| -- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
| |
| -- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.
| |
| if not no_count and namespace == 0 then | |
| m_syllables = m_syllables or require(syllables_module)
| |
| local langcode = lang:getCode()
| |
| if m_data.langs_to_generate_syllable_count_categories[langcode] then
| |
| local phonemic, phonetic = split_phonemic_phonetic(item.pron)
| |
| local use_it
| |
| if not phonetic then -- not a '/.../ [...]' combined pronunciation
| |
| local repr = determine_repr(phonemic)
| |
| if m_data.langs_to_use_phonetic_notation[langcode] then
| |
| use_it = repr == "phonetic" and phonemic or nil
| |
| else
| |
| use_it = repr == "phonemic" and phonemic or nil
| |
| end
| |
| elseif repr == "phonetic" then
| |
| use_it = phonetic
| |
| elseif repr == "phonemic" then
| |
| use_it = phonemic
| |
| end
| |
| -- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].
| |
| if use_it and not (find(use_it, " ") or find(use_it, "‿")) then
| |
| local syllable_count = m_syllables.getVowels(use_it, lang)
| |
| if syllable_count then
| |
| insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..
| |
| "-syllable words")
| |
| end
| |
| end
| |
| end
| |
| end
| |
| | |
| -- The nature of hasInvalidSeparators() is such that we don't have to split a combined '/.../ [...]' spec
| |
| -- into its parts in order to process.
| |
| if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then
| |
| insert(categories, "IPA for English using .ˈ or .ˌ")
| |
| end
| |
| end | | end |
| end | | end |
| | | if aa.default then |
| return process_maybe_split_categories(split_output, categories, concat(bits), lang) | | table.insert(output, "|aa=" .. aa.default) |
| end
| | end |
| | | if qq.default then |
| --[=[
| | table.insert(output, "|qq=" .. qq.default) |
| Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from
| |
| {format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that
| |
| pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and
| |
| categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but
| |
| may have HTML added surrounding invalid characters so they appear in red.
| |
| ]=]
| |
| local function format_one_IPA(lang, pron, err, categories)
| |
| -- Remove wikilinks, so that wikilink brackets are not misinterpreted as indicating phonetic transcription
| |
| local without_links = gsub(pron, "%[%[[^|%]]+|([^%]]+)%]%]", "%1")
| |
| without_links = gsub(without_links, "%[%[[^%]]+%]%]", "%1")
| |
| | |
| -- Detect whether this is a phonemic or phonetic transcription | |
| local repr, reconstructed = determine_repr(without_links)
| |
| | |
| if reconstructed then | |
| pron = sub(pron, 2) | |
| without_links = sub(without_links, 2)
| |
| end | | end |
| | | if args.nocount then |
| -- If valid, strip the representation marks
| | table.insert(output, "|nocount=1") |
| if repr == "phonemic" then | |
| pron = usub(pron, 2, -2) | |
| without_links = usub(without_links, 2, -2)
| |
| elseif repr == "phonetic" then
| |
| pron = usub(pron, 2, -2)
| |
| without_links = usub(without_links, 2, -2)
| |
| elseif repr == "orthographic" then
| |
| pron = usub(pron, 2, -2)
| |
| without_links = usub(without_links, 2, -2)
| |
| elseif repr == "rhyme" then
| |
| pron = usub(pron, 2)
| |
| without_links = usub(without_links, 2)
| |
| else
| |
| insert(categories, "IPA pronunciations with invalid representation marks")
| |
| -- insert(err, "invalid representation marks")
| |
| -- Removed because it's annoying when previewing pronunciation pages.
| |
| end | | end |
| | | if args.sort then |
| if pron == "" then | | table.insert(output, "|sort=" .. args.sort) |
| insert(categories, "IPA pronunciations with no pronunciation present") | |
| end | | end |
| | |
| | table.insert(output, "}}") |
|
| |
|
| -- Check for obsolete and nonstandard symbols | | return table.concat(output) |
| for i, symbol in ipairs(m_data.nonstandard) do
| | end |
| local result
| |
| for nonstandard in gmatch(pron, symbol) do
| |
| if not result then
| |
| result = {}
| |
| end
| |
| insert(result, nonstandard)
| |
| insert(categories,
| |
| {cat = "IPA pronunciations with obsolete or nonstandard characters", sort_key = nonstandard}
| |
| )
| |
| end
| |
|
| |
|
| if result then | | -- Used by [[Template:X2IPAchar]] |
| insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")
| | function export.X2IPAchar(frame) |
| break
| | local params = { |
| end
| | [1] = { list = true, allow_holes = true }, |
| end | | ["ref"] = {list = true, allow_holes = true}, |
| | ["q"] = {list = true, allow_holes = true, require_index = true}, |
| | ["qq"] = {list = true, allow_holes = true, require_index = true}, |
| | ["qual"] = { list = true, allow_holes = true }, |
| | -- FIXME, remove this. |
| | ["lang"] = {}, |
| | } |
| | |
| | local args = require("Module:parameters").process(frame:getParent().args, params) |
|
| |
|
| --[[ Check for invalid symbols after removing the following: | | local m_XSAMPA = require("Module:IPA/X-SAMPA") |
| 1. wikilinks (handled above)
| | |
| 2. paired HTML tags
| | local pronunciations, refs, q, qq, qual, lang = args[1], args.ref, args.q, args.qq, args.qual, args.lang |
| 3. bolding
| | |
| 4. italics
| | local output = {} |
| 5. HTML entity for space
| | table.insert(output, "{{IPAchar") |
| 6. asterisk at beginning of transcription
| | |
| 7. comma followed by spacing characters
| | for i = 1, math.max(pronunciations.maxindex, refs.maxindex, q.maxindex, qq.maxindex, qual.maxindex) do |
| 8. superscripts enclosed in superscript parentheses ]]
| | if pronunciations[i] then |
| local found_HTML | | table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i])) |
| local result = gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",
| |
| function(tagName, content)
| |
| found_HTML = true
| |
| return content
| |
| end)
| |
| result = gsub(result, "'''([^']*)'''", "%1")
| |
| result = gsub(result, "''([^']*)''", "%1")
| |
| result = gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.
| |
| result = gsub(result, "^%*", "") | |
| result = ugsub(result, ",%s+", "") | |
| | |
| -- VS15 | |
| local vs15_class = "[" .. m_symbols.add_vs15 .. "]" | |
| if umatch(pron, vs15_class) then
| |
| local vs15 = u(0xFE0E)
| |
| if find(result, vs15) then | |
| result = gsub(result, vs15, "") | |
| pron = gsub(pron, vs15, "")
| |
| end | | end |
| pron = ugsub(pron, "(" .. vs15_class .. ")", "%1" .. vs15) | | if q[i] then |
| end
| | table.insert(output, "|q" .. i .. "=" .. q[i]) |
| | |
| if result ~= "" then
| |
| local suggestions = {}
| |
| for k, v in pairs(m_symbols.invalid) do
| |
| if find(result, k, 1, true) then
| |
| insert(suggestions, k .. " with " .. v)
| |
| end
| |
| end | | end |
| if suggestions[1] then | | if qq[i] then |
| suggestions = listToText(suggestions) | | table.insert(output, "|qq" .. i .. "=" .. qq[i]) |
| if is_content_page then
| |
| error("Invalid IPA: replace " .. suggestions)
| |
| else
| |
| insert(err, "replace " .. suggestions)
| |
| end
| |
| end | | end |
| result = ugsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "") | | if qual[i] then |
| local per_lang_valid
| | table.insert(output, "|qual" .. i .. "=" .. qual[i]) |
| if lang then
| |
| per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]
| |
| end | | end |
| per_lang_valid = per_lang_valid or "" | | if refs[i] then |
| result = ugsub(result, "[" .. m_symbols.valid .. per_lang_valid .. "]", "")
| | table.insert(output, "|ref" .. i .. "=" .. refs[i]) |
| if result ~= "" then
| |
| local category = "IPA pronunciations with invalid IPA characters" | |
| if not is_content_page then
| |
| category = category .. "/non_mainspace"
| |
| end
| |
| insert(categories, category)
| |
| insert(err, "invalid IPA characters (" .. result .. ")")
| |
| end | | end |
| end | | end |
|
| |
|
| if found_HTML then | | if lang then |
| insert(categories, "IPA pronunciations with paired HTML tags") | | table.insert(output, "|lang=" .. lang) |
| end | | end |
| | |
| | table.insert(output, "}}") |
| | |
| | return table.concat(output) |
| | end |
|
| |
|
| if repr == "phonemic" or repr == "rhyme" then
| | -- Used by [[Template:x2rhymes]] |
| if lang and m_data.phonemes[lang:getCode()] then
| | function export.X2rhymes(frame) |
| local valid_phonemes = m_data.phonemes[lang:getCode()]
| | local parent_args = frame.getParent and frame:getParent().args or frame |
| local rest = pron
| | local compat = parent_args["lang"] |
| local phonemes = {}
| | local offset = compat and 0 or 1 |
|
| |
|
| while #rest > 0 do
| | local params = { |
| local longestmatch, longestmatch_len = "", 0
| | [compat and "lang" or 1] = {required = true, default = "und"}, |
| | [1 + offset] = {required = true, list = true, allow_holes = true}, |
| | } |
| | |
| | local args = require("Module:parameters").process(parent_args, params) |
| | |
| | local m_XSAMPA = require("Module:IPA/X-SAMPA") |
| | |
| | pronunciations, lang = args[1 + offset], args[compat and "lang" or 1] |
| | |
| | local output = {} |
| | table.insert(output, "{{rhymes") |
| | |
| | table.insert(output, "|" .. lang) |
|
| |
|
| local rest_init = sub(rest, 1, 1)
| | for i = 1, pronunciations.maxindex do |
| if rest_init == "(" or rest_init == ")" then
| | if pronunciations[i] then |
| longestmatch = rest_init
| | table.insert(output, "|" .. m_XSAMPA.XSAMPA_to_IPA(pronunciations[i])) |
| longestmatch_len = 1
| |
| else
| |
| for _, phoneme in ipairs(valid_phonemes) do
| |
| local phoneme_len = len(phoneme)
| |
| if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then
| |
| longestmatch = phoneme
| |
| longestmatch_len = len(longestmatch)
| |
| end
| |
| end
| |
| end
| |
| | |
| if longestmatch_len > 0 then
| |
| insert(phonemes, longestmatch)
| |
| rest = usub(rest, longestmatch_len + 1)
| |
| else
| |
| local phoneme = usub(rest, 1, 1)
| |
| insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
| |
| rest = usub(rest, 2)
| |
| insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode())
| |
| end
| |
| end
| |
| | |
| pron = concat(phonemes)
| |
| end
| |
| | |
| if repr == "phonemic" then
| |
| pron = "/" .. pron .. "/"
| |
| else
| |
| pron = "-" .. pron
| |
| end | | end |
| elseif repr == "phonetic" then
| |
| pron = "[" .. pron .. "]"
| |
| elseif repr == "orthographic" then
| |
| pron = "⟨" .. pron .. "⟩"
| |
| end | | end |
| | | |
| if reconstructed then | | table.insert(output, "}}") |
| pron = "*" .. pron
| | |
| end | | return table.concat(output) |
| | |
| return pron | |
| end | | end |
|
| |
|
| --[==[ | | -- Used for [[Template:enPR]]. |
| Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and
| | function export.enPR(frame) |
| error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic
| | local parent_args = frame:getParent().args |
| (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined
| |
| phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.
| |
| If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page
| |
| will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}
| |
| also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is
| |
| a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values
| |
| are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`
| |
| is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of
| |
| category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].
| |
| If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated
| |
| string.
| |
| ]==]
| |
| function export.format_IPA(lang, pron, split_output) | |
| local err = {} | |
| local categories = {}
| |
|
| |
|
| -- `pron` shouldn't contain ref tags. | | local params = { |
| if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then
| | [1] = {list = true, disallow_holes = true}, |
| error("<ref> tags found inside pronunciation parameter.")
| | } |
| end | |
|
| |
|
| local phonemic, phonetic = split_phonemic_phonetic(pron) | | local m_param_utils = require(parameter_utilities_module) |
| pron = format_one_IPA(lang, phonemic, err, categories)
| |
| if phonetic then
| |
| phonetic = format_one_IPA(lang, phonetic, err, categories)
| |
| pron = pron .. " " .. phonetic
| |
| end
| |
|
| |
|
| if err[1] then | | local param_mods = m_param_utils.construct_param_mods { |
| err = '<span class="previewonly error" style="font-size: small;> ' .. concat(err, ", ") .. "</span>" | | {group = {"q", "a", "ref"}}, |
| else
| | } |
| err = ""
| |
| end | |
|
| |
|
| return process_maybe_split_categories(split_output, categories, '<span class="IPA">' .. pron .. "</span>", lang, | | local items, args = m_param_utils.process_list_arguments { |
| err) | | params = params, |
| end
| | param_mods = param_mods, |
| | raw_args = parent_args, |
| | termarg = 1, |
| | term_dest = "pron", |
| | track_module = "enPR", |
| | } |
|
| |
|
| --[==[
| | local data = { |
| Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to
| | items = items, |
| [[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The
| | q = args.q.default, |
| pronunciations are formatted by wrapping them in the {{cd|AHD}} and {{cd|enPR}} CSS classes and adding any left and
| | qq = args.qq.default, |
| right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular
| | a = args.a.default, |
| and accent qualifiers. There is a single parameter `data`, an object with the following fields:
| | aa = args.aa.default, |
| * `items` is a list of enPR pronunciations, each of which is an object with the following properties:
| | } |
| ** `pron`: the enPR pronunciation;
| |
| ** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;
| |
| ** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
| |
| ** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
| |
| ** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.
| |
| * `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"enPR:"}.
| |
| * `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
| |
| * `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"enPR:"}.
| |
| * `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
| |
| ]==]
| |
| function export.format_enPR_full(data)
| |
| local prefix = "[[Appendix:English pronunciation|enPR]]: " | |
| local lang = require("Module:languages").getByCode("en")
| |
| local parts = {}
| |
| | |
| for _, item in ipairs(data.items) do
| |
| local part = '<span class="AHD enPR">' .. item.pron .. "</span>"
| |
| | |
| if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then | |
| part = require("Module:pron qualifier").format_qualifiers {
| |
| lang = lang,
| |
| text = part,
| |
| q = item.q,
| |
| qq = item.qq,
| |
| a = item.a,
| |
| aa = item.aa,
| |
| }
| |
| end
| |
| insert(parts, part) | |
| end
| |
| | |
| local prontext = prefix .. concat(parts, ", ")
| |
| if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then
| |
| prontext = require(pron_qualifier_module).format_qualifiers {
| |
| lang = lang,
| |
| text = prontext,
| |
| q = data.q,
| |
| qq = data.qq,
| |
| a = data.a,
| |
| aa = data.aa,
| |
| }
| |
| end
| |
|
| |
|
| return prontext | | return m_IPA.format_enPR_full(data) |
| end | | end |
|
| |
|
| return export | | return export |