48,355
edits
(Created page with "local export = {} local add_suffix -- Defined below. local find = string.find local match = string.match local reverse = string.reverse local sub = string.sub local toNFD = mw.ustring.toNFD local ugsub = mw.ustring.gsub local ulower = mw.ustring.lower local umatch = mw.ustring.match local usub = mw.ustring.sub local vowels = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ" local hyphens = "%-‐‑‒–—" --[==[ Loaders for...") |
No edit summary |
||
| Line 3: | Line 3: | ||
local add_suffix -- Defined below. | local add_suffix -- Defined below. | ||
local find = string.find | local find = string.find | ||
local is_regular_plural -- Defined below. | |||
local match = string.match | local match = string.match | ||
local remove_possessive -- Defined below. | |||
local reverse = string.reverse | local reverse = string.reverse | ||
local sub = string.sub | local sub = string.sub | ||
| Line 11: | Line 13: | ||
local umatch = mw.ustring.match | local umatch = mw.ustring.match | ||
local usub = mw.ustring.sub | local usub = mw.ustring.sub | ||
local uupper = mw.ustring.upper | |||
local vowels = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ" | local vowels = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ" | ||
| Line 62: | Line 65: | ||
final == "j" and umatch(stem, "[^" .. vowels .. "]j$") or | final == "j" and umatch(stem, "[^" .. vowels .. "]j$") or | ||
final == "s" or | final == "s" or | ||
final == "u" and umatch(stem, "%f[%w']u$") or | |||
final == "x" or | final == "x" or | ||
final == "z" or | final == "z" or | ||
| Line 67: | Line 71: | ||
) | ) | ||
end | end | ||
function export.remove_possessive(stem) | |||
return match(stem, "^(.*)'s$") or match(stem, "^(.*s)'$") or stem | |||
end | |||
remove_possessive = export.remove_possessive | |||
local suffixes = {} | local suffixes = {} | ||
suffixes["'s"] = { | |||
truncated = function(stem) | |||
return sub(stem, -1) == "s" and "'" or "'s" | |||
end, | |||
} | |||
suffixes["s.plural"] = { | suffixes["s.plural"] = { | ||
final_y_is_i = true, | final_y_is_i = true, | ||
epenthetic_e = epenthetic_e_for_s | epenthetic_e = epenthetic_e_for_s, | ||
modifies_possessive = true, | |||
} | } | ||
| Line 210: | Line 226: | ||
function export.add_suffix(term, suffix, pos) | function export.add_suffix(term, suffix, pos) | ||
local data = suffixes[suffix] | local data, possessive = suffixes[suffix] | ||
-- If modifies_possessive is set, check for and remove any possessive | |||
-- suffix, which will be re-added again at the end. | |||
if data.modifies_possessive then | |||
local new = remove_possessive(term) | |||
if new ~= term then | |||
term, possessive = new, true | |||
end | |||
end | |||
suffix = match(suffix, "^([^.]*)") | suffix = match(suffix, "^([^.]*)") | ||
local final, stem = sub(term, -1) | local final, stem = sub(term, -1) | ||
| Line 233: | Line 257: | ||
stem = double_final_consonant(term, final) | stem = double_final_consonant(term, final) | ||
end | end | ||
local truncated = data.truncated | |||
if truncated then | |||
suffix = truncated(stem) | |||
end | |||
local output = stem .. suffix | |||
-- Re-add the possessive suffix, if applicable. | |||
if possessive then | |||
output = add_suffix(output, "'s", pos) | |||
end | |||
return output | |||
end | end | ||
add_suffix = export.add_suffix | add_suffix = export.add_suffix | ||
| Line 280: | Line 313: | ||
end | end | ||
-- Returns true if `plural` is an expected, regular plural of `term`. | --[==[ | ||
Returns true if `plural` is an expected, regular plural of `term`. | |||
The optional parameter `pos` can be used to specify the part of speech, | |||
which is necessary because proper nouns do not change a {"-y"} suffix to {"-ies"} | |||
(e.g. {"Abby"} → {"Abbys"}). By default, `pos` is set to {"noun"}. In addition to | |||
{"proper noun"}, it can also take the special value {"noun+"}, which means that | |||
the function will first attempt the check with the {"noun"} setting, and will | |||
then attempt it with the {"proper noun"} setting iff the term begins with a | |||
capital letter. | |||
]==] | |||
function export.is_regular_plural(plural, term, pos) | function export.is_regular_plural(plural, term, pos) | ||
local init_plural, init_term, try_as_proper_noun = plural, term | |||
if pos == "noun+" then | |||
pos, try_as_proper_noun = "noun", true | |||
end | |||
-- Ignore any final punctuation that occurs in both forms, which is common | -- Ignore any final punctuation that occurs in both forms, which is common | ||
-- in abbreviations (e.g. "abbr." → "abbrs."). | -- in abbreviations (e.g. "abbr." → "abbrs."). | ||
| Line 290: | Line 336: | ||
plural = sub(plural, 1, -final_punc_len - 1) | plural = sub(plural, 1, -final_punc_len - 1) | ||
end | end | ||
if | if plural == add_suffix(term, "s.plural", pos) then | ||
return true | return true | ||
end | end | ||
local final = sub(term, -1) | local final = sub(term, -1) | ||
if ( | |||
-- Doubled final consonants in "s" and "z". | -- Doubled final consonants in "s" and "z". | ||
final == "s" and plural == term .. "ses" or -- e.g. "busses" | final == "s" and plural == term .. "ses" or -- e.g. "busses" | ||
| Line 303: | Line 349: | ||
-- Capitalized terms like "$DEITY" → "$DEITIES (should we treat this as regular?) | -- Capitalized terms like "$DEITY" → "$DEITIES (should we treat this as regular?) | ||
final == "Y" and ulower(plural) == convert_final_y_to_i(ulower(term)) .. "es" | final == "Y" and ulower(plural) == convert_final_y_to_i(ulower(term)) .. "es" | ||
) | ) then | ||
return true | |||
elseif try_as_proper_noun then | |||
local init = umatch(init_term, "^[^%w%s]*(%w)") | |||
return init and uupper(init) == init and ulower(init) ~= init and | |||
is_regular_plural(init_plural, init_term, "proper noun") or | |||
false | |||
end | |||
return false | |||
end | |||
is_regular_plural = export.is_regular_plural | |||
do | |||
local function do_singularize(str) | |||
local sing = match(str, "^(.-)ies$") | |||
if sing then | |||
return sing .. "y" | |||
end | |||
-- Handle cases like "[[parish]]es" | |||
return match(str, "^(.-[cs]h%]*)es$") or -- not -zhes | |||
-- Handle cases like "[[box]]es" | |||
match(str, "^(.-x%]*)es$") or -- not -ses or -zes | |||
-- Handle regular plurals | |||
match(str, "^(.-)s$") or | |||
-- Otherwise, return input | |||
str | |||
end | |||
local function collapse_link(link, linktext) | |||
if link == linktext then | |||
return "[[" .. link .. "]]" | |||
end | |||
return "[[" .. link .. "|" .. linktext .. "]]" | |||
end | |||
--[==[ | |||
Singularize a word in a smart fashion, according to normal English rules. Works analogously to {pluralize()}. | |||
'''NOTE''': This doesn't always work as well as {pluralize()}. Beware. It will mishandle cases like "passes" -> "passe", "eyries" -> "eyry". | |||
# If word ends in -ies, replace -ies with -y. | |||
# If the word ends in -xes, -shes, -ches, remove -es. [Does not affect -ses, cf. "houses", "impasses".] | |||
# Otherwise, remove -s. | |||
This handles links correctly: | |||
# If a piped link, change the second part appropriately. Collapse the link to a simple link if both parts end up the same. | |||
# If a non-piped link, singularize the link. | |||
# A link like "[[parish]]es" will be handled correctly because the code that checks for -shes etc. allows ] characters between the | |||
'sh' etc. and final -es. | |||
]==] | |||
function export.singularize(str) | |||
if type(str) == "table" then | |||
-- allow calling from a template | |||
str = str.args[1] | |||
end | |||
-- Check for a link. This pattern matches both piped and unpiped links. | |||
-- If the link is not piped, the second capture (linktext) will be empty. | |||
local beginning, link, linktext = match(str, "^(.*)%[%[([^|%]]+)%|?(.-)%]%]$") | |||
if not link then | |||
return do_singularize(str) | |||
elseif linktext ~= "" then | |||
return beginning .. collapse_link(link, do_singularize(linktext)) | |||
end | |||
return beginning .. "[[" .. do_singularize(link) .. "]]" | |||
end | |||
end | |||
--[==[ | |||
Return the appropriate indefinite article to prefix to `str`. Correctly handles links and capitalized text. | |||
Does not correctly handle words like [[union]], [[uniform]] and [[university]] that take "a" despite beginning with | |||
a 'u'. The returned article will have its first letter capitalized if `ucfirst` is specified, otherwise lowercase. | |||
]==] | |||
function export.get_indefinite_article(str, ucfirst) | |||
str = str or "" | |||
-- If there's a link at the beginning, examine the first letter of the | |||
-- link text. This pattern matches both piped and unpiped links. | |||
-- If the link is not piped, the second capture (linktext) will be empty. | |||
local link, linktext = match(str, "^%[%[([^|%]]+)%|?(.-)%]%]") | |||
if match(link and (linktext ~= "" and linktext or link) or str, "^()[AEIOUaeiou]") then | |||
return ucfirst and "An" or "an" | |||
end | |||
return ucfirst and "A" or "a" | |||
end | |||
get_indefinite_article = export.get_indefinite_article | |||
--[==[ | |||
Prefix `text` with the appropriate indefinite article to prefix to `text`. Correctly handles links and capitalized | |||
text. Does not correctly handle words like [[union]], [[uniform]] and [[university]] that take "a" despite beginning | |||
with a 'u'. The returned article will have its first letter capitalized if `ucfirst` is specified, otherwise lowercase. | |||
]==] | |||
function export.add_indefinite_article(text, ucfirst) | |||
return get_indefinite_article(text, ucfirst) .. " " .. text | |||
end | end | ||
export.vowels = vowels | |||
export.vowel = "[" .. vowels .. "]" | |||
return export | return export | ||