Module:en-utilities: Difference between revisions

Jump to navigation Jump to search
no edit summary
(Created page with "local export = {} local add_suffix -- Defined below. local find = string.find local match = string.match local reverse = string.reverse local sub = string.sub local toNFD = mw.ustring.toNFD local ugsub = mw.ustring.gsub local ulower = mw.ustring.lower local umatch = mw.ustring.match local usub = mw.ustring.sub local vowels = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ" local hyphens = "%-‐‑‒–—" --[==[ Loaders for...")
 
No edit summary
 
Line 3: Line 3:
local add_suffix -- Defined below.
local add_suffix -- Defined below.
local find = string.find
local find = string.find
local is_regular_plural -- Defined below.
local match = string.match
local match = string.match
local remove_possessive -- Defined below.
local reverse = string.reverse
local reverse = string.reverse
local sub = string.sub
local sub = string.sub
Line 11: Line 13:
local umatch = mw.ustring.match
local umatch = mw.ustring.match
local usub = mw.ustring.sub
local usub = mw.ustring.sub
local uupper = mw.ustring.upper


local vowels = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ"
local vowels = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ"
Line 62: Line 65:
final == "j" and umatch(stem, "[^" .. vowels .. "]j$") or
final == "j" and umatch(stem, "[^" .. vowels .. "]j$") or
final == "s" or
final == "s" or
final == "u" and umatch(stem, "%f[%w']u$") or
final == "x" or
final == "x" or
final == "z" or
final == "z" or
Line 67: Line 71:
)
)
end
end
function export.remove_possessive(stem)
return match(stem, "^(.*)'s$") or match(stem, "^(.*s)'$") or stem
end
remove_possessive = export.remove_possessive


local suffixes = {}
local suffixes = {}
suffixes["'s"] = {
truncated = function(stem)
return sub(stem, -1) == "s" and "'" or "'s"
end,
}


suffixes["s.plural"] = {
suffixes["s.plural"] = {
final_y_is_i = true,
final_y_is_i = true,
epenthetic_e = epenthetic_e_for_s
epenthetic_e = epenthetic_e_for_s,
modifies_possessive = true,
}
}


Line 210: Line 226:


function export.add_suffix(term, suffix, pos)
function export.add_suffix(term, suffix, pos)
local data = suffixes[suffix]
local data, possessive = suffixes[suffix]
-- If modifies_possessive is set, check for and remove any possessive
-- suffix, which will be re-added again at the end.
if data.modifies_possessive then
local new = remove_possessive(term)
if new ~= term then
term, possessive = new, true
end
end
suffix = match(suffix, "^([^.]*)")
suffix = match(suffix, "^([^.]*)")
local final, stem = sub(term, -1)
local final, stem = sub(term, -1)
Line 233: Line 257:
stem = double_final_consonant(term, final)
stem = double_final_consonant(term, final)
end
end
return stem .. suffix
local truncated = data.truncated
if truncated then
suffix = truncated(stem)
end
local output = stem .. suffix
-- Re-add the possessive suffix, if applicable.
if possessive then
output = add_suffix(output, "'s", pos)
end
return output
end
end
add_suffix = export.add_suffix
add_suffix = export.add_suffix
Line 280: Line 313:
end
end


-- Returns true if `plural` is an expected, regular plural of `term`.
--[==[
Returns true if `plural` is an expected, regular plural of `term`.
The optional parameter `pos` can be used to specify the part of speech,
which is necessary because proper nouns do not change a {"-y"} suffix to {"-ies"}
(e.g. {"Abby"} → {"Abbys"}). By default, `pos` is set to {"noun"}. In addition to
{"proper noun"}, it can also take the special value {"noun+"}, which means that
the function will first attempt the check with the {"noun"} setting, and will
then attempt it with the {"proper noun"} setting iff the term begins with a
capital letter.
]==]
function export.is_regular_plural(plural, term, pos)
function export.is_regular_plural(plural, term, pos)
local init_plural, init_term, try_as_proper_noun = plural, term
if pos == "noun+" then
pos, try_as_proper_noun = "noun", true
end
-- Ignore any final punctuation that occurs in both forms, which is common
-- Ignore any final punctuation that occurs in both forms, which is common
-- in abbreviations (e.g. "abbr." → "abbrs.").
-- in abbreviations (e.g. "abbr." → "abbrs.").
Line 290: Line 336:
plural = sub(plural, 1, -final_punc_len - 1)
plural = sub(plural, 1, -final_punc_len - 1)
end
end
if plural == term .. "s" or plural == add_suffix(term, "s.plural", pos) then
if plural == add_suffix(term, "s.plural", pos) then
return true
return true
end
end
local final = sub(term, -1)
local final = sub(term, -1)
return (
if (
-- Doubled final consonants in "s" and "z".
-- Doubled final consonants in "s" and "z".
final == "s" and plural == term .. "ses" or -- e.g. "busses"
final == "s" and plural == term .. "ses" or -- e.g. "busses"
Line 303: Line 349:
-- Capitalized terms like "$DEITY" → "$DEITIES (should we treat this as regular?)
-- Capitalized terms like "$DEITY" → "$DEITIES (should we treat this as regular?)
final == "Y" and ulower(plural) == convert_final_y_to_i(ulower(term)) .. "es"
final == "Y" and ulower(plural) == convert_final_y_to_i(ulower(term)) .. "es"
)
) then
return true
elseif try_as_proper_noun then
local init = umatch(init_term, "^[^%w%s]*(%w)")
return init and uupper(init) == init and ulower(init) ~= init and
is_regular_plural(init_plural, init_term, "proper noun") or
false
end
return false
end
is_regular_plural = export.is_regular_plural
 
do
local function do_singularize(str)
local sing = match(str, "^(.-)ies$")
if sing then
return sing .. "y"
end
-- Handle cases like "[[parish]]es"
return match(str, "^(.-[cs]h%]*)es$") or -- not -zhes
-- Handle cases like "[[box]]es"
match(str, "^(.-x%]*)es$") or -- not -ses or -zes
-- Handle regular plurals
match(str, "^(.-)s$") or
-- Otherwise, return input
str
end
local function collapse_link(link, linktext)
if link == linktext then
return "[[" .. link .. "]]"
end
return "[[" .. link .. "|" .. linktext .. "]]"
end
--[==[
Singularize a word in a smart fashion, according to normal English rules. Works analogously to {pluralize()}.
 
'''NOTE''': This doesn't always work as well as {pluralize()}. Beware. It will mishandle cases like "passes" -> "passe", "eyries" -> "eyry".
# If word ends in -ies, replace -ies with -y.
# If the word ends in -xes, -shes, -ches, remove -es. [Does not affect -ses, cf. "houses", "impasses".]
# Otherwise, remove -s.
 
This handles links correctly:
# If a piped link, change the second part appropriately. Collapse the link to a simple link if both parts end up the same.
# If a non-piped link, singularize the link.
# A link like "[[parish]]es" will be handled correctly because the code that checks for -shes etc. allows ] characters between the
  'sh' etc. and final -es.
]==]
function export.singularize(str)
if type(str) == "table" then
-- allow calling from a template
str = str.args[1]
end
-- Check for a link. This pattern matches both piped and unpiped links.
-- If the link is not piped, the second capture (linktext) will be empty.
local beginning, link, linktext = match(str, "^(.*)%[%[([^|%]]+)%|?(.-)%]%]$")
if not link then
return do_singularize(str)
elseif linktext ~= "" then
return beginning .. collapse_link(link, do_singularize(linktext))
end
return beginning .. "[[" .. do_singularize(link) .. "]]"
end
end
 
--[==[
Return the appropriate indefinite article to prefix to `str`. Correctly handles links and capitalized text.
Does not correctly handle words like [[union]], [[uniform]] and [[university]] that take "a" despite beginning with
a 'u'. The returned article will have its first letter capitalized if `ucfirst` is specified, otherwise lowercase.
]==]
function export.get_indefinite_article(str, ucfirst)
str = str or ""
-- If there's a link at the beginning, examine the first letter of the
-- link text. This pattern matches both piped and unpiped links.
-- If the link is not piped, the second capture (linktext) will be empty.
local link, linktext = match(str, "^%[%[([^|%]]+)%|?(.-)%]%]")
if match(link and (linktext ~= "" and linktext or link) or str, "^()[AEIOUaeiou]") then
return ucfirst and "An" or "an"
end
return ucfirst and "A" or "a"
end
get_indefinite_article = export.get_indefinite_article
 
--[==[
Prefix `text` with the appropriate indefinite article to prefix to `text`. Correctly handles links and capitalized
text. Does not correctly handle words like [[union]], [[uniform]] and [[university]] that take "a" despite beginning
with a 'u'. The returned article will have its first letter capitalized if `ucfirst` is specified, otherwise lowercase.
]==]
function export.add_indefinite_article(text, ucfirst)
return get_indefinite_article(text, ucfirst) .. " " .. text
end
end
export.vowels = vowels
export.vowel = "[" .. vowels .. "]"


return export
return export

Navigation menu