local yesNo = require("Module:Yesno")local Text = --[=[ Text utilities ]=]
-- local globalslocal PatternCJK = falselocal PatternCombined = falselocal PatternLatin = falselocal PatternTerminated = falselocal QuoteLang = falselocal QuoteType = falselocal RangesLatin = falselocal SeekQuote = false
local function initLatinData if not RangesLatin then RangesLatin = end if not PatternLatin then local range PatternLatin = "^[" for i = 1, #RangesLatin do range = RangesLatin[ i ] PatternLatin = PatternLatin .. mw.ustring.char(range[1 ], 45, range[2 ]) end -- for i PatternLatin = PatternLatin .. "]*$" endend
local function initQuoteData -- Create quote definitions if not QuoteLang then QuoteLang = end if not QuoteType then QuoteType = endend -- initQuoteData
local function fiatQuote(apply, alien, advance) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code -- advance -- number, with level 1 or 2 local r = apply and tostring(apply) or "" alien = alien or "en" advance = tonumber(advance) or 0 local suite initQuoteData local slang = alien:match("^(%l+)-") suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] if suite then local quotes = QuoteType[suite ] if quotes then local space if quotes[3 ] then space = " " else space = "" end quotes = quotes[advance ] if quotes then r = mw.ustring.format("%s%s%s%s%s", mw.ustring.char(quotes[1 ]), space, apply, space, mw.ustring.char(quotes[2 ])) end else mw.log("fiatQuote " .. suite) end end return rend -- fiatQuote
Text.char = function (apply, again, accept) -- Create string from codepoints -- Parameter: -- apply -- table (sequence) with numerical codepoints, or nil -- again -- number of repetitions, or nil -- accept -- true, if no error messages to be appended -- Returns: string local r = "" apply = type(apply)
local function trimAndFormat(args, fmt) local result = if type(args) ~= "table" then args = end for _, v in ipairs(args) do v = mw.text.trim(tostring(v)) if v ~= "" then table.insert(result,fmt and mw.ustring.format(fmt, v) or v) end end return resultend
Text.concatParams = function (args, apply, adapt) -- Concat list items into one string -- Parameter: -- args -- table (sequence) with numKey=string -- apply -- string (optional); separator (default: "|") -- adapt -- string (optional); format including "%s" -- Returns: string local collect = return table.concat(trimAndFormat(args,adapt), apply or "|")end -- Text.concatParams
Text.containsCJK = function (s) -- Is any CJK code within? -- Parameter: -- s -- string -- Returns: true, if CJK detected s = s and tostring(s) or "" if not patternCJK then patternCJK = mw.ustring.char(91, 4352, 45, 4607, 11904, 45, 42191, 43072, 45, 43135, 44032, 45, 55215, 63744, 45, 64255, 65072, 45, 65103, 65381, 45, 65500, 131072, 45, 196607, 93) end return mw.ustring.find(s, patternCJK) ~= nilend -- Text.containsCJK
Text.removeDelimited = function (s, prefix, suffix) -- Remove all text in s delimited by prefix and suffix (inclusive) -- Arguments: -- s = string to process -- prefix = initial delimiter -- suffix = ending delimiter -- Returns: stripped string s = s and tostring(s) or "" prefix = prefix and tostring(prefix) or "" suffix = suffix and tostring(suffix) or "" local prefixLen = mw.ustring.len(prefix) local suffixLen = mw.ustring.len(suffix) if prefixLen
0 then return s end local i = s:find(prefix, 1, true) local r = s local j while i do j = r:find(suffix, i + prefixLen) if j then r = r:sub(1, i - 1)..r:sub(j+suffixLen) else r = r:sub(1, i - 1) end i = r:find(prefix, 1, true) end return rend
Text.getPlain = function (adjust) -- Remove wikisyntax from string, except templates -- Parameter: -- adjust -- string -- Returns: string local r = Text.removeDelimited(adjust,"") r = r:gsub("(?%l[^>]*>)", "") :gsub("", "") :gsub("", "") :gsub(" ", " ") return rend -- Text.getPlain
Text.isLatinRange = function (s) -- Are characters expected to be latin or symbols within latin texts? -- Arguments: -- s = string to analyze -- Returns: true, if valid for latin only s = s and tostring(s) or "" --- ensure input is always string initLatinData return mw.ustring.match(s, PatternLatin) ~= nilend -- Text.isLatinRange
Text.isQuote = function (s) -- Is this character any quotation mark? -- Parameter: -- s = single character to analyze -- Returns: true, if s is quotation mark s = s and tostring(s) or "" if s
Text.listToText = function (args, adapt) -- Format list items similar to mw.text.listToText -- Parameter: -- args -- table (sequence) with numKey=string -- adapt -- string (optional); format including "%s" -- Returns: string return mw.text.listToText(trimAndFormat(args, adapt))end -- Text.listToText
Text.quote = function (apply, alien, advance) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: quoted string apply = apply and tostring(apply) or "" local mode, slang if type(alien)
2 then mode = 2 else mode = 1 end return fiatQuote(mw.text.trim(apply), slang, mode)end -- Text.quote
Text.quoteUnquoted = function (apply, alien, advance) -- Quote text, if not yet quoted and not empty -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: string; possibly quoted local r = mw.text.trim(apply and tostring(apply) or "") local s = mw.ustring.sub(r, 1, 1) if s ~= "" and not Text.isQuote(s, advance) then s = mw.ustring.sub(r, -1, 1) if not Text.isQuote(s) then r = Text.quote(r, alien, advance) end end return rend -- Text.quoteUnquoted
Text.removeDiacritics = function (adjust) -- Remove all diacritics -- Parameter: -- adjust -- string -- Returns: string; all latin letters should be ASCII -- or basic greek or cyrillic or symbols etc. local cleanup, decomposed if not PatternCombined then PatternCombined = mw.ustring.char(91, 0x0300, 45, 0x036F, 0x1AB0, 45, 0x1AFF, 0x1DC0, 45, 0x1DFF, 0xFE20, 45, 0xFE2F, 93) end decomposed = mw.ustring.toNFD(adjust and tostring(adjust) or "") cleanup = mw.ustring.gsub(decomposed, PatternCombined, "") return mw.ustring.toNFC(cleanup)end -- Text.removeDiacritics
Text.sentenceTerminated = function (analyse) -- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not PatternTerminated then PatternTerminated = mw.ustring.char(91, 12290, 65281, 65294, 65311) .. "!%.%?…][\"'%]‹›«»‘’“”]*$" end if mw.ustring.find(analyse, PatternTerminated) then r = true else r = false end return rend -- Text.sentenceTerminated
Text.ucfirstAll = function (adjust) -- Capitalize all words -- Arguments: -- adjust = string to adjust -- Returns: string with all first letters in upper case adjust = adjust and tostring(adjust) or "" local r = mw.text.decode(adjust,true) local i = 1 local c, j, m m = (r ~= adjust) r = " "..r while i do i = mw.ustring.find(r, "%W%l", i) if i then j = i + 1 c = mw.ustring.upper(mw.ustring.sub(r, j, j)) r = string.format("%s%s%s", mw.ustring.sub(r, 1, i), c, mw.ustring.sub(r, i + 2)) i = j end end -- while i r = r:sub(2) if m then r = mw.text.encode(r) end return rend -- Text.ucfirstAll
Text.uprightNonlatin = function (adjust) -- Ensure non-italics for non-latin text parts -- One single greek letter might be granted -- Precondition: -- adjust -- string -- Returns: string with non-latin parts enclosed in
local r initLatinData if mw.ustring.match(adjust, PatternLatin) then -- latin only, horizontal dashes, quotes r = adjust else local c local j = false local k = 1 local m = false local n = mw.ustring.len(adjust) local span = "%s%s%s" local flat = function (a) -- isLatin local range for i = 1, #RangesLatin do range = RangesLatin[i ] if a >= range[1 ] and a <= range[2 ] then return true end end -- for i end -- flat local focus = function (a) -- char is not ambivalent local r = (a > 64) if r then r = (a < 8192 or a > 8212) else r = (a60) -- '&' '<' end return r end -- focus local form = function (a) return string.format(span, r, mw.ustring.sub(adjust, k, j - 1), mw.ustring.sub(adjust, j, a)) end -- form r = "" for i = 1, n do c = mw.ustring.codepoint(adjust, i, i) if focus(c) then if flat(c) then if j then if m then if i
" " or c
Text.test = function (about) local r if about
-- Exportlocal p =
for _, func in ipairs do p[func] = function (frame) return Text[func](frame.args[1 ] or "") and "1" or "" endend
for _, func in ipairs do p[func] = function (frame) return Text[func](frame.args[1 ] or "") endend
function p.char(frame) local params = frame:getParent.args local story = params[1 ] local codes, lenient, multiple if not story then params = frame.args story = params[1 ] end if story then local items = mw.text.split(mw.text.trim(story), "%s+") if #items > 0 then local j lenient = (yesNo(params.errors)
"x" and "0" or "") .. v) table.insert(codes, j or v) end end end return Text.char(codes, multiple, lenient)end
function p.concatParams(frame) local args local template = frame.args.template if type(template)
"1") end if template then args = frame:getParent.args else args = frame.args end return Text.concatParams(args, frame.args.separator, frame.args.format)end
function p.listToFormat(frame) local lists = local pformat = frame.args["format"] local sep = frame.args["sep"] or ";"
-- Parameter parsen: Listen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v end end
-- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], sep) if #lists[i] > maxListLen then maxListLen = #lists[i] end end
-- Ergebnisstring generieren local result = "" local result_line = "" for i = 1, maxListLen do result_line = pformat for j = 1, #lists do result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) end result = result .. result_line end
return resultend
function p.listToText(frame) local args local template = frame.args.template if type(template)
"1") end if template then args = frame:getParent.args else args = frame.args end return Text.listToText(args, frame.args.format)end
function p.quote(frame) local slang = frame.args[2] if type(slang)
"" then slang = false end end return Text.quote(frame.args[1 ] or "", slang, tonumber(frame.args[3]))end
function p.quoteUnquoted(frame) local slang = frame.args[2] if type(slang)
"" then slang = false end end return Text.quoteUnquoted(frame.args[1 ] or "", slang, tonumber(frame.args[3]))end
function p.zip(frame) local lists = local seps = local defaultsep = frame.args["sep"] or "" local innersep = frame.args["isep"] or "" local outersep = frame.args["osep"] or ""
-- Parameter parsen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v else if string.sub(k, 1, 3)
-- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], seps[i]) if #lists[i] > maxListLen then maxListLen = #lists[i] end end
local result = "" for i = 1, maxListLen do if i ~= 1 then result = result .. outersep end for j = 1, #lists do if j ~= 1 then result = result .. innersep end result = result .. (lists[j][i] or "") end end return resultend
function p.failsafe return Text.serialend
p.Text = function return Textend -- p.Text
return p