Module:Text Explained

local yesNo = require("Module:Yesno")local Text = --[=[ Text utilities ]=]

-- local globalslocal PatternCJK = falselocal PatternCombined = falselocal PatternLatin = falselocal PatternTerminated = falselocal QuoteLang = falselocal QuoteType = falselocal RangesLatin = falselocal SeekQuote = false

local function initLatinData if not RangesLatin then RangesLatin = end if not PatternLatin then local range PatternLatin = "^[" for i = 1, #RangesLatin do range = RangesLatin[ i ] PatternLatin = PatternLatin .. mw.ustring.char(range[1 ], 45, range[2 ]) end -- for i PatternLatin = PatternLatin .. "]*$" endend

local function initQuoteData -- Create quote definitions if not QuoteLang then QuoteLang = end if not QuoteType then QuoteType = endend -- initQuoteData

local function fiatQuote(apply, alien, advance) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code -- advance -- number, with level 1 or 2 local r = apply and tostring(apply) or "" alien = alien or "en" advance = tonumber(advance) or 0 local suite initQuoteData local slang = alien:match("^(%l+)-") suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] if suite then local quotes = QuoteType[suite ] if quotes then local space if quotes[3 ] then space = " " else space = "" end quotes = quotes[advance ] if quotes then r = mw.ustring.format("%s%s%s%s%s", mw.ustring.char(quotes[1 ]), space, apply, space, mw.ustring.char(quotes[2 ])) end else mw.log("fiatQuote " .. suite) end end return rend -- fiatQuote

Text.char = function (apply, again, accept) -- Create string from codepoints -- Parameter: -- apply -- table (sequence) with numerical codepoints, or nil -- again -- number of repetitions, or nil -- accept -- true, if no error messages to be appended -- Returns: string local r = "" apply = type(apply)

"table" and apply or again = math.floor(tonumber(again) or 1) if again < 1 then return "" end local bad = local codes = for _, v in ipairs(apply) do local n = tonumber(v) if not n or (n < 32 and n ~= 9 and n ~= 10) then table.insert(bad, tostring(v)) else table.insert(codes, math.floor(n)) end end if #bad > 0 then if not accept then r = tostring(mw.html.create("span") :addClass("error") :wikitext("bad codepoints: " .. table.concat(bad, " "))) end return r end if #codes > 0 then r = mw.ustring.char(unpack(codes)) if again > 1 then r = r:rep(again) end end return rend -- Text.char

local function trimAndFormat(args, fmt) local result = if type(args) ~= 'table' then args = end for _, v in ipairs(args) do v = mw.text.trim(tostring(v)) if v ~= "" then table.insert(result,fmt and mw.ustring.format(fmt, v) or v) end end return resultend

Text.concatParams = function (args, apply, adapt) -- Concat list items into one string -- Parameter: -- args -- table (sequence) with numKey=string -- apply -- string (optional); separator (default: "|") -- adapt -- string (optional); format including "%s" -- Returns: string local collect = return table.concat(trimAndFormat(args,adapt), apply or "|")end -- Text.concatParams

Text.containsCJK = function (s) -- Is any CJK code within? -- Parameter: -- s -- string -- Returns: true, if CJK detected s = s and tostring(s) or "" if not patternCJK then patternCJK = mw.ustring.char(91, 4352, 45, 4607, 11904, 45, 42191, 43072, 45, 43135, 44032, 45, 55215, 63744, 45, 64255, 65072, 45, 65103, 65381, 45, 65500, 131072, 45, 196607, 93) end return mw.ustring.find(s, patternCJK) ~= nilend -- Text.containsCJK

Text.removeDelimited = function (s, prefix, suffix) -- Remove all text in s delimited by prefix and suffix (inclusive) -- Arguments: -- s = string to process -- prefix = initial delimiter -- suffix = ending delimiter -- Returns: stripped string s = s and tostring(s) or "" prefix = prefix and tostring(prefix) or "" suffix = suffix and tostring(suffix) or "" local prefixLen = mw.ustring.len(prefix) local suffixLen = mw.ustring.len(suffix) if prefixLen

0 or suffixLen

0 then return s end local i = s:find(prefix, 1, true) local r = s local j while i do j = r:find(suffix, i + prefixLen) if j then r = r:sub(1, i - 1)..r:sub(j+suffixLen) else r = r:sub(1, i - 1) end i = r:find(prefix, 1, true) end return rend

Text.getPlain = function (adjust) -- Remove wikisyntax from string, except templates -- Parameter: -- adjust -- string -- Returns: string local r = Text.removeDelimited(adjust,"") r = r:gsub("()", "") :gsub("", "") :gsub("", "") :gsub(" ", " ") return rend -- Text.getPlain

Text.isLatinRange = function (s) -- Are characters expected to be latin or symbols within latin texts? -- Arguments: -- s = string to analyze -- Returns: true, if valid for latin only s = s and tostring(s) or "" --- ensure input is always string initLatinData return mw.ustring.match(s, PatternLatin) ~= nilend -- Text.isLatinRange

Text.isQuote = function (s) -- Is this character any quotation mark? -- Parameter: -- s = single character to analyze -- Returns: true, if s is quotation mark s = s and tostring(s) or "" if s

"" then return false end if not SeekQuote then SeekQuote = mw.ustring.char(34, -- " 39, -- ' 171, -- laquo 187, -- raquo 8216, -- lsquo 8217, -- rsquo 8218, -- sbquo 8220, -- ldquo 8221, -- rdquo 8222, -- bdquo 8249, -- lsaquo 8250, -- rsaquo 0x300C, -- CJK 0x300D, -- CJK 0x300E, -- CJK 0x300F) -- CJK end return mw.ustring.find(SeekQuote, s, 1, true) ~= nilend -- Text.isQuote

Text.listToText = function (args, adapt) -- Format list items similar to mw.text.listToText -- Parameter: -- args -- table (sequence) with numKey=string -- adapt -- string (optional); format including "%s" -- Returns: string return mw.text.listToText(trimAndFormat(args, adapt))end -- Text.listToText

Text.quote = function (apply, alien, advance) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: quoted string apply = apply and tostring(apply) or "" local mode, slang if type(alien)

"string" then slang = mw.text.trim(alien):lower else slang = mw.title.getCurrentTitle.pageLanguage if not slang then -- TODO FIXME: Introduction expected 2017-04 slang = mw.language.getContentLanguage:getCode end end if advance

2 then mode = 2 else mode = 1 end return fiatQuote(mw.text.trim(apply), slang, mode)end -- Text.quote

Text.quoteUnquoted = function (apply, alien, advance) -- Quote text, if not yet quoted and not empty -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: string; possibly quoted local r = mw.text.trim(apply and tostring(apply) or "") local s = mw.ustring.sub(r, 1, 1) if s ~= "" and not Text.isQuote(s, advance) then s = mw.ustring.sub(r, -1, 1) if not Text.isQuote(s) then r = Text.quote(r, alien, advance) end end return rend -- Text.quoteUnquoted

Text.removeDiacritics = function (adjust) -- Remove all diacritics -- Parameter: -- adjust -- string -- Returns: string; all latin letters should be ASCII -- or basic greek or cyrillic or symbols etc. local cleanup, decomposed if not PatternCombined then PatternCombined = mw.ustring.char(91, 0x0300, 45, 0x036F, 0x1AB0, 45, 0x1AFF, 0x1DC0, 45, 0x1DFF, 0xFE20, 45, 0xFE2F, 93) end decomposed = mw.ustring.toNFD(adjust and tostring(adjust) or "") cleanup = mw.ustring.gsub(decomposed, PatternCombined, "") return mw.ustring.toNFC(cleanup)end -- Text.removeDiacritics

Text.sentenceTerminated = function (analyse) -- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not PatternTerminated then PatternTerminated = mw.ustring.char(91, 12290, 65281, 65294, 65311) .. "!%.%?…][\"'%]‹›«»‘’“”]*$" end if mw.ustring.find(analyse, PatternTerminated) then r = true else r = false end return rend -- Text.sentenceTerminated

Text.ucfirstAll = function (adjust) -- Capitalize all words -- Arguments: -- adjust = string to adjust -- Returns: string with all first letters in upper case adjust = adjust and tostring(adjust) or "" local r = mw.text.decode(adjust,true) local i = 1 local c, j, m m = (r ~= adjust) r = " "..r while i do i = mw.ustring.find(r, "%W%l", i) if i then j = i + 1 c = mw.ustring.upper(mw.ustring.sub(r, j, j)) r = string.format("%s%s%s", mw.ustring.sub(r, 1, i), c, mw.ustring.sub(r, i + 2)) i = j end end -- while i r = r:sub(2) if m then r = mw.text.encode(r) end return rend -- Text.ucfirstAll

Text.uprightNonlatin = function (adjust) -- Ensure non-italics for non-latin text parts -- One single greek letter might be granted -- Precondition: -- adjust -- string -- Returns: string with non-latin parts enclosed in

local r initLatinData if mw.ustring.match(adjust, PatternLatin) then -- latin only, horizontal dashes, quotes r = adjust else local c local j = false local k = 1 local m = false local n = mw.ustring.len(adjust) local span = "%s%s%s" local flat = function (a) -- isLatin local range for i = 1, #RangesLatin do range = RangesLatin[i ] if a >= range[1 ] and a <= range[2 ] then return true end end -- for i end -- flat local focus = function (a) -- char is not ambivalent local r = (a > 64) if r then r = (a < 8192 or a > 8212) else r = (a

38 or a

60) -- '&' '<' end return r end -- focus local form = function (a) return string.format(span, r, mw.ustring.sub(adjust, k, j - 1), mw.ustring.sub(adjust, j, a)) end -- form r = "" for i = 1, n do c = mw.ustring.codepoint(adjust, i, i) if focus(c) then if flat(c) then if j then if m then if i

m then -- single greek letter. j = false end m = false end if j then local nx = i - 1 local s = "" for ix = nx, 1, -1 do c = mw.ustring.sub(adjust, ix, ix) if c

" " or c

"(" then nx = nx - 1 s = c .. s else break -- for ix end end -- for ix r = form(nx) .. s j = false k = i end end elseif not j then j = i if c >= 880 and c <= 1023 then -- single greek letter? m = i + 1 else m = false end end elseif m then m = m + 1 end end -- for i if j and (not m or m < n) then r = form(n) else r = r .. mw.ustring.sub(adjust, k) end end return rend -- Text.uprightNonlatin

Text.test = function (about) local r if about

"quote" then initQuoteData r = r.QuoteLang = QuoteLang r.QuoteType = QuoteType end return rend -- Text.test

-- Exportlocal p =

for _, func in ipairs do p[func] = function (frame) return Text[func](frame.args[1 ] or "") and "1" or "" endend

for _, func in ipairs do p[func] = function (frame) return Text[func](frame.args[1 ] or "") endend

function p.char(frame) local params = frame:getParent.args local story = params[1 ] local codes, lenient, multiple if not story then params = frame.args story = params[1 ] end if story then local items = mw.text.split(mw.text.trim(story), "%s+") if #items > 0 then local j lenient = (yesNo(params.errors)

false) codes = multiple = tonumber(params["*" ]) for _, v in ipairs(items) do j = tonumber((v:sub(1, 1)

"x" and "0" or "") .. v) table.insert(codes, j or v) end end end return Text.char(codes, multiple, lenient)end

function p.concatParams(frame) local args local template = frame.args.template if type(template)

"string" then template = mw.text.trim(template) template = (template

"1") end if template then args = frame:getParent.args else args = frame.args end return Text.concatParams(args, frame.args.separator, frame.args.format)end

function p.listToFormat(frame) local lists = local pformat = frame.args["format"] local sep = frame.args["sep"] or ";"

-- Parameter parsen: Listen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v end end

-- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], sep) if #lists[i] > maxListLen then maxListLen = #lists[i] end end

-- Ergebnisstring generieren local result = "" local result_line = "" for i = 1, maxListLen do result_line = pformat for j = 1, #lists do result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) end result = result .. result_line end

return resultend

function p.listToText(frame) local args local template = frame.args.template if type(template)

"string" then template = mw.text.trim(template) template = (template

"1") end if template then args = frame:getParent.args else args = frame.args end return Text.listToText(args, frame.args.format)end

function p.quote(frame) local slang = frame.args[2] if type(slang)

"string" then slang = mw.text.trim(slang) if slang

"" then slang = false end end return Text.quote(frame.args[1 ] or "", slang, tonumber(frame.args[3]))end

function p.quoteUnquoted(frame) local slang = frame.args[2] if type(slang)

"string" then slang = mw.text.trim(slang) if slang

"" then slang = false end end return Text.quoteUnquoted(frame.args[1 ] or "", slang, tonumber(frame.args[3]))end

function p.zip(frame) local lists = local seps = local defaultsep = frame.args["sep"] or "" local innersep = frame.args["isep"] or "" local outersep = frame.args["osep"] or ""

-- Parameter parsen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v else if string.sub(k, 1, 3)

"sep" then local sepnum = tonumber(string.sub(k, 4)) if sepnum then seps[sepnum] = v end end end end -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden for i = 1, math.max(#seps, #lists) do if not seps[i] then seps[i] = defaultsep end end

-- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], seps[i]) if #lists[i] > maxListLen then maxListLen = #lists[i] end end

local result = "" for i = 1, maxListLen do if i ~= 1 then result = result .. outersep end for j = 1, #lists do if j ~= 1 then result = result .. innersep end result = result .. (lists[j][i] or "") end end return resultend

function p.failsafe return Text.serialend

p.Text = function return Textend -- p.Text

return p