Module:URLutil explained

local URLutil = --[=[ Utilities for URL etc. on www. * decode * encode * getAuthority * getFragment * getHost * getLocation * getNormalized * getPath * getPort * getQuery * getQueryTable * getRelativePath * getScheme * getSortkey * getTLD * getTop2domain * getTop3domain * isAuthority * isDomain * isDomainExample * isDomainInt * isHost * isHostPathResource * isIP * isIPlocal * isIPv4 * isIPv6 * isMailAddress * isMailLink * isProtocolDialog * isProtocolWiki * isResourceURL * isSuspiciousURL * isUnescapedURL * isWebURL * wikiEscapeURL * failsafe Only [[dotted decimal]] notation for IPv4 expected.Does not support dotted hexadecimal, dotted octal, or single-number formats.IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.]=]local Failsafe = URLutil

local decodeComponentProtect =

local decodeComponentEscape = function (averse, adapt) return adapt

20 or adapt

127 or decodeComponentProtect[averse ]:find(string.char(adapt), 1, true)end -- decodeComponentEscape

local decodeComponentML = function (ask) local i = 1 local j, n, s while (i) do i = ask:find("&#[xX]%x%x+;", i) if i then j = ask:find(";", i + 3, true) s = ask:sub(i + 2, j - 1):upper n = s:byte(1, 1) if n

88 then n = tonumber(s:sub(2), 16) elseif s:match("^%d+$") then n = tonumber(s) else n = false end if n then if n >= 128 then s = string.format("&#%d;", n) elseif decodeComponentEscape("X", n) then s = string.format("%%%02X", n) else s = string.format("%c", n) end j = j + 1 if i

1 then ask = s .. ask:sub(j) else ask = string.format("%s%s%s", ask:sub(1, i - 1), s, ask:sub(j)) end end i = i + 1 end end -- while i return askend -- decodeComponentML

local decodeComponentPercent = function (ask, averse) local i = 1 local j, k, m, n while (i) do i = ask:find("%%[2-7]%x", i) if i then j = i + 1 k = j + 1 n = ask:byte(k, k) k = k + 1 m = (n > 96) if m then n = n - 32 m = n end if n > 57 then n = n - 55 else n = n - 48 end n = (ask:byte(j, j) - 48) * 16 + n if n

39 and ask:sub(i + 3, i + 5)

"%27" then j = i + 6 while (ask:sub(j, j + 2)

"%27") do j = j + 3 end -- while "%27" elseif decodeComponentEscape(averse, n) then if m then ask = string.format("%s%c%s", ask:sub(1, j), m, ask:sub(k)) end elseif i

1 then ask = string.format("%c%s", n, ask:sub(k)) else ask = string.format("%s%c%s", ask:sub(1, i - 1), n, ask:sub(k)) end i = j end end -- while i return askend -- decodeComponentPercent

local getTopDomain = function (url, mode) local r = URLutil.getHost(url) if r then local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$" if mode

3 then pattern = "[%w%%%-]+%." .. pattern end r = mw.ustring.match("." .. r, "%.(" .. pattern) if not r then r = false end else r = false end return rend -- getTopDomain

local getHash = function (url) local r = url:find("#", 1, true) if r then local i = url:find("&#", 1, true) if i then local s while (i) do s = url:sub(i + 2) if s:match("^%d+;") or s:match("^x%x+;") then r = url:find("#", i + 4, true) if r then i = url:find("&#", i + 4, true) else i = false end else r = i + 1 i = false end end -- while i end end return rend -- getHash

URLutil.decode = function (url, enctype) local r, s if type(enctype)

"string" then s = mw.text.trim(enctype) if s

"" then s = false else s = s:upper end end r = mw.text.encode(mw.uri.decode(url, s)) if r:find("[%[|%]]") then local k r, k = r:gsub("%[", "[") :gsub("|", "|") :gsub("%]", "]") end return rend -- URLutil.decode

URLutil.encode = function (url, enctype) local k, r, s if type(enctype)

"string" then s = mw.text.trim(enctype) if s

"" then s = false else s = s:upper end end r = mw.uri.encode(url, s) k = r:byte(1, 1) if -- k

35 or -- # k

42 or -- * k

58 or -- : k

59 then -- ; r = string.format("%%%X%s", k, r:sub(2)) end if r:find("[%[|%]]") then r, k = r:gsub("%[", "%5B") :gsub("|", "%7C") :gsub("%]", "%5D") end return rend -- URLutil.encode

URLutil.getAuthority = function (url) local r if type(url)

"string" then local colon, host, port local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/" local s = mw.text.decode(url) local i = s:find("#", 6, true) if i then s = s:sub(1, i - 1) .. "/" else s = s .. "/" end host, colon, port = mw.ustring.match(s, pattern) if URLutil.isHost(host) then host = mw.ustring.lower(host) if colon

":" then if port:find("^[1-9]") then r = (host .. ":" .. port) end elseif #port

0 then r = host end end else r = false end return rend -- URLutil.getAuthority

URLutil.getFragment = function (url, decode) local r if type(url)

"string" then local i = getHash(url) if i then r = mw.text.trim(url:sub(i)):sub(2) if type(decode)

"string" then local encoding = mw.text.trim(decode) local launch if encoding

"%" then launch = true elseif encoding

"WIKI" then r = r:gsub("%.(%x%x)", "%%%1") :gsub("_", " ") launch = true end if launch then r = mw.uri.decode(r, "PATH") end end else r = false end else r = nil end return rend -- URLutil.getFragment

URLutil.getHost = function (url) local r = URLutil.getAuthority(url) if r then r = mw.ustring.match(r, "^([%w%.%%_%-]+):?[%d]*$") end return rend -- URLutil.getHost

URLutil.getLocation = function (url) local r if type(url)

"string" then r = mw.text.trim(url) if r

"" then r = false else local i i = getHash(r) if i then if i

1 then r = false else r = r:sub(1, i - 1) end end end else r = nil end return rend -- URLutil.getLocation

URLutil.getNormalized = function (url) local r if type(url)

"string" then r = mw.text.trim(url) if r

"" then r = false else r = decodeComponentML(r) end else r = false end if r then local k = r:find("//", 1, true) if k then local j = r:find("/", k + 2, true) local sF, sP, sQ if r:find("%%[2-7]%x") then local i = getHash(r) if i then sF = r:sub(i + 1) r = r:sub(1, i - 1) if sF

"" then sF = false else sF = decodeComponentPercent(sF, "F") end end i = r:find("?", 1, true) if i then sQ = r:sub(i) r = r:sub(1, i - 1) sQ = decodeComponentPercent(sQ, "Q") end if j then if #r > j then sP = r:sub(j + 1) sP = decodeComponentPercent(sP, "P") end r = r:sub(1, j - 1) end elseif j then local n = #r if r:byte(n, n)

35 then -- '#' n = n - 1 r = r:sub(1, n) end if n > j then sP = r:sub(j + 1) end r = r:sub(1, j - 1) end r = mw.ustring.lower(r) .. "/" if sP then r = r .. sP end if sQ then r = r .. sQ end if sF then r = string.format("%s#%s", r, sF) end end r = r:gsub(" ", "%%20") :gsub("%[", "%%5B") :gsub("|", "%%7C") :gsub("%]", "%%5D") :gsub("%<", "%%3C") :gsub("%>", "%%3E") end return rend -- URLutil.getNormalized

URLutil.getPath = function (url) local r = URLutil.getRelativePath(url) if r then local s = r:match("^([^%?]*)%?") if s then r = s end s = r:match("^([^#]*)#") if s then r = s end end return rend -- URLutil.getPath

URLutil.getPort = function (url) local r = URLutil.getAuthority(url) if r then r = r:match(":([1-9][0-9]*)$") if r then r = tonumber(r) else r = false end end return rend -- URLutil.getPort

URLutil.getQuery = function (url, key, separator) local r = URLutil.getLocation(url) if r then r = r:match("^[^%?]*%?(.+)$") if r then if type(key)

"string" then local single = mw.text.trim(key) local sep = "&" local s, scan if type(separator)

"string" then s = mw.text.trim(separator) if s:match("^[&;,/]$") then sep = s end end s = string.format("%s%s%s", sep, r, sep) scan = string.format("%s%s=([^%s]*)%s", sep, key, sep, sep) r = s:match(scan) end end if not r then r = false end end return rend -- URLutil.getQuery

URLutil.getQueryTable = function (url, separator) local r = URLutil.getQuery(url) if r then local sep = "&" local n, pairs, s, set if type(separator)

"string" then s = mw.text.trim(separator) if s:match("^[&;,/]$") then sep = s end end pairs = mw.text.split(r, sep, true) n = #pairs r = for i = 1, n do s = pairs[i ] if s:find("=", 2, true) then s, set = s:match("^([^=]+)=(.*)$") if s then r[s ] = set end else r[s ] = false end end -- for i end return rend -- URLutil.getQueryTable

URLutil.getRelativePath = function (url) local r if type(url)

"string" then local s = url:match("^%s*[a-zA-Z]*://(.*)$") if s then s = s:match("[^/]+(/.*)$") else local x x, s = url:match("^%s*(/?)(/.*)$") if x

"/" then s = s:match("/[^/]+(/.*)$") end end if s then r = mw.text.trim(s) elseif URLutil.isResourceURL(url) then r = "/" else r = false end else r = nil end return rend -- URLutil.getRelativePath

URLutil.getScheme = function (url) local r if type(url)

"string" then local pattern = "^%s*([a-zA-Z]*)(:?)(//)" local prot, colon, slashes = url:match(pattern) r = false if slashes

"//" then if colon

":" then if #prot > 2 then r = prot:lower .. "://" end elseif #prot

0 then r = "//" end end else r = nil end return rend -- URLutil.getScheme

URLutil.getSortkey = function (url) local r = url if type(url)

"string" then local i = url:find("//") if i then local scheme if i

0 then scheme = "" else scheme = url:match("^%s*([a-zA-Z]*)://") end if scheme then local s = url:sub(i + 2) local comps, site, m, suffix scheme = scheme:lower i = s:find("/") if i and i > 1 then suffix = s:sub(i + 1) -- mw.uri.encode s = s:sub(1, i - 1) suffix = suffix:gsub("#", " ") else suffix = "" end site, m = s:match("^(.+)(:%d+)$") if not m then site = s m = 0 end comps = mw.text.split(site:lower, ".", true) r = "///" for i = #comps, 2, -1 do r = string.format("%s%s.", r, comps[i ]) end -- for --i r = string.format("%s%s %d %s: %s", r, comps[1 ], m, scheme, suffix) end end end return rend -- URLutil.getSortkey

URLutil.getTLD = function (url) local r = URLutil.getHost(url) if r then r = mw.ustring.match(r, "%w+%.(%a[%w%-]*%a)$") if not r then r = false end end return rend -- URLutil.getTLD

URLutil.getTop2domain = function (url) return getTopDomain(url, 2)end -- URLutil.getTop2domain

URLutil.getTop3domain = function (url) return getTopDomain(url, 3)end -- URLutil.getTop3domain

URLutil.isAuthority = function (s) local r if type(s)

"string" then local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$" local host, colon, port = mw.ustring.match(s, pattern) if colon

":" then port = port:match("^[1-9][0-9]*$") if type(port) ~= "string" then r = false end elseif port ~= "" then r = false end r = URLutil.isHost(host) else r = nil end return rend -- URLutil.isAuthority

URLutil.isDomain = function (s) local r if type(s)

"string" then local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" local scope s, scope = mw.ustring.match(s, scan) if type(s)

"string" then if mw.ustring.find(s, "^%w") then if mw.ustring.find(s, "..", 1, true) then r = false else r = true end end end else r = nil end return rend -- URLutil.isDomain

URLutil.isDomainExample = function (url) -- RFC 2606: example.com example.net example.org example.edu local r = getTopDomain(url, 2) if r then local s = r:lower:match("^example%.([a-z][a-z][a-z])$") if s then r = (s

"com" or s

"edu" or s

"net" or s

"org") else r = false end end return rend -- URLutil.isDomainExample

URLutil.isDomainInt = function (url) -- Internationalized Domain Name (Punycode) local r = URLutil.getHost(url) if r then if r:match("^[!-~]+$") then local s = "." .. r if s:find(".xn--", 1, true) then r = true else r = false end else r = true end end return rend -- URLutil.isDomainInt

URLutil.isHost = function (s) return URLutil.isDomain(s) or URLutil.isIP(s)end -- URLutil.isHost

URLutil.isHostPathResource = function (s) local r = URLutil.isResourceURL(s) if not r and s then r = URLutil.isResourceURL("//" .. mw.text.trim(s)) end return rend -- URLutil.isHostPathResource

URLutil.isIP = function (s) return URLutil.isIPv4(s) and 4 or URLutil.isIPv6(s) and 6end -- URLutil.isIP

URLutil.isIPlocal = function (s) -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) local r = false local num = s:match("^ *([01][0-9]*)%.") if num then num = tonumber(num) if num

0 then r = s:match("^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$") elseif num

10 or num

127 then -- loopback; private/local host: 127.0.0.1 r = URLutil.isIPv4(s) elseif num

169 then -- 169.254.*.* elseif num

172 then -- 172.(16...31).*.* num = s:match("^ *0*172%.([0-9]+)%.") if num then num = tonumber(num) if num >= 16 and num <= 31 then r = URLutil.isIPv4(s) end end elseif beg

192 then -- 192.168.*.* num = s:match("^ *0*192%.([0-9]+)%.") if num then num = tonumber(num) if num

168 then r = URLutil.isIPv4(s) end end end end if r then r = true end return rend -- URLutil.isIPlocal

URLutil.isIPv4 = function (s) local function legal(n) return (tonumber(n) < 256) end local r = false if type(s)

"string" then local p1, p2, p3, p4 = s:match("^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$") if p1 and p2 and p3 and p4 then r = legal(p1) and legal(p2) and legal(p3) and legal(p4) end end return rend -- URLutil.isIPv4

URLutil.isIPv6 = function (s) local dcolon, groups if type(s) ~= "string" or s:len

0 or s:find("[^:%x]") -- only colon and hex digits are legal chars or s:find("^:[^:]") -- can begin or end with :: but not with single : or s:find("[^:]:$") or s:find(":::") then return false end s = mw.text.trim(s) s, dcolon = s:gsub("::", ":") if dcolon > 1 then return false end -- at most one :: s = s:gsub("^:?", ":") -- prepend : if needed, upper s, groups = s:gsub(":%x%x?%x?%x?", "") -- remove valid groups, and count them return ((dcolon

1 and groups < 8) or (dcolon

0 and groups

8)) and (s:len

0 or (dcolon

1 and s

":")) -- might be one dangling : if original ended with ::end -- URLutil.isIPv6

URLutil.isMailAddress = function (s) if type(s)

"string" then s = mw.ustring.match(s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$") return URLutil.isDomain(s) end return falseend -- URLutil.isMailAddress

URLutil.isMailLink = function (s) if type(s)

"string" then local addr s, addr = mw.ustring.match(s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$") if type(s)

"string" then if s:lower

"mailto" then return URLutil.isMailAddress(addr) end end end return falseend -- URLutil.isMailLink

local function isProtocolAccepted(prot, supplied) if type(prot)

"string" then local scheme, colon, slashes = mw.ustring.match(prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$") if slashes ~= "/" then if scheme

"" then if colon ~= ":" and slashes

"//" then return true end elseif colon

":" or slashes

"" then local s = supplied:match(" " .. scheme:lower .. " ") if type(s)

"string" then return true end end end end return falseend -- isProtocolAccepted

URLutil.isProtocolDialog = function (prot) return isProtocolAccepted(prot, " mailto irc ircs ssh telnet ")end -- URLutil.isProtocolDialog

URLutil.isProtocolWiki = function (prot) return isProtocolAccepted(prot, " ftp ftps git http https nntp sftp svn worldwind ")end -- URLutil.isProtocolWiki

URLutil.isResourceURL = function (url) local scheme = URLutil.getScheme(url) if scheme then local s = " // http:// https:// ftp:// sftp:// " s = s:find(string.format(" %s ", scheme)) if s then if URLutil.getAuthority(url) then if not url:match("%S%s+%S") then local s1, s2 = url:match("^([^#]+)(#.*)$") if s2 then if url:match("^%s*[a-zA-Z]*:?//(.+)/") then return true end else return true end end end end end return falseend -- URLutil.isResourceURL

URLutil.isSuspiciousURL = function (url) if URLutil.isResourceURL(url) then local s = URLutil.getAuthority(url) local pat = "[%[|%]" .. mw.ustring.char(34, 8201, 45, 8207, 8234, 45, 8239, 8288) .. "]" if s:find("@") or url:find("") or url:find(pat) or url:find("[%.,]$") then return true end -- TODO zero width character ?? return false end return trueend -- URLutil.isSuspiciousURL

URLutil.isUnescapedURL = function (url, trailing) if type(trailing) ~= "string" then if URLutil.isWebURL(url) then if url:match("[%[|%]]") then return true end end end return falseend -- URLutil.isUnescapedURL

URLutil.isWebURL = function (url) if URLutil.getScheme(url) and URLutil.getAuthority(url) then if not url:find("%S%s+%S") and not url:find("", 1, true) then return true end end return falseend -- URLutil.isWebURL

URLutil.wikiEscapeURL = function (url) if url:find("[%[|%]]") then local n url, n = url:gsub("%[", "&#91;") :gsub("|", "&#124;") :gsub("%]", "]") end return urlend -- URLutil.wikiEscapeURL

Failsafe.failsafe = function (atleast) -- Retrieve versioning and check for compliance -- Precondition: -- atleast -- string, with required version -- or wikidata|item|~|@ or false -- Postcondition: -- Returns string -- with queried version/item, also if problem -- false -- if appropriate -- 2020-08-17 local since = atleast local last = (since

"~") local linked = (since

"@") local link = (since

"item") local r if last or link or linked or since

"wikidata" then local item = Failsafe.item since = false if type(item)

"number" and item > 0 then local suited = string.format("Q%d", item) if link then r = suited else local entity = mw.wikibase.getEntity(suited) if type(entity)

"table" then local seek = Failsafe.serialProperty or "P348" local vsn = entity:formatPropertyValues(seek) if type(vsn)

"table" and type(vsn.value)

"string" and vsn.value ~= "" then if last and vsn.value

Failsafe.serial then r = false elseif linked then if mw.title.getCurrentTitle.prefixedText

mw.wikibase.getSitelink(suited) then r = false else r = suited end else r = vsn.value end end end end end end if type(r)

"nil" then if not since or since <= Failsafe.serial then r = Failsafe.serial else r = false end end return rend -- Failsafe.failsafe

local function Template(frame, action, amount) -- Run actual code from template transclusion -- Precondition: -- frame -- object -- action -- string, with function name -- amount -- number, of args if > 1 -- Postcondition: -- Return string or not local n = amount or 1 local v = local r, s for i = 1, n do s = frame.args[i ] if s then s = mw.text.trim(s) if s ~= "" then v[i ] = s end end end -- for i if v[1 ] then r = URLutil[action ](v[1 ], v[2 ], v[3 ]) end return rend -- Template

local p =

function p.decode(frame) return Template(frame, "decode", 2) or ""endfunction p.encode(frame) return Template(frame, "encode", 2) or ""endfunction p.getAuthority(frame) return Template(frame, "getAuthority") or ""endfunction p.getFragment(frame) local r = Template(frame, "getFragment", 2) if r then r = "#" .. r else r = "" end return rendfunction p.getHost(frame) return Template(frame, "getHost") or ""endfunction p.getLocation(frame) return Template(frame, "getLocation") or ""endfunction p.getNormalized(frame) return Template(frame, "getNormalized") or ""endfunction p.getPath(frame) return Template(frame, "getPath") or ""endfunction p.getPort(frame) return Template(frame, "getPort") or ""endfunction p.getQuery(frame) local r = Template(frame, "getQuery", 3) if r then local key = frame.args[2 ] if key then key = mw.text.trim(key) if key

"" then key = nil end end if not key then r = "?" .. r end else r = "" end return rendfunction p.getRelativePath(frame) return Template(frame, "getRelativePath") or ""endfunction p.getScheme(frame) return Template(frame, "getScheme") or ""endfunction p.getSortkey(frame) return Template(frame, "getSortkey") or ""endfunction p.getTLD(frame) return Template(frame, "getTLD") or ""endfunction p.getTop2domain(frame) return Template(frame, "getTop2domain") or ""endfunction p.getTop3domain(frame) return Template(frame, "getTop3domain") or ""endfunction p.isAuthority(frame) return Template(frame, "isAuthority") and "1" or ""endfunction p.isDomain(frame) return Template(frame, "isDomain") and "1" or ""endfunction p.isDomainExample(frame) return Template(frame, "isDomainExample") and "1" or ""endfunction p.isDomainInt(frame) return Template(frame, "isDomainInt") and "1" or ""endfunction p.isHost(frame) return Template(frame, "isHost") and "1" or ""endfunction p.isHostPathResource(frame) return Template(frame, "isHostPathResource") and "1" or ""endfunction p.isIP(frame) return Template(frame, "isIP") or ""endfunction p.isIPlocal(frame) return Template(frame, "isIPlocal") and "1" or ""endfunction p.isIPv4(frame) return Template(frame, "isIPv4") and "1" or ""endfunction p.isIPv6(frame) return Template(frame, "isIPv6") and "1" or ""endfunction p.isMailAddress(frame) return Template(frame, "isMailAddress") and "1" or ""endfunction p.isMailLink(frame) return Template(frame, "isMailLink") and "1" or ""endfunction p.isProtocolDialog(frame) return Template(frame, "isProtocolDialog") and "1" or ""endfunction p.isProtocolWiki(frame) return Template(frame, "isProtocolWiki") and "1" or ""endfunction p.isResourceURL(frame) return Template(frame, "isResourceURL") and "1" or ""endfunction p.isSuspiciousURL(frame) return Template(frame, "isSuspiciousURL") and "1" or ""endfunction p.isUnescapedURL(frame) return Template(frame, "isUnescapedURL", 2) and "1" or ""endfunction p.isWebURL(frame) return Template(frame, "isWebURL") and "1" or ""endfunction p.wikiEscapeURL(frame) return Template(frame, "wikiEscapeURL")endp.failsafe = function (frame) local s = type(frame) local since if s

"table" then since = frame.args[1 ] elseif s

"string" then since = frame end if since then since = mw.text.trim(since) if since

"" then since = false end end return Failsafe.failsafe(since) or ""endfunction p.URLutil return URLutilend

return p