--
----------------------------< D E P E N D E N C I E S >------------------------------------------------------
require('strict');local getArgs = require ('Module:Arguments').getArgs;
----------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
local categories = ; -- category nameslocal config = ; -- global configuration settingslocal digits = ; -- for i18n; table that translates local-wiki digits to western digitslocal err_warn_msgs = ; -- error and warning messageslocal excepted_pages = ;local month_num = ; -- for i18n; table that translates local-wiki month names to western digitslocal prefixes = ; -- service provider tail string prefixeslocal services = ; -- archive service provider data fromlocal s_text = ; -- table of static text strings used to build final renderinglocal uncategorized_namespaces = ; -- list of namespaces that we should not categorizelocal uncategorized_subpages = ; -- list of subpages that should not be categorized
----------------------------< P A G E S C O P E I D E N T I F I E R S >----------------------------------
local non_western_digits; -- boolean flag set true when data.digits.enable is truelocal this_page = mw.title.getCurrentTitle;
local track = ; -- Associative array to hold tracking categorieslocal ulx = ; -- Associative array to hold template data
----------------------------< S U B S T I T U T E >----------------------------------------------------------
Populates numbered arguments in a message string using an argument table.
local function substitute (msg, args) return args and mw.message.newRawMessage (msg, args):plain or msg;end
----------------------------< tableLength >-----------------------
Given a 1-D table, return number of elements
local function tableLength(T) local count = 0 for _ in pairs(T) do count = count + 1 end return countend
--[=[-------------------------< M A K E _ W I K I L I N K >---------------------------------------------------- Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if onlylink is provided, returns a wikilink in the form L; if neither are provided or link is omitted, returns anempty string.
]=]
local function make_wikilink (link, display, no_link) if nil
else -- no_link if display and ( ~= display) then -- if there is display text return display; -- return that else return link or ; -- return the target article name or empty string end endend
----------------------------< createTracking >-----------------------
Return data in track[] ie. tracking categories
local function createTracking if not excepted_pages[this_page.fullText] then -- namespace:title/fragment is allowed to be categorized (typically this module's / template's testcases page(s)) if uncategorized_namespaces[this_page.nsText] then return ; -- this page not to be categorized so return empty string end for _,v in ipairs (uncategorized_subpages) do -- cycle through page name patterns if this_page.text:match (v) then -- test page name against each pattern return ; -- this subpage type not to be categorized so return empty string end end end
local out = ; if tableLength(track) > 0 then for key, _ in pairs(track) do -- loop through table table.insert (out, make_wikilink (key)); -- and convert category names to links end end return table.concat (out); -- concat into one big string; empty string if table is empty
end
----------------------------< inlineError >-----------------------
Critical error. Render output completely in red. Add to tracking category.
This function called as the last thing before abandoning this module
local function inlineError (msg, args) track[categories.error] = 1 return table.concat end
----------------------------< inlineRed >-----------------------
Render a text fragment in red, such as a warning as part of the final output.Add tracking category.
local function inlineRed(msg, trackmsg) if trackmsg
"error" then track[categories.error] = 1; end
return '
' .. msg .. ''end--
local function base62(value) local r = 1 -- default return value is input value is malformed
if value:match ('%W') then -- value must only be in the set [0-9a-zA-Z] return; -- nil return when value contains extraneous characters end
local n = #value -- number of characters in value local k = 1 local c r = 0 for i = n, 1, -1 do -- loop through all characters in value from ls digit to ms digit c = value:byte(i, i) if c >= 48 and c <= 57 then -- character is digit 0-9 c = c - 48 elseif c >= 65 and c <= 90 then -- character is ascii a-z c = c - 55 else -- must be ascii A-Z c = c - 61 end r = r + c * k -- accumulate this base62 character's value k = k * 62 -- bump for next end -- for i
return rend
----------------------------< D E C O D E _ D A T E >--------------------------------------------------------
Given a date string, return it in iso format along with an indicator of the date's format. Except that month namesmust be recognizable as legitimate month names with proper capitalization, and that the date string must match oneof the recognized date formats, no error checking is done here; return nil else
local function decode_date (date_str) local patterns = ; local t = ;
if non_western_digits then -- this wiki uses non-western digits? date_str = mw.ustring.gsub (date_str, '%d', digits); -- convert this wiki's non-western digits to western digits end
if date_str:match ('^%d%d%d%d%-%d%d%-%d%d$') then -- already an iso format date, return western digits form return date_str, 'iso'; end for k, v in pairs (patterns) do local c1, c2, c3 = mw.ustring.match (date_str, patterns[k][1]); -- c1 .. c3 are captured but we don't know what they hold if c1 then -- set on match t = ; if month_num[t.m] then -- when month not already a number t.m = month_num[t.m]; -- replace valid month name with a number else return nil, 'iso'; -- not a valid date form because month not valid end
return mw.ustring.format ('%.4d-%.2d-%.2d', t.y, t.m, t.d), k; -- return date in iso format end end return nil, 'iso'; -- date could not be decoded; return nil and default iso dateend
--
local function makeDate (year, month, day, df) local format = ;
local date = table.concat ('-'); -- assemble year-initial numeric-format date (zero padding not required here)
if non_western_digits then -- this wiki uses non-western digits? date = mw.ustring.gsub (date, '%d', digits); -- convert this wiki's non-western digits to western digits end
return mw.getContentLanguage:formatDate (format[df], date);end
----------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
Returns true if date is after 31 December 1899 (why is 1900 the min year? shouldn't the internet's date-of-birthbe min year?), not after today's date, and represents a valid date (29 February 2017 is not a valid date). AppliesGregorian leapyear rules.
all arguments are required
local function is_valid_date (year, month, day) local days_in_month = ; local month_length; local y, m, d; local today = os.date ('*t'); -- fetch a table of current date parts
if not year or
month or not day or
if 1900 > y or today.year < y or 1 > m or 12 < m then -- year and month are within bounds TODO: 1900? return false; end
if (2
(y%4) and (0~=(y%100) or 0
if 1 > d or month_length < d then -- day is within bounds return false; end -- here when date parts represent a valid date return os.time <= os.time; -- date at midnight must be less than or equal to current date/timeend
----------------------------< decodeWebciteDate >-----------------------
Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format
returns date string in df format - webcite date is a unix timestamp encoded as bae62or the string 'query'
local function decodeWebciteDate(path, df)
local dt = ; local decode;
dt = mw.text.split(path, "/")
-- valid URL formats that are not base62
-- http://www.webcitation.org/query?id=1138911916587475 -- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03 -- http://www.webcitation.org/1138911916587475 -- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e -- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e
if dt[2]:find ('query', 1, true) or dt[2]:find ('cache', 1, true) or dt[2]:find ('getfile', 1, true) or tonumber(dt[2]) then return 'query'; end
decode = base62(dt[2]); -- base62 string -> exponential number if not decode then return nil; -- nil return when dt[2] contains characters not in %w end dt = os.date('*t', string.format("%d", decode):sub(1,10)) -- exponential number -> text -> first 10 characters (a unix timestamp) -> a table of date parts
decode = makeDate (dt.year, dt.month, dt.day, 'iso'); -- date comparisons are all done in iso format with western digits if non_western_digits then -- this wiki uses non-western digits? decode = mw.ustring.gsub (decode, '%d', digits); -- convert this wiki's non-western digits to western digits end
return decode;end
--
local function decodeWaybackDate(path, df)
local msg, snapdate;
snapdate = path:gsub ('^/web/', ):gsub ('^/all/', ):gsub ('^/%d%d%d%d?%d?/', ):gsub ('^/org%-%d%d%d%d?/', ):gsub ('^/ukgwa/', ):gsub ('^/tna/', ):gsub ('^/', ); -- remove leading /web/, /all/, /###/, /org-###/, /ukgwa/, /tna/, or / snapdate = snapdate:match ('^[^/]+'); -- get timestamp if snapdate
snapdate = snapdate:gsub ('%a%a_%d?$', ):gsub ('%-', ); -- from date, remove any trailing "re_", dashes
msg = ; if snapdate:match ('%*$') then -- a trailing '*' causes calendar display at archive .org snapdate = snapdate:gsub ('%*$', ); -- remove so not part of length calc later msg = inlineRed (err_warn_msgs.ts_cal, 'warning'); -- make a message end
if not tonumber(snapdate) then return nil, 'ts_nan'; -- return nil (fatal error flag) and message selector end
local dlen = snapdate:len; if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits? return , inlineRed (err_warn_msgs.ts_short, 'error'); -- return empty string and error message end
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here
if not is_valid_date (year, month, day) then return , inlineRed (err_warn_msgs.ts_date, 'error'); -- return empty string and error message end
snapdate = table.concat ('-'); -- date comparisons are all done in iso format if 14
--
local function decodeArchiveisDate(path, df) local snapdate
if path:match ('^/%w+$') then -- short form url path is '/' followed by some number of base 62 digits and nothing else return "short link" -- e.g. http://archive.is/hD1qz end
snapdate = mw.text.split (path, '/')[2]:gsub('[%.%-]', ); -- get snapshot date, e.g. 2016.08.28-144552; remove periods and hyphens
local dlen = string.len(snapdate) if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits? return , inlineRed (err_warn_msgs.ts_short, 'error'); -- return empty string and error message end
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here
if not is_valid_date (year, month, day) then return , inlineRed (err_warn_msgs.ts_date, 'error'); -- return empty string and error message end
snapdate = table.concat ('-'); -- date comparisons are all done in iso format if 14
----------------------------< serviceName >-----------------------
Given a domain extracted by mw.uri.new (eg. web.archive.org) set tail string and service ID
local function serviceName(host, no_link) local tracking; local index; host = host:lower:gsub ('^web%.(.+)', '%1'):gsub ('^www%.(.+)', '%1'); -- lowercase, remove web. and www. subdomains
if services[host] then index = host; else for k, _ in pairs (services) do if host:find ('%f[%a]'..k:gsub ('([%.%-])', '%%%1')) then index = k; break; end end end if index then local out = ; -- empty string in [1] so that concatenated result has leading single space ulx.url1.service = services[index][4] or 'other'; tracking = services[index][5] or categories.other; -- build tail string if false
services[index][1] then table.insert (out, prefixes.atthe); else table.insert (out, services[index][1]); end table.insert (out, make_wikilink (services[index][2], services[index][3], no_link)); -- add article wikilink if services[index][6] then -- add tail postfix if it exists table.insert (out, services[index][6]); end ulx.url1.tail = table.concat (out, ' '); -- put it all together; result has leading space character
else -- here when unknown archive ulx.url1.service = 'other'; tracking = categories.unknown; ulx.url1.tail = table.concat (' '); end track[tracking] = 1end
--url=.. |url4=.. |url7=..}} Three url arguments not in numeric sequence (1..4..7). Function only processes arguments numbered 2 or greater (in this case 4 and 7) It creates numeric sequenced table entries like: urlx.url2.url =
local function parseExtraArgs(args)
local i, j, argurl, argurl2, argdate, argtitle
j = 2 for i = 2, config.maxurls do argurl = "url" .. i if args[argurl] then argurl2 = "url" .. j ulx[argurl2] = ulx[argurl2]["url"] = args[argurl] argdate = "date" .. i if args[argdate] then ulx[argurl2]["date"] = args[argdate] else ulx[argurl2]["date"] = inlineRed (err_warn_msgs.date_miss, 'warning'); end argtitle = "title" .. i if args[argtitle] then ulx[argurl2]["title"] = args[argtitle] else ulx[argurl2]["title"] = nil end j = j + 1 end end
if j
----------------------------< comma >-----------------------
Given a date string, return "," if it's MDY
local function comma(date) return (date and date:match ('%a+ +%d%d? +%d%d%d%d')) or ;end
----------------------------< createRendering >-----------------------
Return a rendering of the data in ulx[][]
local function createRendering
local displayfield local out = ; local index_date, msg = ulx.url1.date:match ('(index)(.*)'); -- when ulx.url1.date extract 'index' text and message text (if there is a message) ulx.url1.date = ulx.url1.date:gsub ('index.*', 'index'); -- remove message
if 'none'
ulx.url1.date and s_text.archive or s_text.archived); -- add text table.insert (out, ' '); -- insert a space table.insert (out, ulx.url1.date); -- add date table.insert (out, ')'); -- close date text end else -- no title if index_date then -- when url date is 'index' table.insert (out, table.concat); -- add the index link label table.insert (out, msg or ); -- add date mismatch message when url date is /*/ and |date= has valid date else table.insert (out, table.concat); -- add link label for url has timestamp date (will include mismatch message if there is one) end if ulx.url1.date then if 'index' ~= ulx.url1.date then table.insert (out, ulx.url1.date); -- add date when data is not 'index' end table.insert (out, comma(ulx.url1.date)); -- add ',' if date format is mdy table.insert (out, ulx.url1.tail); -- add tail text else -- no date table.insert (out, ulx.url1.tail); -- add tail text end end
if 0 < ulx.url1.extraurls then -- For multiple archive URLs local tot = ulx.url1.extraurls + 1 table.insert (out, '.') -- terminate first url table.insert (out, table.concat); -- add header text
for i=2, tot do -- loop through the additionals local index = table.concat ; -- make an index displayfield = ulx[index]['title'] and 'title' or 'date'; -- choose display text table.insert (out, '['); -- open extlink markup table.insert (out, ulx[index]['url']); -- add the url table.insert (out, ' '); -- the required space table.insert (out, ulx[index][displayfield]); -- add the label table.insert (out, ']'); -- close extlink markup table.insert (out, i
else -- For if 'addlarchives'
local tot = ulx.url1.extraurls + 1; for i=1, tot do -- loop through the additionals local index = table.concat ; -- make an index table.insert (out, '['); -- open extlink markup table.insert (out, ulx[index]['url']); -- add url table.insert (out, ' '); -- add required space
displayfield = ulx[index]['title']; if 'addlarchives'
tot and '.' or ', ')); -- add terminator end return table.concat (out); -- make a big string and done endend
--
local function parameter_name_xlate (args, params, enum_params) local name; -- holds modifiable name of the parameter name during evaluation local enum; -- for enumerated parameters, holds the enumerator during evaluation local found = false; -- flag used to break out of nested for loops local new_args = ; -- a table that holds canonical and translated parameter k/v pairs local origin = ; -- a table that maps original (local language) parameter names to their canonical name for local language error messaging local unnamed_params; -- set true when unsupported positional parameters are detected for k, v in pairs (args) do -- loop through all of the arguments in the args table name = k; -- copy of original parameter name
if 'string'
alias then new_args[pname] = v; -- create a new entry in the new_args table origin [pname] = k; -- create an entry to make canonical parameter name to original local language parameter name found = true; -- flag so that we can break out of these nested for loops break; -- no need to search the rest of the aliases table for name so go on to the next k, v pair end end if found then -- true when we found an alias that matched name found = false; -- reset the flag break; -- go do next args k/v pair end end else -- enumerated parameters name = name:gsub ('%d$', '#'); -- replace enumeration digits with place holder for table search -- TODO: insert shortcut here? if num_params[name] then name holds the canonical parameter name; no need to search further for pname, aliases in pairs (enum_params) do -- loop through each parameter the num_params table for _, alias in ipairs (aliases) do -- loop through each alias in the parameter's aliases table if name
----------------------------< W E B A R C H I V E >----------------------------------------------------------
template entry point
local function webarchive(frame) local args = getArgs (frame);
local data = mw.loadData (table.concat); categories = data.categories; -- fill in the forward declarations config = data.config; if data.digits.enable then digits = data.digits; -- for i18n; table of digits in the local wiki's language non_western_digits = true; -- use_non_western_digits end err_warn_msgs = data.err_warn_msgs; excepted_pages = data.excepted_pages; month_num = data.month_num; -- for i18n; table of month names in the local wiki's language prefixes = data.prefixes; services = data.services; s_text = data.s_text; uncategorized_namespaces = data.uncategorized_namespaces; uncategorized_subpages = data.uncategorized_subpages;
local origin = ; -- holds a map of English to local language parameter names used in the current template; not currently used local unnamed_params; -- boolean set to true when template call has unnamed parameters args, origin, unnamed_params = parameter_name_xlate (args, data.params, data.enum_params); -- translate parameter names in args to English
local date, format, msg, udate, uri, url; local ldf = 'iso'; -- when there is no |date= parameter, render url dates in iso format if args.url and args.url1 then -- URL argument (first) return inlineError (data.crit_err_msgs.conflicting,); end url = args.url or args.url1; if not url then return inlineError (data.crit_err_msgs.empty); end -- these iabot bugs perportedly fixed; removing these causes lua script error--
if not (url:lower:find ('^http') or url:find ('^//')) then return inlineError (data.crit_err_msgs.invalid_url); end
ulx.url1 = ulx.url1.url = url
ulx.url1.extraurls = parseExtraArgs(args)
local good = false; good, uri = pcall (mw.uri.new, ulx.url1.url); -- get a table of uri parts from this url; protected mode to prevent lua error when ulx.url1.url is malformed if not good or nil
if args.date and args.date1 then -- Date argument return inlineError (data.crit_err_msgs.conflicting,); end date = args.date or args.date1; date = date and date:gsub (' +', ' '); -- replace multiple spaces with a single space
if date and config.verifydates then if '*'
date then date = nil; -- if date extracted from URL, ldf = 'mdy'; -- then |date=mdy overrides iso elseif 'dmy'
date then date = nil; -- if date extracted from URL, ldf = 'ymd'; -- then |date=ymd overrides iso else date, ldf = decode_date (date); -- get an iso format date from date and get date's original format end end
if 'wayback'
ulx.url1.service or 'archiveit'
ulx.url1.service then if date then if config.verifydates then if ldf then udate, msg = decodeWaybackDate (uri.path); -- get the url date in iso format and format of date in |date=; 'index' when wayback url date is * if not udate then -- this is the only 'fatal' error return return inlineError (data.crit_err_msgs[msg]); end
if udate ~= date then -- date comparison using iso format dates date = udate; msg = table.concat ; end end end else -- no |date= udate, msg = decodeWaybackDate (uri.path);
if not udate then -- this is the only 'fatal' error return return inlineError (data.crit_err_msgs[msg]); end
if
elseif 'webcite'
"query" then date = nil; -- unset msg = inlineRed (err_warn_msgs.date_miss, 'warning'); elseif not date then -- invalid base62 string date = inlineRed (err_warn_msgs.date1, 'error'); end end
elseif 'archiveis'
"short link" then date = nil; -- unset msg = inlineRed (err_warn_msgs.date_miss, 'warning'); elseif
if 'index'
if not format then format = "none" else for k, v in pairs (data.format_vals) do -- |format= accepts two specific values loop through a table of those values local found; -- declare a nil flag for _, p in ipairs (v) do -- loop through local language variants if format
if format
"addlarchives" then format = "addlarchives" else format = "none" end end ulx.url1.format = format
if args.title and args.title1 then -- Title argument return inlineError (data.crit_err_msgs.conflicting,); end
ulx.url1.title = args.title or args.title1;
local rend = createRendering if not rend then return inlineError (data.crit_err_msgs.unknown); end
return rend .. ((unnamed_params and inlineRed (err_warn_msgs.unnamed_params, 'warning')) or ) .. createTracking;
end
----------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
return ;