Module:Footnotes/anchor id list explained

require('strict');local data = mw.loadData ('Module:Footnotes/anchor id list/data');local whitelist = mw.loadData ('Module:Footnotes/whitelist');local Lang_obj = mw.language.getContentLanguage; -- used by template_list_add to uppercase first letter of template name TODO: better way to do that?

local redirects_date = local redirects_patent = local redirects_sfnref = local aliases_author = local aliases_contributor = local aliases_editor = local aliases_harvc_author = local aliases_inventor = local alias_patterns_date = local alias_patterns_harvc_date = local alias_patterns_patent_date = local patterns_date = local patterns_tags = local template_skip = local Article_content;

local anchor_id_list = ; -- exported tableslocal template_list = ;local article_whitelist = ;

----------------------------< A R T I C L E _ C O N T E N T _ G E T >----------------------------------------

get article content, remove certain html-like tags and their content so that this code doesn't include any citationtemplates inside the tags as valid tagets; they are not.

local function article_content_get if not Article_content then Article_content = mw.title.getCurrentTitle:getContent or ; -- get the content of the article or ; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625 for _, tag in ipairs (patterns_tags) do Article_content = Article_content:gsub (tag, ); -- remove certain html-like tags and their content end endend

----------------------------< S F N R E F _ G E T >----------------------------------------------------------

make an anchor id from the contents of or . this function assumes that and are correctly formed.

local function sfnref_get (template) template = template:gsub ('', '%1'); -- strip bounding template markup and trim local parts = mw.text.split (template, '%s*|%s*'); -- split at the pipe and remove extraneous space characters local anchor_id = ;

if redirects_sfnref[parts[1]:lower] then anchor_id[1] = 'CITEREF'; else return nil; -- not an sfnref or harvid template end local i = 2; -- indexer into parts table local j = 2; -- indexer into anchor_id table which already has 'CITEREF' at [1] while parts[i] and 7 > j do -- loop through what should be just positional parameters for names and year (2-6 four names and a date) if not parts[i]:find ('=') then -- look for equal sign (named paraneter in a template that doesn't support named parameters) anchor_id[j] = parts[i]; -- positional parameters are saved j = j+1; -- bump the anchor_id indexer end i = i+ 1; -- bump the parts indexer end

return table.concat (anchor_id, );end

--year=, |date=, |publicationdate=, or |publication-date in that order. Does not errorcheck (that is left to the cs1|2 templates to do)

also gets date from |=

local function date_get (template, aliases) local rvalue;

for _, pattern in ipairs (aliases) do -- spin through the date alias patterns rvalue = tostring(template):match (pattern); -- is this |= used (tostring because something makes match think template is a table) if rvalue then rvalue = tostring(template):match (pattern .. '(%b)'); -- is rvalue a template? if rvalue then rvalue = rvalue:gsub ('', '%1'); -- strip bounding template markup and trim local parts = mw.text.split (rvalue, '%s*|%s*'); -- split at the pipe and remove extraneous space characters

if redirects_date[parts[1]:lower] then -- if parts[1] names or redirect rvalue = parts[2]; -- assume that date template is properly formed, first positional parameter is the date else return ; -- |date= holds some other template than or redirect end else rvalue = template:match (pattern .. '([^|}]+)'); if rvalue then -- if rvalue is something rvalue = mw.text.trim (rvalue); -- trim it end if not rvalue or

rvalue then -- if rvale was nothing or trimed to nothing rvalue = nil; -- ensure that it is unset so we can try the next parameter in the list end end

if rvalue then for _, pattern in ipairs (patterns_date) do -- spin through the recognized date formats-- date = rvalue:match (pattern); -- attempt to extract year portion according to the pattern local date, date2 = rvalue:match (pattern); -- attempt to extract year portion according to the pattern; gets second year in any range if date then if date2 then -- when a second year date = table.concat ; -- build a date range end return date; -- matched so return; end end break; -- found a date but it was malformed so abandon end end end

return ; -- no date param or date param doesn't hold a recognized date; empty string for concatenationend

--vauthors= or |veditors=; there is no |vcontributors= parameter.

splits the v parameter value at the comma; correctly handles accept-as-witten markup when used to wrap a comma-separated names (corporate)

local function vnames_get (params, vparam) local vnames = ; -- first four author or editor names go here local split = ; -- temp table to assist in decoding accept-as-witten-markup

if params[vparam] then -- test for |vauthors= or |veditor= split = mw.text.split (params[vparam], '%s*,%s*'); -- this will separate portions of ((Black, Brown, White, an Co.)) local i = 1; -- an indexer while split[i] do if split[i]:match ('^%(%(.*[^%)][^%)]$') then -- first segment of comma-separated accept-as-witten; this segment has the opening doubled parens local name = split[i]; i=i+1; -- bump indexer to next segment while split[i] do name = name .. ', ' .. split[i]; -- concatenate with previous segments if split[i]:match ('^.*%)%)$') then -- if this table member has the closing doubled parens break; -- and done reassembling so end i=i+1; -- bump indexer end table.insert (vnames, name); -- and add accept-as-witten name to the vnames table else table.insert (vnames, split[i]); -- and add name to the vnames table end i=i+1; -- bump indexer if 5

i then break; end -- limit to four names end

for i, vname in ipairs (vnames) do if not vname:match ('%(%(.-%)%)') then -- without accept-this-value-as-written markup vnames[i] = vname:gsub ('(.-)%s+%u+$', '%1'); -- extract and save surname(s) end end for i, vname in ipairs (vnames) do -- repeat, this time for accept-this-value-as-written markup vnames[i] = vname:gsub ('%(%((.-)%)%)', '%1'); -- remove markup if present and save the whole name end end

return 0 ~= #vnames and table.concat (vnames) or nil -- return a concatenation of the vnames; nil elseend

--2 makes anchor id from contributor, author, or editor name-lists in that order

get the names from the cs1|2 template; if there are no contributor names, try author names, then try editor names.

returns concatenated names in enumeration order when successful; nil else

empty name (nameholding parameter n is present without value) and missing name (nameholding parameter n is notpresent) are included as empty string with all other names

local function names_get (params, aliases_list) local names = ; -- first four author or editor names go here local enum_alias; -- alias with '#' replaced with a digit

for enum=1, 4 do -- four names only for i, alias in ipairs (aliases_list) do if not names[enum] then -- hanven't found a previous alias with this [enum]? see if we can find this alias with this enum enum_alias = alias:gsub ('#', enum); -- replace '#' to make 'lastn' if 1

enum then -- because |last= and last1= are exact aliases if params[enum_alias] then -- test |last1= first names[enum] = params[enum_alias]; -- found so save the value assigned to |last1= break; -- next enum else enum_alias = alias:gsub ('#', ); -- replace '#' to make 'last' if params[enum_alias] then names[enum] = params[enum_alias]; -- found so save the value assigned to |last= break; -- next enum end end else -- here for enum 2, 3, 4 if params[enum_alias] then names[enum] = params[enum_alias]; -- found so save the value assigned to |lastn= break; -- next enum end end end end end

for enum=1, 4 do -- spin through the names table and local name = names[enum]; if not name then -- when nameholding parameter n is not present (nil) name = ; -- convert to empty string for concatenation end name = name:gsub('%(%((.-)%)%)', '%1'); -- remove accept-as-written markup if present names[enum] = name; -- save the modified name end

local name_str = table.concat (names); -- concatenate the names return ~= name_str and name_str or nil; -- return the concatenation if not empty string; nil elseend

--2 doesnot see the template markup but instead sees the result of the template as html. cs1|2 strips the html whichleaves the displayed value for the anchor id. We can't do that here so, because templates aren't allowed inparameters, we simply discard any templates found in the cs1|2 template.

this may leave a |lastn= parameter empty which will be treated as if it were really empty as cs1|2 do (three authors,|last2= empty -> CITEREFLast1Last3YYYY (the harv and sfn render: 'Last1, & Last3 YYYY' with CITEREFLast1Last3YYYY).

local function template_strip (template) template = template:gsub ('^$', , 1); -- remove outer (cs1|2 template delimiters with trailing/leading whitespace) template = template:gsub ('%b', ); -- remove any templates from the cs1|2 template return template;end

----------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------

Returns a string where all of lua's magic characters have been escaped. This is important because functions likestring.gsub treat their pattern and replace strings as patterns, not literal strings.

local function escape_lua_magic_chars (argument) argument = argument:gsub("%%", "%%%%"); -- replace % with %% argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters return argument;end

--[=[-------------------------< W I K I L I N K _ S T R I P >-------------------------------------------------- Wikilink markup does not belong in an anchor id and can / does confuse the code that parses apart citation and harvc templates so here we remove any wiki markup: [[link|label]] -> label link -> link ]=]

local function wikilink_strip (template) for wikilink in template:gmatch ('%[%b[]%]') do -- get a wikilink template = template:gsub ('%[%b[]%]', '__57r1P__', 1); -- install a marker if wikilink:match ('%[%[.-|(.-)%]%]') then wikilink = wikilink:match ('%[%[.-|(.-)%]%]'); -- extract label from complex label wikilink else wikilink = wikilink:match ('%[%[(.-)%]%]'); -- extract link from simple link wikilinks end wikilink = escape_lua_magic_chars (wikilink); -- in case there are lua magic characters in wikilink template = template:gsub ('__57r1P__', wikilink, 1); -- replace the marker with the appropriate text end

return template;end

--...}} returns cite book

local function template_name_get (template) local template_name = template:match ('^} (no spaces between assignment operator and pipe or closing brace) ref = mw.text.trim (ref); -- something, could be just whitespace, so trim leading / trailing whitespace if

ref then -- trimming a string of whitespace makes an empty string ref = nil; -- make empty ref same as missing ref end end end end

template_params_get (template, params); -- build a table of template parameters and their values

if whitelist.wrapper_templates[template_name][1] then -- is this wrapper a simple-default wrapper? name_default = whitelist.wrapper_templates[template_name][1]; -- get the default names date_default = whitelist.wrapper_templates[template_name][2]; -- get the default date else vol = params['volume'] or 'default'; if not whitelist.wrapper_templates[template_name][vol] then -- make sure this volume exists vol = 'default'; -- doesn't exist, use default volume end name_default = whitelist.wrapper_templates[template_name][vol][1]; -- get the default names date_default = whitelist.wrapper_templates[template_name][vol][2]; -- get the default date end

if 'harv'

ref or not ref then -- |ref=harv specified or |ref= missing or empty anchor_id = names_get (params, aliases_contributor) or -- get contributor, author, or editor names names_get (params, aliases_author) or vnames_get (params, 'vauthors') or -- |vauthors= names_get (params, aliases_editor) or vnames_get (params, 'veditors') or -- |veditors= name_default; -- default names from whitelist-- whitelist.wrapper_templates[template_name][1]; -- default names from whitelist

if

date then -- if date not provided in the template date = date_default; -- use the default date from whitelist end

if anchor_id then -- if names were gotten anchor_id = 'CITEREF' .. anchor_id .. date; end

elseif ref:match ('%b') then -- ref holds a template anchor_id = sfnref_get (ref); -- returns content of or ; nil else

elseif 'none'

ref then -- |ref=none return nil; -- anchor id expicitly suppressed else anchor_id = ref; -- |ref= may match an anchor id override value in template |ref= parameter end return anchor_id; -- anchor_id text; nil elseend

--2 template and cs1-like templates

inspect |ref= to decide what to do: |ref= - empty or missing: get names and date from template parameters; all cs1|2 create CITEREF anchor IDs |ref=harv - get names and date from template parameters |ref= - assemble an anchor id from positional parameters |ref= - assemble an anchor id from positional parameters |ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'? |ref= - save param value because may match an anchor id override value in template |ref= parameter or |id= parameter

local function anchor_id_make_cs12 (template) local ref; -- content of |ref= local template_name; -- name of the template local anchor_id; -- the assembled anchor id from this template local date; local params = ; -- table of template parameters template_name = template_name_get (template); -- get first char uppercase trimmed template name; ignore subpages ~/new, ~/sandbox if not template_name or template_skip[template_name] then return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening } (no spaces between assignment operator and pipe or closing brace) ref = mw.text.trim (ref); -- something, could be just whitespace, so trim leading / trailing whitespace if

ref then -- trimming a string of whitespace makes an empty string ref = nil; -- make empty ref same as missing ref end end end end

template_params_get (template, params); -- build a table of template parameters and their values

if 'harv'

ref or not ref then -- |ref=harv specified or |ref= missing or empty if redirects_patent[template_name] then -- if this is a cite patent template anchor_id = names_get (params, aliases_inventor); -- inventor names only else -- cs1|2 template anchor_id = names_get (params, aliases_contributor) or -- get contributor, author, or editor names names_get (params, aliases_author) or vnames_get (params, 'vauthors') or -- |vauthors= names_get (params, aliases_editor) or vnames_get (params, 'veditors'); -- |veditors= end

if anchor_id then -- if names were gotten anchor_id = 'CITEREF' .. anchor_id .. date; end

elseif ref:match ('%b') then -- ref holds a template anchor_id = sfnref_get (ref); -- returns content of or ; nil else

elseif 'none'

ref and not redirects_patent[template_name] then -- |ref=none; not supported by cite patent return nil; -- anchor id expicitly suppressed else anchor_id = ref; -- |ref= may match an anchor id override value in template |ref= parameter end return anchor_id; -- anchor_id text; nil elseend

----------------------------< L I S T _ A D D >--------------------------------------------------------------

adds an to table; for anchor IDs, the boolean argument must be set true; no return value

local function list_add (item, list, encode) if item then -- if there was an item if encode then -- for anchor IDs ... item = mw.uri.anchorEncode (item); -- encode to remove wikimarkup, convert spaces to underscores etc end if not list[item] then -- if not already saved list[item] = 1; -- save it else -- here when this item already saved list[item] = list[item] + 1; -- to indicate that there are multiple items end endend

----------------------------< A N C H O R _ I D _ M A K E _ A N C H O R >------------------------------------

make anchor IDs from ; there may be more than one because is not limited to the number ofanchors it may hold.

local function anchor_id_make_anchor (template, anchor_id_list) template = template:gsub ('^$', , 1); -- remove outer and template name

template = wikilink_strip (template); -- strip any wikilink markup (there shouldn't be any but just in case) local params = ; local anchor_id; for param in template:gmatch ('%b') do -- loop through the template; remove and save templates (presumed to be sfnref or harvid) table.insert (params, param); -- save it template = template:gsub ('%b', , 1); -- remove it from source template end for _, t in ipairs (params) do -- spin through the templates in params anchor_id = sfnref_get (t); -- attempt to decode and if anchor_id then -- nil when not or list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list end end template = template:gsub ('|%s*|', '|'); -- when pipe follows pipe with or without white space, remove extraneous pipe template = template:gsub ('^|', ):gsub('|$', ); -- remove extraneous leading and trailing pipes

params = mw.text.split (template, '%s*|%s*'); -- split at the pipe and remove extraneous space characters for _, t in ipairs (params) do -- spin through the anchor IDs anchor_id = mw.text.trim (t); -- trim white space if ~= anchor_id then -- should always have something list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list end endend

----------------------------< T E M P L A T E _ L I S T _ A D D >--------------------------------------------

makes a list of templates use in the article.

local function template_list_add (template) local template = template:match ('