Module:Sandbox/Innesw/PopulationFromWikidata-upgrade explained

---------------- Defining variables--------------------local Pop_P = "P1082" -- population propertylocal Applies_P = "P518" -- applies to part propertylocal Pointin_P = "P585" -- point in time propertylocal DetMeth_P = "P459" -- determination method propertylocal RefURL_P = "P854" -- reference URLlocal RefTitle_P = "P1476" -- reference titlelocal RefPubIn_P = "P1433" -- reference published in propertylocal DatePub_P = "P577" -- date of publication propertylocal Publisher_P = "P123" -- publisher propertylocal Retrieved_P = "P813" -- retrieved propertylocal Instof_P = "P31" -- instance of propertylocal ShortN_P = "P1813" -- short name property

local CensusAU_I = "Q5058971" -- Australian census itemlocal SAL_I = "Q33112019" -- state suburb item (includes SSC and SAL)--local GCCSA_I = "Q112762887" -- Greater Capital City Statistical Area itemlocal LGA_I = "Q33127844" -- Local Government Area itemlocal UCL_I = "Q33127891" -- Urban Centre or Locality--local SA2_I = "Q33128776" -- SA2--local SA3_I = "Q118313924" -- SA3local ILOC_I = "Q112729549" -- Indigenous Location

local item = nil

local p =

--------------- Function LastURLSection returns last section of a url, ie: the text after the last '/' ----------------------

local function LastURLSection(url) local pos = 1 local f while true do f = string.find(url, '/', pos, true) if (f

nil) then break else pos = f + 1 end end return string.sub(url, pos)end

--------------- Function SplitDoubleClaims returns the claims table with any claim with multiple points-in-time split into multiple claims. ----------------------

local function SplitDoubleClaims(claims) local oldclaims, newclaims =, for j, s in pairs(claims) do local npits = table.maxn(s.qualifiers[Pointin_P]) if npits > 1 then -- we need to split this claim -- first, make copies of the claim, and the points-in-time local newc, pits =, for a, pit in pairs(s.qualifiers[Pointin_P]) do table.insert(newc, mw.clone(s)) table.insert(pits, mw.clone(pit)) end -- for each point-in-time, only keep matching point-in-time, determination method and reference qualifiers from each copy for a, pit in pairs(pits) do local keeps = -- points in time for k, p in pairs(newc[a].qualifiers[Pointin_P]) do if p.datavalue.value.time

pit.datavalue.value.time then keeps = mw.clone(p) break -- only the matching point-in-time is kept end end newc[a].qualifiers[Pointin_P] = table.insert(newc[a].qualifiers[Pointin_P], keeps)

local year = string.sub(pit.datavalue.value.time, 2, 5) -- determination methods keeps = for k, p in pairs(newc[a].qualifiers[DetMeth_P]) do local detmet = mw.wikibase.getEntity(p.datavalue.value.id) if string.find(detmet.labels.en.value, year, 1, true) ~= nil then keeps = mw.clone(p) break -- only the matching determination method is kept end end newc[a].qualifiers[DetMeth_P] = table.insert(newc[a].qualifiers[DetMeth_P], keeps)

-- references keeps = for k, p in pairs(newc[a].references) do if p.snaks[RefURL_P] ~= nil and string.find(p.snaks[RefURL_P][1].datavalue.value, year, 1, true) ~= nil then table.insert(keeps, mw.clone(p)) -- multiple matching references may be kept end end newc[a].references = for r, b in pairs(keeps) do table.insert(newc[a].references, b) end end

for k, p in pairs(newc) do table.insert(newclaims, p) end

table.insert(oldclaims, j) end end -- remove the original splitable claims for k, p in pairs(oldclaims) do table.remove(claims, p) end -- and add the separate ones they were split into for k, p in pairs(newclaims) do table.insert(claims, p) endend

--------------- Function IdForGeog returns the place ID for the specified geography abbreviation. Returns nil if abbreviation is blank. ----------------------

local function IdForGeog(geog) if geog

"ucl" then return UCL_I elseif geog

"sal" then return SAL_I -- includes SSC and SAL elseif geog

"lga" then return UCL_I elseif geog

"iloc" then return ILOC_I end return nilend

--------------- Function GeogIdsForType returns a table of geography IDs that could be shown for the specified type. ----------------------

local function GeogIdsForType(type) type = string.lower(type) if type

"town" then return elseif type

"suburb" then return elseif type

"city" then return elseif type

"settlement" then return elseif type

"locality" then return elseif type

"townandlocality" then return elseif type

"lga" then return elseif type

"region" then -- for now saying region uses LGA_I, but unclear what is most apprpriate ABS geography type. Can revise. return endend--------------- Function GetRefsForClaim to check, collate and format all the reference components ----------------------

local function GetRefsForClaim(claim, defaulttitle) local refs = "" local r = 0 for b, x in pairs(claim.references) do -- loop through all references in a claim -- each reference in the wikidata will produce a citation reference for the claim r = r + 1

-- gather various values for potential later use local refurl = "" if claim.references[b].snaks[RefURL_P] ~= nil then -- if reference has a reference url, use it refurl = claim.references[b].snaks[RefURL_P][1].datavalue.value end

local reftitle = defaulttitle -- default title is the Wikidata item title if claim.references[b].snaks[RefTitle_P] ~= nil then -- if reference has a title, use it reftitle = claim.references[b].snaks[RefTitle_P][1].datavalue.value.text end

local detmet = mw.wikibase.getEntity(claim.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

local pubinlabel = "" if claim.references[b].snaks[RefPubIn_P] ~= nil then -- if reference has a published in (it should for all references), use its item's label local pubin = mw.wikibase.getEntity(claim.references[b].snaks[RefPubIn_P][1].datavalue.value.id) pubinlabel = pubin.labels.en.value end

local refwork = pubinlabel -- the default reference work for for non-census references, or fall-back for census references with missing parts

local pubdate = "" if claim.references[b].snaks[DatePub_P] ~= nil then -- if reference has a date published, use it. This is the second-best option for the published date. pubdate = mw.language.getContentLanguage:formatDate('j F Y', claim.references[b].snaks[DatePub_P][1].datavalue.value.time) end if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id

CensusAU_I then -- if determination method is an instance of an australian census refwork = detmet.labels.en.value .. " " .. pubinlabel -- reference work is determination method label + published in if detmet.claims[DatePub_P] ~=nil then -- if determination method has a date published, use that as the date pubdate = mw.language.getContentLanguage:formatDate('j F Y', detmet.claims[DatePub_P][1].mainsnak.datavalue.value.time) end end

local refpublisher = "" if detmet.claims[Publisher_P] ~= nil then -- if determination method has a publisher, use its item's label local publisheritem = mw.wikibase.getEntity(detmet.claims[Publisher_P][1].mainsnak.datavalue.value.id) refpublisher = publisheritem.labels.en.value end

local refaccessdate = "" if claim.references[b].snaks[Retrieved_P] ~= nil then -- if reference has an access date, use it. refaccessdate = mw.language.getContentLanguage:formatDate('j F Y', claim.references[b].snaks[Retrieved_P][1].datavalue.value.time) end

local appliespart = mw.wikibase.getEntity(claim.qualifiers[Applies_P][1].datavalue.value.id).labels.en.value -- the label of the item of the applies to part of the claim

local year = string.sub(claim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string

local reference

if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id

CensusAU_I then -- if determination method is an instance of an australian census refwork = detmet.labels.en.value .. " " .. pubinlabel -- reference work is determination method label + published in -- the reference is built using the specific template for the census year, which ensures the link format is correct local geogid = LastURLSection(refurl) -- the id for the specific ABS reference is easiest to get from the ref URL. It may be the only place it is available. reference = mw.getCurrentFrame:expandTemplate else -- use the provided reference url, and whatever other citation data is available

local citewebargs =

reference = mw.getCurrentFrame:expandTemplate end

local wdeditpencil = mw.getCurrentFrame:expandTemplate -- the Edit At Wikidata icon & link reference = reference .. wdeditpencil

-- The name of the citation reference will be the same for each wikidata claim reference. This will allow references to the same data to be combined into a single citation reference. local refname = refwork .. "_" .. year .. "_" .. appliespart .. "_" .. reftitle if r > 1 then -- 2nd and later references in the same wikidata claim have their number appended, to keep them unique refname = refname .. "_" .. r end

refs = refs .. mw.getCurrentFrame:extensionTag -- accumulate the citation references end return refsend

--------------- Function GetAbbrLabel gets the population geography abbreviation ---------------

local function GetAbbrLabel(returnclaim) local appliespartitem = mw.wikibase.getEntity(returnclaim.qualifiers[Applies_P][1].datavalue.value.id) -- load the applies to part item local abbrelabel = appliespartitem.labels.en.value -- the fall back value for the geography label if no abbreviation (short name) value exists in Wikidata item if appliespartitem.claims[ShortN_P] ~= nil then -- if a short name value exists, use it, with the full label as a tooltip abbrelabel = mw.getCurrentFrame:expandTemplate end return '' .. abbrelabel .. ''end

--------------- Function GetYearLink gets the Wikipedia article link for the population year ---------------

local function GetYearLink(returnclaim) local year = string.sub(returnclaim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string local yearreturn = year -- if no links to Wikipedia articles describing population determination method exist, default is year

local detmetitem = mw.wikibase.getEntity(returnclaim.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

if detmetitem.sitelinks ~=nil and detmetitem.sitelinks.enwiki ~=nil then -- if determination method item has an enwiki URL yearreturn = "" .. year .. "" -- use that URL as the link for the year value elseif detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id

CensusAU_I then -- if determination method is an instance of an australian census yearreturn = "" .. year .. "" -- use the section of the Census in Australia article as the link for the year value end return yearreturnend

---------------- Function HistoricPopulations returns a wikitable of all census population values for all geography types, or a specified one ----------------- parameters:-- required: type= the type value as for the Infobox-- optional: wikidata= the wikidata item to be used instead of the one in the current page-- optional: geog= a single geography type to return pop values for. Valid are 'ucl', 'sal', 'lga', 'iloc'. If left blank, all geographies will be returned.

function p.HistoricPopulations(frame) if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it item = mw.wikibase.getEntity(frame.args.wikidata) else item = mw.wikibase.getEntity -- if there's a Wikidata item connected to the article it will find it here. end

-- if there are no population claims in the item, return an empty string if not (item and item.claims and item.claims[Pop_P]) then return "" end

-- Find claims with: -- (1) point in time is not nil -- (2) applies to part is not nil -- (3) determination method is not nil -- (4) References table is not empty -- (5) The determination method for the claim is an australian census

local validpopclaims = local z = 0 for j, s in pairs(item.claims[Pop_P]) do local isCensus = false if s.qualifiers[DetMeth_P] ~= nil then local detmetitem = mw.wikibase.getEntity(s.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item isCensus = (detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id

CensusAU_I) -- is determination method an instance of an australian census? end if s.qualifiers ~= nil and s.qualifiers[Pointin_P] ~= nil and s.qualifiers[Applies_P] ~= nil and s.qualifiers[DetMeth_P] ~= nil and s.references ~= nil and isCensus then z = z + 1 validpopclaims[z] = s -- add to valid claims table end end

-- if there are no valid claims, return an empty string if #validpopclaims < 1 then return "" end

SplitDoubleClaims(validpopclaims) -- any claims with multiple points-in-time are split into separate claims

-- add to history table for all (or requested-geography-only) claims

local showGeogIds =

if frame.args.geog ~= nil and frame.args.geog ~= "" then -- if geog is specified, only claims for its id are returned showGeogIds[1] = IdForGeog(string.lower(frame.args.geog)) else showGeogIds = GeogIdsForType(frame.args.type) end local showGeogIdsString = ',' .. table.concat(showGeogIds, ',') .. ','

local geog = nil if frame.args.geog ~= nil and frame.args.geog ~= "" then geog = string.lower(frame.args.geog) end

local oneplaceid = IdForGeog(geog)

local history = local years = local glist = for i, q in pairs(validpopclaims) do local claimgeogid = q.qualifiers[Applies_P][1].datavalue.value.id -- the ID of the applies_to_part item in the claim if string.find(showGeogIdsString, ',' .. claimgeogid .. ',', 1, true) then -- the geography ID of the claim is in the list of IDs that could be shown for the type if (not oneplaceid) or (claimgeogid

oneplaceid) then -- if geog is not specified, or it is and the claim applies_to_part matches it local claimyear = string.sub(q.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string if not history[claimyear] then history[claimyear] = table.insert(years, claimyear) end local refs = GetRefsForClaim(q, item.labels.en.value) history[claimyear].claim[claimgeogid] = mw.language.getContentLanguage:formatNum(tonumber(q.mainsnak.datavalue.value.amount)) .. refs glist[claimgeogid] = 1 end end end -- sort the years table table.sort(years) local geogNames =

-- build the wikidata table contents from the history table local wt = for g, l in pairs(showGeogIds) do if glist[l] then table.insert(wt, ' !! ' .. geogNames[l]) end end -- data rows for k, v in ipairs(years) do table.insert(wt, '\n|-\n! | ' .. v) -- first column, contains years for g, l in pairs(showGeogIds) do if glist[l] then if not history[v].claim[l] then table.insert(wt, '\n| ') -- empty table cell else table.insert(wt, '\n| ' .. history[v].claim[l]) end end end end

local wts = table.concat(wt)

-- if there are table contents, add the start and end of the table if #wts > 0 then wts = '

-\n!' .. wts -- start of table and empty top-left cell wts = wts .. '\n
' -- end of table end return wtsend

---------------- Function LatestPopulation returns the most recent population value for a specified geography ----------------- parameters:-- required: geog= a single geography type to return pop value for. Valid are 'ucl', 'sal', 'lga', 'iloc'.-- optional: wikidata= the wikidata item to be used instead of the one in the current page-- optional: year= any value (except 'no') requests the year to be shown after the population figure-- optional: punc= any value will be inserted into the output before the reference number

function p.LatestPopulation(frame) if frame.args.geog

nil then return "" end

local geogID = IdForGeog(string.lower(frame.args.geog)) if geogID

nil then return "" end

if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it item = mw.wikibase.getEntity(frame.args.wikidata) else item = mw.wikibase.getEntity -- if there's a Wikidata item connected to the article it will find it here. end

-- if there are no population claims in the item, return an empty string if not (item and item.claims and item.claims[Pop_P]) then return "" end

------------ PART 1: Find claims that meet mimimum criteria

-- (1) point in time is not nil -- (2) applies to part is not nil -- (3) determination method is not nil -- (4) References table is not empty

local validpopclaims = local z = 0 for j, s in pairs(item.claims[Pop_P]) do if s.qualifiers ~= nil and s.qualifiers[Pointin_P]~= nil and s.qualifiers[Applies_P] ~= nil and s.qualifiers[DetMeth_P] ~= nil and s.references ~= nil then z = z + 1 validpopclaims[z] = s -- add to valid claims table end end

-- if there are no valid claims, return an empty string if #validpopclaims <1 then return "" end SplitDoubleClaims(validpopclaims) -- any claims with multiple points-in-time are split into separate claims

--------------- PART 2: Find the latest claim for each geography found local latestclaim = for i, q in pairs(validpopclaims) do local oclaimdate = q.qualifiers[Pointin_P][1].datavalue.value.time local claimgeog = q.qualifiers[Applies_P][1].datavalue.value.id if latestclaim[claimgeog]

nil or (latestclaim[claimgeog] ~= nil and oclaimdate >= latestclaim[claimgeog].qualifiers[Pointin_P][1].datavalue.value.time) then -- if the max date for a particular geography value is later than the previous latest, overwrite with the current latest claim latestclaim[claimgeog] = q end end

--------------- PART 3: Compile the module output, using only latest claim for the specified geography

local wikitext = "" if latestclaim[geogID] ~= nil then local yearlink = "" if (frame.args.year or ) ~= and frame.args.year:lower ~= 'no' then -- year is only shown on request yearlink = " (" .. GetYearLink(latestclaim[geogID]) .. ")" end local refs = GetRefsForClaim(latestclaim[geogID], item.labels.en.value) -- the references for the claim wikitext = mw.language.getContentLanguage:formatNum(tonumber(latestclaim[geogID].mainsnak.datavalue.value.amount)) .. yearlink .. (frame.args.punc or ) .. refs end

local cat = if mw.title.getCurrentTitle.namespace

0 then -- category not added except in article namespace cat = '' end return wikitext .. catend

---------------- Function ListForInfobox returns the most recent population values ----------------- parameters:-- required: type= the type value as for the Infobox-- optional: wikidata= the wikidata item to be used instead of the one in the current page-- optional: geog= a single geography type to return pop values for. Valid are 'ucl', 'sal', 'lga', 'iloc'. If left blank, all geographies will be returned.

function p.ListForInfobox(frame) if frame.args.type

nil then return "" end

local luaplacetype = ""

local articleplacetype = string.lower(frame.args.type) -- for the place type supplied, change to a lower case string

if articleplacetype

"town" -- Check for valid place types or articleplacetype

"suburb" or articleplacetype

"city" or articleplacetype

"settlement" or articleplacetype

"locality" or articleplacetype

"townlocality" or articleplacetype

"lga" or articleplacetype

"region" then -- OK to continue elseif articleplacetype

"cadastral" or articleplacetype

"protected" then -- these place types don't have ABS populations return "" else -- unrecognised type supplied return "" end

if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it item = mw.wikibase.getEntity(frame.args.wikidata) else item = mw.wikibase.getEntity -- if there's a Wikidata item connected to the article it will find it here. end

-- if there are no population claims in the item, return an empty string if not (item and item.claims and item.claims[Pop_P]) then return "" end ------------ PART 1: Find claims that meet mimimum criteria

-- (1) point in time is not nil -- (2) applies to part is not nil -- (3) determination method is not nil -- (4) References table is not empty

local validpopclaims = local z = 0 for j, s in pairs(item.claims[Pop_P]) do if s.qualifiers ~= nil and s.qualifiers[Pointin_P]~= nil and s.qualifiers[Applies_P] ~= nil and s.qualifiers[DetMeth_P] ~= nil and s.references ~= nil then z = z + 1 validpopclaims[z] = s -- add to valid claims table end end

-- if there are no valid claims, return an empty string if #validpopclaims <1 then return "" end SplitDoubleClaims(validpopclaims) -- any claims with multiple points-in-time are split into separate claims

--------------- PART 2: Find the latest claim for each geography found local latestclaim = for i, q in pairs(validpopclaims) do local oclaimdate = q.qualifiers[Pointin_P][1].datavalue.value.time local claimgeog = q.qualifiers[Applies_P][1].datavalue.value.id if latestclaim[claimgeog]

nil or (latestclaim[claimgeog] ~= nil and oclaimdate >= latestclaim[claimgeog].qualifiers[Pointin_P][1].datavalue.value.time) then -- if the max date for a particular geography value is later than the previous latest, overwrite with the current latest claim latestclaim[claimgeog] = q end end

--------------- PART 3: specify the geography types that can be returned for each place type

local showGeogIds =

if frame.args.geog ~= nil and frame.args.geog ~= "" then -- if geog is specified, only claims for its id are returned showGeogIds[1] = IdForGeog(string.lower(frame.args.geog)) else showGeogIds = GeogIdsForType(articleplacetype) end

--------------- PART 4: Compile the module output, using only latest claims in specified geographies

local returnlist =

for j, t in pairs(showGeogIds) do if latestclaim[t] ~= nil then local refs = GetRefsForClaim(latestclaim[t], item.labels.en.value) -- the references for the max date claim table.insert(returnlist, mw.language.getContentLanguage:formatNum(tonumber(latestclaim[t].mainsnak.datavalue.value.amount)) .. " (" .. GetAbbrLabel(latestclaim[t]) .. " " .. GetYearLink(latestclaim[t]) .. ")" .. refs) end end

local wikitext = "" if #returnlist

1 then -- if there is only one entry in returnlist, return it without a bullet point wikitext = returnlist[1] else -- if there are multiple entries in returnlist, return all the rows with new line and bullet points between them wikitext = "\n*" .. table.concat(returnlist, "\n*") end local cat = if mw.title.getCurrentTitle.namespace

0 then -- category not added except in article namespace cat = '' end return wikitext .. catend

-- ###### this function is just for testing of the upgrade during development

function p.main local wdata = 'Q649969' -- Ulladulla return p.HistoricPopulations .. '\n\n' .. p.HistoricPopulations .. p.LatestPopulation .. '
' .. p.ListForInfoboxend

return p