-- This module generates the wikitext required at Module:Convert/data-- by reading and processing the wikitext of the master list of units-- (see conversion_data for the page title).---- Script method:-- * Read lines, ignoring everything before "
".-- * Process the following lines:-- * Find next level-3 heading like "
".-- * Parse each following line starting with "|"-- (but ignore lines starting with "|-" or "|}".-- * Split such lines into fields (delimiter "||") and trim-- leading/trailing whitespace from each field.-- Remove any "colspan" at front of second field (symbol).-- * Remove thousand separators (commas) from the scale field.-- If the scale is a number, do not change it.-- Otherwise, it should be an expression like "5/9", in-- which case it is replaced by the value of the expression.-- * Remove wiki formatting '...' from the link field.-- * Remove redundant fields from the unit to reduce size of data table.-- * Create alternative forms of a unit such as an alias or a combination.-- * Stop processing when encounter end of text or a line starting-- with a level-2 heading ("
local ulower = mw.ustring.lowerlocal usub = mw.ustring.sublocal text_code
local specials =
-- Module text for the local language (localization).-- A default table of text for enwiki is provided here.-- If needed for another wiki, wanted sections from the table can be-- copied into translation_table in Module:Convert/text.-- For example, copying and modifying only the titles section may give:---- local translation_table = local mtext =
local function message(key, ...) -- Return a message from the message table, which can be localized. -- '$1', '$2', ... are replaced with the first, second, ... parameters, -- each of which must be a string or a number. -- The global variable is_test_run can be set by a testing program to -- check the messages generated by this program. local rep = for i, v in ipairs do rep['$' .. i] = v end key = key or '???' local extra if is_test_run and key ~= 'm_line_num' then extra = key .. ': ' else extra = end return extra .. string.gsub(mtext.messages[key] or key, '$%d+', rep)end
local function quit(key, ...) -- Use error to pass an error message to the surrounding pcall. error(message(key, ...), 0)end
local function quit_no_message -- Throw an error. -- This is used in some functions which can throw an error with a message, -- but where the message is in fact never displayed because the calling -- function uses pcall to catch errors, and any message is ignored. -- Using this function documents that the message (which may be useful in -- some other application) does not need translation as it never appears. error('this message is not displayed', 0)end
local function collection -- Return a table to hold items. return end
local warnings = collectionlocal function add_warning(key, ...) -- Add a warning that will be inserted before the final result. warnings:add(message(key, ...))end
---Begin code to evaluate expressions------------------------------------- This is needed because Lua's loadstring is not available in Scribunto,-- and each scale value can be specifed as an expression such as "5/9".-- More complex expressions are supported, including use of parentheses-- and the binary operators: + - * / ^
local operators =
local function tokenizer(text) -- Function 'next' returns the next token which is one of: -- number -- table (operator) -- string ('(' or ')') -- nil (end of text) -- If invalid, an error is thrown. -- The number is unsigned (unary operators are not supported). return end
local function evaluate_tokens(tokens, inparens) -- Return the value from evaluating tokenized expression, or throw an error. local numstack, opstack = collection, collection local function perform_ops(precedence, associativity) while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or (opstack[opstack.n].precedence
1)) do local rhs = numstack:pop local lhs = numstack:pop if not (rhs and lhs) then quit_no_message('missing number') end local op = opstack:pop numstack:add(op.func(lhs, rhs)) end end local token_last local function set_state(token_type) if token_last
'number') and 'operator' or 'number' quit_no_message('missing ' .. missing) end token_last = token_type end while true do local token = tokens:next if type(token)
'table' then set_state('operator') perform_ops(token.precedence, token.associativity) opstack:add(token) elseif token
')' then if inparens then break end quit_no_message('unbalanced parentheses') else break end end perform_ops(0) if numstack.n > 1 then quit_no_message('missing operator') end if numstack.n < 1 then quit_no_message('missing number') end return numstack:popend
local function evaluate(expression) -- Return value (a number) from evaluating expression (a string), -- or throw an error if invalid. -- This is not bullet proof, but it should support the expressions used. return evaluate_tokens(tokenizer(expression))end---End code to evaluate expressions----------------------------------------Begin code adapted from Module:Convert-------------------------------
local plural_suffix = 's' -- may be changed from translation.plural_suffix below
local function shallow_copy(t) -- Return a shallow copy of t. -- Do not need the features and overhead of mw.clone provided by Scribunto. local result = for k, v in pairs(t) do result[k] = v end return resultend
local function split(text, delimiter) -- Return a numbered table with fields from splitting text. -- The delimiter is used in a regex without escaping (for example, '.' would fail). -- Each field has any leading/trailing whitespace removed. local t = text = text .. delimiter -- to get last item for item in text:gmatch('%s*(.-)%s*' .. delimiter) do table.insert(t, item) end return tend
local unit_mt =
local function prefixed_name(unit, name, index) -- Return unit name with SI prefix inserted at correct position. -- index = 1 (name1), 2 (name2), 3 (name1_us), 4 (name2_us). -- The position is a byte (not character) index, so use Lua's sub. local pos = rawget(unit, 'prefix_position') if type(pos)
local unit_prefixed_mt =
local function lookup(units, unitcode, sp, what) -- Return a copy of the unit if found, or return nil. -- In this cut-down code, sp is always nil, and what is ignored. local t = units[unitcode] if t then if t.shouldbe then return nil end local result = shallow_copy(t) if result.prefixes then result.si_name = result.si_prefix = return setmetatable(result, unit_prefixed_mt) end return setmetatable(result, unit_mt) end local SIprefixes = text_code.SIprefixes for plen = SIprefixes[1] or 2, 1, -1 do -- Look for an SI prefix; should never occur with an alias. -- Check for longer prefix first ('dam' is decametre). -- SIprefixes[1] = prefix maximum #characters (as seen by mw.ustring.sub). local prefix = usub(unitcode, 1, plen) local si = SIprefixes[prefix] if si then local t = units[usub(unitcode, plen+1)] if t and t.prefixes then local result = shallow_copy(t) if (sp
local function evaluate_condition(value, condition) -- Return true or false from applying a conditional expression to value, -- or throw an error if invalid. -- A very limited set of expressions is supported: -- v < 9 -- v * 9 < 9 -- where -- 'v' is replaced with value -- 9 is any number (as defined by Lua tonumber) -- '<' can also be '<=' or '>' or '>=' -- In addition, the following form is supported: -- LHS and RHS -- where -- LHS, RHS = any of above expressions. local function compare(value, text) local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$') if arithop
'*' then factor = tonumber(factor) if factor
nil then quit_no_message('Invalid default expression.') end if compop
'<=' then return value <= limit elseif compop
'>=' then return value >= limit end quit_no_message('Invalid default expression.') -- should not occur end local lhs, rhs = condition:match('^(.-%W)and(%W.*)') if lhs
---End adapted code-----------------------------------------------------
local function strip(text) -- Return text with no leading/trailing whitespace. return text:match("^%s*(.-)%s*$")end
local function empty(text) -- Return true if text is nil or empty (assuming a string). return text
end
-- Tables of units: k = unit code, v = unit table.local units_index = -- all units: normal, alias, per, combination, or multiplelocal alias_index = -- all aliases (to detect attempts to define more than once)local per_index = -- all "per" units (to detect attempts to define more than once)
local function get_unit(ucode, utype) -- Look up unit code in our cache of units. -- If utype
local overrides = -- read from input for unit codes that should not be checked for a duplicate
local function insert_unique_unit(data, unit, index) -- After inserting any required built-in data, insert the unit into the -- data table and (if index not nil) add to index, -- but not if the unit code is already defined. local ucode = unit.unitcode local known = get_unit(ucode) if known and not overrides[ucode] then quit('m_dup_code', ucode) end for item, t in pairs(specials.ucode) do unit[item] = t[ucode] end if index then index[ucode] = unit end table.insert(data, unit)end
local function check_condition(condition) -- Return true if condition appears to be valid; otherwise return false. for _, value in ipairs do local success, result = pcall(evaluate_condition, value, condition) if not success then return false end end return trueend
local function check_default_expression(default, ucode) -- Return a numbered table of names present in param default -- (two names if an expression, or one name (param default) otherwise). -- Throw an error if a problem occurs. -- An expression uses pipe-delimited fields with 'v' representing -- the input value for the conversion. -- Example (suffix is optional): 'v < 120 ! small ! big ! suffix' -- returns . if not default:find('!', 1, true) then return end local t = for item in (default .. '!'):gmatch('%s*(.-)%s*!') do t[#t+1] = item -- split on '!', removing leading/trailing whitespace end if not (#t
4) then quit('m_def_fmt', default, ucode) end local condition, default1, default2 = t[1], t[2], t[3] if #t
local function check_default(default, ucode, utype, unit_table) -- Check the given name (or expression) of a default output. -- Normally a unit must not define itself as its default. However, -- some units are defined merely for use in per units, and they have -- the same ucode, utype and default. -- Example: unit cent which cannot be converted to anything other than -- a cent, but which can work, for example, in cent/km and cent/mi. -- Throw an error if a problem occurs. local done = for _, default in ipairs(check_default_expression(default, ucode)) do if done[default] then quit('m_def_rpt', default, ucode) end if default
unit_table.utype and utype
local function check_all_defaults(cfg, units) -- Check each default in units and warn if needed. -- This is done after all input data has been processed. -- Throw an error if a problem occurs. local errors = collection local missing = collection -- unitcodes with missing defaults for _, unit in ipairs(units) do if not unit.shouldbe and not unit.combination then -- This is a standard unit or an alias/per (not shouldbe, combo). -- An alias may have a default defined, but it is optional. local default = unit.default local ucode = unit.unitcode if empty(default) then if not unit.target then -- unit should have a default missing:add(ucode) end else local ok, msg = pcall(check_default, default, ucode, unit.utype, unit) if not ok then errors:add(msg) if errors.n >= cfg.maxerrors then break end end end end end if errors.n > 0 then error(errors:join, 0) end if missing.n > 0 then add_warning('m_wrn_nodef') local limit = cfg.maxerrors for _, v in ipairs(missing) do limit = limit - 1 if limit < 0 then add_warning('m_wrn_more') break end add_warning('m_wrn_ucode', v) end endend
local function check_all_pers(cfg, units) -- Check each component of each "per" unit and warn if needed. -- In addition, add any required extra fields for some types of units. -- This is done after all input data has been processed. -- Throw an error if a problem occurs. local errors = collection local function errmsg(key, ...) errors:add(message(key, ...)) end for _, unit in ipairs(units) do local per = unit.per if per then local ucode = unit.unitcode if #per ~= 2 then errmsg('m_per_two', ucode) else local types = for i, v in ipairs(per) do if empty(v) then errmsg('m_per_empty', ucode) end if not text_code.currency[v] then local t = get_unit(v) if t then types[i] = t.utype else errmsg('m_per_undef', ucode, v) end end end if specials.utype[unit.utype]
1 then unit.invert = 1 else unit.invert = -1 end else errmsg('m_per_fuel', ucode) end end end end if errors.n >= cfg.maxerrors then break end end if errors.n > 0 then error(errors:join, 0) endend
local function update_units(units, composites, varnames) -- Update some unit definitions with extra data defined in other sections. -- This is done after all input data has been processed. for _, unit in ipairs(units) do local comp = composites[unit.unitcode] if comp then unit.subdivs = '' end local vn = varnames[unit.unitcode] if vn then unit.varname = vn end endend
local function make_override(cfg, data) -- Return a function which, when called, stores a unit code that is not to be -- checked for a duplicate. The table is stored in data (also a table). return function (utype, fields) local ucode = fields[1] if empty(ucode) then quit('m_ovr_miss') end if data[ucode] then quit('m_ovr_dup', ucode) end data[ucode] = true endend
local function make_default(cfg, data) -- Return a function which, when called, stores a table that defines a -- default output unit. The table is stored in data (also a table). local defaults_index = -- to detect attempts to define a default twice return function (utype, fields) -- Store a table defining a unit. -- This is for a unit such as 'kg' that has a default output unit -- different from what is defined for the base unit ('g'). -- Throw an error if a problem occurs. local ucode = fields[1] local default = fields[2] if empty(ucode) then quit('m_dfs_code') end if empty(default) then quit('m_dfs_none', ucode) end if #fields ~= 2 then quit('m_dfs_two', ucode) end local unit_table = get_unit(ucode) if not unit_table then quit('m_dfs_undef', ucode) end local symbol = unit_table.defkey or unit_table.symbol if empty(symbol) then quit('m_dfs_sym', ucode) end check_default(default, ucode, utype, unit_table) if defaults_index[ucode] then quit('m_dfs_dup', ucode) end defaults_index[ucode] = default table.insert(data,) endend
local function clean_link(link, name) -- Return link, customary where: -- link = given link after removing any '...' wiki formatting -- and removing any leading '+' or '*' or '@'; -- customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil -- (for extra "US" or "U.S." or "Imperial" customary units link). -- Result has leading/trailing whitespace removed, and is nil if empty -- or if link matches the name, if a name is specified. -- Exception: If the link is empty and the name starts with '2|ft|6|in'. -- The target units must be defined first. -- Throw an error if a problem occurs. local unitcode -- dummy code required for simplicity, but which is not used in output local alternate_code -- an alternative unit code can be specified to replace convert input local fixed_name -- a fixed name can be specified to replace the unit's normal symbol/name local default_code local ucodes, scales =, for i, v in ipairs(fields) do -- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name if i
then quit('m_cmp_miss') end unitcode = v elseif 2 <= i and i <= 5 then if not (i
) then local target = get_unit(v, (i
4 then default_code = v else if scales[#scales] ~= target.scale then quit('m_cmp_scale', v, unitcode) end alternate_code = v end end elseif i
nil or scale <= 0 then quit('m_cmp_inval', unitcode, scales[i]) end scales[i] = scale end for i = 1, count - 1 do local ratio = scales[i] / scales[i + 1] local rounded = math.floor(ratio + 0.5) if rounded < 2 then quit('m_cmp_order', unitcode) end if math.abs(ratio - rounded)/ratio > 1e-6 then quit('m_cmp_int', unitcode) end ratios[i] = rounded end local text = local function add_text(key, value) table.insert(text, string.format('%s = %q', key, value)) end if default_code then add_text('default', default_code) end if alternate_code then add_text('unit', alternate_code) end if fixed_name then add_text('name', fixed_name) end local subdiv = string.format('["%s"] = ', ucodes[2], table.concat(text, ', ')) local main_code = ucodes[1] local item = data[main_code] if item then table.insert(item.subdivs, subdiv) else data[main_code] = end endend
local function make_outputmultiple(cfg, data) -- Return a function which, when called, stores a table that defines a -- single multiple output unit. The table is stored in data (also a table). return function (utype, fields) -- Store a table defining a unit. -- This is for a multiple unit like 'ydftin' (result in yards, feet, inches). -- The target units must be defined first. -- Throw an error if a problem occurs. local unit = local ucodes, scales =, for i, v in ipairs(fields) do if i
then quit('m_mul_miss') end unit.unitcode = v elseif v
0 and 'm_mul_none' or 'm_mul_one', unit.unitcode) end -- Component units must be specified from most-significant to least-significant -- (so scale values will be in descending order), -- and each ratio of a pair of scales must be very close to an integer. -- The componenets and ratios are stored in reverse order (least significant first). -- This script stores a unit scale as a string (might be an expression like "5/9"), -- but scales in a multiple are handled as numbers (should never be expressions). local ratios, count =, #scales for i = 1, count do local scale = tonumber(scales[i]) if scale
-- To make updating the data module easier, this script inserts a preamble-- and a postamble so the result can be used to replace the whole page.local data_preamble = [=[ -- Conversion data used by [[Module:Convert]] which uses mw.loadData for-- read-only access to this module so that it is loaded only once per page.-- See if copying to another wiki.---- These data tables follow:-- all_units all properties for a unit, including default output-- default_exceptions exceptions for default output ('kg' and 'g' have different defaults)-- link_exceptions exceptions for links ('kg' and 'g' have different links)---- These tables are generated by a script which reads the wikitext of a page that-- documents the required properties of each unit; see .]=]
local data_postamble = [=[ return { all_units = all_units, default_exceptions = default_exceptions, link_exceptions = link_exceptions, per_unit_fixups = per_unit_fixups, }]=]