-- 2023-04-17 STABLE wrt basics, quotes "" * with base sep; working on resltstring & report-- todo: report options, more options-- todo: options count, pattern, out-table, out-htmllist, keepinputordersourcerequire('strict')local p = local mArgs = require('Module:Arguments')local str = require('Module:String')local yesno = require('Module:Yesno')local tTools = require('Module:TableTools')local strDeEnCode = require('Module:DecodeEncode')local iMaxWords = 12 -- alpha-status, Apr2023. when stable, can be higherlocal tArgs = local report = nil -- initinated when explain=T
local function parseReportType(tArgs)local xpReportTF = falselocal xpReportType = yesno(tArgs.explain, tArgs.explain) or false -- to be parsed beyond T/F-- in: nil, false: FALSE type=nil-- in: true, preview: type=true TRUE (dflt: if prev)-- in: doc, testcases: by page TRUE (persistent on those pages)-- in: foo, other: FALSE
xpReportTF = false if yesno(xpReportType, false)
'testcases' then xpReportType = 'testcases' xpReportTF = true elseif xpReportType
true then xpReportType = 'preview' xpReportTF = true else xpReportTF = false -- unk word end tArgs.explain = xpReportTF return xpReportType
end
local function initReport(tArgs) report = require('Module:Str find word/report') report.xpCheckExplain -- dummyend
local function isPreview -- here or in report?local ifPreview = require('Module:If preview') -- return not (ifPreview._warning
-- Turn "A" into "A" etc. asap-- and reduce multi-spaces (including nbsp etc.) into single spacelocal function sDecodeTrim(str) if str
-- %-Escape any word (character string) before feeding it into a string pattern function-- will be %-escaped: "([%(%)%.%%%+%-%*%?%[%^%$%]])" = 12 characters .%+-*?[^$]local function escape_word(word) return str._escapePattern(word)end
-- remove \' \" outer pair (& rm outer spaces);-- any result (=the inner string) is trimmed by T/F option (case " abc ").local function removeOuterQuotes(s, bTrimAfter) if s
if mw.ustring.match(s, "^%s*\'") ~= nil then s = mw.ustring.gsub(s, "^%s*%\'(.*)%\'%s*$", "%1") elseif mw.ustring.match(s, '^%s*\"') ~= then s = mw.ustring.gsub(mw.text.trim(s), '^%\"(.*)%\"$', '%1') end if bTrimAfter
-- separator-in-- todo: check characters '" _ ; & accept?'local function setSepIn(sSep, sDefaultSep) if sSep
then return sDecodeTrim(sDefaultSep) else return sSep endend
-- separatorlocal function setSepOut(sSep, sDefaultSep) sSep = sDecodeTrim(sSep) or nil if sSep
then return sDefaultSep else return sSep endend
-- Check whether a single word is in a table (simple array of words)-- returns hitword or nil; iPosition is helper to keep outlist orderedlocal function findWordInTable(tSource, word)---local bHit = false---local iPosition = -1 for i, v in ipairs(tSource) do if v
return nilend
-- Reads and parses a word list and returns a table with words (simple array)-- words list can be: source, andwords-to-check, orwords-to-check-- step 1: basic preparation of the csv wordstring-- step 2: when case-insensitive, turn string into lowercase-- step 3: read (parse) quoted '..'-- step 4: read (parse) quoted ".."-- step 5: read (parse) comma-separated words-- step 6: merge quoted wordlists; keep in order-- step 7: when booleans=T, change boolean words into true/false (module:yesno rules)-- step 8: replace synonyms (by inout "|_nov=November, 11" input)-- step 9: remove duplicates from wordtable (rm latest)-- all words returned are trimmed-- return the table (a straight array)local function buildWordTable(sWordlist)local wordTable = local hitWord = local hitCount = -1local _local sPatternlocal cQ1 = '_Q0027_' -- U+0027 = \'local cQ2 = '_Q0022_' -- U+0022 = \"local tQ1hits = -- Q1-hits, reused to restore orderlocal tQ2hits = -- Q2-hits, reused to restore orderlocal sMsg = -- xpmessage onlylocal xpHasQuotes = false
-- Step 1: prepare sWordList sDecodeTrim(sWordlist) if sWordlist
nil then return wordTable end sWordlist = tArgs.sep .. sWordlist .. tArgs.sep -- test. dev only: xpHasQuotes = mw.ustring.match(sWordlist, '[\"\']') ~= -- unused if xpHasQuotes then --- report.xpMessage('xpHasQuotes [unused]: ' .. tostring(xpHasQuotes)) end
-- Step 2: case sensitive if yesno(tArgs.case, true)
-- Step 3: Q1 read quotes (single quotes '..') sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\'\'%s*%f[' .. tArgs.sep_pattern .. ']' -- initial: hitWord = sDecodeTrim(mw.ustring.match(sWordlist, sPattern)) or while hitWord ~= do --- now into function/ to check if both \' and \" are not mixed --- hitWord = sDecodeTrim(mw.ustring.gsub(hitWord, "^%\'(.+)%\'$", "%1")) -- remove outer Qs \" hitWord = removeOuterQuotes(hitWord, true) table.insert(tQ1hits, hitWord) sWordlist = mw.ustring.gsub(sWordlist, sPattern, cQ1, 1) -- removes current 1st hit; replace with code
-- next hitWord = sDecodeTrim(mw.ustring.match(sWordlist, sPattern)) or end --- report.xpMessage('sWL1: ' .. sWordlist) --- report.xpMessage('Qhits: ' .. table.concat(tQ1hits, '; ')) -- Step 4: Q2 read quotes (double quotes "..") sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\"\"%s*%f[' .. tArgs.sep_pattern .. ']' -- initial search hitWord = sDecodeTrim(mw.ustring.match(sWordlist, sPattern)) or while hitWord ~= do --- hitWord = sDecodeTrim(mw.ustring.gsub(hitWord, '^%\"(.+)%\"$', '%1')) -- remove outer Qs \" hitWord = removeOuterQuotes(hitWord, true) table.insert(tQ2hits, hitWord) sWordlist = mw.ustring.gsub(sWordlist, sPattern, cQ2, 1) -- removes current '1st' hit; replace with code -- next hitWord = sDecodeTrim(mw.ustring.match(sWordlist, sPattern)) or end ---report.xpMessage('sWL2:' .. sWordlist) ---report.xpMessage('Qhits: ' .. table.concat(tQ2hits, '; '))
-- Step 5: parse plain sep-delimited words sPattern = '%f[^' .. tArgs.sep_pattern .. '][^' .. tArgs.sep_pattern .. ']+%f[' .. tArgs.sep_pattern .. ']' hitCount = 0 while hitCount < iMaxWords do hitWord = sDecodeTrim(str._match(sWordlist, sPattern, 1, hitCount + 1, false, tArgs.sep)) or if hitWord
-- Step 6: merge quoted words & wordtable, keep order for iQ, sQW in ipairs(tQ1hits) do for iW, sW in ipairs(wordTable) do if sW
cQ2 then wordTable[iW] = sQW break end end end
-- Step 7: when read as booleans, converse words to true/false if tArgs.booleans then local sBool for i, v in ipairs(wordTable) do sBool = yesno(v) if sBool ~= nil then wordTable[i] = tostring(sBool) end end end
-- Step 8: replace synonyms if #tArgs['synonymsTables'] >= 1 then for aka1, tAkas in pairs (tArgs['synonymsTables']) do for iW, w in ipairs(wordTable) do if findWordInTable(tAkas, w) then -- todo must be ... ~= nil ??? 26-3 wordTable[iW] = aka1 end end end end
if true then wordTable = tTools.removeDuplicates(wordTable)else -- lol works but not needed, use ttools -- Step 9: remove duplicates from list local iR, iK -- iR = reader, iK = killer local hit = false iR = 1 while iR < #wordTable do iK = #wordTable -- will be counting downwards while iK > iR do if wordTable[iK]
return wordTableend
-- AND-logic with ANDwords words: ALL words must be found-- returns -- T when *all* AND words are found-- hittable with all hit words-- note 1: when F, the hittable still contains the words that were found-- note 2: empty AND-wordlist => True by logic (because: not falsified)local function checkANDwords(tWorkf)local bANDchk = true -- main conclusionlocal result1 = nil -- per word hitlocal tHits = -- hit table---local iPos = -1 -- helper info just to keep in order
if #tWorkf.ANDwords > 0 then bANDchk = true for i, word in ipairs(tWorkf.ANDwords) do result1 = findWordInTable(tWorkf.SOURCEwords, word) or nil if result1
-- OR-logic with tORwords words: at least one word must be found-- returns -- True when at least one OR word is found-- hittable has all hit words-- note 1: empty OR-wordlist => True by logic (because: not falsified)-- note 2: while just one hitword is a True result, the hittable contains all words foundlocal function checkORwords(tWork)local result1local bORchklocal tHits
bORchk = false tHits = result1 = nil if #tWork.ORwords > 0 then for i, word in ipairs(tWork.ORwords) do result1 = findWordInTable(tWork.SOURCEwords, word) or nil if result1
-- Determine the requested return value (a string)-- sRESULTstring is the _main return value (logically defined value)-- this function applies tArgs.out_true / tArgs.out_false return value-- note: out_true= implies: blank return value-- note: no parameter out_true= (that is, out_true=nil) implies: by default, return the sRESULTstring--- todo add pref, sufflocal function yesnoReturnstring(tResults) if tResults.resultALL
nil then return table.concat(tResults.tTRUE, tArgs.out_sep) else -- some |out-true= value is entered, could be return '_out-true' .. tArgs.out_true end endend
local function tCombinedSourceorderedTRUEtables(tResult)local tOut = if tResult.tANDhits
nil then tOut = tResult.tANDhits else tOut = tResult.tANDhits for i, v in ipairs(tResult.tORhits) do table.insert(tOut, i, v) end end if tOut
local function concatAndLists(s1, s2) local tLists = -- args in: both s1 and s2 to concat table.insert(tLists, s1) table.insert(tLists, s2) return table.concat(tLists, tArgs.sep)end
--
tNewArgs.sep = setSepIn(origArgs['sep'], tDefault['sep']) tNewArgs.sep_pattern = escape_word(tNewArgs.sep) tNewArgs.out_sep = setSepOut(origArgs['out-sep'] or origArgs['sep'], tDefault['out_sep']) tNewArgs.case = yesno(origArgs['case'] or origArgs['casesensitive']) or tDefault['case'] tNewArgs.booleans = yesno(origArgs['bool'] or origArgs['booleans']) or tDefault['booleans'] tNewArgs.out_true = sDecodeTrim(origArgs.out_true) or nil -- nil =default so return sRESULTstring; keep as legal input & return value tNewArgs.out_false = sDecodeTrim(origArgs.out_false) or tNewArgs.prefix = sDecodeTrim(origArgs.prefix or origArgs.p) or tNewArgs.suffix = sDecodeTrim(origArgs.suffix or origArgs.s) or tNewArgs.out_format = 'default' -- todo: table, default, htmllisttype, flatlidt, first, tNewArgs.explain = false -- TEST17Apr origArgs.explain tNewArgs.explain_type = parseReportType(tNewArgs) or nil tNewArgs.test = origArgs.test
-- the wordlists: tNewArgs['source'] = origArgs['source'] or origArgs['s'] or tNewArgs['sANDlist'] = concatAndLists(origArgs['word'] or origArgs['w'] or nil, origArgs['andwords'] or origArgs['andw'] or nil) tNewArgs['sORlist'] = origArgs['orwords'] or origArgs['orw'] or
tNewArgs['synonyms'] = tNewArgs['synonymsTables'] = -- to be populated later for k, v in pairs(origArgs) do if str._match(k, '^_%S', 1, 1, false, false) then local syn1 syn1 = mw.ustring.gsub(k, '^_', , 1) table.insert(tNewArgs['synonyms'], syn1) tNewArgs['synonyms'][syn1] = v end end
if tNewArgs.explain
if false then for aka1, sAkalist in pairs (tNewArgs['synonyms']) do report.xpMessage('SYNONYMS: ' .. aka1 .. '=' .. sAkalist) endend
return tNewArgsend
--
tArgs = parseArgs(origArgs) -- make synonyms into tables -- 'aka1' = target synonym (= the synonym that remains) for aka1, sAkalist in pairs(tArgs['synonyms']) do tArgs['synonymsTables'][aka1] = buildWordTable(tArgs['synonyms'][aka1]) end
-- build the worktables tWork['SOURCEwords'] = buildWordTable(tArgs.source) tWork['ANDwords'] = buildWordTable(tArgs.sANDlist) tWork['ORwords'] = buildWordTable(tArgs.sORlist)
-- apply logic & conclude tResults.resultALL = nil -- best be set explicitly if (#tWork.SOURCEwords
0) then -- No words to check tResults.resultALL = false if yesno(tArgs.explain, true) then report.xpMessage('ERR201 No words to check') end else tResults['bAND'], tResults['tANDhits'] = checkANDwords(tWork) tResults['bOR'], tResults['tORhits'] = checkORwords(tWork) tResults.resultALL = (tResults.bAND) and (tResults.bOR) end
tResults.sRESULTstring = 'notinit' if tResults.resultALL
local sReport = if tArgs.explain then sReport = 'xp endfinal Report here L485' --sReport = report.xpPresent(tArgs, tWork, tResults) end local test = 'Tunk'test = tArgs.test or '_unk'
if tArgs.explain then test = tostring(tArgs.explain)else test = 'not'end return string.upper(tostring(tResults.resultALL)) .. tResults.sRESULTstringend
function p.main(frame)local origArgs = mArgs.getArgs(frame)
return p._main(origArgs)end
return p