Module:WikitextParser explained

-- Module:WikitextParser is a general-purpose wikitext parser-- Documentation and master version: https://en.wikipedia.org/wiki/Module:WikitextParser-- Authors: User:Sophivorus, User:Certes, User:Aidan9382, et al.-- License: CC-BY-SA-4.0local WikitextParser =

-- Helper function to escape a string for use in regexeslocal function escapeString(str) return str:gsub('[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0')end

-- Get the lead section from the given wikitext-- The lead section is any content before the first section title.-- @param wikitext Required. Wikitext to parse.-- @return Wikitext of the lead section. May be empty if the lead section is empty.function WikitextParser.getLead(wikitext) wikitext = '\n' .. wikitext wikitext = wikitext:gsub('\n

.*', ) wikitext = mw.text.trim(wikitext) return wikitextend

-- Get the sections from the given wikitext-- This method doesn't get the lead section, use getLead for that-- @param wikitext Required. Wikitext to parse.-- @return Map from section title to section contentfunction WikitextParser.getSections(wikitext) local sections = wikitext = '\n' .. wikitext .. '\n

' for title in wikitext:gmatch('\n

+ *([^=]+) *

+') do local section = wikitext:match('\n

+ *' .. escapeString(title) .. ' *

+(.-)\n

') section = mw.text.trim(section) sections[title ] = section end return sectionsend

-- Get a section from the given wikitext (including any subsections)-- If the given section title appears more than once, only the section of the first instance will be returned-- @param wikitext Required. Wikitext to parse.-- @param title Required. Title of the section-- @return Wikitext of the section, or nil if it isn't found. May be empty if the section is empty or contains only subsections.function WikitextParser.getSection(wikitext, title) title = mw.text.trim(title) title = escapeString(title) wikitext = '\n' .. wikitext .. '\n' local level, wikitext = wikitext:match('\n(

+) *' .. title .. ' *

.-\n(.*)') if wikitext then local nextSection = '\n

' .. string.rep('=?', #level - 2) .. '[^=].*' wikitext = wikitext:gsub(nextSection, ) -- remove later sections at this level or higher wikitext = mw.text.trim(wikitext) return wikitext endend

-- Get the content of a

tag from the given wikitext.-- We can't use getTags because both opening and closing
tags are self-closing tags.-- @param wikitext Required. Wikitext to parse.-- @param name Required. Name of the
tag-- @return Content of the
tag, or nil if it isn't found. May be empty if the section tag is empty.function WikitextParser.getSectionTag(wikitext, name) name = mw.text.trim(name) name = escapeString(name) wikitext = wikitext:match('< *section +begin *= *["\']? *' .. name .. ' *["\']? */>(.-)< *section +end= *["\']? *'.. name ..' *["\']? */>') if wikitext then return mw.text.trim(wikitext) endend

-- Get the lists from the given wikitext.-- @param wikitext Required. Wikitext to parse.-- @return Sequence of lists.function WikitextParser.getLists(wikitext) local lists = wikitext = '\n' .. wikitext .. '\n\n' for list in wikitext:gmatch('\n([*#].-)\n[^*#]') do table.insert(lists, list) end return listsend

-- Get the paragraphs from the given wikitext.-- @param wikitext Required. Wikitext to parse.-- @return Sequence of paragraphs.function WikitextParser.getParagraphs(wikitext) local paragraphs =

-- Remove non-paragraphs wikitext = '\n' .. wikitext .. '\n' wikitext = wikitext:gsub('\n[*#][^\n]*', ) -- remove lists wikitext = wikitext:gsub('\n%[%b[]%]\n', ) -- remove files and categories wikitext = wikitext:gsub('\n%b *\n', '\n%0\n') -- add spacing between tables and block templates wikitext = wikitext:gsub('\n%b *\n', '\n') -- remove tables and block templates wikitext = wikitext:gsub('\n

+[^=]+

+ *\n', '\n') -- remove section titles wikitext = mw.text.trim(wikitext)

for paragraph in mw.text.gsplit(wikitext, '\n\n+') do if mw.text.trim(paragraph) ~= then table.insert(paragraphs, paragraph) end end return paragraphsend

-- Get the templates from the given wikitext.-- @param wikitext Required. Wikitext to parse.-- @return Sequence of templates.function WikitextParser.getTemplates(wikitext) local templates = for template in wikitext:gmatch('') do if wikitext:sub(1, 3) ~= '