Module:Diff/sandbox explained

------------------------------------------------------------------------------- Provides functions for diffing text.---- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org)-- (c) 2007 Hisham Muhammad-- Adapted to MediaWiki LUA: ---- License: MIT/X, see http://sputnik.freewisdom.org/en/License-----------------------------------------------------------------------------

local SKIP_SEPARATOR = true -- a constant

-- token statuseslocal IN = "in"local OUT = "out"local SAME = "same"

------------------------------------------------------------------------------- Split a string into tokens. (Adapted from Gavin Kistner's split on-- http://lua-users.org/wiki/SplitJoin.---- @param text A string to be split.-- @param separator [optional] the separator pattern (defaults to any-- whitespace - %s+).-- @param skip_separator [optional] don't include the separator in the results. -- @return A list of tokens.-----------------------------------------------------------------------------local function split(text, separator, skip_separator) separator = separator or "%s+" local parts = local start = 1 local split_start, split_end = mw.ustring.find(text, separator, start) while split_start do table.insert(parts, mw.ustring.sub(text, start, split_start-1)) if not skip_separator then table.insert(parts, mw.ustring.sub(text, split_start, split_end)) end start = split_end + 1 split_start, split_end = mw.ustring.find(text, separator, start) end if mw.ustring.sub(text, start) ~= "" then table.insert(parts, mw.ustring.sub(text, start)) end return partsend

------------------------------------------------------------------------------- Derives the longest common subsequence of two strings. This is a faster-- implementation than one provided by stdlib. Submitted by Hisham Muhammad. -- The algorithm was taken from:-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence---- @param t1 the first string.-- @param t2 the second string.-- @return the least common subsequence as a matrix.-----------------------------------------------------------------------------local function quick_LCS(t1, t2) local m = #t1 local n = #t2

-- Build matrix on demand local C = local setmetatable = setmetatable local mt_tbl = local mt_C = setmetatable(C, mt_C) local max = math.max for i = 1, m+1 do local ci1 = C[i+1] local ci = C[i] for j = 1, n+1 do if t1[i-1]

t2[j-1] then ci1[j+1] = ci[j] + 1 else ci1[j+1] = max(ci1[j], ci[j+1]) end end end return Cend

------------------------------------------------------------------------------- Formats an inline diff as HTML, with and tags.-- -- @param tokens a table of pairs.-- @return an HTML string.-----------------------------------------------------------------------------local function format_as_html(tokens) local diff_buffer = "" local token, status for i, token_record in ipairs(tokens) do token = mw.text.nowiki(token_record[1]) status = token_record[2] if status

"in" then diff_buffer = diff_buffer..''..token..'' elseif status

"out" then diff_buffer = diff_buffer..''..token..'' else diff_buffer = diff_buffer..token end end return diff_bufferend

------------------------------------------------------------------------------- Returns a diff of two strings as a list of pairs, where the first value-- represents a token and the second the token's status ("same", "in", "out").---- @param old The "old" text string-- @param new The "new" text string-- @param separator [optional] the separator pattern (defaults to any-- whitespace).-- @return A list of annotated tokens.-----------------------------------------------------------------------------local function diff(old, new, separator) assert(old); assert(new) new = split(new, separator); old = split(old, separator)

-- First, compare the beginnings and ends of strings to remove the common -- prefix and suffix. Chances are, there is only a small number of tokens -- in the middle that differ, in which case we can save ourselves a lot -- in terms of LCS computation. local prefix = "" -- common text in the beginning local suffix = "" -- common text in the end while old[1] and old[1]

new[1] do local token = table.remove(old, 1) table.remove(new, 1) prefix = prefix..token end while old[#old] and old[#old]

new[#new] do local token = table.remove(old) table.remove(new) suffix = token..suffix end

-- Setup a table that will store the diff (an upvalue for get_diff). We'll -- store it in the reverse order to allow for tail calls. We'll also keep -- in this table functions to handle different events. local rev_diff =

-- Put the suffix as the first token (we are storing the diff in the -- reverse order)

rev_diff:same(suffix)

-- Define a function that will scan the LCS matrix backwards and build the -- diff output recursively. local function get_diff(C, old, new, i, j) local old_i = old[i] local new_j = new[j] if i >= 1 and j >= 1 and old_i

new_j then rev_diff:same(old_i) return get_diff(C, old, new, i-1, j-1) else local Cij1 = C[i][j-1] local Ci1j = C[i-1][j] if j >= 1 and (i

0 or Cij1 >= Ci1j) then rev_diff:ins(new_j) return get_diff(C, old, new, i, j-1) elseif i >= 1 and (j

0 or Cij1 < Ci1j) then rev_diff:del(old_i) return get_diff(C, old, new, i-1, j) end end end -- Then call it. get_diff(quick_LCS(old, new), old, new, #old + 1, #new + 1)

-- Put the prefix in at the end rev_diff:same(prefix)

-- Reverse the diff. local diff =

for i = #rev_diff, 1, -1 do table.insert(diff, rev_diff[i]) end diff.to_html = format_as_html return diffend

------------------------------------------------------------------------------- Wiki diff style, currently just for a line-----------------------------------------------------------------------------local function wikiDiff(old, new, separator) local tokens = diff(old, new, separator) local root = mw.html.create()

local token, status

local plusMinusStyle = 'width: 2%; padding: 0.25em; font-weight: bold;' .. 'font-size: 1.25em; text-align: end;' local tdDivStyle = 'word-wrap: break-word; direction: ltr;'

local tdSharedStyle = 'width: 48%; border-style: solid; border-radius: 0.33em; ' .. 'padding: 0.33em 0.5em; font-size: 1em; font-family: monospace; white-space: pre-wrap; border-width: 1px 1px 1px 4px; ' .. '-webkit-border-end-width: 1px; -webkit-border-start-width: 4px; ' .. '-moz-border-end-width: 1px; -moz-border-start-width: 4px;' -- these override default border-width for browsers that support them, needed for RTL UI on commons local insDelSharedStyle = 'padding: 0.25em 0; font-weight: bold; text-decoration: initial;'

local tr = root:tag('table'):addClass('diff'):css('width', '100%'):tag('tr')

tr:tag('td') :addClass('diff-marker') :cssText(plusMinusStyle) :wikitext('−')

local deleted = tr :tag('td') :cssText('border-color: #ffe49c; ' .. tdSharedStyle) :addClass('diff-deletedline') :tag('div') :cssText(tdDivStyle)

for i, token_record in ipairs(tokens) do token = mw.text.nowiki(token_record[1]) status = token_record[2] if status

OUT then deleted :tag('del') :cssText('background: #feeec8; ' .. insDelSharedStyle) :addClass('diffchange') :addClass('diffchange-inline') :wikitext(token) elseif status

SAME then deleted:wikitext(token) end end

tr:tag('td') :cssText(plusMinusStyle) :wikitext('+')

local inserted = tr :tag('td') :cssText('border-color: #a3d3ff; ' .. tdSharedStyle) :addClass('diff-addedline') :tag('div') :cssText(tdDivStyle)

for i, token_record in ipairs(tokens) do token = mw.text.nowiki(token_record[1]) status = token_record[2] if status

IN then inserted :tag('ins') :cssText('background: #d8ecff; ' .. insDelSharedStyle) :addClass('diffchange') :addClass('diffchange-inline') :wikitext(token) elseif status

SAME then inserted:wikitext(token) end end

return tostring(root)end

local function main(frame) return wikiDiff(mw.text.decode(mw.text.unstrip(frame.args[1])), mw.text.decode(mw.text.unstrip(frame.args[2])), frame.args[3] or '[%s%.:-]+')end

return