------------------------------------------------------------------------------- Provides functions for diffing text.---- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org)-- (c) 2007 Hisham Muhammad-- Adapted to MediaWiki LUA:
local SKIP_SEPARATOR = true -- a constant
-- token statuseslocal IN = "in"local OUT = "out"local SAME = "same"
------------------------------------------------------------------------------- Split a string into tokens. (Adapted from Gavin Kistner's split on-- http://lua-users.org/wiki/SplitJoin.---- @param text A string to be split.-- @param separator [optional] the separator pattern (defaults to any-- whitespace - %s+).-- @param skip_separator [optional] don't include the separator in the results. -- @return A list of tokens.-----------------------------------------------------------------------------local function split(text, separator, skip_separator) separator = separator or "%s+" local parts = local start = 1 local split_start, split_end = mw.ustring.find(text, separator, start) while split_start do table.insert(parts, mw.ustring.sub(text, start, split_start-1)) if not skip_separator then table.insert(parts, mw.ustring.sub(text, split_start, split_end)) end start = split_end + 1 split_start, split_end = mw.ustring.find(text, separator, start) end if mw.ustring.sub(text, start) ~= "" then table.insert(parts, mw.ustring.sub(text, start)) end return partsend
------------------------------------------------------------------------------- Derives the longest common subsequence of two strings. This is a faster-- implementation than one provided by stdlib. Submitted by Hisham Muhammad. -- The algorithm was taken from:-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence---- @param t1 the first string.-- @param t2 the second string.-- @return the least common subsequence as a matrix.-----------------------------------------------------------------------------local function quick_LCS(t1, t2) local m = #t1 local n = #t2
-- Build matrix on demand local C = local setmetatable = setmetatable local mt_tbl = local mt_C = setmetatable(C, mt_C) local max = math.max for i = 1, m+1 do local ci1 = C[i+1] local ci = C[i] for j = 1, n+1 do if t1[i-1]
------------------------------------------------------------------------------- Formats an inline diff as HTML, with and tags.-- -- @param tokens a table of pairs.-- @return an HTML string.-----------------------------------------------------------------------------local function format_as_html(tokens) local diff_buffer = "" local token, status for i, token_record in ipairs(tokens) do token = mw.text.nowiki(token_record[1]) status = token_record[2] if status
"out" then diff_buffer = diff_buffer..''..token..'' else diff_buffer = diff_buffer..token end end return diff_bufferend
------------------------------------------------------------------------------- Returns a diff of two strings as a list of pairs, where the first value-- represents a token and the second the token's status ("same", "in", "out").---- @param old The "old" text string-- @param new The "new" text string-- @param separator [optional] the separator pattern (defaults to any-- whitespace).-- @return A list of annotated tokens.-----------------------------------------------------------------------------local function diff(old, new, separator) assert(old); assert(new) new = split(new, separator); old = split(old, separator)
-- First, compare the beginnings and ends of strings to remove the common -- prefix and suffix. Chances are, there is only a small number of tokens -- in the middle that differ, in which case we can save ourselves a lot -- in terms of LCS computation. local prefix = "" -- common text in the beginning local suffix = "" -- common text in the end while old[1] and old[1]
new[#new] do local token = table.remove(old) table.remove(new) suffix = token..suffix end
-- Setup a table that will store the diff (an upvalue for get_diff). We'll -- store it in the reverse order to allow for tail calls. We'll also keep -- in this table functions to handle different events. local rev_diff =
-- Put the suffix as the first token (we are storing the diff in the -- reverse order)
rev_diff:same(suffix)
-- Define a function that will scan the LCS matrix backwards and build the -- diff output recursively. local function get_diff(C, old, new, i, j) local old_i = old[i] local new_j = new[j] if i >= 1 and j >= 1 and old_i
0 or Cij1 >= Ci1j) then rev_diff:ins(new_j) return get_diff(C, old, new, i, j-1) elseif i >= 1 and (j
-- Put the prefix in at the end rev_diff:same(prefix)
-- Reverse the diff. local diff =
for i = #rev_diff, 1, -1 do table.insert(diff, rev_diff[i]) end diff.to_html = format_as_html return diffend
------------------------------------------------------------------------------- Wiki diff style, currently just for a line-----------------------------------------------------------------------------local function wikiDiff(old, new, separator) local tokens = diff(old, new, separator) local root = mw.html.create()
local token, status
local plusMinusStyle = 'width: 2%; padding: 0.25em; font-weight: bold;' .. 'font-size: 1.25em; text-align: end;' local tdDivStyle = 'word-wrap: break-word; direction: ltr;'
local tdSharedStyle = 'width: 48%; border-style: solid; border-radius: 0.33em; ' .. 'padding: 0.33em 0.5em; font-size: 1em; font-family: monospace; white-space: pre-wrap; border-width: 1px 1px 1px 4px; ' .. '-webkit-border-end-width: 1px; -webkit-border-start-width: 4px; ' .. '-moz-border-end-width: 1px; -moz-border-start-width: 4px;' -- these override default border-width for browsers that support them, needed for RTL UI on commons local insDelSharedStyle = 'padding: 0.25em 0; font-weight: bold; text-decoration: initial;'
local tr = root:tag('table'):addClass('diff'):css('width', '100%'):tag('tr')
tr:tag('td') :addClass('diff-marker') :cssText(plusMinusStyle) :wikitext('−')
local deleted = tr :tag('td') :cssText('border-color: #ffe49c; ' .. tdSharedStyle) :addClass('diff-deletedline') :tag('div') :cssText(tdDivStyle)
for i, token_record in ipairs(tokens) do token = mw.text.nowiki(token_record[1]) status = token_record[2] if status
SAME then deleted:wikitext(token) end end
tr:tag('td') :cssText(plusMinusStyle) :wikitext('+')
local inserted = tr :tag('td') :cssText('border-color: #a3d3ff; ' .. tdSharedStyle) :addClass('diff-addedline') :tag('div') :cssText(tdDivStyle)
for i, token_record in ipairs(tokens) do token = mw.text.nowiki(token_record[1]) status = token_record[2] if status
SAME then inserted:wikitext(token) end end
return tostring(root)end
local function main(frame) return wikiDiff(mw.text.decode(mw.text.unstrip(frame.args[1])), mw.text.decode(mw.text.unstrip(frame.args[2])), frame.args[3] or '[%s%.:-]+')end
return