diff --git a/.luacheckrc b/.luacheckrc index 7c9517a..58a7a14 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -1,4 +1,4 @@ include_files = {"**/*.lua", "*.rockspec", "*.luacheckrc"} -exclude_files = {"build.luarocks/", "lua_modules/", "tmp/", ".luarocks/", ".rocks/"} +exclude_files = {"build.luarocks/", "lua_modules/", "tmp/", ".luarocks/", ".rocks/", "luatest/vendor/"} max_line_length = 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index a5ead76..fbc59b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Added support for unified diff output in `t.assert_equals()` failure messages + when expected and actual values are YAML-serializable (gh-412). - Group and suite hooks must now be registered using the call-style API. Use: diff --git a/luatest/assertions.lua b/luatest/assertions.lua index ffb79db..3cd419d 100644 --- a/luatest/assertions.lua +++ b/luatest/assertions.lua @@ -6,6 +6,7 @@ local math = require('math') local comparator = require('luatest.comparator') +local diff = require('luatest.diff') local mismatch_formatter = require('luatest.mismatch_formatter') local pp = require('luatest.pp') local log = require('luatest.log') @@ -20,12 +21,17 @@ local prettystr_pairs = pp.tostring_pair local M = {} local xfail = false +local diff_enabled = true local box_error_type = ffi.typeof(box.error.new(box.error.UNKNOWN)) -- private exported functions (for testing) M.private = {} +function M.private.set_diff_enabled(value) + diff_enabled = value and true or false +end + function M.private.is_xfail() local xfail_status = xfail xfail = false @@ -83,6 +89,14 @@ local function error_msg_equality(actual, expected, deep_analysis) if success then result = table.concat({result, mismatchResult}, '\n') end + + if diff_enabled then + local diff_result = diff.build_unified_diff(expected, actual) + if diff_result then + result = table.concat({result, 'diff:', diff_result}, '\n') + end + end + return result end return string.format("expected: %s, actual: %s", diff --git a/luatest/diff.lua b/luatest/diff.lua new file mode 100644 index 0000000..82fc423 --- /dev/null +++ b/luatest/diff.lua @@ -0,0 +1,356 @@ +local yaml = require('yaml') +local msgpack = require('msgpack') +local uri = require('uri') + +-- Optional Tarantool-specific types. +local decimal = require('decimal') +local datetime = require('datetime') +local uuid = require('uuid') +local ffi = require('ffi') + +-- varbinary is available only since Tarantool 3.0.0. +-- For older versions we provide a fallback stub. +local has_varbinary, varbinary = pcall(require, 'varbinary') +if not has_varbinary then + varbinary = { + is = function() + return false + end + } +end + +-- diff_match_patch expects bit32 +if not rawget(_G, 'bit32') then + _G.bit32 = require('bit') +end + +local diff_match_patch = require('luatest.vendor.diff_match_patch') + +diff_match_patch.settings({ + Diff_Timeout = 0, + Patch_Margin = 1e9, +}) + +local M = {} + +-- Maximum number of distinct line IDs that can be encoded as single-byte chars. +local MAX_LINE_ID = 0x100 + +local function encode_line_id(id) + if id >= MAX_LINE_ID then + return nil + end + + return string.char(id) +end + +local function decode_line_id(encoded) + return encoded:byte(1) +end + +-- Classify known cdata/userdata types into semantic kinds. +local function classify_cdata(v) + local t = type(v) + + if t ~= 'cdata' and t ~= 'userdata' then + return nil + end + + if decimal.is_decimal(v) then + return 'decimal' + end + + if datetime.is_datetime(v) then + return 'datetime' + end + + if uuid.is_uuid(v) then + return 'uuid' + end + + if varbinary.is(v) then + return 'varbinary' + end + + return nil +end + +-- Recursively normalize a value into something that: +-- * is safe and stable for YAML encoding; +-- * produces meaningful diffs for "semantic" types (decimal/datetime/uuid/varbinary); +-- * does NOT produce noisy diffs for opaque userdata/cdata (newproxy, ffi types, etc). +local function normalize_for_yaml(value) + local t = type(value) + + if t == 'table' then + local res = {} + for k, v in pairs(value) do + local nk = normalize_for_yaml(k) + if nk == nil then + -- YAML keys must be representable; fallback to tostring. + nk = tostring(k) + end + res[nk] = normalize_for_yaml(v) + end + return res + end + + if t == 'cdata' or t == 'userdata' then + local kind = classify_cdata(value) + if kind ~= nil then + local ok, s = pcall(tostring, value) + if ok and type(s) == 'string' then + return s + end + return '<' .. kind .. '>' + end + + if t == 'userdata' then + return '' + end + + if ffi ~= nil then + local ok, ctype = pcall(ffi.typeof, value) + if ok then + -- Stable placeholder: includes type, but not the pointer or content. + return '' + end + end + + return '' + end + + if t == 'function' or t == 'thread' then + return '<' .. t .. '>' + end + + -- other primitive types. + return value +end + +-- Encode a Lua value as YAML after normalizing it to a diff-friendly form. +local function encode_yaml(value) + local ok, encoded = pcall(yaml.encode, normalize_for_yaml(value)) + if ok then + return encoded + end +end + +-- Try to decode a string as msgpack and then encode the result to YAML. +-- Returns nil if decoding or encoding fails. +local function msgpack_to_yaml(value) + if type(value) ~= 'string' then + return nil + end + + local ok, decoded = pcall(msgpack.decode, value) + if not ok or type(decoded) ~= 'table' then + return nil + end + + return encode_yaml(decoded) +end + +-- Convert a supported Lua value into a textual form suitable for diffing. +-- +-- * Tables are serialized to YAML with recursive normalization. +-- * Strings are used as-is, or interpreted as msgpack and then YAML when possible. +-- * Numbers / booleans are converted via tostring(). +-- * Top-level decimal/datetime/uuid/varbinary are converted via tostring(). +-- * Top-level opaque userdata/cdata disable diffing (return nil). +local function as_yaml(value) + local t = type(value) + + if t == 'cdata' or t == 'userdata' then + local kind = classify_cdata(value) + if kind ~= nil then + local ok, s = pcall(tostring, value) + if ok and type(s) == 'string' then + return s + end + return '<' .. kind .. '>' + end + + return nil + end + + if t == 'string' then + return msgpack_to_yaml(value) or encode_yaml(value) + end + + local encoded = encode_yaml(value) + if encoded ~= nil then + return encoded + end + + local ok, packed = pcall(msgpack.encode, value) + if ok then + local decoded_yaml = msgpack_to_yaml(packed) + if decoded_yaml ~= nil then + return decoded_yaml + end + + return encode_yaml(packed) + end +end + +-- Map two multiline texts to compact "char sequences" and shared line table. +-- Returns nil if the number of unique lines exceeds MAX_LINE_ID. +local function lines_to_chars(text1, text2) + local line_array = {} + local line_hash = {} + + local function add_line(line) + local id = line_hash[line] + if id == nil then + id = #line_array + 1 + local encoded = encode_line_id(id) + if encoded == nil then + return nil + end + line_array[id] = line + line_hash[line] = id + end + + return encode_line_id(id) + end + + local function munge(text) + local tokens = {} + local start = 1 + + while true do + local newline_pos = text:find('\n', start, true) + if newline_pos == nil then + local tail = text:sub(start) + if tail ~= '' then + local token = add_line(tail) + if token == nil then + return nil + end + table.insert(tokens, token) + end + break + end + + local token = add_line(text:sub(start, newline_pos)) + if token == nil then + return nil + end + table.insert(tokens, token) + start = newline_pos + 1 + end + + return table.concat(tokens) + end + + local chars1 = munge(text1) + if chars1 == nil then + return nil + end + + local chars2 = munge(text2) + if chars2 == nil then + return nil + end + + return chars1, chars2, line_array +end + +-- Expand a "char sequence" produced by lines_to_chars back into full text. +local function chars_to_lines(text, line_array) + local out = {} + + for i = 1, #text do + local id = decode_line_id(text:sub(i, i)) + local line = line_array[id] + if line == nil then + return nil + end + table.insert(out, line) + end + + return table.concat(out) +end + +-- Compute line-based diff using diff_match_patch, falling back to nil on failure. +local function diff_by_lines(text1, text2) + local chars1, chars2, line_array = lines_to_chars(text1, text2) + if chars1 == nil then + return nil + end + + local diffs = diff_match_patch.diff_main(chars1, chars2, false) + diff_match_patch.diff_cleanupSemantic(diffs) + + for i, diff in ipairs(diffs) do + local text = chars_to_lines(diff[2], line_array) + if text == nil then + return nil + end + diffs[i][2] = text + end + + return diffs +end + +-- Normalize patch text from diff_match_patch: unescape it, drop junk lines, +-- and ensure it is valid, readable unified diff. +local function prettify_patch(patch_text) + -- patch_toText() escapes non-ascii symbols using URL escaping. Convert it + -- back to preserve the original values in unified diff output. + patch_text = uri.unescape(patch_text) + + local out = {} + + for line in (patch_text .. '\n'):gmatch('(.-)\n') do + if line ~= '' and line ~= ' ' then + local first = line:sub(1, 1) + + if first ~= '@' and first ~= '+' + and first ~= '-' and first ~= ' ' then + line = ' ' .. line + end + + table.insert(out, line) + end + end + + return table.concat(out, '\n') +end + +--- Build unified diff for expected and actual values serialized to YAML. +-- Tries line-based diff first, falls back to char-based. +-- Returns nil when values can't be serialized or there is no diff. +function M.build_unified_diff(expected, actual) + local expected_text = as_yaml(expected) + local actual_text = as_yaml(actual) + + if expected_text == nil or actual_text == nil then + return nil + end + + local diffs = diff_by_lines(expected_text, actual_text) + local used_line_diff = true + + if diffs == nil then + diffs = diff_match_patch.diff_main(expected_text, actual_text) + used_line_diff = false + end + + if not used_line_diff then + diff_match_patch.diff_cleanupSemantic(diffs) + end + + local patches = diff_match_patch.patch_make(expected_text, + actual_text, diffs) + local patch_text = diff_match_patch.patch_toText(patches) + + if patch_text == '' then + return nil + end + + return prettify_patch(patch_text) +end + +return M diff --git a/luatest/vendor/diff_match_patch.lua b/luatest/vendor/diff_match_patch.lua new file mode 100644 index 0000000..a2ae216 --- /dev/null +++ b/luatest/vendor/diff_match_patch.lua @@ -0,0 +1,2184 @@ +--[[ +* Diff Match and Patch +* Copyright 2018 The diff-match-patch Authors. +* https://github.com/google/diff-match-patch +* +* Based on the JavaScript implementation by Neil Fraser. +* Ported to Lua by Duncan Cross. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +--]] + +--[[ +-- Lua 5.1 and earlier requires the external BitOp library. +-- This library is built-in from Lua 5.2 and later as 'bit32'. +require 'bit' -- +local band, bor, lshift + = bit.band, bit.bor, bit.lshift +--]] + +local band, bor, lshift + = bit32.band, bit32.bor, bit32.lshift +local type, setmetatable, ipairs, select + = type, setmetatable, ipairs, select +local unpack, tonumber, error + = unpack, tonumber, error +local strsub, strbyte, strchar, gmatch, gsub + = string.sub, string.byte, string.char, string.gmatch, string.gsub +local strmatch, strfind, strformat + = string.match, string.find, string.format +local tinsert, tremove, tconcat + = table.insert, table.remove, table.concat +local max, min, floor, ceil, abs + = math.max, math.min, math.floor, math.ceil, math.abs +local clock = os.clock + + +-- Utility functions. + +local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]' +local function percentEncode_replace(v) + return strformat('%%%02X', strbyte(v)) +end + +local function indexOf(a, b, start) + if (#b == 0) then + return nil + end + return strfind(a, b, start, true) +end + +local htmlEncode_pattern = '[&<>\n]' +local htmlEncode_replace = { + ['&'] = '&', ['<'] = '<', ['>'] = '>', ['\n'] = '¶
' +} + +-- Public API Functions +-- (Exported at the end of the script) + +local diff_main, + diff_cleanupSemantic, + diff_cleanupEfficiency, + diff_levenshtein, + diff_prettyHtml + +local match_main + +local patch_make, + patch_toText, + patch_fromText, + patch_apply + +--[[ +* The data structure representing a diff is an array of tuples: +* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}} +* which means: delete 'Hello', add 'Goodbye' and keep ' world.' +--]] +local DIFF_DELETE = -1 +local DIFF_INSERT = 1 +local DIFF_EQUAL = 0 + +-- Number of seconds to map a diff before giving up (0 for infinity). +local Diff_Timeout = 1.0 +-- Cost of an empty edit operation in terms of edit characters. +local Diff_EditCost = 4 +-- At what point is no match declared (0.0 = perfection, 1.0 = very loose). +local Match_Threshold = 0.5 +-- How far to search for a match (0 = exact location, 1000+ = broad match). +-- A match this many characters away from the expected location will add +-- 1.0 to the score (0.0 is a perfect match). +local Match_Distance = 1000 +-- When deleting a large block of text (over ~64 characters), how close do +-- the contents have to be to match the expected contents. (0.0 = perfection, +-- 1.0 = very loose). Note that Match_Threshold controls how closely the +-- end points of a delete need to match. +local Patch_DeleteThreshold = 0.5 +-- Chunk size for context length. +local Patch_Margin = 4 +-- The number of bits in an int. +local Match_MaxBits = 32 + +function settings(new) + if new then + Diff_Timeout = new.Diff_Timeout or Diff_Timeout + Diff_EditCost = new.Diff_EditCost or Diff_EditCost + Match_Threshold = new.Match_Threshold or Match_Threshold + Match_Distance = new.Match_Distance or Match_Distance + Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold + Patch_Margin = new.Patch_Margin or Patch_Margin + Match_MaxBits = new.Match_MaxBits or Match_MaxBits + else + return { + Diff_Timeout = Diff_Timeout; + Diff_EditCost = Diff_EditCost; + Match_Threshold = Match_Threshold; + Match_Distance = Match_Distance; + Patch_DeleteThreshold = Patch_DeleteThreshold; + Patch_Margin = Patch_Margin; + Match_MaxBits = Match_MaxBits; + } + end +end + +-- --------------------------------------------------------------------------- +-- DIFF API +-- --------------------------------------------------------------------------- + +-- The private diff functions +local _diff_compute, + _diff_bisect, + _diff_halfMatchI, + _diff_halfMatch, + _diff_cleanupSemanticScore, + _diff_cleanupSemanticLossless, + _diff_cleanupMerge, + _diff_commonPrefix, + _diff_commonSuffix, + _diff_commonOverlap, + _diff_xIndex, + _diff_text1, + _diff_text2, + _diff_toDelta, + _diff_fromDelta + +--[[ +* Find the differences between two texts. Simplifies the problem by stripping +* any common prefix or suffix off the texts before diffing. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {boolean} opt_checklines Has no effect in Lua. +* @param {number} opt_deadline Optional time when the diff should be complete +* by. Used internally for recursive calls. Users should set DiffTimeout +* instead. +* @return {Array.>} Array of diff tuples. +--]] +function diff_main(text1, text2, opt_checklines, opt_deadline) + -- Set a deadline by which time the diff must be complete. + if opt_deadline == nil then + if Diff_Timeout <= 0 then + opt_deadline = 2 ^ 31 + else + opt_deadline = clock() + Diff_Timeout + end + end + local deadline = opt_deadline + + -- Check for null inputs. + if text1 == nil or text1 == nil then + error('Null inputs. (diff_main)') + end + + -- Check for equality (speedup). + if text1 == text2 then + if #text1 > 0 then + return {{DIFF_EQUAL, text1}} + end + return {} + end + + -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of + -- implementing the line-mode speedup. + local checklines = false + + -- Trim off common prefix (speedup). + local commonlength = _diff_commonPrefix(text1, text2) + local commonprefix + if commonlength > 0 then + commonprefix = strsub(text1, 1, commonlength) + text1 = strsub(text1, commonlength + 1) + text2 = strsub(text2, commonlength + 1) + end + + -- Trim off common suffix (speedup). + commonlength = _diff_commonSuffix(text1, text2) + local commonsuffix + if commonlength > 0 then + commonsuffix = strsub(text1, -commonlength) + text1 = strsub(text1, 1, -commonlength - 1) + text2 = strsub(text2, 1, -commonlength - 1) + end + + -- Compute the diff on the middle block. + local diffs = _diff_compute(text1, text2, checklines, deadline) + + -- Restore the prefix and suffix. + if commonprefix then + tinsert(diffs, 1, {DIFF_EQUAL, commonprefix}) + end + if commonsuffix then + diffs[#diffs + 1] = {DIFF_EQUAL, commonsuffix} + end + + _diff_cleanupMerge(diffs) + return diffs +end + +--[[ +* Reduce the number of edits by eliminating semantically trivial equalities. +* @param {Array.>} diffs Array of diff tuples. +--]] +function diff_cleanupSemantic(diffs) + local changes = false + local equalities = {} -- Stack of indices where equalities are found. + local equalitiesLength = 0 -- Keeping our own length var is faster. + local lastEquality = nil + -- Always equal to diffs[equalities[equalitiesLength]][2] + local pointer = 1 -- Index of current position. + -- Number of characters that changed prior to the equality. + local length_insertions1 = 0 + local length_deletions1 = 0 + -- Number of characters that changed after the equality. + local length_insertions2 = 0 + local length_deletions2 = 0 + + while diffs[pointer] do + if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. + equalitiesLength = equalitiesLength + 1 + equalities[equalitiesLength] = pointer + length_insertions1 = length_insertions2 + length_deletions1 = length_deletions2 + length_insertions2 = 0 + length_deletions2 = 0 + lastEquality = diffs[pointer][2] + else -- An insertion or deletion. + if diffs[pointer][1] == DIFF_INSERT then + length_insertions2 = length_insertions2 + #(diffs[pointer][2]) + else + length_deletions2 = length_deletions2 + #(diffs[pointer][2]) + end + -- Eliminate an equality that is smaller or equal to the edits on both + -- sides of it. + if lastEquality + and (#lastEquality <= max(length_insertions1, length_deletions1)) + and (#lastEquality <= max(length_insertions2, length_deletions2)) then + -- Duplicate record. + tinsert(diffs, equalities[equalitiesLength], + {DIFF_DELETE, lastEquality}) + -- Change second copy to insert. + diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT + -- Throw away the equality we just deleted. + equalitiesLength = equalitiesLength - 1 + -- Throw away the previous equality (it needs to be reevaluated). + equalitiesLength = equalitiesLength - 1 + pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 + length_insertions1, length_deletions1 = 0, 0 -- Reset the counters. + length_insertions2, length_deletions2 = 0, 0 + lastEquality = nil + changes = true + end + end + pointer = pointer + 1 + end + + -- Normalize the diff. + if changes then + _diff_cleanupMerge(diffs) + end + _diff_cleanupSemanticLossless(diffs) + + -- Find any overlaps between deletions and insertions. + -- e.g: abcxxxxxxdef + -- -> abcxxxdef + -- e.g: xxxabcdefxxx + -- -> defxxxabc + -- Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 2 + while diffs[pointer] do + if (diffs[pointer - 1][1] == DIFF_DELETE and + diffs[pointer][1] == DIFF_INSERT) then + local deletion = diffs[pointer - 1][2] + local insertion = diffs[pointer][2] + local overlap_length1 = _diff_commonOverlap(deletion, insertion) + local overlap_length2 = _diff_commonOverlap(insertion, deletion) + if (overlap_length1 >= overlap_length2) then + if (overlap_length1 >= #deletion / 2 or + overlap_length1 >= #insertion / 2) then + -- Overlap found. Insert an equality and trim the surrounding edits. + tinsert(diffs, pointer, + {DIFF_EQUAL, strsub(insertion, 1, overlap_length1)}) + diffs[pointer - 1][2] = + strsub(deletion, 1, #deletion - overlap_length1) + diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1) + pointer = pointer + 1 + end + else + if (overlap_length2 >= #deletion / 2 or + overlap_length2 >= #insertion / 2) then + -- Reverse overlap found. + -- Insert an equality and swap and trim the surrounding edits. + tinsert(diffs, pointer, + {DIFF_EQUAL, strsub(deletion, 1, overlap_length2)}) + diffs[pointer - 1] = {DIFF_INSERT, + strsub(insertion, 1, #insertion - overlap_length2)} + diffs[pointer + 1] = {DIFF_DELETE, + strsub(deletion, overlap_length2 + 1)} + pointer = pointer + 1 + end + end + pointer = pointer + 1 + end + pointer = pointer + 1 + end +end + +--[[ +* Reduce the number of edits by eliminating operationally trivial equalities. +* @param {Array.>} diffs Array of diff tuples. +--]] +function diff_cleanupEfficiency(diffs) + local changes = false + -- Stack of indices where equalities are found. + local equalities = {} + -- Keeping our own length var is faster. + local equalitiesLength = 0 + -- Always equal to diffs[equalities[equalitiesLength]][2] + local lastEquality = nil + -- Index of current position. + local pointer = 1 + + -- The following four are really booleans but are stored as numbers because + -- they are used at one point like this: + -- + -- (pre_ins + pre_del + post_ins + post_del) == 3 + -- + -- ...i.e. checking that 3 of them are true and 1 of them is false. + + -- Is there an insertion operation before the last equality. + local pre_ins = 0 + -- Is there a deletion operation before the last equality. + local pre_del = 0 + -- Is there an insertion operation after the last equality. + local post_ins = 0 + -- Is there a deletion operation after the last equality. + local post_del = 0 + + while diffs[pointer] do + if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. + local diffText = diffs[pointer][2] + if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then + -- Candidate found. + equalitiesLength = equalitiesLength + 1 + equalities[equalitiesLength] = pointer + pre_ins, pre_del = post_ins, post_del + lastEquality = diffText + else + -- Not a candidate, and can never become one. + equalitiesLength = 0 + lastEquality = nil + end + post_ins, post_del = 0, 0 + else -- An insertion or deletion. + if diffs[pointer][1] == DIFF_DELETE then + post_del = 1 + else + post_ins = 1 + end + --[[ + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + --]] + if lastEquality and ( + (pre_ins+pre_del+post_ins+post_del == 4) + or + ( + (#lastEquality < Diff_EditCost / 2) + and + (pre_ins+pre_del+post_ins+post_del == 3) + )) then + -- Duplicate record. + tinsert(diffs, equalities[equalitiesLength], + {DIFF_DELETE, lastEquality}) + -- Change second copy to insert. + diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT + -- Throw away the equality we just deleted. + equalitiesLength = equalitiesLength - 1 + lastEquality = nil + if (pre_ins == 1) and (pre_del == 1) then + -- No changes made which could affect previous entry, keep going. + post_ins, post_del = 1, 1 + equalitiesLength = 0 + else + -- Throw away the previous equality. + equalitiesLength = equalitiesLength - 1 + pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 + post_ins, post_del = 0, 0 + end + changes = true + end + end + pointer = pointer + 1 + end + + if changes then + _diff_cleanupMerge(diffs) + end +end + +--[[ +* Compute the Levenshtein distance; the number of inserted, deleted or +* substituted characters. +* @param {Array.>} diffs Array of diff tuples. +* @return {number} Number of changes. +--]] +function diff_levenshtein(diffs) + local levenshtein = 0 + local insertions, deletions = 0, 0 + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if (op == DIFF_INSERT) then + insertions = insertions + #data + elseif (op == DIFF_DELETE) then + deletions = deletions + #data + elseif (op == DIFF_EQUAL) then + -- A deletion and an insertion is one substitution. + levenshtein = levenshtein + max(insertions, deletions) + insertions = 0 + deletions = 0 + end + end + levenshtein = levenshtein + max(insertions, deletions) + return levenshtein +end + +--[[ +* Convert a diff array into a pretty HTML report. +* @param {Array.>} diffs Array of diff tuples. +* @return {string} HTML representation. +--]] +function diff_prettyHtml(diffs) + local html = {} + for x, diff in ipairs(diffs) do + local op = diff[1] -- Operation (insert, delete, equal) + local data = diff[2] -- Text of change. + local text = gsub(data, htmlEncode_pattern, htmlEncode_replace) + if op == DIFF_INSERT then + html[x] = '' .. text .. '' + elseif op == DIFF_DELETE then + html[x] = '' .. text .. '' + elseif op == DIFF_EQUAL then + html[x] = '' .. text .. '' + end + end + return tconcat(html) +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE DIFF FUNCTIONS +-- --------------------------------------------------------------------------- + +--[[ +* Find the differences between two texts. Assumes that the texts do not +* have any common prefix or suffix. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {boolean} checklines Has no effect in Lua. +* @param {number} deadline Time when the diff should be complete by. +* @return {Array.>} Array of diff tuples. +* @private +--]] +function _diff_compute(text1, text2, checklines, deadline) + if #text1 == 0 then + -- Just add some text (speedup). + return {{DIFF_INSERT, text2}} + end + + if #text2 == 0 then + -- Just delete some text (speedup). + return {{DIFF_DELETE, text1}} + end + + local diffs + + local longtext = (#text1 > #text2) and text1 or text2 + local shorttext = (#text1 > #text2) and text2 or text1 + local i = indexOf(longtext, shorttext) + + if i ~= nil then + -- Shorter text is inside the longer text (speedup). + diffs = { + {DIFF_INSERT, strsub(longtext, 1, i - 1)}, + {DIFF_EQUAL, shorttext}, + {DIFF_INSERT, strsub(longtext, i + #shorttext)} + } + -- Swap insertions for deletions if diff is reversed. + if #text1 > #text2 then + diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE + end + return diffs + end + + if #shorttext == 1 then + -- Single character string. + -- After the previous speedup, the character can't be an equality. + return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}} + end + + -- Check to see if the problem can be split in two. + do + local + text1_a, text1_b, + text2_a, text2_b, + mid_common = _diff_halfMatch(text1, text2) + + if text1_a then + -- A half-match was found, sort out the return data. + -- Send both pairs off for separate processing. + local diffs_a = diff_main(text1_a, text2_a, checklines, deadline) + local diffs_b = diff_main(text1_b, text2_b, checklines, deadline) + -- Merge the results. + local diffs_a_len = #diffs_a + diffs = diffs_a + diffs[diffs_a_len + 1] = {DIFF_EQUAL, mid_common} + for i, b_diff in ipairs(diffs_b) do + diffs[diffs_a_len + 1 + i] = b_diff + end + return diffs + end + end + + return _diff_bisect(text1, text2, deadline) +end + +--[[ +* Find the 'middle snake' of a diff, split the problem in two +* and return the recursively constructed diff. +* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {number} deadline Time at which to bail if not yet complete. +* @return {Array.>} Array of diff tuples. +* @private +--]] +function _diff_bisect(text1, text2, deadline) + -- Cache the text lengths to prevent multiple calls. + local text1_length = #text1 + local text2_length = #text2 + local _sub, _element + local max_d = ceil((text1_length + text2_length) / 2) + local v_offset = max_d + local v_length = 2 * max_d + local v1 = {} + local v2 = {} + -- Setting all elements to -1 is faster in Lua than mixing integers and nil. + for x = 0, v_length - 1 do + v1[x] = -1 + v2[x] = -1 + end + v1[v_offset + 1] = 0 + v2[v_offset + 1] = 0 + local delta = text1_length - text2_length + -- If the total number of characters is odd, then + -- the front path will collide with the reverse path. + local front = (delta % 2 ~= 0) + -- Offsets for start and end of k loop. + -- Prevents mapping of space beyond the grid. + local k1start = 0 + local k1end = 0 + local k2start = 0 + local k2end = 0 + for d = 0, max_d - 1 do + -- Bail out if deadline is reached. + if clock() > deadline then + break + end + + -- Walk the front path one step. + for k1 = -d + k1start, d - k1end, 2 do + local k1_offset = v_offset + k1 + local x1 + if (k1 == -d) or ((k1 ~= d) and + (v1[k1_offset - 1] < v1[k1_offset + 1])) then + x1 = v1[k1_offset + 1] + else + x1 = v1[k1_offset - 1] + 1 + end + local y1 = x1 - k1 + while (x1 <= text1_length) and (y1 <= text2_length) + and (strsub(text1, x1, x1) == strsub(text2, y1, y1)) do + x1 = x1 + 1 + y1 = y1 + 1 + end + v1[k1_offset] = x1 + if x1 > text1_length + 1 then + -- Ran off the right of the graph. + k1end = k1end + 2 + elseif y1 > text2_length + 1 then + -- Ran off the bottom of the graph. + k1start = k1start + 2 + elseif front then + local k2_offset = v_offset + delta - k1 + if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then + -- Mirror x2 onto top-left coordinate system. + local x2 = text1_length - v2[k2_offset] + 1 + if x1 > x2 then + -- Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline) + end + end + end + end + + -- Walk the reverse path one step. + for k2 = -d + k2start, d - k2end, 2 do + local k2_offset = v_offset + k2 + local x2 + if (k2 == -d) or ((k2 ~= d) and + (v2[k2_offset - 1] < v2[k2_offset + 1])) then + x2 = v2[k2_offset + 1] + else + x2 = v2[k2_offset - 1] + 1 + end + local y2 = x2 - k2 + while (x2 <= text1_length) and (y2 <= text2_length) + and (strsub(text1, -x2, -x2) == strsub(text2, -y2, -y2)) do + x2 = x2 + 1 + y2 = y2 + 1 + end + v2[k2_offset] = x2 + if x2 > text1_length + 1 then + -- Ran off the left of the graph. + k2end = k2end + 2 + elseif y2 > text2_length + 1 then + -- Ran off the top of the graph. + k2start = k2start + 2 + elseif not front then + local k1_offset = v_offset + delta - k2 + if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then + local x1 = v1[k1_offset] + local y1 = v_offset + x1 - k1_offset + -- Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2 + 1 + if x1 > x2 then + -- Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline) + end + end + end + end + end + -- Diff took too long and hit the deadline or + -- number of diffs equals number of characters, no commonality at all. + return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}} +end + +--[[ + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} x Index of split point in text1. + * @param {number} y Index of split point in text2. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {Array.>} Array of diff tuples. + * @private +--]] +function _diff_bisectSplit(text1, text2, x, y, deadline) + local text1a = strsub(text1, 1, x - 1) + local text2a = strsub(text2, 1, y - 1) + local text1b = strsub(text1, x) + local text2b = strsub(text2, y) + + -- Compute both diffs serially. + local diffs = diff_main(text1a, text2a, false, deadline) + local diffsb = diff_main(text1b, text2b, false, deadline) + + local diffs_len = #diffs + for i, v in ipairs(diffsb) do + diffs[diffs_len + i] = v + end + return diffs +end + +--[[ +* Determine the common prefix of two strings. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the start of each +* string. +--]] +function _diff_commonPrefix(text1, text2) + -- Quick check for common null cases. + if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1)) + then + return 0 + end + -- Binary search. + -- Performance analysis: https://neil.fraser.name/news/2007/10/09/ + local pointermin = 1 + local pointermax = min(#text1, #text2) + local pointermid = pointermax + local pointerstart = 1 + while (pointermin < pointermid) do + if (strsub(text1, pointerstart, pointermid) + == strsub(text2, pointerstart, pointermid)) then + pointermin = pointermid + pointerstart = pointermin + else + pointermax = pointermid + end + pointermid = floor(pointermin + (pointermax - pointermin) / 2) + end + return pointermid +end + +--[[ +* Determine the common suffix of two strings. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the end of each string. +--]] +function _diff_commonSuffix(text1, text2) + -- Quick check for common null cases. + if (#text1 == 0) or (#text2 == 0) + or (strbyte(text1, -1) ~= strbyte(text2, -1)) then + return 0 + end + -- Binary search. + -- Performance analysis: https://neil.fraser.name/news/2007/10/09/ + local pointermin = 1 + local pointermax = min(#text1, #text2) + local pointermid = pointermax + local pointerend = 1 + while (pointermin < pointermid) do + if (strsub(text1, -pointermid, -pointerend) + == strsub(text2, -pointermid, -pointerend)) then + pointermin = pointermid + pointerend = pointermin + else + pointermax = pointermid + end + pointermid = floor(pointermin + (pointermax - pointermin) / 2) + end + return pointermid +end + +--[[ +* Determine if the suffix of one string is the prefix of another. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the end of the first +* string and the start of the second string. +* @private +--]] +function _diff_commonOverlap(text1, text2) + -- Cache the text lengths to prevent multiple calls. + local text1_length = #text1 + local text2_length = #text2 + -- Eliminate the null case. + if text1_length == 0 or text2_length == 0 then + return 0 + end + -- Truncate the longer string. + if text1_length > text2_length then + text1 = strsub(text1, text1_length - text2_length + 1) + elseif text1_length < text2_length then + text2 = strsub(text2, 1, text1_length) + end + local text_length = min(text1_length, text2_length) + -- Quick check for the worst case. + if text1 == text2 then + return text_length + end + + -- Start by looking for a single character match + -- and increase length until no match is found. + -- Performance analysis: https://neil.fraser.name/news/2010/11/04/ + local best = 0 + local length = 1 + while true do + local pattern = strsub(text1, text_length - length + 1) + local found = strfind(text2, pattern, 1, true) + if found == nil then + return best + end + length = length + found - 1 + if found == 1 or strsub(text1, text_length - length + 1) == + strsub(text2, 1, length) then + best = length + length = length + 1 + end + end +end + +--[[ +* Does a substring of shorttext exist within longtext such that the substring +* is at least half the length of longtext? +* This speedup can produce non-minimal diffs. +* Closure, but does not reference any external variables. +* @param {string} longtext Longer string. +* @param {string} shorttext Shorter string. +* @param {number} i Start index of quarter length substring within longtext. +* @return {?Array.} Five element Array, containing the prefix of +* longtext, the suffix of longtext, the prefix of shorttext, the suffix +* of shorttext and the common middle. Or nil if there was no match. +* @private +--]] +function _diff_halfMatchI(longtext, shorttext, i) + -- Start with a 1/4 length substring at position i as a seed. + local seed = strsub(longtext, i, i + floor(#longtext / 4)) + local j = 0 -- LUANOTE: do not change to 1, was originally -1 + local best_common = '' + local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b + while true do + j = indexOf(shorttext, seed, j + 1) + if (j == nil) then + break + end + local prefixLength = _diff_commonPrefix(strsub(longtext, i), + strsub(shorttext, j)) + local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1), + strsub(shorttext, 1, j - 1)) + if #best_common < suffixLength + prefixLength then + best_common = strsub(shorttext, j - suffixLength, j - 1) + .. strsub(shorttext, j, j + prefixLength - 1) + best_longtext_a = strsub(longtext, 1, i - suffixLength - 1) + best_longtext_b = strsub(longtext, i + prefixLength) + best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1) + best_shorttext_b = strsub(shorttext, j + prefixLength) + end + end + if #best_common * 2 >= #longtext then + return {best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common} + else + return nil + end +end + +--[[ +* Do the two texts share a substring which is at least half the length of the +* longer text? +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {?Array.} Five element Array, containing the prefix of +* text1, the suffix of text1, the prefix of text2, the suffix of +* text2 and the common middle. Or nil if there was no match. +* @private +--]] +function _diff_halfMatch(text1, text2) + if Diff_Timeout <= 0 then + -- Don't risk returning a non-optimal diff if we have unlimited time. + return nil + end + local longtext = (#text1 > #text2) and text1 or text2 + local shorttext = (#text1 > #text2) and text2 or text1 + if (#longtext < 4) or (#shorttext * 2 < #longtext) then + return nil -- Pointless. + end + + -- First check if the second quarter is the seed for a half-match. + local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4)) + -- Check again based on the third quarter. + local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2)) + local hm + if not hm1 and not hm2 then + return nil + elseif not hm2 then + hm = hm1 + elseif not hm1 then + hm = hm2 + else + -- Both matched. Select the longest. + hm = (#hm1[5] > #hm2[5]) and hm1 or hm2 + end + + -- A half-match was found, sort out the return data. + local text1_a, text1_b, text2_a, text2_b + if (#text1 > #text2) then + text1_a, text1_b = hm[1], hm[2] + text2_a, text2_b = hm[3], hm[4] + else + text2_a, text2_b = hm[1], hm[2] + text1_a, text1_b = hm[3], hm[4] + end + local mid_common = hm[5] + return text1_a, text1_b, text2_a, text2_b, mid_common +end + +--[[ +* Given two strings, compute a score representing whether the internal +* boundary falls on logical boundaries. +* Scores range from 6 (best) to 0 (worst). +* @param {string} one First string. +* @param {string} two Second string. +* @return {number} The score. +* @private +--]] +function _diff_cleanupSemanticScore(one, two) + if (#one == 0) or (#two == 0) then + -- Edges are the best. + return 6 + end + + -- Each port of this function behaves slightly differently due to + -- subtle differences in each language's definition of things like + -- 'whitespace'. Since this function's purpose is largely cosmetic, + -- the choice has been made to use each language's native features + -- rather than force total conformity. + local char1 = strsub(one, -1) + local char2 = strsub(two, 1, 1) + local nonAlphaNumeric1 = strmatch(char1, '%W') + local nonAlphaNumeric2 = strmatch(char2, '%W') + local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s') + local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s') + local lineBreak1 = whitespace1 and strmatch(char1, '%c') + local lineBreak2 = whitespace2 and strmatch(char2, '%c') + local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$') + local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n') + + if blankLine1 or blankLine2 then + -- Five points for blank lines. + return 5 + elseif lineBreak1 or lineBreak2 then + -- Four points for line breaks. + return 4 + elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then + -- Three points for end of sentences. + return 3 + elseif whitespace1 or whitespace2 then + -- Two points for whitespace. + return 2 + elseif nonAlphaNumeric1 or nonAlphaNumeric2 then + -- One point for non-alphanumeric. + return 1 + end + return 0 +end + +--[[ +* Look for single edits surrounded on both sides by equalities +* which can be shifted sideways to align the edit to a word boundary. +* e.g: The cat came. -> The cat came. +* @param {Array.>} diffs Array of diff tuples. +--]] +function _diff_cleanupSemanticLossless(diffs) + local pointer = 2 + -- Intentionally ignore the first and last element (don't need checking). + while diffs[pointer + 1] do + local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] + if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then + -- This is a single edit surrounded by equalities. + local diff = diffs[pointer] + + local equality1 = prevDiff[2] + local edit = diff[2] + local equality2 = nextDiff[2] + + -- First, shift the edit as far left as possible. + local commonOffset = _diff_commonSuffix(equality1, edit) + if commonOffset > 0 then + local commonString = strsub(edit, -commonOffset) + equality1 = strsub(equality1, 1, -commonOffset - 1) + edit = commonString .. strsub(edit, 1, -commonOffset - 1) + equality2 = commonString .. equality2 + end + + -- Second, step character by character right, looking for the best fit. + local bestEquality1 = equality1 + local bestEdit = edit + local bestEquality2 = equality2 + local bestScore = _diff_cleanupSemanticScore(equality1, edit) + + _diff_cleanupSemanticScore(edit, equality2) + + while strbyte(edit, 1) == strbyte(equality2, 1) do + equality1 = equality1 .. strsub(edit, 1, 1) + edit = strsub(edit, 2) .. strsub(equality2, 1, 1) + equality2 = strsub(equality2, 2) + local score = _diff_cleanupSemanticScore(equality1, edit) + + _diff_cleanupSemanticScore(edit, equality2) + -- The >= encourages trailing rather than leading whitespace on edits. + if score >= bestScore then + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + end + end + if prevDiff[2] ~= bestEquality1 then + -- We have an improvement, save it back to the diff. + if #bestEquality1 > 0 then + diffs[pointer - 1][2] = bestEquality1 + else + tremove(diffs, pointer - 1) + pointer = pointer - 1 + end + diffs[pointer][2] = bestEdit + if #bestEquality2 > 0 then + diffs[pointer + 1][2] = bestEquality2 + else + tremove(diffs, pointer + 1, 1) + pointer = pointer - 1 + end + end + end + pointer = pointer + 1 + end +end + +--[[ +* Reorder and merge like edit sections. Merge equalities. +* Any edit section can move as long as it doesn't cross an equality. +* @param {Array.>} diffs Array of diff tuples. +--]] +function _diff_cleanupMerge(diffs) + diffs[#diffs + 1] = {DIFF_EQUAL, ''} -- Add a dummy entry at the end. + local pointer = 1 + local count_delete, count_insert = 0, 0 + local text_delete, text_insert = '', '' + local commonlength + while diffs[pointer] do + local diff_type = diffs[pointer][1] + if diff_type == DIFF_INSERT then + count_insert = count_insert + 1 + text_insert = text_insert .. diffs[pointer][2] + pointer = pointer + 1 + elseif diff_type == DIFF_DELETE then + count_delete = count_delete + 1 + text_delete = text_delete .. diffs[pointer][2] + pointer = pointer + 1 + elseif diff_type == DIFF_EQUAL then + -- Upon reaching an equality, check for prior redundancies. + if count_delete + count_insert > 1 then + if (count_delete > 0) and (count_insert > 0) then + -- Factor out any common prefixies. + commonlength = _diff_commonPrefix(text_insert, text_delete) + if commonlength > 0 then + local back_pointer = pointer - count_delete - count_insert + if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL) + then + diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2] + .. strsub(text_insert, 1, commonlength) + else + tinsert(diffs, 1, + {DIFF_EQUAL, strsub(text_insert, 1, commonlength)}) + pointer = pointer + 1 + end + text_insert = strsub(text_insert, commonlength + 1) + text_delete = strsub(text_delete, commonlength + 1) + end + -- Factor out any common suffixies. + commonlength = _diff_commonSuffix(text_insert, text_delete) + if commonlength ~= 0 then + diffs[pointer][2] = + strsub(text_insert, -commonlength) .. diffs[pointer][2] + text_insert = strsub(text_insert, 1, -commonlength - 1) + text_delete = strsub(text_delete, 1, -commonlength - 1) + end + end + -- Delete the offending records and add the merged ones. + pointer = pointer - count_delete - count_insert + for i = 1, count_delete + count_insert do + tremove(diffs, pointer) + end + if #text_delete > 0 then + tinsert(diffs, pointer, {DIFF_DELETE, text_delete}) + pointer = pointer + 1 + end + if #text_insert > 0 then + tinsert(diffs, pointer, {DIFF_INSERT, text_insert}) + pointer = pointer + 1 + end + pointer = pointer + 1 + elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then + -- Merge this equality with the previous one. + diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2] + tremove(diffs, pointer) + else + pointer = pointer + 1 + end + count_insert, count_delete = 0, 0 + text_delete, text_insert = '', '' + end + end + if diffs[#diffs][2] == '' then + diffs[#diffs] = nil -- Remove the dummy entry at the end. + end + + -- Second pass: look for single edits surrounded on both sides by equalities + -- which can be shifted sideways to eliminate an equality. + -- e.g: ABAC -> ABAC + local changes = false + pointer = 2 + -- Intentionally ignore the first and last element (don't need checking). + while pointer < #diffs do + local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] + if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then + -- This is a single edit surrounded by equalities. + local diff = diffs[pointer] + local currentText = diff[2] + local prevText = prevDiff[2] + local nextText = nextDiff[2] + if #prevText == 0 then + tremove(diffs, pointer - 1) + changes = true + elseif strsub(currentText, -#prevText) == prevText then + -- Shift the edit over the previous equality. + diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1) + nextDiff[2] = prevText .. nextDiff[2] + tremove(diffs, pointer - 1) + changes = true + elseif strsub(currentText, 1, #nextText) == nextText then + -- Shift the edit over the next equality. + prevDiff[2] = prevText .. nextText + diff[2] = strsub(currentText, #nextText + 1) .. nextText + tremove(diffs, pointer + 1) + changes = true + end + end + pointer = pointer + 1 + end + -- If shifts were made, the diff needs reordering and another shift sweep. + if changes then + -- LUANOTE: no return value, but necessary to use 'return' to get + -- tail calls. + return _diff_cleanupMerge(diffs) + end +end + +--[[ +* loc is a location in text1, compute and return the equivalent location in +* text2. +* e.g. 'The cat' vs 'The big cat', 1->1, 5->8 +* @param {Array.>} diffs Array of diff tuples. +* @param {number} loc Location within text1. +* @return {number} Location within text2. +--]] +function _diff_xIndex(diffs, loc) + local chars1 = 1 + local chars2 = 1 + local last_chars1 = 1 + local last_chars2 = 1 + local x + for _x, diff in ipairs(diffs) do + x = _x + if diff[1] ~= DIFF_INSERT then -- Equality or deletion. + chars1 = chars1 + #diff[2] + end + if diff[1] ~= DIFF_DELETE then -- Equality or insertion. + chars2 = chars2 + #diff[2] + end + if chars1 > loc then -- Overshot the location. + break + end + last_chars1 = chars1 + last_chars2 = chars2 + end + -- Was the location deleted? + if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then + return last_chars2 + end + -- Add the remaining character length. + return last_chars2 + (loc - last_chars1) +end + +--[[ +* Compute and return the source text (all equalities and deletions). +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Source text. +--]] +function _diff_text1(diffs) + local text = {} + for x, diff in ipairs(diffs) do + if diff[1] ~= DIFF_INSERT then + text[#text + 1] = diff[2] + end + end + return tconcat(text) +end + +--[[ +* Compute and return the destination text (all equalities and insertions). +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Destination text. +--]] +function _diff_text2(diffs) + local text = {} + for x, diff in ipairs(diffs) do + if diff[1] ~= DIFF_DELETE then + text[#text + 1] = diff[2] + end + end + return tconcat(text) +end + +--[[ +* Crush the diff into an encoded string which describes the operations +* required to transform text1 into text2. +* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. +* Operations are tab-separated. Inserted text is escaped using %xx notation. +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Delta text. +--]] +function _diff_toDelta(diffs) + local text = {} + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if op == DIFF_INSERT then + text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace) + elseif op == DIFF_DELETE then + text[x] = '-' .. #data + elseif op == DIFF_EQUAL then + text[x] = '=' .. #data + end + end + return tconcat(text, '\t') +end + +--[[ +* Given the original text1, and an encoded string which describes the +* operations required to transform text1 into text2, compute the full diff. +* @param {string} text1 Source string for the diff. +* @param {string} delta Delta text. +* @return {Array.>} Array of diff tuples. +* @throws {Errorend If invalid input. +--]] +function _diff_fromDelta(text1, delta) + local diffs = {} + local diffsLength = 0 -- Keeping our own length var is faster + local pointer = 1 -- Cursor in text1 + for token in gmatch(delta, '[^\t]+') do + -- Each token begins with a one character parameter which specifies the + -- operation of this token (delete, insert, equality). + local tokenchar, param = strsub(token, 1, 1), strsub(token, 2) + if (tokenchar == '+') then + local invalidDecode = false + local decoded = gsub(param, '%%(.?.?)', + function(c) + local n = tonumber(c, 16) + if (#c ~= 2) or (n == nil) then + invalidDecode = true + return '' + end + return strchar(n) + end) + if invalidDecode then + -- Malformed URI sequence. + error('Illegal escape in _diff_fromDelta: ' .. param) + end + diffsLength = diffsLength + 1 + diffs[diffsLength] = {DIFF_INSERT, decoded} + elseif (tokenchar == '-') or (tokenchar == '=') then + local n = tonumber(param) + if (n == nil) or (n < 0) then + error('Invalid number in _diff_fromDelta: ' .. param) + end + local text = strsub(text1, pointer, pointer + n - 1) + pointer = pointer + n + if (tokenchar == '=') then + diffsLength = diffsLength + 1 + diffs[diffsLength] = {DIFF_EQUAL, text} + else + diffsLength = diffsLength + 1 + diffs[diffsLength] = {DIFF_DELETE, text} + end + else + error('Invalid diff operation in _diff_fromDelta: ' .. token) + end + end + if (pointer ~= #text1 + 1) then + error('Delta length (' .. (pointer - 1) + .. ') does not equal source text length (' .. #text1 .. ').') + end + return diffs +end + +-- --------------------------------------------------------------------------- +-- MATCH API +-- --------------------------------------------------------------------------- + +local _match_bitap, _match_alphabet + +--[[ +* Locate the best instance of 'pattern' in 'text' near 'loc'. +* @param {string} text The text to search. +* @param {string} pattern The pattern to search for. +* @param {number} loc The location to search around. +* @return {number} Best match index or -1. +--]] +function match_main(text, pattern, loc) + -- Check for null inputs. + if text == nil or pattern == nil or loc == nil then + error('Null inputs. (match_main)') + end + + if text == pattern then + -- Shortcut (potentially not guaranteed by the algorithm) + return 1 + elseif #text == 0 then + -- Nothing to match. + return -1 + end + loc = max(1, min(loc, #text)) + if strsub(text, loc, loc + #pattern - 1) == pattern then + -- Perfect match at the perfect spot! (Includes case of null pattern) + return loc + else + -- Do a fuzzy compare. + return _match_bitap(text, pattern, loc) + end +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE MATCH FUNCTIONS +-- --------------------------------------------------------------------------- + +--[[ +* Initialise the alphabet for the Bitap algorithm. +* @param {string} pattern The text to encode. +* @return {Object} Hash of character locations. +* @private +--]] +function _match_alphabet(pattern) + local s = {} + local i = 0 + for c in gmatch(pattern, '.') do + s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1)) + i = i + 1 + end + return s +end + +--[[ +* Locate the best instance of 'pattern' in 'text' near 'loc' using the +* Bitap algorithm. +* @param {string} text The text to search. +* @param {string} pattern The pattern to search for. +* @param {number} loc The location to search around. +* @return {number} Best match index or -1. +* @private +--]] +function _match_bitap(text, pattern, loc) + if #pattern > Match_MaxBits then + error('Pattern too long.') + end + + -- Initialise the alphabet. + local s = _match_alphabet(pattern) + + --[[ + * Compute and return the score for a match with e errors and x location. + * Accesses loc and pattern through being a closure. + * @param {number} e Number of errors in match. + * @param {number} x Location of match. + * @return {number} Overall score for match (0.0 = good, 1.0 = bad). + * @private + --]] + local function _match_bitapScore(e, x) + local accuracy = e / #pattern + local proximity = abs(loc - x) + if (Match_Distance == 0) then + -- Dodge divide by zero error. + return (proximity == 0) and 1 or accuracy + end + return accuracy + (proximity / Match_Distance) + end + + -- Highest score beyond which we give up. + local score_threshold = Match_Threshold + -- Is there a nearby exact match? (speedup) + local best_loc = indexOf(text, pattern, loc) + if best_loc then + score_threshold = min(_match_bitapScore(0, best_loc), score_threshold) + -- LUANOTE: Ideally we'd also check from the other direction, but Lua + -- doesn't have an efficent lastIndexOf function. + end + + -- Initialise the bit arrays. + local matchmask = lshift(1, #pattern - 1) + best_loc = -1 + + local bin_min, bin_mid + local bin_max = #pattern + #text + local last_rd + for d = 0, #pattern - 1, 1 do + -- Scan for the best match; each iteration allows for one more error. + -- Run a binary search to determine how far from 'loc' we can stray at this + -- error level. + bin_min = 0 + bin_mid = bin_max + while (bin_min < bin_mid) do + if (_match_bitapScore(d, loc + bin_mid) <= score_threshold) then + bin_min = bin_mid + else + bin_max = bin_mid + end + bin_mid = floor(bin_min + (bin_max - bin_min) / 2) + end + -- Use the result from this iteration as the maximum for the next. + bin_max = bin_mid + local start = max(1, loc - bin_mid + 1) + local finish = min(loc + bin_mid, #text) + #pattern + + local rd = {} + for j = start, finish do + rd[j] = 0 + end + rd[finish + 1] = lshift(1, d) - 1 + for j = finish, start, -1 do + local charMatch = s[strsub(text, j - 1, j - 1)] or 0 + if (d == 0) then -- First pass: exact match. + rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch) + else + -- Subsequent passes: fuzzy match. + -- Functions instead of operators make this hella messy. + rd[j] = bor( + band( + bor( + lshift(rd[j + 1], 1), + 1 + ), + charMatch + ), + bor( + bor( + lshift(bor(last_rd[j + 1], last_rd[j]), 1), + 1 + ), + last_rd[j + 1] + ) + ) + end + if (band(rd[j], matchmask) ~= 0) then + local score = _match_bitapScore(d, j - 1) + -- This match will almost certainly be better than any existing match. + -- But check anyway. + if (score <= score_threshold) then + -- Told you so. + score_threshold = score + best_loc = j - 1 + if (best_loc > loc) then + -- When passing loc, don't exceed our current distance from loc. + start = max(1, loc * 2 - best_loc) + else + -- Already passed loc, downhill from here on in. + break + end + end + end + end + -- No hope for a (better) match at greater error levels. + if (_match_bitapScore(d + 1, loc) > score_threshold) then + break + end + last_rd = rd + end + return best_loc +end + +-- ----------------------------------------------------------------------------- +-- PATCH API +-- ----------------------------------------------------------------------------- + +local _patch_addContext, + _patch_deepCopy, + _patch_addPadding, + _patch_splitMax, + _patch_appendText, + _new_patch_obj + +--[[ +* Compute a list of patches to turn text1 into text2. +* Use diffs if provided, otherwise compute it ourselves. +* There are four ways to call this function, depending on what data is +* available to the caller: +* Method 1: +* a = text1, b = text2 +* Method 2: +* a = diffs +* Method 3 (optimal): +* a = text1, b = diffs +* Method 4 (deprecated, use method 3): +* a = text1, b = text2, c = diffs +* +* @param {string|Array.>} a text1 (methods 1,3,4) or +* Array of diff tuples for text1 to text2 (method 2). +* @param {string|Array.>} opt_b text2 (methods 1,4) or +* Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). +* @param {string|Array.>} opt_c Array of diff tuples for +* text1 to text2 (method 4) or undefined (methods 1,2,3). +* @return {Array.<_new_patch_obj>} Array of patch objects. +--]] +function patch_make(a, opt_b, opt_c) + local text1, diffs + local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c) + if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then + -- Method 1: text1, text2 + -- Compute diffs from text1 and text2. + text1 = a + diffs = diff_main(text1, opt_b, true) + if (#diffs > 2) then + diff_cleanupSemantic(diffs) + diff_cleanupEfficiency(diffs) + end + elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then + -- Method 2: diffs + -- Compute text1 from diffs. + diffs = a + text1 = _diff_text1(diffs) + elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then + -- Method 3: text1, diffs + text1 = a + diffs = opt_b + elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table') + then + -- Method 4: text1, text2, diffs + -- text2 is not used. + text1 = a + diffs = opt_c + else + error('Unknown call format to patch_make.') + end + + if (diffs[1] == nil) then + return {} -- Get rid of the null case. + end + + local patches = {} + local patch = _new_patch_obj() + local patchDiffLength = 0 -- Keeping our own length var is faster. + local char_count1 = 0 -- Number of characters into the text1 string. + local char_count2 = 0 -- Number of characters into the text2 string. + -- Start with text1 (prepatch_text) and apply the diffs until we arrive at + -- text2 (postpatch_text). We recreate the patches one by one to determine + -- context info. + local prepatch_text, postpatch_text = text1, text1 + for x, diff in ipairs(diffs) do + local diff_type, diff_text = diff[1], diff[2] + + if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then + -- A new patch starts here. + patch.start1 = char_count1 + 1 + patch.start2 = char_count2 + 1 + end + + if (diff_type == DIFF_INSERT) then + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + patch.length2 = patch.length2 + #diff_text + postpatch_text = strsub(postpatch_text, 1, char_count2) + .. diff_text .. strsub(postpatch_text, char_count2 + 1) + elseif (diff_type == DIFF_DELETE) then + patch.length1 = patch.length1 + #diff_text + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + postpatch_text = strsub(postpatch_text, 1, char_count2) + .. strsub(postpatch_text, char_count2 + #diff_text + 1) + elseif (diff_type == DIFF_EQUAL) then + if (#diff_text <= Patch_Margin * 2) + and (patchDiffLength ~= 0) and (#diffs ~= x) then + -- Small equality inside a patch. + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + patch.length1 = patch.length1 + #diff_text + patch.length2 = patch.length2 + #diff_text + elseif (#diff_text >= Patch_Margin * 2) then + -- Time for a new patch. + if (patchDiffLength ~= 0) then + _patch_addContext(patch, prepatch_text) + patches[#patches + 1] = patch + patch = _new_patch_obj() + patchDiffLength = 0 + -- Unlike Unidiff, our patch lists have a rolling context. + -- https://github.com/google/diff-match-patch/wiki/Unidiff + -- Update prepatch text & pos to reflect the application of the + -- just completed patch. + prepatch_text = postpatch_text + char_count1 = char_count2 + end + end + end + + -- Update the current character count. + if (diff_type ~= DIFF_INSERT) then + char_count1 = char_count1 + #diff_text + end + if (diff_type ~= DIFF_DELETE) then + char_count2 = char_count2 + #diff_text + end + end + + -- Pick up the leftover patch if not empty. + if (patchDiffLength > 0) then + _patch_addContext(patch, prepatch_text) + patches[#patches + 1] = patch + end + + return patches +end + +--[[ +* Merge a set of patches onto the text. Return a patched text, as well +* as a list of true/false values indicating which patches were applied. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @param {string} text Old text. +* @return {Array.>} Two return values, the +* new text and an array of boolean values. +--]] +function patch_apply(patches, text) + if patches[1] == nil then + return text, {} + end + + -- Deep copy the patches so that no changes are made to originals. + patches = _patch_deepCopy(patches) + + local nullPadding = _patch_addPadding(patches) + text = nullPadding .. text .. nullPadding + + _patch_splitMax(patches) + -- delta keeps track of the offset between the expected and actual location + -- of the previous patch. If there are patches expected at positions 10 and + -- 20, but the first patch was found at 12, delta is 2 and the second patch + -- has an effective expected position of 22. + local delta = 0 + local results = {} + for x, patch in ipairs(patches) do + local expected_loc = patch.start2 + delta + local text1 = _diff_text1(patch.diffs) + local start_loc + local end_loc = -1 + if #text1 > Match_MaxBits then + -- _patch_splitMax will only provide an oversized pattern in + -- the case of a monster delete. + start_loc = match_main(text, + strsub(text1, 1, Match_MaxBits), expected_loc) + if start_loc ~= -1 then + end_loc = match_main(text, strsub(text1, -Match_MaxBits), + expected_loc + #text1 - Match_MaxBits) + if end_loc == -1 or start_loc >= end_loc then + -- Can't find valid trailing context. Drop this patch. + start_loc = -1 + end + end + else + start_loc = match_main(text, text1, expected_loc) + end + if start_loc == -1 then + -- No match found. :( + results[x] = false + -- Subtract the delta for this failed patch from subsequent patches. + delta = delta - patch.length2 - patch.length1 + else + -- Found a match. :) + results[x] = true + delta = start_loc - expected_loc + local text2 + if end_loc == -1 then + text2 = strsub(text, start_loc, start_loc + #text1 - 1) + else + text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1) + end + if text1 == text2 then + -- Perfect match, just shove the replacement text in. + text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs) + .. strsub(text, start_loc + #text1) + else + -- Imperfect match. Run a diff to get a framework of equivalent + -- indices. + local diffs = diff_main(text1, text2, false) + if (#text1 > Match_MaxBits) + and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then + -- The end points match, but the content is unacceptably bad. + results[x] = false + else + _diff_cleanupSemanticLossless(diffs) + local index1 = 1 + local index2 + for y, mod in ipairs(patch.diffs) do + if mod[1] ~= DIFF_EQUAL then + index2 = _diff_xIndex(diffs, index1) + end + if mod[1] == DIFF_INSERT then + text = strsub(text, 1, start_loc + index2 - 2) + .. mod[2] .. strsub(text, start_loc + index2 - 1) + elseif mod[1] == DIFF_DELETE then + text = strsub(text, 1, start_loc + index2 - 2) .. strsub(text, + start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1)) + end + if mod[1] ~= DIFF_DELETE then + index1 = index1 + #mod[2] + end + end + end + end + end + end + -- Strip the padding off. + text = strsub(text, #nullPadding + 1, -#nullPadding - 1) + return text, results +end + +--[[ +* Take a list of patches and return a textual representation. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {string} Text representation of patches. +--]] +function patch_toText(patches) + local text = {} + for x, patch in ipairs(patches) do + _patch_appendText(patch, text) + end + return tconcat(text) +end + +--[[ +* Parse a textual representation of patches and return a list of patch objects. +* @param {string} textline Text representation of patches. +* @return {Array.<_new_patch_obj>} Array of patch objects. +* @throws {Error} If invalid input. +--]] +function patch_fromText(textline) + local patches = {} + if (#textline == 0) then + return patches + end + local text = {} + for line in gmatch(textline, '([^\n]*)') do + text[#text + 1] = line + end + local textPointer = 1 + while (textPointer <= #text) do + local start1, length1, start2, length2 + = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$') + if (start1 == nil) then + error('Invalid patch string: "' .. text[textPointer] .. '"') + end + local patch = _new_patch_obj() + patches[#patches + 1] = patch + + start1 = tonumber(start1) + length1 = tonumber(length1) or 1 + if (length1 == 0) then + start1 = start1 + 1 + end + patch.start1 = start1 + patch.length1 = length1 + + start2 = tonumber(start2) + length2 = tonumber(length2) or 1 + if (length2 == 0) then + start2 = start2 + 1 + end + patch.start2 = start2 + patch.length2 = length2 + + textPointer = textPointer + 1 + + while true do + local line = text[textPointer] + if (line == nil) then + break + end + local sign; sign, line = strsub(line, 1, 1), strsub(line, 2) + + local invalidDecode = false + local decoded = gsub(line, '%%(.?.?)', + function(c) + local n = tonumber(c, 16) + if (#c ~= 2) or (n == nil) then + invalidDecode = true + return '' + end + return strchar(n) + end) + if invalidDecode then + -- Malformed URI sequence. + error('Illegal escape in patch_fromText: ' .. line) + end + + line = decoded + + if (sign == '-') then + -- Deletion. + patch.diffs[#patch.diffs + 1] = {DIFF_DELETE, line} + elseif (sign == '+') then + -- Insertion. + patch.diffs[#patch.diffs + 1] = {DIFF_INSERT, line} + elseif (sign == ' ') then + -- Minor equality. + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, line} + elseif (sign == '@') then + -- Start of next patch. + break + elseif (sign == '') then + -- Blank line? Whatever. + else + -- WTF? + error('Invalid patch mode "' .. sign .. '" in: ' .. line) + end + textPointer = textPointer + 1 + end + end + return patches +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE PATCH FUNCTIONS +-- --------------------------------------------------------------------------- + +local patch_meta = { + __tostring = function(patch) + local buf = {} + _patch_appendText(patch, buf) + return tconcat(buf) + end +} + +--[[ +* Class representing one patch operation. +* @constructor +--]] +function _new_patch_obj() + return setmetatable({ + --[[ @type {Array.>} ]] + diffs = {}; + --[[ @type {?number} ]] + start1 = 1; -- nil; + --[[ @type {?number} ]] + start2 = 1; -- nil; + --[[ @type {number} ]] + length1 = 0; + --[[ @type {number} ]] + length2 = 0; + }, patch_meta) +end + +--[[ +* Increase the context until it is unique, +* but don't let the pattern expand beyond Match_MaxBits. +* @param {_new_patch_obj} patch The patch to grow. +* @param {string} text Source text. +* @private +--]] +function _patch_addContext(patch, text) + if (#text == 0) then + return + end + local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1) + local padding = 0 + + -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly + -- different logic here than in other language ports. + -- Look for the first two matches of pattern in text. If two are found, + -- increase the pattern length. + local firstMatch = indexOf(text, pattern) + local secondMatch = nil + if (firstMatch ~= nil) then + secondMatch = indexOf(text, pattern, firstMatch + 1) + end + while (#pattern == 0 or secondMatch ~= nil) + and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do + padding = padding + Patch_Margin + pattern = strsub(text, max(1, patch.start2 - padding), + patch.start2 + patch.length1 - 1 + padding) + firstMatch = indexOf(text, pattern) + if (firstMatch ~= nil) then + secondMatch = indexOf(text, pattern, firstMatch + 1) + else + secondMatch = nil + end + end + -- Add one chunk for good luck. + padding = padding + Patch_Margin + + -- Add the prefix. + local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1) + if (#prefix > 0) then + tinsert(patch.diffs, 1, {DIFF_EQUAL, prefix}) + end + -- Add the suffix. + local suffix = strsub(text, patch.start2 + patch.length1, + patch.start2 + patch.length1 - 1 + padding) + if (#suffix > 0) then + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, suffix} + end + + -- Roll back the start points. + patch.start1 = patch.start1 - #prefix + patch.start2 = patch.start2 - #prefix + -- Extend the lengths. + patch.length1 = patch.length1 + #prefix + #suffix + patch.length2 = patch.length2 + #prefix + #suffix +end + +--[[ +* Given an array of patches, return another array that is identical. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {Array.<_new_patch_obj>} Array of patch objects. +--]] +function _patch_deepCopy(patches) + local patchesCopy = {} + for x, patch in ipairs(patches) do + local patchCopy = _new_patch_obj() + local diffsCopy = {} + for i, diff in ipairs(patch.diffs) do + diffsCopy[i] = {diff[1], diff[2]} + end + patchCopy.diffs = diffsCopy + patchCopy.start1 = patch.start1 + patchCopy.start2 = patch.start2 + patchCopy.length1 = patch.length1 + patchCopy.length2 = patch.length2 + patchesCopy[x] = patchCopy + end + return patchesCopy +end + +--[[ +* Add some padding on text start and end so that edges can match something. +* Intended to be called only from within patch_apply. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {string} The padding string added to each side. +--]] +function _patch_addPadding(patches) + local paddingLength = Patch_Margin + local nullPadding = '' + for x = 1, paddingLength do + nullPadding = nullPadding .. strchar(x) + end + + -- Bump all the patches forward. + for x, patch in ipairs(patches) do + patch.start1 = patch.start1 + paddingLength + patch.start2 = patch.start2 + paddingLength + end + + -- Add some padding on start of first diff. + local patch = patches[1] + local diffs = patch.diffs + local firstDiff = diffs[1] + if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then + -- Add nullPadding equality. + tinsert(diffs, 1, {DIFF_EQUAL, nullPadding}) + patch.start1 = patch.start1 - paddingLength -- Should be 0. + patch.start2 = patch.start2 - paddingLength -- Should be 0. + patch.length1 = patch.length1 + paddingLength + patch.length2 = patch.length2 + paddingLength + elseif (paddingLength > #firstDiff[2]) then + -- Grow first equality. + local extraLength = paddingLength - #firstDiff[2] + firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2] + patch.start1 = patch.start1 - extraLength + patch.start2 = patch.start2 - extraLength + patch.length1 = patch.length1 + extraLength + patch.length2 = patch.length2 + extraLength + end + + -- Add some padding on end of last diff. + patch = patches[#patches] + diffs = patch.diffs + local lastDiff = diffs[#diffs] + if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then + -- Add nullPadding equality. + diffs[#diffs + 1] = {DIFF_EQUAL, nullPadding} + patch.length1 = patch.length1 + paddingLength + patch.length2 = patch.length2 + paddingLength + elseif (paddingLength > #lastDiff[2]) then + -- Grow last equality. + local extraLength = paddingLength - #lastDiff[2] + lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength) + patch.length1 = patch.length1 + extraLength + patch.length2 = patch.length2 + extraLength + end + + return nullPadding +end + +--[[ +* Look through the patches and break up any which are longer than the maximum +* limit of the match algorithm. +* Intended to be called only from within patch_apply. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +--]] +function _patch_splitMax(patches) + local patch_size = Match_MaxBits + local x = 1 + while true do + local patch = patches[x] + if patch == nil then + return + end + if patch.length1 > patch_size then + local bigpatch = patch + -- Remove the big old patch. + tremove(patches, x) + x = x - 1 + local start1 = bigpatch.start1 + local start2 = bigpatch.start2 + local precontext = '' + while bigpatch.diffs[1] do + -- Create one of several smaller patches. + local patch = _new_patch_obj() + local empty = true + patch.start1 = start1 - #precontext + patch.start2 = start2 - #precontext + if precontext ~= '' then + patch.length1, patch.length2 = #precontext, #precontext + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, precontext} + end + while bigpatch.diffs[1] and (patch.length1 < patch_size-Patch_Margin) do + local diff_type = bigpatch.diffs[1][1] + local diff_text = bigpatch.diffs[1][2] + if (diff_type == DIFF_INSERT) then + -- Insertions are harmless. + patch.length2 = patch.length2 + #diff_text + start2 = start2 + #diff_text + patch.diffs[#(patch.diffs) + 1] = bigpatch.diffs[1] + tremove(bigpatch.diffs, 1) + empty = false + elseif (diff_type == DIFF_DELETE) and (#patch.diffs == 1) + and (patch.diffs[1][1] == DIFF_EQUAL) + and (#diff_text > 2 * patch_size) then + -- This is a large deletion. Let it pass in one chunk. + patch.length1 = patch.length1 + #diff_text + start1 = start1 + #diff_text + empty = false + patch.diffs[#patch.diffs + 1] = {diff_type, diff_text} + tremove(bigpatch.diffs, 1) + else + -- Deletion or equality. + -- Only take as much as we can stomach. + diff_text = strsub(diff_text, 1, + patch_size - patch.length1 - Patch_Margin) + patch.length1 = patch.length1 + #diff_text + start1 = start1 + #diff_text + if (diff_type == DIFF_EQUAL) then + patch.length2 = patch.length2 + #diff_text + start2 = start2 + #diff_text + else + empty = false + end + patch.diffs[#patch.diffs + 1] = {diff_type, diff_text} + if (diff_text == bigpatch.diffs[1][2]) then + tremove(bigpatch.diffs, 1) + else + bigpatch.diffs[1][2] + = strsub(bigpatch.diffs[1][2], #diff_text + 1) + end + end + end + -- Compute the head context for the next patch. + precontext = _diff_text2(patch.diffs) + precontext = strsub(precontext, -Patch_Margin) + -- Append the end context for this patch. + local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin) + if postcontext ~= '' then + patch.length1 = patch.length1 + #postcontext + patch.length2 = patch.length2 + #postcontext + if patch.diffs[1] + and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then + patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2] + .. postcontext + else + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, postcontext} + end + end + if not empty then + x = x + 1 + tinsert(patches, x, patch) + end + end + end + x = x + 1 + end +end + +--[[ +* Emulate GNU diff's format. +* Header: @@ -382,8 +481,9 @@ +* @return {string} The GNU diff string. +--]] +function _patch_appendText(patch, text) + local coords1, coords2 + local length1, length2 = patch.length1, patch.length2 + local start1, start2 = patch.start1, patch.start2 + local diffs = patch.diffs + + if length1 == 1 then + coords1 = start1 + else + coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1 + end + + if length2 == 1 then + coords2 = start2 + else + coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2 + end + text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n' + + local op + -- Escape the body of the patch with %xx notation. + for x, diff in ipairs(patch.diffs) do + local diff_type = diff[1] + if diff_type == DIFF_INSERT then + op = '+' + elseif diff_type == DIFF_DELETE then + op = '-' + elseif diff_type == DIFF_EQUAL then + op = ' ' + end + text[#text + 1] = op + .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace) + .. '\n' + end + + return text +end + +-- Expose the API +local _M = {} + +_M.DIFF_DELETE = DIFF_DELETE +_M.DIFF_INSERT = DIFF_INSERT +_M.DIFF_EQUAL = DIFF_EQUAL + +_M.diff_main = diff_main +_M.diff_cleanupSemantic = diff_cleanupSemantic +_M.diff_cleanupEfficiency = diff_cleanupEfficiency +_M.diff_levenshtein = diff_levenshtein +_M.diff_prettyHtml = diff_prettyHtml + +_M.match_main = match_main + +_M.patch_make = patch_make +_M.patch_toText = patch_toText +_M.patch_fromText = patch_fromText +_M.patch_apply = patch_apply + +-- Expose some non-API functions as well, for testing purposes etc. +_M.diff_commonPrefix = _diff_commonPrefix +_M.diff_commonSuffix = _diff_commonSuffix +_M.diff_commonOverlap = _diff_commonOverlap +_M.diff_halfMatch = _diff_halfMatch +_M.diff_bisect = _diff_bisect +_M.diff_cleanupMerge = _diff_cleanupMerge +_M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless +_M.diff_text1 = _diff_text1 +_M.diff_text2 = _diff_text2 +_M.diff_toDelta = _diff_toDelta +_M.diff_fromDelta = _diff_fromDelta +_M.diff_xIndex = _diff_xIndex +_M.match_alphabet = _match_alphabet +_M.match_bitap = _match_bitap +_M.new_patch_obj = _new_patch_obj +_M.patch_addContext = _patch_addContext +_M.patch_splitMax = _patch_splitMax +_M.patch_addPadding = _patch_addPadding +_M.settings = settings + +return _M diff --git a/test/luaunit/error_msg_test.lua b/test/luaunit/error_msg_test.lua index 09c2604..1e4ec87 100644 --- a/test/luaunit/error_msg_test.lua +++ b/test/luaunit/error_msg_test.lua @@ -1,4 +1,5 @@ local t = require('luatest') +local utils = require('luatest.utils') local g = t.group() local helper = require('test.helpers.general') @@ -6,6 +7,14 @@ local assert_failure_matches = helper.assert_failure_matches local assert_failure_contains = helper.assert_failure_contains local assert_failure_equals = helper.assert_failure_equals +g.before_all(function() + t.private.set_diff_enabled(false) +end) + +g.after_all(function() + t.private.set_diff_enabled(true) +end) + function g.test_assert_equalsMsg() assert_failure_equals('expected: 2, actual: 1', t.assert_equals, 1, 2 ) assert_failure_equals('expected: "exp"\nactual: "act"', t.assert_equals, 'act', 'exp') @@ -349,3 +358,270 @@ actual: {3, 2, 1}]], t.assert_equals, {3,2,1}, {one=1,two=2}) assert_failure_contains([[and