|
| 1 | +/** |
| 2 | + * A very simple diff algorithm. Slightly adapted to support splitting at different stages (e.g. |
| 3 | + * first diff lines, then diff words) |
| 4 | + * |
| 5 | + * https://bramcohen.livejournal.com/73318.html |
| 6 | + * |
| 7 | + * @experiemantal This API will likely change. |
| 8 | + */ |
| 9 | + |
| 10 | +import * as map from '../map.js' |
| 11 | +import * as math from '../math.js' |
| 12 | +import * as array from '../array.js' |
| 13 | + |
| 14 | +/** |
| 15 | + * Implementation of patience diff. Expects that content is pre-split (e.g. by newline). |
| 16 | + * |
| 17 | + * @param {Array<string>} as |
| 18 | + * @param {Array<string>} bs |
| 19 | + * @return {Array<{ index: number, remove: Array<string>, insert: Array<string>}>} changeset @todo should use delta instead |
| 20 | + */ |
| 21 | +export const diff = (as, bs) => { |
| 22 | + const { |
| 23 | + middleAs, |
| 24 | + middleBs, |
| 25 | + commonPrefix |
| 26 | + } = removeCommonPrefixAndSuffix(as, bs) |
| 27 | + return lcs(middleAs, middleBs, commonPrefix) |
| 28 | +} |
| 29 | + |
| 30 | + |
| 31 | +/** |
| 32 | + * @param {string} a |
| 33 | + * @param {string} b |
| 34 | + * @param {RegExp|string} _regexp |
| 35 | + */ |
| 36 | +export const diffSplitBy = (a, b, _regexp) => { |
| 37 | + const isStringSeparator = typeof _regexp === 'string' |
| 38 | + const separator = isStringSeparator ? _regexp : '' |
| 39 | + const regexp = isStringSeparator ? new RegExp(_regexp, 'g') : _regexp |
| 40 | + const as = splitByRegexp(a, regexp, !isStringSeparator) |
| 41 | + const bs = splitByRegexp(b, regexp, !isStringSeparator) |
| 42 | + const changes = diff(as, bs) |
| 43 | + let prevSplitIndex = 0 |
| 44 | + let prevStringIndex = 0 |
| 45 | + return changes.map(change => { |
| 46 | + for (; prevSplitIndex < change.index; prevSplitIndex++) { |
| 47 | + prevStringIndex += as[prevSplitIndex].length |
| 48 | + } |
| 49 | + return { |
| 50 | + index: prevStringIndex, |
| 51 | + remove: change.remove.join(separator), |
| 52 | + insert: change.insert.join(separator) |
| 53 | + } |
| 54 | + }) |
| 55 | +} |
| 56 | + |
| 57 | +/** |
| 58 | + * Sensible default for diffing strings using patience (it's fast though). |
| 59 | + * |
| 60 | + * Perform different types of patience diff on the content. Diff first by newline, then paragraphs, then by word |
| 61 | + * (split by space, brackets, punctuation) |
| 62 | + * |
| 63 | + * @param {string} a |
| 64 | + * @param {string} b |
| 65 | + */ |
| 66 | +export const diffAuto = (a, b) => |
| 67 | + diffSplitBy(a, b, '\n').map(d => |
| 68 | + diffSplitBy(d.remove, d.insert, /\.\ |[a-zA-Z0-9]+|[\.\ \(\)\[\]\,\;\{\}]/g).map(dd => ({ |
| 69 | + insert: dd.insert, |
| 70 | + remove: dd.remove, |
| 71 | + index: dd.index + d.index |
| 72 | + })) |
| 73 | + ).flat() |
| 74 | + |
| 75 | +/** |
| 76 | + * @param {Array<string>} as |
| 77 | + * @param {Array<string>} bs |
| 78 | + */ |
| 79 | +const removeCommonPrefixAndSuffix = (as, bs) => { |
| 80 | + const commonLen = math.min(as.length, bs.length) |
| 81 | + let commonPrefix = 0 |
| 82 | + let commonSuffix = 0 |
| 83 | + // match start |
| 84 | + for (; commonPrefix < commonLen && as[commonPrefix] === bs[commonPrefix]; commonPrefix++) { /* nop */ } |
| 85 | + // match end |
| 86 | + for (; commonSuffix < commonLen - commonPrefix && as[as.length - 1 - commonSuffix] === bs[bs.length - 1 - commonSuffix]; commonSuffix++) { /* nop */ } |
| 87 | + const middleAs = as.slice(commonPrefix, as.length - commonSuffix) |
| 88 | + const middleBs = bs.slice(commonPrefix, bs.length - commonSuffix) |
| 89 | + return { |
| 90 | + middleAs, middleBs, commonPrefix, commonSuffix |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +/** |
| 95 | + * Splits string by regex and returns all strings as an array. The matched parts are also returned. |
| 96 | + * |
| 97 | + * @param {string} str |
| 98 | + * @param {RegExp} regexp |
| 99 | + * @param {boolean} includeSeparator |
| 100 | + */ |
| 101 | +const splitByRegexp = (str, regexp, includeSeparator) => { |
| 102 | + const matches = [...str.matchAll(regexp)] |
| 103 | + let prevIndex = 0 |
| 104 | + /** |
| 105 | + * @type {Array<string>} |
| 106 | + */ |
| 107 | + const res = [] |
| 108 | + matches.forEach(m => { |
| 109 | + prevIndex < (m.index || 0) && res.push(str.slice(prevIndex, m.index)) |
| 110 | + includeSeparator && res.push(m[0]) // is always non-empty |
| 111 | + prevIndex = /** @type {number} */ (m.index) + m[0].length |
| 112 | + }) |
| 113 | + const end = str.slice(prevIndex) |
| 114 | + end.length > 0 && res.push(end) |
| 115 | + return res |
| 116 | +} |
| 117 | + |
| 118 | +/** |
| 119 | + * An item may have multiple occurances (not when matching unique entries). It also may have a |
| 120 | + * reference to the stack of other items (from as to bs). |
| 121 | + */ |
| 122 | +class Item { |
| 123 | + constructor () { |
| 124 | + /** |
| 125 | + * @type {Array<number>} |
| 126 | + */ |
| 127 | + this.indexes = [] |
| 128 | + /** |
| 129 | + * The matching item from the other side |
| 130 | + * @type {Item?} |
| 131 | + */ |
| 132 | + this.match = null |
| 133 | + /** |
| 134 | + * For patience sort. Reference (index of the stack) to the previous pile. |
| 135 | + * |
| 136 | + * @type {Item?} |
| 137 | + */ |
| 138 | + this.ref = null |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +/** |
| 143 | + * @param {Array<string>} xs |
| 144 | + */ |
| 145 | +const partition = xs => { |
| 146 | + /** |
| 147 | + * @type {Map<string,Item>} |
| 148 | + */ |
| 149 | + const refs = map.create() |
| 150 | + xs.forEach((x, index) => { |
| 151 | + map.setIfUndefined(refs, x, () => new Item()).indexes.push(index) |
| 152 | + }) |
| 153 | + return refs |
| 154 | +} |
| 155 | + |
| 156 | +/** |
| 157 | + * Find the longest common subsequence of items using patience sort. |
| 158 | + * |
| 159 | + * @param {Array<string>} as |
| 160 | + * @param {Array<string>} bs |
| 161 | + * @param {number} indexAdjust |
| 162 | + */ |
| 163 | +const lcs = (as, bs, indexAdjust) => { |
| 164 | + if (as.length === 0 && bs.length === 0) return [] |
| 165 | + const aParts = partition(as) |
| 166 | + const bParts = partition(bs) |
| 167 | + /** |
| 168 | + * @type {Array<Array<Item>>} I.e. Array<Pile<Item>> |
| 169 | + */ |
| 170 | + const piles = [] |
| 171 | + aParts.forEach((aItem, aKey) => { |
| 172 | + // skip if no match or if either item is not unique |
| 173 | + if (aItem.indexes.length > 1 || (aItem.match = bParts.get(aKey) || null) == null || aItem.match.indexes.length > 1) return |
| 174 | + for (let i = 0; i < piles.length; i++) { |
| 175 | + const pile = piles[i] |
| 176 | + if (aItem.match.indexes[0] < /** @type {Item} */ (pile[pile.length - 1].match).indexes[0]) { |
| 177 | + pile.push(aItem) |
| 178 | + if (i > 0) aItem.ref = array.last(piles[i - 1]) |
| 179 | + return |
| 180 | + } |
| 181 | + } |
| 182 | + piles.length > 0 && (aItem.ref = array.last(piles[piles.length - 1])) |
| 183 | + piles.push([aItem]) |
| 184 | + }) |
| 185 | + /** |
| 186 | + * References to all matched items |
| 187 | + * |
| 188 | + * @type {Array<Item>} |
| 189 | + */ |
| 190 | + const matches = [] |
| 191 | + /** |
| 192 | + * @type {Item?} |
| 193 | + */ |
| 194 | + let currPileItem = piles[piles.length - 1]?.[0] |
| 195 | + while (currPileItem != null) { |
| 196 | + matches.push(currPileItem) |
| 197 | + currPileItem = currPileItem.ref |
| 198 | + } |
| 199 | + matches.reverse() |
| 200 | + // add pseude match (assume the string terminal always matches) |
| 201 | + const pseudoA = new Item() |
| 202 | + const pseudoB = new Item() |
| 203 | + pseudoA.match = pseudoB |
| 204 | + pseudoA.indexes.push(as.length) |
| 205 | + pseudoB.indexes.push(bs.length) |
| 206 | + matches.push(pseudoA) |
| 207 | + /** |
| 208 | + * @type {Array<{ index: number, remove: Array<string>, insert: Array<string>}>} |
| 209 | + */ |
| 210 | + const changeset = [] |
| 211 | + let diffAStart = 0 |
| 212 | + let diffBStart = 0 |
| 213 | + for (let i = 0; i < matches.length; i++) { |
| 214 | + const m = matches[i] |
| 215 | + const delLength = m.indexes[0] - diffAStart |
| 216 | + const insLength = /** @type {Item} */ (m.match).indexes[0] - diffBStart |
| 217 | + if (delLength !== 0 || insLength !== 0) { |
| 218 | + const stripped = removeCommonPrefixAndSuffix(as.slice(diffAStart, diffAStart + delLength), bs.slice(diffBStart, diffBStart + insLength)) |
| 219 | + if (stripped.middleAs.length !== 0 || stripped.middleBs.length !== 0) { |
| 220 | + changeset.push({ index: diffAStart + indexAdjust + stripped.commonPrefix, remove: stripped.middleAs, insert: stripped.middleBs }) |
| 221 | + } |
| 222 | + } |
| 223 | + diffAStart = m.indexes[0] + 1 |
| 224 | + diffBStart = /** @type {Item} */ (m.match).indexes[0] + 1 |
| 225 | + } |
| 226 | + return changeset |
| 227 | +} |
0 commit comments