dmonad
diff --git a/‎diff/patience.js‎
Lines changed: 227 additions & 0 deletions b/‎diff/patience.js‎
Lines changed: 227 additions & 0 deletions
diff --git a/‎diff/patience.test.js‎
Lines changed: 144 additions & 0 deletions b/‎diff/patience.test.js‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 6 additions & 0 deletions b/‎package.json‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,227 @@
+/**
+ * A very simple diff algorithm. Slightly adapted to support splitting at different stages (e.g.
+ * first diff lines, then diff words)
+ *
+ * https://bramcohen.livejournal.com/73318.html
+ *
+ * @experiemantal This API will likely change.
+ */
+
+import * as map from '../map.js'
+import * as math from '../math.js'
+import * as array from '../array.js'
+
+/**
+ * Implementation of patience diff. Expects that content is pre-split (e.g. by newline).
+ *
+ * @param {Array<string>} as
+ * @param {Array<string>} bs
+ * @return {Array<{ index: number, remove: Array<string>, insert: Array<string>}>} changeset @todo should use delta instead
+ */
+export const diff = (as, bs) => {
+  const {
+    middleAs,
+    middleBs,
+    commonPrefix
+  } = removeCommonPrefixAndSuffix(as, bs)
+  return lcs(middleAs, middleBs, commonPrefix)
+}
+
+
+/**
+ * @param {string} a
+ * @param {string} b
+ * @param {RegExp|string} _regexp
+ */
+export const diffSplitBy = (a, b, _regexp) => {
+  const isStringSeparator = typeof _regexp === 'string'
+  const separator = isStringSeparator ? _regexp : ''
+  const regexp = isStringSeparator ? new RegExp(_regexp, 'g') : _regexp
+  const as = splitByRegexp(a, regexp, !isStringSeparator)
+  const bs = splitByRegexp(b, regexp, !isStringSeparator)
+  const changes = diff(as, bs)
+  let prevSplitIndex = 0
+  let prevStringIndex = 0
+  return changes.map(change => {
+    for (; prevSplitIndex < change.index; prevSplitIndex++) {
+      prevStringIndex += as[prevSplitIndex].length
+    }
+    return {
+      index: prevStringIndex,
+      remove: change.remove.join(separator),
+      insert: change.insert.join(separator)
+    }
+  })
+}
+
+/**
+ * Sensible default for diffing strings using patience (it's fast though).
+ *
+ * Perform different types of patience diff on the content. Diff first by newline, then paragraphs, then by word
+ * (split by space, brackets, punctuation)
+ *
+ * @param {string} a
+ * @param {string} b
+ */
+export const diffAuto = (a, b) =>
+  diffSplitBy(a, b, '\n').map(d =>
+    diffSplitBy(d.remove, d.insert, /\.\ |[a-zA-Z0-9]+|[\.\ \(\)\[\]\,\;\{\}]/g).map(dd => ({
+      insert: dd.insert,
+      remove: dd.remove,
+      index: dd.index + d.index
+    }))
+  ).flat()
+
+/**
+ * @param {Array<string>} as
+ * @param {Array<string>} bs
+ */
+const removeCommonPrefixAndSuffix = (as, bs) => {
+  const commonLen = math.min(as.length, bs.length)
+  let commonPrefix = 0
+  let commonSuffix = 0
+  // match start  
+  for (; commonPrefix < commonLen && as[commonPrefix] === bs[commonPrefix]; commonPrefix++) { /* nop */ }
+  // match end
+  for (; commonSuffix < commonLen - commonPrefix && as[as.length - 1 - commonSuffix] === bs[bs.length - 1 - commonSuffix]; commonSuffix++) { /* nop */ }
+  const middleAs = as.slice(commonPrefix, as.length - commonSuffix)
+  const middleBs = bs.slice(commonPrefix, bs.length - commonSuffix)
+  return {
+    middleAs, middleBs, commonPrefix, commonSuffix
+  }
+}
+
+/**
+ * Splits string by regex and returns all strings as an array. The matched parts are also returned.
+ *
+ * @param {string} str
+ * @param {RegExp} regexp
+ * @param {boolean} includeSeparator
+ */
+const splitByRegexp = (str, regexp, includeSeparator) => {
+  const matches = [...str.matchAll(regexp)]
+  let prevIndex = 0
+  /**
+   * @type {Array<string>}
+   */
+  const res = []
+  matches.forEach(m => {
+    prevIndex < (m.index || 0) && res.push(str.slice(prevIndex, m.index))
+    includeSeparator && res.push(m[0]) // is always non-empty
+    prevIndex = /** @type {number} */ (m.index) + m[0].length
+  })
+  const end = str.slice(prevIndex)
+  end.length > 0 && res.push(end)
+  return res
+}
+
+/**
+ * An item may have multiple occurances (not when matching unique entries). It also may have a
+ * reference to the stack of other items (from as to bs).
+ */
+class Item {
+  constructor () {
+    /**
+     * @type {Array<number>}
+     */
+    this.indexes = []
+    /**
+     * The matching item from the other side
+     * @type {Item?}
+     */
+    this.match = null
+    /**
+     * For patience sort. Reference (index of the stack) to the previous pile.
+     *
+     * @type {Item?}
+     */
+    this.ref = null
+  }
+}
+
+/**
+ * @param {Array<string>} xs
+ */
+const partition = xs => {
+  /**
+   * @type {Map<string,Item>}
+   */
+  const refs = map.create()
+  xs.forEach((x, index) => {
+    map.setIfUndefined(refs, x, () => new Item()).indexes.push(index)
+  })
+  return refs
+}
+
+/**
+ * Find the longest common subsequence of items using patience sort.
+ *
+ * @param {Array<string>} as
+ * @param {Array<string>} bs
+ * @param {number} indexAdjust
+ */
+const lcs = (as, bs, indexAdjust) => {
+  if (as.length === 0 && bs.length === 0) return []
+  const aParts = partition(as)
+  const bParts = partition(bs)
+  /**
+   * @type {Array<Array<Item>>} I.e. Array<Pile<Item>>
+   */
+  const piles = []
+  aParts.forEach((aItem, aKey) => {
+    // skip if no match or if either item is not unique
+    if (aItem.indexes.length > 1 || (aItem.match = bParts.get(aKey) || null) == null || aItem.match.indexes.length > 1) return
+    for (let i = 0; i < piles.length; i++) {
+      const pile = piles[i]
+      if (aItem.match.indexes[0] < /** @type {Item} */ (pile[pile.length - 1].match).indexes[0]) {
+        pile.push(aItem)
+        if (i > 0) aItem.ref = array.last(piles[i - 1])
+        return
+      }
+    }
+    piles.length > 0 && (aItem.ref = array.last(piles[piles.length - 1]))
+    piles.push([aItem])
+  })
+  /**
+   * References to all matched items
+   *
+   * @type {Array<Item>}
+   */
+  const matches = []
+  /**
+   * @type {Item?}
+   */
+  let currPileItem = piles[piles.length - 1]?.[0]
+  while (currPileItem != null) {
+    matches.push(currPileItem)
+    currPileItem = currPileItem.ref
+  }
+  matches.reverse()
+  // add pseude match (assume the string terminal always matches)
+  const pseudoA = new Item()
+  const pseudoB = new Item()
+  pseudoA.match = pseudoB
+  pseudoA.indexes.push(as.length)
+  pseudoB.indexes.push(bs.length)
+  matches.push(pseudoA)
+  /**
+   * @type {Array<{ index: number, remove: Array<string>, insert: Array<string>}>}
+   */
+  const changeset = []
+  let diffAStart = 0
+  let diffBStart = 0
+  for (let i = 0; i < matches.length; i++) {
+    const m = matches[i]
+    const delLength = m.indexes[0] - diffAStart
+    const insLength = /** @type {Item} */ (m.match).indexes[0] - diffBStart
+    if (delLength !== 0 || insLength !== 0) {
+      const stripped = removeCommonPrefixAndSuffix(as.slice(diffAStart, diffAStart + delLength), bs.slice(diffBStart, diffBStart + insLength))
+      if (stripped.middleAs.length !== 0 || stripped.middleBs.length !== 0) {
+        changeset.push({ index: diffAStart + indexAdjust + stripped.commonPrefix, remove: stripped.middleAs, insert: stripped.middleBs })
+      }
+    }
+    diffAStart = m.indexes[0] + 1
+    diffBStart = /** @type {Item} */ (m.match).indexes[0] + 1
+  }
+  return changeset
+}
@@ -0,0 +1,144 @@
+import * as prng from '../prng.js'
+import * as t from '../testing.js'
+import * as patience from './patience.js'
+
+/**
+ * @param {string} a
+ * @param {string} b
+ * @param {Array<{ insert: string, remove: string, index: number }>} expect
+ */
+const testDiffAuto = (a, b, expect) => {
+  const res = patience.diffAuto(a, b)
+  t.info(`Diffing "${a}" with "${b}"`)
+  console.log(res)
+  t.compare(res, expect)
+}
+
+/**
+ * @param {t.TestCase} _tc
+ */
+export const testDiffing = _tc => {
+  testDiffAuto(
+    'x  ',
+    ' ',
+    [{
+      insert: '',
+      remove: 'x ',
+      index: 0
+    }]
+  )
+  // no change
+  testDiffAuto(
+    'testa',
+    'testa',
+    []
+  )
+  // single char change
+  testDiffAuto(
+    'testa',
+    'testb',
+    [{
+      insert: 'testb',
+      remove: 'testa',
+      index: 0
+    }]
+  )
+  // single word change
+  testDiffAuto(
+    'The rabbit jumped over the fence.\n',
+    'The dog jumped over the fence.\n',
+    [{
+      insert: 'dog',
+      remove: 'rabbit',
+      index: 4
+    }]
+  )
+  // similar sentences.
+  testDiffAuto(
+    'the dog. the cat.',
+    'the cat. the rabbit.',
+    [{
+      insert: 'cat',
+      remove: 'dog',
+      index: 4
+    }, {
+      insert: 'rabbit',
+      remove: 'cat',
+      index: 13
+    }]
+  )
+  testDiffAuto(
+    'cat food',
+    'my cat food',
+    [{
+      insert: 'my ',
+      remove: '',
+      index: 0
+    }]
+  )
+  testDiffAuto(
+    'the cat stuff',
+    'my cat food',
+    [{
+      insert: 'my',
+      remove: 'the',
+      index: 0
+    }, {
+      insert: 'food',
+      remove: 'stuff',
+      index: 8
+    }]
+  )
+}
+
+/**
+ * @param {t.TestCase} tc
+ */
+export const testRepeatRandomWordReplace = tc => {
+  const NWords = 600
+  const NReplacements = Math.floor(NWords / 20)
+  const NInserts = Math.floor(NWords / 20) + 1
+  const NDeletes = Math.floor(NWords / 20) + 1
+  const MaxWordLen = 6
+
+  t.group(`Diff on changed list of words (#words=${NWords},#replacements=${NReplacements},#inserts=${NInserts},#deletes=${NDeletes}})`, () => {
+    const words = []
+    for (let i = 0; i < NWords; i++) {
+      words.push(prng.word(tc.prng, 0, MaxWordLen))
+    }
+    const newWords = words.slice()
+    for (let i = 0; i < NReplacements; i++) {
+      const pos = prng.int32(tc.prng, 0, words.length - 1)
+      newWords[pos] = prng.word(tc.prng, 0, MaxWordLen)
+    }
+    for (let i = 0; i < NInserts; i++) {
+      const pos = prng.int32(tc.prng, 0, words.length - 1)
+      newWords.splice(pos, 0, prng.word(tc.prng, 0, MaxWordLen))
+    }
+    for (let i = 0; i < NDeletes; i++) {
+      const pos = prng.int32(tc.prng, 0, words.length - 1)
+      newWords.splice(pos, 1)
+    }
+    const before = words.join(' ')
+    const after = newWords.join(' ')
+    /**
+     * @type {Array<{ insert: string, remove: string, index: number }>}
+     */
+    let d = []
+    t.measureTime(`time to calculate diff (a.length=${before.length},b.length=${after.length})`, () => {
+      d = patience.diffAuto(before, after)
+    })
+    let updating = before
+    console.log({ words, newWords, diff: d })
+    // verify by applying
+    for (let i = d.length - 1; i >= 0; i--) {
+      const change = d[i]
+      const spliced = updating.split('')
+      spliced.splice(change.index, change.remove.length, change.insert)
+      updating = spliced.join('')
+    }
+    t.compare(updating, after)
+    t.assert(d.length <= NReplacements + 1 + NInserts + NDeletes) // Sanity check: A maximum of one fault
+  })
+}
+
@@ -135,6 +135,12 @@
       "import": "./decoding.js",
       "require": "./dist/decoding.cjs"
     },
+    "./diff/patience": {
+      "types": "./diff/patience.d.ts",
+      "module": "./diff/patience.js",
+      "import": "./diff/patience.js",
+      "require": "./dist/patience.cjs"
+    },
     "./diff.js": "./diff.js",
     "./dist/diff.cjs": "./dist/diff.cjs",
     "./diff": {