Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## 4.1.0

- Significant performance improvements:
- **4.7x speedup** on typical code diffs relative to 4.0.1.
- **2x speedup** on very large files (10k+ lines).
- Implemented **Anchor Splitting** strategies (Divide & Conquer) to decompose large diff problems.
- Optimized interning overhead by determining anchors directly from IDs.

## 4.0.1

- fix endless loopi/wrong result in certain cases (#21, #18)
Expand All @@ -9,13 +17,11 @@

## 3.0.0


- added `DiffResult::getUpdatesWithData`. To make this work, following changes have been made:
- The functions `calculateDiff()`, `calculateListDiff`, `calculateCustomListDiff` now have an additional
generic type parameter. This is a breaking change (if you used `calculateCustomListDiff`
with a single explicit type parameter, it now has two)
- `DiffResult`has now a generic type parameter for the type of the data of the underlying lists

- added `DiffResult::getUpdatesWithData`. To make this work, following changes have been made:
- The functions `calculateDiff()`, `calculateListDiff`, `calculateCustomListDiff` now have an additional
generic type parameter. This is a breaking change (if you used `calculateCustomListDiff`
with a single explicit type parameter, it now has two)
- `DiffResult`has now a generic type parameter for the type of the data of the underlying lists

## 2.0.0

Expand Down Expand Up @@ -78,5 +84,4 @@ Update Package description

Dokumentation Fixes


## 0.0.1 - Initial Release
124 changes: 124 additions & 0 deletions lib/src/anchors.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import 'dart:typed_data';

import 'package:diffutil_dart/src/int_int_map.dart';

/// Represents a unique element found in both lists that can serve as an anchor point.
class Anchor {
final int sourceIndex;
final int targetIndex;
Anchor(this.sourceIndex, this.targetIndex);
}

/// Finds anchor points - elements that appear exactly once in both lists.
/// These can be used to split the diff problem into smaller subproblems.
List<Anchor> findAnchors<T>(List<T> source, int sStart, int sEnd,
List<T> target, int tStart, int tEnd) {
final sLen = sEnd - sStart;
final tLen = tEnd - tStart;

// Only use anchors for larger inputs
if (sLen < 64 || tLen < 64) return const [];

// Compute hashes
final sHashes = Int32List(sLen);
for (var i = 0; i < sLen; i++) {
sHashes[i] = source[sStart + i].hashCode;
}
final tHashes = Int32List(tLen);
for (var i = 0; i < tLen; i++) {
tHashes[i] = target[tStart + i].hashCode;
}

// Count occurrences using IntIntMap.
// Note: Hash collisions (different items with same hash) will increment the count
// for that hash, effectively "hiding" the unique items that share the hash.
// This is safe: we just miss some potential anchors, but we never incorrectly
// identify a non-matching pair as an anchor because we verify equality later.
final capacity = sLen + tLen;
final sourceHashCounts = IntIntMap(capacity);
final targetHashCounts = IntIntMap(capacity);

for (var i = 0; i < sLen; i++) {
final h = sHashes[i];
sourceHashCounts.put(h, sourceHashCounts.get(h) + 1);
}
for (var i = 0; i < tLen; i++) {
final h = tHashes[i];
targetHashCounts.put(h, targetHashCounts.get(h) + 1);
}

// Find unique source positions by hash
final uniqueSourceByHash = IntIntMap(capacity);
for (var i = 0; i < sLen; i++) {
final h = sHashes[i];
if (sourceHashCounts.get(h) == 1) {
// Store index + 1 to distinguish from 0 (missing)
uniqueSourceByHash.put(h, sStart + i + 1);
}
}

// Find candidates - elements unique in both lists
final candidates = <Anchor>[];
for (var i = 0; i < tLen; i++) {
final h = tHashes[i];
if (sourceHashCounts.get(h) == 1 && targetHashCounts.get(h) == 1) {
final sIdxPlus1 = uniqueSourceByHash.get(h);
if (sIdxPlus1 > 0) {
final sIdx = sIdxPlus1 - 1;
// Verify actual equality (not just hash)
if (source[sIdx] == target[tStart + i]) {
candidates.add(Anchor(sIdx, tStart + i));
}
}
}
}

if (candidates.length < 4) return const [];

// Find longest increasing subsequence of source indices
final anchors = longestIncreasingSubsequence(candidates);
return anchors.length < 2 ? const [] : anchors;
}

/// Finds the longest increasing subsequence of anchors by source index.
List<Anchor> longestIncreasingSubsequence(List<Anchor> candidates) {
if (candidates.isEmpty) return const [];
final size = candidates.length;
final predecessors = Int32List(size);
for (var i = 0; i < size; i++) {
predecessors[i] = -1;
}

final tails = Int32List(size);
var length = 0;

for (var i = 0; i < size; i++) {
final value = candidates[i].sourceIndex;
var low = 0;
var high = length;
while (low < high) {
final mid = (low + high) >> 1;
final midValue = candidates[tails[mid]].sourceIndex;
if (midValue < value) {
low = mid + 1;
} else {
high = mid;
}
}
if (low > 0) {
predecessors[i] = tails[low - 1];
}
tails[low] = i;
if (low == length) length++;
}

if (length == 0) return const [];

var idx = tails[length - 1];
final result = <Anchor>[];
while (idx >= 0) {
result.add(candidates[idx]);
idx = predecessors[idx];
}
return result.reversed.toList();
}
159 changes: 155 additions & 4 deletions lib/src/diffutil_impl.dart
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import 'dart:typed_data';
import 'package:diffutil_dart/src/diff_delegate.dart';
import 'package:diffutil_dart/src/model/diffupdate.dart';
import 'package:diffutil_dart/src/model/diffupdate_with_data.dart';
import 'package:diffutil_dart/src/interner.dart';
import 'package:diffutil_dart/src/anchors.dart';

///Snakes represent a match between two lists. It is optionally prefixed or postfixed with an
///add or remove operation. See the Myers' paper for details.
Expand All @@ -32,14 +34,17 @@ final class _Snake {
required this.endY,
required this.reverse});

@pragma("vm:prefer-inline")
bool hasAdditionOrRemoval() {
return endY - startY != endX - startX;
}

@pragma("vm:prefer-inline")
bool isAddition() {
return endY - startY > endX - startX;
}

@pragma("vm:prefer-inline")
int diagonalSize() {
return min(endX - startX, endY - startY);
}
Expand Down Expand Up @@ -109,10 +114,12 @@ final class _Range {
newListStart = 0,
newListEnd = 0;

@pragma("vm:prefer-inline")
int oldSize() {
return oldListEnd - oldListStart;
}

@pragma("vm:prefer-inline")
int newSize() {
return newListEnd - newListStart;
}
Expand Down Expand Up @@ -666,10 +673,123 @@ DiffResult<T> calculateListDiff<T>(
bool detectMoves = true,
bool Function(T, T)? equalityChecker,
}) {
return calculateDiff<T>(
ListDiffDelegate<T>(oldList, newList, equalityChecker),
detectMoves: detectMoves,
);
if (equalityChecker != null) {
return calculateDiff<T>(
ListDiffDelegate<T>(oldList, newList, equalityChecker),
detectMoves: detectMoves,
);
} else {
// Try anchor-based splitting for large lists
// Threshold: e.g. 1000 items?
if (oldList.length > 1000 && newList.length > 1000) {
return _calculateListDiffWithAnchors(oldList, newList, detectMoves);
}

return calculateDiff<T>(
tryIntern(oldList, newList),
detectMoves: detectMoves,
);
}
}

DiffResult<T> _calculateListDiffWithAnchors<T>(
List<T> oldList, List<T> newList, bool detectMoves) {
// Determine anchors either from InterningDelegate (already computed) or manual scan
final List<Anchor> anchors;
final delegate = tryIntern(oldList, newList);

if (delegate is InterningDelegate) {
// Reconstruct anchors from negative IDs
final reconstructed = <Anchor>[];
final oldIds = delegate.oldIds;
final newIds = delegate.newIds;
final oldAnchorMap = <int, int>{};

for (var i = 0; i < oldIds.length; i++) {
if (oldIds[i] < 0) {
oldAnchorMap[oldIds[i]] = i;
}
}

if (oldAnchorMap.isNotEmpty) {
for (var i = 0; i < newIds.length; i++) {
final id = newIds[i];
if (id < 0) {
final oldPos = oldAnchorMap[id];
if (oldPos != null) {
reconstructed.add(Anchor(oldPos, i));
}
}
}
}
anchors = reconstructed;
} else {
anchors =
findAnchors(oldList, 0, oldList.length, newList, 0, newList.length);
}

if (anchors.isEmpty) {
return calculateDiff<T>(delegate, detectMoves: detectMoves);
}

// Sort anchors by position in oldList (they are already sorted by LIS property? specific impl might not sort)
// LIS returns sorted seq.

// Create a global delegate for sub-diffs to use (efficient interning)
final globalDelegate = delegate;
final diagonals = <_Diagonal>[];

void addDiagonal(int x, int y, int size) {
if (diagonals.isNotEmpty) {
final last = diagonals.last;
if (last.x + last.size == x && last.y + last.size == y) {
diagonals[diagonals.length - 1] =
_Diagonal(last.x, last.y, last.size + size);
return;
}
}
diagonals.add(_Diagonal(x, y, size));
}

int oldStart = 0;
int newStart = 0;

for (final anchor in anchors) {
final oldAnchor = anchor.sourceIndex;
final newAnchor = anchor.targetIndex;

// Diff the range before anchor
if (oldAnchor > oldStart || newAnchor > newStart) {
final subDelegate = _SubDelegate(
globalDelegate, oldStart, oldAnchor, newStart, newAnchor);
final subResult = calculateDiff<T>(subDelegate, detectMoves: false);

// Merge diagonals (shifting coordinates)
for (final d in subResult._mDiagonals) {
addDiagonal(d.x + oldStart, d.y + newStart, d.size);
}
}

// Add the anchor itself as a diagonal (match of size 1)
addDiagonal(oldAnchor, newAnchor, 1);

oldStart = oldAnchor + 1;
newStart = newAnchor + 1;
}

// Diff the tail
if (oldStart < oldList.length || newStart < newList.length) {
final subDelegate = _SubDelegate(
globalDelegate, oldStart, oldList.length, newStart, newList.length);
final subResult = calculateDiff<T>(subDelegate, detectMoves: false);
for (final d in subResult._mDiagonals) {
addDiagonal(d.x + oldStart, d.y + newStart, d.size);
}
}

// Construct final result using global delegate.
return DiffResult._(globalDelegate, diagonals, Int32List(oldList.length),
Int32List(newList.length), detectMoves);
}

/// you can use this function if you want to use custom list-types, such as BuiltList
Expand Down Expand Up @@ -854,3 +974,34 @@ _Snake? backwardSnake(_Range range, DiffDelegate cb, _CenteredArray forward,
}
return null;
}

class _SubDelegate implements DiffDelegate {
final DiffDelegate parent;
final int oldStart;
final int oldEnd;
final int newStart;
final int newEnd;

_SubDelegate(
this.parent, this.oldStart, this.oldEnd, this.newStart, this.newEnd);

@override
int getOldListSize() => oldEnd - oldStart;
@override
int getNewListSize() => newEnd - newStart;

@override
bool areItemsTheSame(int oldPos, int newPos) {
return parent.areItemsTheSame(oldStart + oldPos, newStart + newPos);
}

@override
bool areContentsTheSame(int oldPos, int newPos) {
return parent.areContentsTheSame(oldStart + oldPos, newStart + newPos);
}

@override
Object? getChangePayload(int oldPos, int newPos) {
return parent.getChangePayload(oldStart + oldPos, newStart + newPos);
}
}
Loading