Skip to content

Commit 3cdbfbf

Browse files
committed
Basic node comparison
1 parent 926730c commit 3cdbfbf

17 files changed

+611
-207
lines changed

AngleSharp.Diffing.sln

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@ Microsoft Visual Studio Solution File, Format Version 12.00
33
# Visual Studio Version 16
44
VisualStudioVersion = 16.0.29230.61
55
MinimumVisualStudioVersion = 10.0.40219.1
6-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Egil.AngleSharp.Diffing", "src\Egil.AngleSharp.Diffing.csproj", "{2BFFA992-22C2-4A65-94D8-CA06E81D2364}"
6+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Egil.AngleSharp.Diffing", "src\Egil.AngleSharp.Diffing.csproj", "{2BFFA992-22C2-4A65-94D8-CA06E81D2364}"
77
EndProject
88
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{F81A673B-5C04-4C04-BFD8-7E017CF2E1AE}"
99
ProjectSection(SolutionItems) = preProject
1010
Directory.Build.props = Directory.Build.props
1111
LICENSE = LICENSE
12+
README.md = README.md
1213
EndProjectSection
1314
EndProject
14-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Egil.AngleSharp.DiffingTests", "tests\Egil.AngleSharp.DiffingTests.csproj", "{18203D98-66B4-4736-B79A-3B7D02EFA9E8}"
15+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Egil.AngleSharp.DiffingTests", "tests\Egil.AngleSharp.DiffingTests.csproj", "{18203D98-66B4-4736-B79A-3B7D02EFA9E8}"
1516
EndProject
1617
Global
1718
GlobalSection(SolutionConfigurationPlatforms) = preSolution

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
Steps in finding differences:
22

33
- NodeSelector/NodeFilter - filters away nodes that should not be part of the comparison
4-
- Inline selector(ignorer)
4+
- Inline filter(ignorer)
55
- NodeMatcher - matches a control-node with a test-node for comparison
6-
- Inline matcher(css selector)
76
- Compare nodes by:
87
- AttributeFilter - filters away attributes not compared
98
- Inline filter(ignorer)

src/Diff.cs

Lines changed: 0 additions & 25 deletions
This file was deleted.

src/DifferenceEngine.cs

Lines changed: 0 additions & 40 deletions
This file was deleted.

src/Egil.AngleSharp.Diffing.csproj

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
<PropertyGroup>
44
<TargetFramework>netstandard2.0</TargetFramework>
5-
65
</PropertyGroup>
76

87
<ItemGroup>

src/HtmlDifferenceEngine.cs

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Diagnostics;
4+
using System.Linq;
5+
using AngleSharp.Dom;
6+
7+
namespace Egil.AngleSharp.Diffing
8+
{
9+
public enum CompareResult
10+
{
11+
Same,
12+
Different
13+
}
14+
15+
public interface IHtmlCompareStrategy
16+
{
17+
bool NodeFilter(INode node);
18+
bool AttributeFilter(IAttr attribute, IElement owningElement);
19+
CompareResult Compare(in Comparison comparison);
20+
}
21+
22+
public class HtmlDifferenceEngine
23+
{
24+
private readonly IHtmlCompareStrategy _strategy;
25+
26+
public HtmlDifferenceEngine(IHtmlCompareStrategy strategy)
27+
{
28+
_strategy = strategy;
29+
}
30+
31+
public IReadOnlyCollection<Diff> Compare(INodeList controlNodes, INodeList testNodes) => DoCompare(controlNodes, testNodes);
32+
33+
private IReadOnlyCollection<Diff> DoCompare(INodeList controlNodes, INodeList testNodes)
34+
{
35+
if (controlNodes is null) throw new ArgumentNullException(nameof(controlNodes));
36+
if (testNodes is null) throw new ArgumentNullException(nameof(testNodes));
37+
38+
var result = new List<Diff>();
39+
40+
var comparisons = GetComparisons(controlNodes, testNodes);
41+
42+
foreach (var comparison in comparisons)
43+
{
44+
result.AddRange(GetDifferences(in comparison));
45+
//// Compare child nodes
46+
//if (comparison.Control.HasChildNodes)
47+
// foreach (var diff in Compare(comparison.Control.ChildNodes, comparison.Test.ChildNodes))
48+
// yield return diff;
49+
}
50+
51+
var matchedTestNodes = comparisons.Where(x => x.Test.HasValue).Select(x => x.Test.Value);
52+
var unmatchedTestNodes = testNodes.Select((n, i) => new ComparisonSource(n, i)).Except(matchedTestNodes);
53+
54+
// detect unmatched test nodes
55+
foreach (var node in unmatchedTestNodes)
56+
{
57+
result.Add(node.Node.NodeType switch
58+
{
59+
NodeType.Comment => new Diff(DiffType.UnexpectedComment, test: node),
60+
NodeType.Element => new Diff(DiffType.UnexpectedElement, test: node),
61+
NodeType.Text => new Diff(DiffType.UnexpectedTextNode, test: node),
62+
_ => throw new InvalidOperationException($"Unexpected nodetype, {node.Node.NodeType}, in test nodes list.")
63+
});
64+
}
65+
66+
return result;
67+
}
68+
69+
private ICollection<Comparison> GetComparisons(INodeList controlNodes, INodeList testNodes)
70+
{
71+
var evenTreeBranch = controlNodes.Length == testNodes.Length;
72+
var result = new Comparison[controlNodes.Length];
73+
var lastFoundIndex = -1;
74+
75+
for (int index = 0; index < controlNodes.Length; index++)
76+
{
77+
var controlSource = new ComparisonSource(controlNodes[index], index);
78+
79+
//if (ShouldSkipNode(controlNode)) continue;
80+
81+
// How does this work when nodes are skipped due to skipping strategy?
82+
var testSource = evenTreeBranch
83+
? EqualTreeSizeNodeMatcher(in controlSource)
84+
: ForwardSearchingNodeMatcher(in controlSource);
85+
86+
result[index] = new Comparison(controlSource, testSource);
87+
}
88+
89+
return result;
90+
91+
//bool ShouldSkipNode(INode node) => !_strategy.NodeFilter(node);
92+
93+
ComparisonSource? EqualTreeSizeNodeMatcher(in ComparisonSource comparisonSource)
94+
{
95+
// Consider skipping strategy effect
96+
return new ComparisonSource(testNodes[comparisonSource.Index], comparisonSource.Index);
97+
}
98+
99+
ComparisonSource? ForwardSearchingNodeMatcher(in ComparisonSource comparisonSource)
100+
{
101+
// Consider skipping strategy effect
102+
ComparisonSource? result = null;
103+
104+
// If there are more control nodes than test nodes, then search from the last found index.
105+
var index = testNodes.Length > comparisonSource.Index
106+
? Math.Max(comparisonSource.Index, lastFoundIndex + 1)
107+
: lastFoundIndex + 1;
108+
109+
while (result is null && testNodes.Length > index)
110+
{
111+
// Should this be a stronger comparison than just nodename?
112+
if (comparisonSource.Node.NodeName == testNodes[index].NodeName)
113+
{
114+
result = new ComparisonSource(testNodes[index], index);
115+
lastFoundIndex = index;
116+
}
117+
index++;
118+
}
119+
120+
return result;
121+
}
122+
}
123+
124+
private ICollection<Diff> GetDifferences(in Comparison comparison) // in
125+
{
126+
var result = new List<Diff>(1);
127+
128+
if (comparison.Status == MatchStatus.TestNodeNotFound)
129+
{
130+
result.Add(comparison.Control.Node.NodeType switch
131+
{
132+
NodeType.Comment => new Diff(DiffType.MissingComment, comparison.Control),
133+
NodeType.Element => new Diff(DiffType.MissingElement, comparison.Control),
134+
NodeType.Text => new Diff(DiffType.MissingTextNode, comparison.Control),
135+
_ => throw new InvalidOperationException($"Unexpected nodetype, {comparison.Control.Node.NodeType}, in test nodes list.")
136+
});
137+
}
138+
else if (comparison.Status == MatchStatus.TestNodeFound && _strategy.Compare(in comparison) == CompareResult.Different)
139+
{
140+
result.Add(comparison.Control.Node.NodeType switch
141+
{
142+
NodeType.Comment => new Diff(DiffType.DifferentComment, comparison.Control, comparison.Test),
143+
NodeType.Element => new Diff(DiffType.DifferentElementTagName, comparison.Control, comparison.Test),
144+
NodeType.Text => new Diff(DiffType.DifferentTextNode, comparison.Control, comparison.Test),
145+
_ => throw new InvalidOperationException($"Unexpected nodetype, {comparison.Control.Node.NodeType}, in test nodes list.")
146+
});
147+
}
148+
149+
return result;
150+
}
151+
}
152+
153+
public enum DiffType
154+
{
155+
DifferentComment,
156+
DifferentElementTagName,
157+
DifferentTextNode,
158+
MissingComment,
159+
MissingElement,
160+
MissingTextNode,
161+
UnexpectedComment,
162+
UnexpectedElement,
163+
UnexpectedTextNode
164+
}
165+
166+
[DebuggerDisplay("Diff={Type} Control={Control?.Node.NodeName}[{Control?.Index}] Test={Test?.Node.NodeName}[{Test?.Index}]")]
167+
public readonly struct Diff : IEquatable<Diff>
168+
{
169+
public DiffType Type { get; }
170+
171+
public ComparisonSource? Control { get; }
172+
173+
public ComparisonSource? Test { get; }
174+
175+
public Diff(DiffType type, in ComparisonSource? control = null, in ComparisonSource? test = null)
176+
{
177+
Type = type;
178+
Control = control;
179+
Test = test;
180+
}
181+
182+
#region Equals and Hashcode
183+
public bool Equals(Diff other) => Type == other.Type;
184+
public override int GetHashCode() => (Type).GetHashCode();
185+
public override bool Equals(object obj) => obj is Diff other && Equals(other);
186+
public static bool operator ==(Diff left, Diff right) => left.Equals(right);
187+
public static bool operator !=(Diff left, Diff right) => !(left == right);
188+
#endregion
189+
}
190+
191+
public readonly struct Comparison : IEquatable<Comparison>
192+
{
193+
public ComparisonSource Control { get; }
194+
195+
public ComparisonSource? Test { get; }
196+
197+
public MatchStatus Status => Test is null ? MatchStatus.TestNodeNotFound : MatchStatus.TestNodeFound;
198+
199+
public Comparison(in ComparisonSource control, in ComparisonSource? test)
200+
{
201+
Control = control;
202+
Test = test;
203+
}
204+
205+
#region Equals and HashCode
206+
public bool Equals(Comparison other) => Control == other.Control && Test == other.Test;
207+
public override bool Equals(object obj) => obj is Comparison other && Equals(other);
208+
public override int GetHashCode() => (Control, Test).GetHashCode();
209+
public static bool operator ==(Comparison left, Comparison right) => left.Equals(right);
210+
public static bool operator !=(Comparison left, Comparison right) => !(left == right);
211+
#endregion
212+
}
213+
214+
public readonly struct ComparisonSource : IEquatable<ComparisonSource>
215+
{
216+
public INode Node { get; }
217+
public int Index { get; }
218+
219+
public ComparisonSource(INode node, int index)
220+
{
221+
Node = node;
222+
Index = index;
223+
}
224+
225+
#region Equals and HashCode
226+
public bool Equals(ComparisonSource other) => Node == other.Node && Index == other.Index;
227+
public override int GetHashCode() => (Node, Index).GetHashCode();
228+
public override bool Equals(object obj) => obj is ComparisonSource other && Equals(other);
229+
public static bool operator ==(ComparisonSource left, ComparisonSource right) => left.Equals(right);
230+
public static bool operator !=(ComparisonSource left, ComparisonSource right) => !(left == right);
231+
#endregion
232+
}
233+
}

src/IComparison.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
using AngleSharp.Dom;
2+
3+
namespace Egil.AngleSharp.Diffing
4+
{
5+
public enum MatchStatus
6+
{
7+
None,
8+
TestNodeNotFound,
9+
TestNodeFound
10+
}
11+
12+
public interface IComparison
13+
{
14+
INode Control { get; }
15+
int ControlIndex { get; }
16+
INode? Test { get; }
17+
MatchStatus Status { get; }
18+
}
19+
}

src/IComparisonSource.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using AngleSharp.Dom;
2+
3+
namespace Egil.AngleSharp.Diffing
4+
{
5+
public interface IComparisonSource
6+
{
7+
int Index { get; }
8+
INode Node { get; }
9+
}
10+
}

src/IDifference.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
namespace Egil.AngleSharp.Diffing
2+
{
3+
public interface IDifference { }
4+
}

0 commit comments

Comments
 (0)