Skip to content

Commit 19dee4f

Browse files
committed
TextNode comparer and filter handling inline options.
1 parent 26cc100 commit 19dee4f

File tree

7 files changed

+294
-22
lines changed

7 files changed

+294
-22
lines changed

src/Extensions/ElementExtensions.cs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
using System;
2+
using AngleSharp.Dom;
3+
4+
namespace Egil.AngleSharp.Diffing.Extensions
5+
{
6+
public static class ElementExtensions
7+
{
8+
public static bool TryGetAttrValue<T>(this IElement element, string attributeName, out T result) where T : struct
9+
{
10+
if (element is null) throw new ArgumentNullException(nameof(element));
11+
result = default;
12+
return element.Attributes[attributeName] is IAttr optAttr
13+
&& Enum.TryParse(optAttr.Value, true, out result);
14+
}
15+
16+
public static TEnum GetInlineOptionOrDefault<TEnum>(this IElement startElement, string optionName, TEnum defaultValue)
17+
where TEnum : System.Enum => GetInlineOptionOrDefault(startElement, optionName, x => x.Parse<TEnum>(), defaultValue);
18+
19+
public static bool GetInlineOptionOrDefault(this IElement startElement, string optionName, bool defaultValue)
20+
=> GetInlineOptionOrDefault(startElement, optionName, x => bool.Parse(x), defaultValue);
21+
22+
public static T GetInlineOptionOrDefault<T>(this IElement startElement, string optionName, Func<string, T> resultFunc, T defaultValue)
23+
{
24+
var element = startElement;
25+
26+
while (element is { })
27+
{
28+
if (element.Attributes[optionName] is IAttr attr)
29+
{
30+
return resultFunc(attr.Value);
31+
}
32+
element = element.ParentElement;
33+
}
34+
35+
return defaultValue;
36+
}
37+
}
38+
}

src/Extensions/EnumExtensions.cs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace Egil.AngleSharp.Diffing.Extensions
8+
{
9+
public static class EnumExtensions
10+
{
11+
public static TEnum Parse<TEnum>(this string enumString) where TEnum : System.Enum
12+
{
13+
return (TEnum)Enum.Parse(typeof(TEnum), enumString, true);
14+
}
15+
16+
}
17+
}

src/Strategies/TextNodeStrategies/TextNodeComparer.cs

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,75 @@
22
using System.Text.RegularExpressions;
33
using AngleSharp.Dom;
44
using Egil.AngleSharp.Diffing.Core;
5+
using Egil.AngleSharp.Diffing.Extensions;
56

67
namespace Egil.AngleSharp.Diffing.Strategies.TextNodeStrategies
78
{
89
public class TextNodeComparer
910
{
11+
private const string PRE_ELEMENTNAME = "PRE";
12+
private const string WHITESPACE_ATTR_NAME = "diff:whitespace";
13+
private const string IGNORECASE_ATTR_NAME = "diff:ignorecase";
1014
private static readonly Regex WhitespaceReplace = new Regex(@"\s+", RegexOptions.Compiled | RegexOptions.CultureInvariant, TimeSpan.FromSeconds(5));
1115

12-
public WhitespaceOption Option { get; }
16+
public WhitespaceOption Whitespace { get; }
1317

14-
public TextNodeComparer(WhitespaceOption option)
18+
public bool IgnoreCase { get; }
19+
20+
public TextNodeComparer(WhitespaceOption option = WhitespaceOption.Preserve, bool ignoreCase = false)
1521
{
16-
Option = option;
22+
Whitespace = option;
23+
IgnoreCase = ignoreCase;
1724
}
1825

1926
public CompareResult Compare(in Comparison comparison, CompareResult currentDecision)
2027
{
21-
if (currentDecision.IsSame() || currentDecision.IsSameAndBreak()) return currentDecision;
22-
if (Option != WhitespaceOption.Normalize) return currentDecision;
23-
28+
if (currentDecision.IsSame() || currentDecision.IsSameAndBreak())
29+
return currentDecision;
2430
if (comparison.Control.Node is IText controlTextNode && comparison.Test.Node is IText testTextNode)
31+
return Compare(controlTextNode, testTextNode, currentDecision);
32+
else
33+
return currentDecision;
34+
}
35+
36+
private CompareResult Compare(IText controlTextNode, IText testTextNode, CompareResult currentDecision)
37+
{
38+
var option = GetWhitespaceOption(controlTextNode);
39+
var compareMethod = GetCompareMethod(controlTextNode);
40+
var controlText = controlTextNode.Data;
41+
var testText = testTextNode.Data;
42+
43+
if (option == WhitespaceOption.Normalize)
2544
{
26-
var controlText = WhitespaceReplace.Replace(controlTextNode.Data.Trim(), " ");
27-
var testText = WhitespaceReplace.Replace(testTextNode.Data.Trim(), " ");
45+
controlText = WhitespaceReplace.Replace(controlText.Trim(), " ");
46+
testText = WhitespaceReplace.Replace(controlText.Trim(), " ");
47+
}
2848

29-
if (controlText.Equals(testText, StringComparison.Ordinal)) return CompareResult.Same;
49+
if (controlText.Equals(testText, compareMethod))
50+
return CompareResult.Same;
51+
else
52+
return currentDecision;
53+
}
54+
55+
private WhitespaceOption GetWhitespaceOption(IText textNode)
56+
{
57+
var parent = textNode.ParentElement;
58+
59+
if (parent.NodeName.Equals(PRE_ELEMENTNAME, StringComparison.Ordinal))
60+
{
61+
return parent.TryGetAttrValue(WHITESPACE_ATTR_NAME, out WhitespaceOption option)
62+
? option
63+
: WhitespaceOption.Preserve;
3064
}
3165

32-
return currentDecision;
66+
return parent.GetInlineOptionOrDefault<WhitespaceOption>(WHITESPACE_ATTR_NAME, Whitespace);
67+
}
68+
69+
private StringComparison GetCompareMethod(IText controlTextNode)
70+
{
71+
return controlTextNode.ParentElement.GetInlineOptionOrDefault(IGNORECASE_ATTR_NAME, IgnoreCase)
72+
? StringComparison.OrdinalIgnoreCase
73+
: StringComparison.Ordinal;
3374
}
3475
}
3576
}

src/Strategies/TextNodeStrategies/TextNodeFilter.cs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,48 @@
55
using System.Threading.Tasks;
66
using AngleSharp.Dom;
77
using Egil.AngleSharp.Diffing.Core;
8+
using Egil.AngleSharp.Diffing.Extensions;
89

910
namespace Egil.AngleSharp.Diffing.Strategies.TextNodeStrategies
1011
{
1112
public class TextNodeFilter
1213
{
13-
public WhitespaceOption Option { get; }
14+
private const string PRE_ELEMENTNAME = "PRE";
15+
private const string WHITESPACE_ATTR_NAME = "diff:whitespace";
16+
17+
public WhitespaceOption Whitespace { get; }
1418

1519
public TextNodeFilter(WhitespaceOption option)
1620
{
17-
Option = option;
21+
Whitespace = option;
1822
}
1923

2024
public FilterDecision Filter(in ComparisonSource source, FilterDecision currentDecision)
2125
{
22-
if (currentDecision.IsExclude() || Option == WhitespaceOption.Preserve) return currentDecision;
26+
if (currentDecision.IsExclude()) return currentDecision;
27+
return source.Node is IText textNode ? Filter(source, textNode) : currentDecision;
28+
}
29+
30+
private FilterDecision Filter(in ComparisonSource source, IText textNode)
31+
{
32+
var option = GetWhitespaceOption(textNode);
33+
return option != WhitespaceOption.Preserve && string.IsNullOrWhiteSpace(textNode.Data)
34+
? FilterDecision.Exclude
35+
: FilterDecision.Keep;
36+
}
37+
38+
private WhitespaceOption GetWhitespaceOption(IText textNode)
39+
{
40+
var parent = textNode.ParentElement;
2341

24-
if (source.Node is IText text && string.IsNullOrWhiteSpace(text.Data)) return FilterDecision.Exclude;
42+
if (parent.NodeName.Equals(PRE_ELEMENTNAME, StringComparison.Ordinal))
43+
{
44+
return parent.TryGetAttrValue(WHITESPACE_ATTR_NAME, out WhitespaceOption option)
45+
? option
46+
: WhitespaceOption.Preserve;
47+
}
2548

26-
return currentDecision;
49+
return parent.GetInlineOptionOrDefault(WHITESPACE_ATTR_NAME, Whitespace);
2750
}
2851
}
2952
}

tests/Core/ComparisonSourceTest.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
using Shouldly;
7+
using Xunit;
8+
9+
namespace Egil.AngleSharp.Diffing.Core
10+
{
11+
public class ComparisonSourceTest : DiffingTestBase
12+
{
13+
[Fact(DisplayName = "Two sources are equal if all their properties are equal")]
14+
public void Test1()
15+
{
16+
var node = ToNode("<br>");
17+
var source = new ComparisonSource(node, 1, "path", ComparisonSourceType.Control);
18+
var otherSource = new ComparisonSource(node, 1, "path", ComparisonSourceType.Control);
19+
20+
source.Equals(otherSource).ShouldBeTrue();
21+
source.Equals((object)otherSource).ShouldBeTrue();
22+
(source == otherSource).ShouldBeTrue();
23+
(source != otherSource).ShouldBeFalse();
24+
}
25+
26+
[Theory(DisplayName = "Two sources are not equal if one of their properties are not equal")]
27+
[InlineData(2, "path", ComparisonSourceType.Control)]
28+
[InlineData(1, "otherPath", ComparisonSourceType.Control)]
29+
[InlineData(1, "path", ComparisonSourceType.Test)]
30+
public void Test11(int otherIndex, string otherPath, ComparisonSourceType otherSourceType)
31+
{
32+
var node = ToNode("<br>");
33+
var source = new ComparisonSource(node, 1, "path", ComparisonSourceType.Control);
34+
var otherSource = new ComparisonSource(node, otherIndex, otherPath, otherSourceType);
35+
36+
source.Equals(otherSource).ShouldBeFalse();
37+
(source == otherSource).ShouldBeFalse();
38+
(source != otherSource).ShouldBeTrue();
39+
}
40+
41+
[Fact(DisplayName = "Two sources are not equal if their nodes are not equal")]
42+
public void Test3()
43+
{
44+
var source = new ComparisonSource(ToNode("<br>"), 1, "path", ComparisonSourceType.Control);
45+
var otherSource = new ComparisonSource(ToNode("<p>"), 1, "path", ComparisonSourceType.Control);
46+
47+
source.Equals(otherSource).ShouldBeFalse();
48+
(source == otherSource).ShouldBeFalse();
49+
(source != otherSource).ShouldBeTrue();
50+
}
51+
}
52+
}

tests/Strategies/TextNodeStrategies/TextNodeComparerTest.cs

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,80 @@ public void Test9(string whitespace)
6565
sut.Compare(c4, CompareResult.Different).ShouldBe(CompareResult.Same);
6666
}
6767

68-
// When a parent node has overridden the global whitespace option, that overridden option is used
69-
// When whitespace option is Preserve or RemoveWhitespaceNodes, a string ordinal comparison is performed
70-
// When whitespace option is Preserve or RemoveWhitespaceNodes and IgnoreCase is true, a string ordinal ignore case comparison is performed
71-
// When IgnoreCase is true, a case insensitve comparison is performed
72-
// When the parent element is <pre>, the is implicitly set to Preserve, unless explicitly overridden on the element
68+
[Theory(DisplayName = "When a parent node has a inline whitespace option, that overrides the global whitespace option")]
69+
[InlineData(@"<header><h1><em diff:whitespace=""normalize""> foo bar </em></h1></header>", @"<header><h1><em>foo bar</em></h1></header>")]
70+
[InlineData(@"<header><h1 diff:whitespace=""normalize""><em> foo bar </em></h1></header>", @"<header><h1><em>foo bar</em></h1></header>")]
71+
[InlineData(@"<header diff:whitespace=""normalize""><h1><em> foo bar </em></h1></header>", @"<header><h1><em>foo bar</em></h1></header>")]
72+
public void Test001(string controlHtml, string testHtml)
73+
{
74+
var sut = new TextNodeComparer(WhitespaceOption.Preserve);
75+
var controlSource = new ComparisonSource(ToNode(controlHtml).FirstChild.FirstChild.FirstChild, 0, "dummypath", ComparisonSourceType.Control);
76+
var testSource = new ComparisonSource(ToNode(testHtml).FirstChild.FirstChild.FirstChild, 0, "dummypath", ComparisonSourceType.Test);
77+
var comparison = new Comparison(controlSource, testSource);
78+
79+
sut.Compare(comparison, CompareResult.Different).ShouldBe(CompareResult.Same);
80+
}
81+
82+
[Theory(DisplayName = "When whitespace option is Preserve or RemoveWhitespaceNodes, a string ordinal comparison is performed")]
83+
[InlineData(WhitespaceOption.Preserve)]
84+
[InlineData(WhitespaceOption.RemoveWhitespaceNodes)]
85+
public void Test003(WhitespaceOption whitespaceOption)
86+
{
87+
var sut = new TextNodeComparer(whitespaceOption);
88+
var comparison = new Comparison(ToComparisonSource(" hello\n\nworld ", ComparisonSourceType.Control),
89+
ToComparisonSource(" hello\n\nworld ", ComparisonSourceType.Test));
90+
91+
sut.Compare(comparison, CompareResult.Different).ShouldBe(CompareResult.Same);
92+
}
93+
94+
[Fact(DisplayName = "When IgnoreCase is true, a string ordinal ignore case comparison is performed")]
95+
public void Test004()
96+
{
97+
var sut = new TextNodeComparer(ignoreCase: true);
98+
var comparison = new Comparison(ToComparisonSource("HELLO WoRlD", ComparisonSourceType.Control),
99+
ToComparisonSource("hello world", ComparisonSourceType.Test));
100+
101+
sut.Compare(comparison, CompareResult.Different).ShouldBe(CompareResult.Same);
102+
}
103+
104+
[Fact(DisplayName = "When the parent element is <pre>, the is implicitly set to Preserve")]
105+
public void Test005()
106+
{
107+
var sut = new TextNodeComparer(WhitespaceOption.Normalize);
108+
var pre = ToComparisonSource("<pre>foo bar</pre>");
109+
var controlSource = new ComparisonSource(pre.Node.FirstChild, 0, pre.Path, ComparisonSourceType.Control);
110+
var testSource = ToComparisonSource("foo bar", ComparisonSourceType.Test);
111+
var comparison = new Comparison(controlSource, testSource);
112+
113+
sut.Compare(comparison, CompareResult.Different).ShouldBe(CompareResult.Different);
114+
}
115+
116+
[Fact(DisplayName = "When the parent element is <pre> and the whitespace option is set inline, the inline option is used instead of Preserve")]
117+
public void Test006()
118+
{
119+
var sut = new TextNodeComparer(WhitespaceOption.Normalize);
120+
var pre = ToComparisonSource("<pre diff:whitespace=\"normalize\">foo bar</pre>");
121+
var controlSource = new ComparisonSource(pre.Node.FirstChild, 0, pre.Path, ComparisonSourceType.Control);
122+
var testSource = ToComparisonSource("foo bar", ComparisonSourceType.Test);
123+
var comparison = new Comparison(controlSource, testSource);
124+
125+
sut.Compare(comparison, CompareResult.Different).ShouldBe(CompareResult.Same);
126+
}
127+
128+
[Fact(DisplayName = "When IgnoreCase='true' inline attribute is present in a parent element, a string ordinal ignore case comparison is performed")]
129+
public void Test007()
130+
{
131+
var sut = new TextNodeComparer(ignoreCase: false);
132+
var pre = ToComparisonSource("<h1 diff:ignoreCase=\"True\">HELLO WoRlD</pre>");
133+
var controlSource = new ComparisonSource(pre.Node.FirstChild, 0, pre.Path, ComparisonSourceType.Control);
134+
var testSource = ToComparisonSource("hello world", ComparisonSourceType.Test);
135+
var comparison = new Comparison(controlSource, testSource);
136+
137+
138+
sut.Compare(comparison, CompareResult.Different).ShouldBe(CompareResult.Same);
139+
}
140+
141+
73142
// When diff:regex attribute is found on the containing element, the control text is expected to a regex and that used when comparing to the test text node.
74143
}
75144
}

tests/Strategies/TextNodeStrategies/TextNodeFilterTest.cs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using Shouldly;
1+
using Egil.AngleSharp.Diffing.Core;
2+
using Shouldly;
23
using Xunit;
34

45
namespace Egil.AngleSharp.Diffing.Strategies.TextNodeStrategies
@@ -48,7 +49,38 @@ public void Test4x(string html)
4849
sut.Filter(source, FilterDecision.Exclude).ShouldBe(FilterDecision.Exclude);
4950
}
5051

51-
// When a parent node has overridden the global whitespace option, that overridden option is used
52+
[Fact(DisplayName = "If parent node is <pre> element, the implicit option is Preserved")]
53+
public void Test5()
54+
{
55+
var sut = new TextNodeFilter(WhitespaceOption.Normalize);
56+
var pre = ToComparisonSource("<pre> \n\t </pre>");
57+
var source = new ComparisonSource(pre.Node.FirstChild, 0, pre.Path, ComparisonSourceType.Control);
58+
59+
sut.Filter(source, FilterDecision.Keep).ShouldBe(FilterDecision.Keep);
60+
}
61+
62+
[Fact(DisplayName = "If parent node is <pre> element with a diff:whitespace, the option is take from the attribute")]
63+
public void Test5_1()
64+
{
65+
var sut = new TextNodeFilter(WhitespaceOption.Normalize);
66+
var pre = ToComparisonSource("<pre diff:whitespace=\"RemoveWhitespaceNodes\"> \n\t </pre>");
67+
var source = new ComparisonSource(pre.Node.FirstChild, 0, pre.Path, ComparisonSourceType.Control);
68+
69+
sut.Filter(source, FilterDecision.Keep).ShouldBe(FilterDecision.Exclude);
70+
}
71+
72+
[Theory(DisplayName = "When a parent node has overridden the global whitespace option, that overridden option is used")]
73+
[InlineData(@"<header><h1><em diff:whitespace=""preserve""> </em></h1></header>")]
74+
[InlineData(@"<header><h1 diff:whitespace=""preserve""><em> </em></h1></header>")]
75+
[InlineData(@"<header diff:whitespace=""preserve""><h1><em> </em></h1></header>")]
76+
public void Tes76(string html)
77+
{
78+
var sut = new TextNodeFilter(WhitespaceOption.Normalize);
79+
var root = ToNode(html);
80+
var source = new ComparisonSource(root.FirstChild.FirstChild.FirstChild, 0, "dummypath", ComparisonSourceType.Control);
81+
82+
sut.Filter(source, FilterDecision.Keep).ShouldBe(FilterDecision.Keep);
83+
}
5284
}
5385
}
5486

0 commit comments

Comments
 (0)