Skip to content

Commit 9f3e63c

Browse files
Add HTML comment-based link ignoring (#70)
* feat: Add HTML comment-based link ignoring - Support <!-- link-validator-ignore-next --> for ignoring next link - Support <!-- link-validator-ignore --> blocks for ignoring multiple links - Fix UriHelpers to preserve scheme and port for external URLs - Add comprehensive tests for comment-based ignore functionality * refactor: Improve comment syntax for link ignoring - Single <!-- link-validator-ignore --> now ignores just the next link - <!-- begin link-validator-ignore --> / <!-- end link-validator-ignore --> for blocks - More intuitive and consistent syntax * docs: Add documentation for HTML comment-based link ignoring
1 parent 4b6d71d commit 9f3e63c

File tree

5 files changed

+278
-6
lines changed

5 files changed

+278
-6
lines changed

README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,37 @@ link-validator --url <URL> [OPTIONS]
151151
| `--help` | Show help information | - |
152152
| `--version` | Show version information | - |
153153

154+
### Ignoring Links in HTML
155+
156+
LinkValidator supports HTML comments to exclude specific links from validation. This is useful for development URLs, local services, or intentionally broken example links.
157+
158+
#### Ignore Single Link
159+
160+
Use `<!-- link-validator-ignore -->` to ignore just the next link:
161+
162+
```html
163+
<!-- link-validator-ignore -->
164+
<a href="http://localhost:3000">This link will be ignored</a>
165+
<a href="http://localhost:9090">This link will be validated</a>
166+
```
167+
168+
#### Ignore Block of Links
169+
170+
Use `<!-- begin link-validator-ignore -->` and `<!-- end link-validator-ignore -->` to ignore all links within a section:
171+
172+
```html
173+
<!-- begin link-validator-ignore -->
174+
<div>
175+
<p>These local development links won't be validated:</p>
176+
<a href="http://localhost:3000">Grafana Dashboard</a>
177+
<a href="http://localhost:16686">Jaeger UI</a>
178+
<a href="http://localhost:9090">Prometheus</a>
179+
</div>
180+
<!-- end link-validator-ignore -->
181+
```
182+
183+
**Note:** Comments are case-insensitive, so `<!-- LINK-VALIDATOR-IGNORE -->`, `<!-- Link-Validator-Ignore -->`, etc. will all work.
184+
154185
### Environment Variables
155186

156187
Override default values using environment variables:
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// -----------------------------------------------------------------------
2+
// <copyright file="CommentBasedIgnoreSpecs.cs">
3+
// Copyright (C) 2025 - 2025 Aaron Stannard <https://aaronstannard.com/>
4+
// </copyright>
5+
// -----------------------------------------------------------------------
6+
7+
using FluentAssertions;
8+
using LinkValidator.Actors;
9+
using LinkValidator.Util;
10+
using Xunit;
11+
12+
namespace LinkValidator.Tests;
13+
14+
public class CommentBasedIgnoreSpecs
15+
{
16+
private readonly AbsoluteUri _baseUrl = new(new Uri("https://example.com/"));
17+
18+
[Fact]
19+
public void Should_ignore_next_link_with_comment()
20+
{
21+
var html = @"
22+
<html>
23+
<body>
24+
<a href=""https://www.google.com"">Normal Link</a>
25+
<!-- link-validator-ignore -->
26+
<a href=""http://localhost:3000"">Ignored Link</a>
27+
<a href=""https://www.github.com"">Another Normal Link</a>
28+
</body>
29+
</html>";
30+
31+
var links = ParseHelpers.ParseLinks(html, _baseUrl);
32+
33+
links.Should().HaveCount(2);
34+
links.Should().Contain(x => x.uri.Value.Host == "www.google.com");
35+
links.Should().Contain(x => x.uri.Value.Host == "www.github.com");
36+
links.Should().NotContain(x => x.uri.Value.Host == "localhost");
37+
}
38+
39+
[Fact]
40+
public void Should_ignore_all_links_within_comment_block()
41+
{
42+
var html = @"
43+
<html>
44+
<body>
45+
<a href=""https://www.google.com"">Normal Link</a>
46+
<!-- begin link-validator-ignore -->
47+
<div>
48+
<a href=""http://localhost:3000"">Ignored Link 1</a>
49+
<p>Some text with <a href=""http://localhost:9090"">Ignored Link 2</a></p>
50+
<ul>
51+
<li><a href=""http://localhost:16686"">Ignored Link 3</a></li>
52+
</ul>
53+
</div>
54+
<!-- end link-validator-ignore -->
55+
<a href=""https://www.github.com"">Another Normal Link</a>
56+
</body>
57+
</html>";
58+
59+
var links = ParseHelpers.ParseLinks(html, _baseUrl);
60+
61+
links.Should().HaveCount(2);
62+
links.Should().Contain(x => x.uri.Value.Host == "www.google.com");
63+
links.Should().Contain(x => x.uri.Value.Host == "www.github.com");
64+
links.Should().NotContain(x => x.uri.Value.Host == "localhost");
65+
}
66+
67+
[Fact]
68+
public void Should_handle_nested_ignore_blocks()
69+
{
70+
var html = @"
71+
<html>
72+
<body>
73+
<!-- begin link-validator-ignore -->
74+
<a href=""http://localhost:3000"">Ignored Link 1</a>
75+
<div>
76+
<a href=""http://localhost:9090"">Ignored Link 2</a>
77+
</div>
78+
<!-- end link-validator-ignore -->
79+
<div>
80+
<a href=""https://www.google.com"">Normal Link</a>
81+
</div>
82+
</body>
83+
</html>";
84+
85+
var links = ParseHelpers.ParseLinks(html, _baseUrl);
86+
87+
links.Should().HaveCount(1);
88+
links.Should().Contain(x => x.uri.Value.Host == "www.google.com");
89+
links.Should().NotContain(x => x.uri.Value.Host == "localhost");
90+
}
91+
92+
[Fact]
93+
public void Should_only_ignore_next_immediate_link()
94+
{
95+
var html = @"
96+
<html>
97+
<body>
98+
<!-- link-validator-ignore -->
99+
<a href=""http://localhost:3000"">Ignored Link</a>
100+
<a href=""http://localhost:9090"">Not Ignored</a>
101+
<a href=""https://www.google.com"">Normal Link</a>
102+
</body>
103+
</html>";
104+
105+
var links = ParseHelpers.ParseLinks(html, _baseUrl);
106+
107+
links.Should().HaveCount(2);
108+
links.Should().Contain(x => x.uri.Value.ToString().Contains("localhost:9090"));
109+
links.Should().Contain(x => x.uri.Value.Host == "www.google.com");
110+
links.Should().NotContain(x => x.uri.Value.ToString().Contains("localhost:3000"));
111+
}
112+
113+
[Fact]
114+
public void Should_handle_multiple_ignore_blocks()
115+
{
116+
var html = @"
117+
<html>
118+
<body>
119+
<a href=""https://www.google.com"">Normal Link 1</a>
120+
<!-- begin link-validator-ignore -->
121+
<a href=""http://localhost:3000"">Ignored Link 1</a>
122+
<!-- end link-validator-ignore -->
123+
<a href=""https://www.github.com"">Normal Link 2</a>
124+
<!-- begin link-validator-ignore -->
125+
<a href=""http://localhost:9090"">Ignored Link 2</a>
126+
<!-- end link-validator-ignore -->
127+
<a href=""https://www.stackoverflow.com"">Normal Link 3</a>
128+
</body>
129+
</html>";
130+
131+
var links = ParseHelpers.ParseLinks(html, _baseUrl);
132+
133+
links.Should().HaveCount(3);
134+
links.Should().Contain(x => x.uri.Value.Host == "www.google.com");
135+
links.Should().Contain(x => x.uri.Value.Host == "www.github.com");
136+
links.Should().Contain(x => x.uri.Value.Host == "www.stackoverflow.com");
137+
links.Should().NotContain(x => x.uri.Value.Host == "localhost");
138+
}
139+
140+
[Fact]
141+
public void Should_be_case_insensitive_for_comments()
142+
{
143+
var html = @"
144+
<html>
145+
<body>
146+
<!-- LINK-VALIDATOR-IGNORE -->
147+
<a href=""http://localhost:3000"">Ignored Link 1</a>
148+
<!-- Begin Link-Validator-Ignore -->
149+
<a href=""http://localhost:9090"">Ignored Link 2</a>
150+
<!-- END LINK-VALIDATOR-IGNORE -->
151+
<a href=""https://www.google.com"">Normal Link</a>
152+
</body>
153+
</html>";
154+
155+
var links = ParseHelpers.ParseLinks(html, _baseUrl);
156+
157+
links.Should().HaveCount(1);
158+
links.Should().Contain(x => x.uri.Value.Host == "www.google.com");
159+
links.Should().NotContain(x => x.uri.Value.Host == "localhost");
160+
}
161+
}

src/LinkValidator.Tests/End2EndSpecs.ShouldCrawlWebsiteCorrectly.verified.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
### `/` has broken links:
1717

1818
- `/page2.html` (NotFound)
19-
- `http://getakka.net/broken-link` (NotFound)
19+
- `https://getakka.net/broken-link` (NotFound)
2020

2121
### `/about/index.html` has broken links:
2222

23-
- `http://getakka.net/broken-link` (NotFound)
23+
- `https://getakka.net/broken-link` (NotFound)
2424

2525
### `/index.html` has broken links:
2626

2727
- `/page2.html` (NotFound)
28-
- `http://getakka.net/broken-link` (NotFound)
28+
- `https://getakka.net/broken-link` (NotFound)

src/LinkValidator/Util/ParseHelpers.cs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ public static class ParseHelpers
1919

2020
IReadOnlyList<(AbsoluteUri uri, LinkType type)> links = doc.DocumentNode
2121
.SelectNodes("//a[@href]")?
22+
.Where(node => !IsLinkIgnored(node))
2223
.Select(node => node.GetAttributeValue("href", ""))
2324
.Where(href => !string.IsNullOrEmpty(href) && CanMakeAbsoluteHttpUri(baseUrl, href))
2425
.Select(x => ToAbsoluteUri(baseUrl, x))
@@ -27,4 +28,82 @@ public static class ParseHelpers
2728
.ToArray() ?? [];
2829
return links;
2930
}
31+
32+
private static bool IsLinkIgnored(HtmlNode linkNode)
33+
{
34+
// Check if link is within an ignore block
35+
if (IsWithinIgnoreBlock(linkNode))
36+
return true;
37+
38+
// Check if previous sibling is an ignore-next comment
39+
var previousNode = linkNode.PreviousSibling;
40+
while (previousNode != null)
41+
{
42+
if (previousNode.NodeType == HtmlNodeType.Comment)
43+
{
44+
var commentNode = (HtmlCommentNode)previousNode;
45+
var commentText = commentNode.Comment.Trim();
46+
// Remove comment delimiters and trim
47+
commentText = commentText.Replace("<!--", "").Replace("-->", "").Trim();
48+
// Check for standalone ignore (not "begin")
49+
if (commentText.Equals("link-validator-ignore", StringComparison.OrdinalIgnoreCase))
50+
return true;
51+
}
52+
else if (previousNode.NodeType == HtmlNodeType.Element)
53+
{
54+
// Stop looking if we hit another element
55+
break;
56+
}
57+
previousNode = previousNode.PreviousSibling;
58+
}
59+
60+
return false;
61+
}
62+
63+
private static bool IsWithinIgnoreBlock(HtmlNode node)
64+
{
65+
// Start from the node and walk up the tree
66+
var current = node;
67+
68+
while (current != null)
69+
{
70+
// Check if there's an ignore block at this level
71+
if (current.ParentNode != null)
72+
{
73+
var siblings = current.ParentNode.ChildNodes;
74+
var inIgnoreBlock = false;
75+
76+
foreach (var sibling in siblings)
77+
{
78+
// Check for comment nodes
79+
if (sibling.NodeType == HtmlNodeType.Comment)
80+
{
81+
var commentNode = (HtmlCommentNode)sibling;
82+
var commentText = commentNode.Comment.Trim();
83+
// Remove comment delimiters and trim
84+
commentText = commentText.Replace("<!--", "").Replace("-->", "").Trim();
85+
86+
if (commentText.Equals("begin link-validator-ignore", StringComparison.OrdinalIgnoreCase))
87+
{
88+
inIgnoreBlock = true;
89+
}
90+
else if (commentText.Equals("end link-validator-ignore", StringComparison.OrdinalIgnoreCase))
91+
{
92+
inIgnoreBlock = false;
93+
}
94+
}
95+
96+
// If we've reached the current node and we're in an ignore block, return true
97+
if ((sibling == current || sibling.Descendants().Contains(node)) && inIgnoreBlock)
98+
{
99+
return true;
100+
}
101+
}
102+
}
103+
104+
current = current.ParentNode;
105+
}
106+
107+
return false;
108+
}
30109
}

src/LinkValidator/Util/UriHelpers.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,14 @@ public static AbsoluteUri ToAbsoluteUri(AbsoluteUri baseUri, string rawUri)
9191
resolvedUri = new Uri(rawUri);
9292
}
9393

94-
// Ensure the scheme matches the base URI
95-
if (resolvedUri.Scheme != baseUri.Value.Scheme)
94+
// Only force scheme matching for internal URLs (same domain)
95+
// External URLs should preserve their original scheme and port
96+
if (resolvedUri.Host == baseUri.Value.Host && resolvedUri.Scheme != baseUri.Value.Scheme)
9697
{
9798
var builder = new UriBuilder(resolvedUri)
9899
{
99100
Scheme = baseUri.Value.Scheme,
100-
Port = -1, // Prevents adding the default port
101+
Port = -1, // Prevents adding the default port for internal URLs
101102
};
102103
resolvedUri = builder.Uri;
103104
}

0 commit comments

Comments
 (0)