Skip to content

Commit 34c362b

Browse files
Track link graphs between pages (#32)
* stashing 404 tracking * Added link tracking * adding some test pages for integration testing * making the crawl / parse functions more testable * added end2end test with verify * fix `Directory.Packages.props` * Added real HTTP server for End2End tests * add better server-side logging * fixed HttpStatusCode processing * fix markdown escaping in reports - Add "Linked From" column showing which pages link to each URL - Add broken links summary section for actionable insights - Fix forward slash escaping (\/→/) in markdown output - Add manual test for raw markdown verification * made links to / broken links report use code markdown blocks * reorganize broken link report by page
1 parent ef99632 commit 34c362b

20 files changed

+579
-49
lines changed

.gitattributes

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,8 @@
2929
*.txt text eol=crlf
3030

3131
build.sh eol=lf
32+
33+
# ApprovalTests
34+
*.verified.txt text eol=lf working-tree-encoding=UTF-8
35+
*.verified.xml text eol=lf working-tree-encoding=UTF-8
36+
*.verified.json text eol=lf working-tree-encoding=UTF-8

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ api/
1515
# Approval test outputs
1616
*.received.txt
1717
*.txt.bak
18+
*.received.*
1819

1920
## Ignore Visual Studio temporary files, build results, and
2021
## files generated by popular Visual Studio add-ons.
@@ -308,3 +309,4 @@ __pycache__/
308309
*.btm.cs
309310
*.odx.cs
310311
*.xsd.cs
312+
/.claude

src/Directory.Packages.props

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,13 @@
1111
</ItemGroup>
1212
<ItemGroup Label="Test">
1313
<PackageVersion Include="Akka.TestKit" Version="1.5.39"/>
14+
<PackageVersion Include="Akka.TestKit.Xunit2" Version="1.5.39"/>
1415
<PackageVersion Include="coverlet.collector" Version="6.0.4"/>
16+
<PackageVersion Include="Microsoft.AspNetCore.StaticFiles" Version="2.3.0"/>
17+
<PackageVersion Include="Microsoft.AspNetCore.TestHost" Version="9.0.1"/>
1518
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.13.0"/>
19+
<PackageVersion Include="Verify.DiffPlex" Version="3.1.2"/>
20+
<PackageVersion Include="Verify.Xunit" Version="28.9.0" />
1621
<PackageVersion Include="xunit" Version="2.9.3"/>
1722
<PackageVersion Include="xunit.runner.visualstudio" Version="3.0.1"/>
1823
</ItemGroup>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Sitemap for `http://localhost:8080/`
2+
3+
| URL | StatusCode | Linked From |
4+
| --------------------- | ---------- | ------------------------------------------------------- |
5+
| `/` | OK | \- |
6+
| `/about/contact.html` | OK | `/`, `/about/contact.html`, `/about/index.html` +2 more |
7+
| `/about/index.html` | OK | `/`, `/about/contact.html`, `/about/index.html` +2 more |
8+
| `/index.html` | OK | `/`, `/about/contact.html`, `/about/index.html` +2 more |
9+
| `/page1.html` | OK | `/`, `/index.html` |
10+
| `/page2.html` | NotFound | `/`, `/index.html` |
11+
12+
## 🔴 Pages with Broken Links
13+
14+
### `/` has broken links:
15+
16+
- `/page2.html` (NotFound)
17+
18+
### `/index.html` has broken links:
19+
20+
- `/page2.html` (NotFound)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// -----------------------------------------------------------------------
2+
// <copyright file="End2EndSpecs.cs">
3+
// Copyright (C) 2025 - 2025 Aaron Stannard <https://aaronstannard.com/>
4+
// </copyright>
5+
// -----------------------------------------------------------------------
6+
7+
using Akka.TestKit.Xunit2;
8+
using LinkValidator.Actors;
9+
using Xunit.Abstractions;
10+
using static LinkValidator.Util.CrawlerHelper;
11+
using static LinkValidator.Util.MarkdownHelper;
12+
13+
namespace LinkValidator.Tests;
14+
15+
public class End2EndSpecs : TestKit, IClassFixture<TestWebServerFixture>
16+
{
17+
private readonly TestWebServerFixture _webServerFixture;
18+
private readonly ITestOutputHelper _output;
19+
20+
public End2EndSpecs(ITestOutputHelper output, TestWebServerFixture webServerFixture) : base(output: output)
21+
{
22+
_webServerFixture = webServerFixture;
23+
_output = output;
24+
25+
_webServerFixture.Logger = _output.WriteLine;
26+
_webServerFixture.StartServer(RootPagePath);
27+
}
28+
29+
public static readonly string RootPagePath = Path.Join(Directory.GetCurrentDirectory(), "pages");
30+
31+
[Fact]
32+
public async Task ShouldCrawlWebsiteCorrectly()
33+
{
34+
// sanity check / pre-condition
35+
_output.WriteLine($"Current directory: {Directory.GetCurrentDirectory()}");
36+
_output.WriteLine($"RootPagePath: {RootPagePath}");
37+
_output.WriteLine($"Full RootPagePath: {Path.GetFullPath(RootPagePath)}");
38+
39+
Assert.True(Directory.Exists(RootPagePath));
40+
Assert.True(File.Exists(Path.Join(RootPagePath, "index.html")));
41+
42+
// arrange - start test web server
43+
var baseUrl = new AbsoluteUri(new Uri(_webServerFixture.BaseUrl!));
44+
45+
// act
46+
var crawlResult = await CrawlWebsite(Sys, baseUrl);
47+
var markdown = GenerateMarkdown(baseUrl, crawlResult);
48+
49+
_output.WriteLine("=== RAW MARKDOWN OUTPUT ===");
50+
_output.WriteLine(markdown);
51+
_output.WriteLine("=== END RAW MARKDOWN ===");
52+
53+
// assert
54+
await Verify(markdown);
55+
}
56+
}

src/LinkValidator.Tests/LinkValidator.Tests.csproj

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,16 @@
99
</PropertyGroup>
1010

1111
<ItemGroup>
12+
<PackageReference Include="Akka.TestKit.Xunit2" />
1213
<PackageReference Include="coverlet.collector"/>
1314
<PackageReference Include="FluentAssertions"/>
15+
<PackageReference Include="Microsoft.AspNetCore.StaticFiles" />
16+
<PackageReference Include="Microsoft.AspNetCore.TestHost" />
1417
<PackageReference Include="Microsoft.NET.Test.Sdk"/>
18+
<PackageReference Include="Verify.DiffPlex" />
19+
<PackageReference Include="Verify.Xunit" />
1520
<PackageReference Include="xunit"/>
1621
<PackageReference Include="xunit.runner.visualstudio"/>
17-
<PackageReference Include="Akka.TestKit"/>
1822
</ItemGroup>
1923

2024
<ItemGroup>
@@ -25,4 +29,23 @@
2529
<ProjectReference Include="..\LinkValidator\LinkValidator.csproj"/>
2630
</ItemGroup>
2731

32+
<ItemGroup>
33+
<None Remove="pages\index.html" />
34+
<Content Include="pages\index.html">
35+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
36+
</Content>
37+
<None Remove="pages\about\index.html" />
38+
<Content Include="pages\about\index.html">
39+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
40+
</Content>
41+
<None Remove="pages\about\contact.html" />
42+
<Content Include="pages\about\contact.html">
43+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
44+
</Content>
45+
<None Remove="pages\page1.html" />
46+
<Content Include="pages\page1.html">
47+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
48+
</Content>
49+
</ItemGroup>
50+
2851
</Project>
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
using System.Collections.Immutable;
2+
using System.Net;
3+
using LinkValidator.Actors;
4+
using LinkValidator.Util;
5+
6+
namespace LinkValidator.Tests;
7+
8+
public class ManualMarkdownTest
9+
{
10+
[Fact]
11+
public void TestRawMarkdownOutput()
12+
{
13+
// Create sample data
14+
var baseUri = new AbsoluteUri(new Uri("http://localhost:8080"));
15+
var results = ImmutableSortedDictionary.Create<string, CrawlRecord>()
16+
.Add("/", new CrawlRecord(baseUri, HttpStatusCode.OK, ImmutableList<AbsoluteUri>.Empty))
17+
.Add("/page2.html", new CrawlRecord(
18+
new AbsoluteUri(new Uri("http://localhost:8080/page2.html")),
19+
HttpStatusCode.NotFound,
20+
ImmutableList<AbsoluteUri>.Empty
21+
.Add(baseUri)
22+
.Add(new AbsoluteUri(new Uri("http://localhost:8080/index.html")))));
23+
24+
var markdown = MarkdownHelper.GenerateMarkdown(baseUri, results);
25+
26+
Console.WriteLine("RAW MARKDOWN:");
27+
Console.WriteLine(markdown);
28+
Console.WriteLine("END RAW MARKDOWN");
29+
30+
// Check for escaping
31+
Assert.DoesNotContain("\\/", markdown);
32+
Assert.Contains("/page2.html", markdown);
33+
Assert.Contains("NotFound", markdown);
34+
}
35+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using System.Runtime.CompilerServices;
2+
3+
namespace LinkValidator.Tests.Properties;
4+
5+
public static class Modules
6+
{
7+
[ModuleInitializer]
8+
public static void Initialize() =>
9+
VerifyDiffPlex.Initialize();
10+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// -----------------------------------------------------------------------
2+
// <copyright file="TestWebServerCollection.cs">
3+
// Copyright (C) 2025 - 2025 Aaron Stannard <https://aaronstannard.com/>
4+
// </copyright>
5+
// -----------------------------------------------------------------------
6+
7+
namespace LinkValidator.Tests;
8+
9+
[CollectionDefinition("WebServer")]
10+
public class TestWebServerCollection : ICollectionFixture<TestWebServerFixture>
11+
{
12+
// This class has no code, and is never created. Its purpose is simply
13+
// to be the place to apply [CollectionDefinition] and all the
14+
// ICollectionFixture<> interfaces.
15+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// -----------------------------------------------------------------------
2+
// <copyright file="TestWebServerFixture.cs">
3+
// Copyright (C) 2025 - 2025 Aaron Stannard <https://aaronstannard.com/>
4+
// </copyright>
5+
// -----------------------------------------------------------------------
6+
7+
using Microsoft.AspNetCore.Builder;
8+
using Microsoft.AspNetCore.Hosting;
9+
using Microsoft.Extensions.DependencyInjection;
10+
using Microsoft.Extensions.FileProviders;
11+
using System.Net;
12+
13+
namespace LinkValidator.Tests;
14+
15+
public class TestWebServerFixture : IAsyncDisposable
16+
{
17+
private IWebHost? _webHost;
18+
private readonly object _lock = new();
19+
private const int TestPort = 8080;
20+
21+
public string? BaseUrl { get; private set; }
22+
public Action<string>? Logger { get; set; }
23+
24+
public TestWebServerFixture StartServer(string contentDirectory)
25+
{
26+
lock (_lock)
27+
{
28+
if (_webHost != null)
29+
return this; // Allow multiple calls, return existing server
30+
31+
if (!Directory.Exists(contentDirectory))
32+
throw new DirectoryNotFoundException($"Content directory not found: {contentDirectory}");
33+
34+
var fullPath = Path.GetFullPath(contentDirectory);
35+
36+
_webHost = new WebHostBuilder()
37+
.UseKestrel(options =>
38+
{
39+
options.Listen(IPAddress.Loopback, TestPort);
40+
options.Limits.MaxConcurrentConnections = 100;
41+
options.Limits.MaxConcurrentUpgradedConnections = 100;
42+
})
43+
.ConfigureServices(services =>
44+
{
45+
services.AddRouting();
46+
})
47+
.Configure(app =>
48+
{
49+
app.Use(async (context, next) =>
50+
{
51+
Logger?.Invoke($"Request: {context.Request.Method} {context.Request.Path}");
52+
await next();
53+
Logger?.Invoke($"Response: {context.Response.StatusCode} for {context.Request.Path}");
54+
});
55+
56+
app.UseDefaultFiles(new DefaultFilesOptions
57+
{
58+
FileProvider = new PhysicalFileProvider(Path.GetFullPath(contentDirectory))
59+
});
60+
app.UseStaticFiles(new StaticFileOptions
61+
{
62+
FileProvider = new PhysicalFileProvider(Path.GetFullPath(contentDirectory)),
63+
RequestPath = ""
64+
});
65+
66+
app.UseRouting();
67+
})
68+
.Build();
69+
70+
_webHost.Start();
71+
BaseUrl = $"http://localhost:{TestPort}";
72+
73+
return this;
74+
}
75+
}
76+
77+
public async ValueTask DisposeAsync()
78+
{
79+
if (_webHost != null)
80+
{
81+
await _webHost.StopAsync();
82+
_webHost.Dispose();
83+
_webHost = null;
84+
}
85+
BaseUrl = null;
86+
}
87+
}

0 commit comments

Comments
 (0)