diff --git a/src/tooling/docs-assembler/Cli/DeployCommands.cs b/src/tooling/docs-assembler/Cli/DeployCommands.cs
index 0f67a3baf..6a6df9c28 100644
--- a/src/tooling/docs-assembler/Cli/DeployCommands.cs
+++ b/src/tooling/docs-assembler/Cli/DeployCommands.cs
@@ -38,9 +38,15 @@ private void AssignOutputLogger()
/// The environment to build
/// The S3 bucket name to deploy to
/// The file to write the plan to
+ /// The percentage of deletions allowed in the plan as percentage of total files to sync
///
public async Task Plan(
- string environment, string s3BucketName, string @out = "", Cancel ctx = default)
+ string environment,
+ string s3BucketName,
+ string @out = "",
+ float deleteThreshold = 0.2f,
+ Cancel ctx = default
+ )
{
AssignOutputLogger();
await using var collector = new ConsoleDiagnosticsCollector(logFactory, githubActionsService)
@@ -52,11 +58,25 @@ public async Task Plan(
var s3Client = new AmazonS3Client();
IDocsSyncPlanStrategy planner = new AwsS3SyncPlanStrategy(logFactory, s3Client, s3BucketName, assembleContext);
var plan = await planner.Plan(ctx);
- ConsoleApp.Log("Total files to sync: " + plan.Count);
+ ConsoleApp.Log("Total files to sync: " + plan.TotalSyncRequests);
ConsoleApp.Log("Total files to delete: " + plan.DeleteRequests.Count);
ConsoleApp.Log("Total files to add: " + plan.AddRequests.Count);
ConsoleApp.Log("Total files to update: " + plan.UpdateRequests.Count);
ConsoleApp.Log("Total files to skip: " + plan.SkipRequests.Count);
+ if (plan.TotalSyncRequests == 0)
+ {
+ collector.EmitError(@out, $"Plan has no files to sync so no plan will be written.");
+ await collector.StopAsync(ctx);
+ return collector.Errors;
+ }
+ var validationResult = planner.Validate(plan, deleteThreshold);
+ if (!validationResult.Valid)
+ {
+ collector.EmitError(@out, $"Plan is invalid, delete ratio: {validationResult.DeleteRatio}, threshold: {validationResult.DeleteThreshold} over {plan.TotalSyncRequests:N0} files while plan has {plan.DeleteRequests:N0} deletions");
+ await collector.StopAsync(ctx);
+ return collector.Errors;
+ }
+
if (!string.IsNullOrEmpty(@out))
{
var output = SyncPlan.Serialize(plan);
@@ -91,7 +111,7 @@ public async Task Apply(
var transferUtility = new TransferUtility(s3Client, new TransferUtilityConfig
{
ConcurrentServiceRequests = Environment.ProcessorCount * 2,
- MinSizeBeforePartUpload = AwsS3SyncPlanStrategy.PartSize
+ MinSizeBeforePartUpload = S3EtagCalculator.PartSize
});
IDocsSyncApplyStrategy applier = new AwsS3SyncApplyStrategy(logFactory, s3Client, transferUtility, s3BucketName, assembleContext, collector);
if (!File.Exists(planFile))
diff --git a/src/tooling/docs-assembler/Deploying/AwsS3SyncPlanStrategy.cs b/src/tooling/docs-assembler/Deploying/AwsS3SyncPlanStrategy.cs
index b8ea406ea..18c38f65e 100644
--- a/src/tooling/docs-assembler/Deploying/AwsS3SyncPlanStrategy.cs
+++ b/src/tooling/docs-assembler/Deploying/AwsS3SyncPlanStrategy.cs
@@ -4,6 +4,7 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
+using System.IO.Abstractions;
using System.Security.Cryptography;
using Amazon.S3;
using Amazon.S3.Model;
@@ -11,12 +12,80 @@
namespace Documentation.Assembler.Deploying;
-public class AwsS3SyncPlanStrategy(ILoggerFactory logFactory, IAmazonS3 s3Client, string bucketName, AssembleContext context) : IDocsSyncPlanStrategy
+public interface IS3EtagCalculator
+{
+ Task CalculateS3ETag(string filePath, Cancel ctx = default);
+}
+
+public class S3EtagCalculator(ILoggerFactory logFactory, IFileSystem readFileSystem) : IS3EtagCalculator
{
- internal const long PartSize = 5 * 1024 * 1024; // 5MB
private readonly ILogger _logger = logFactory.CreateLogger();
+
private static readonly ConcurrentDictionary EtagCache = new();
+ internal const long PartSize = 5 * 1024 * 1024; // 5MB
+
+ [SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
+ public async Task CalculateS3ETag(string filePath, Cancel ctx = default)
+ {
+ if (EtagCache.TryGetValue(filePath, out var cachedEtag))
+ {
+ _logger.LogDebug("Using cached ETag for {Path}", filePath);
+ return cachedEtag;
+ }
+
+ var fileInfo = readFileSystem.FileInfo.New(filePath);
+ var fileSize = fileInfo.Length;
+
+ // For files under 5MB, use simple MD5 (matching TransferUtility behavior)
+ if (fileSize <= PartSize)
+ {
+ await using var stream = readFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
+ var smallBuffer = new byte[fileSize];
+ var bytesRead = await stream.ReadAsync(smallBuffer.AsMemory(0, (int)fileSize), ctx);
+ var hash = MD5.HashData(smallBuffer.AsSpan(0, bytesRead));
+ var etag = Convert.ToHexStringLower(hash);
+ EtagCache[filePath] = etag;
+ return etag;
+ }
+
+ // For files over 5MB, use multipart format with 5MB parts (matching TransferUtility)
+ var parts = (int)Math.Ceiling((double)fileSize / PartSize);
+
+ await using var fileStream = readFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
+ var partBuffer = new byte[PartSize];
+ var partHashes = new List();
+
+ for (var i = 0; i < parts; i++)
+ {
+ var bytesRead = await fileStream.ReadAsync(partBuffer.AsMemory(0, partBuffer.Length), ctx);
+ var partHash = MD5.HashData(partBuffer.AsSpan(0, bytesRead));
+ partHashes.Add(partHash);
+ }
+
+ // Concatenate all part hashes
+ var concatenatedHashes = partHashes.SelectMany(h => h).ToArray();
+ var finalHash = MD5.HashData(concatenatedHashes);
+
+ var multipartEtag = $"{Convert.ToHexStringLower(finalHash)}-{parts}";
+ EtagCache[filePath] = multipartEtag;
+ return multipartEtag;
+ }
+}
+
+public class AwsS3SyncPlanStrategy(
+ ILoggerFactory logFactory,
+ IAmazonS3 s3Client,
+ string bucketName,
+ AssembleContext context,
+ IS3EtagCalculator? calculator = null
+)
+ : IDocsSyncPlanStrategy
+{
+ private readonly ILogger _logger = logFactory.CreateLogger();
+
+ private readonly IS3EtagCalculator _s3EtagCalculator = calculator ?? new S3EtagCalculator(logFactory, context.ReadFileSystem);
+
private bool IsSymlink(string path)
{
var fileInfo = context.ReadFileSystem.FileInfo.New(path);
@@ -42,7 +111,7 @@ await Parallel.ForEachAsync(localObjects, ctx, async (localFile, token) =>
if (remoteObjects.TryGetValue(destinationPath, out var remoteObject))
{
// Check if the ETag differs for updates
- var localETag = await CalculateS3ETag(localFile.FullName, token);
+ var localETag = await _s3EtagCalculator.CalculateS3ETag(localFile.FullName, token);
var remoteETag = remoteObject.ETag.Trim('"'); // Remove quotes from remote ETag
if (localETag == remoteETag)
{
@@ -89,14 +158,44 @@ await Parallel.ForEachAsync(localObjects, ctx, async (localFile, token) =>
return new SyncPlan
{
+ TotalSourceFiles = localObjects.Length,
DeleteRequests = deleteRequests.ToList(),
AddRequests = addRequests.ToList(),
UpdateRequests = updateRequests.ToList(),
SkipRequests = skipRequests.ToList(),
- Count = deleteRequests.Count + addRequests.Count + updateRequests.Count + skipRequests.Count
+ TotalSyncRequests = deleteRequests.Count + addRequests.Count + updateRequests.Count + skipRequests.Count
};
}
+ ///
+ public PlanValidationResult Validate(SyncPlan plan, float deleteThreshold)
+ {
+ if (plan.TotalSourceFiles == 0)
+ {
+ _logger.LogError("No files to sync");
+ return new(false, 1.0f, deleteThreshold);
+ }
+
+ var deleteRatio = (float)plan.DeleteRequests.Count / plan.TotalSyncRequests;
+ // if the total sync requests are less than 100, we enforce a higher ratio of 0.8
+ // this allows newer assembled documentation to be in a higher state of flux
+ if (plan.TotalSyncRequests <= 100)
+ deleteThreshold = Math.Max(deleteThreshold, 0.8f);
+
+ // if the total sync requests are less than 1000, we enforce a higher ratio of 0.5
+ // this allows newer assembled documentation to be in a higher state of flux
+ else if (plan.TotalSyncRequests <= 1000)
+ deleteThreshold = Math.Max(deleteThreshold, 0.5f);
+
+ if (deleteRatio > deleteThreshold)
+ {
+ _logger.LogError("Delete ratio is {Ratio} which is greater than the threshold of {Threshold}", deleteRatio, deleteThreshold);
+ return new(false, deleteRatio, deleteThreshold);
+ }
+
+ return new(true, deleteRatio, deleteThreshold);
+ }
+
private async Task> ListObjects(Cancel ctx = default)
{
var listBucketRequest = new ListObjectsV2Request
@@ -115,51 +214,4 @@ private async Task> ListObjects(Cancel ctx = defaul
return objects.ToDictionary(o => o.Key);
}
-
- [SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
- private async Task CalculateS3ETag(string filePath, Cancel ctx = default)
- {
- if (EtagCache.TryGetValue(filePath, out var cachedEtag))
- {
- _logger.LogDebug("Using cached ETag for {Path}", filePath);
- return cachedEtag;
- }
-
- var fileInfo = context.ReadFileSystem.FileInfo.New(filePath);
- var fileSize = fileInfo.Length;
-
- // For files under 5MB, use simple MD5 (matching TransferUtility behavior)
- if (fileSize <= PartSize)
- {
- await using var stream = context.ReadFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
- var smallBuffer = new byte[fileSize];
- var bytesRead = await stream.ReadAsync(smallBuffer.AsMemory(0, (int)fileSize), ctx);
- var hash = MD5.HashData(smallBuffer.AsSpan(0, bytesRead));
- var etag = Convert.ToHexStringLower(hash);
- EtagCache[filePath] = etag;
- return etag;
- }
-
- // For files over 5MB, use multipart format with 5MB parts (matching TransferUtility)
- var parts = (int)Math.Ceiling((double)fileSize / PartSize);
-
- await using var fileStream = context.ReadFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
- var partBuffer = new byte[PartSize];
- var partHashes = new List();
-
- for (var i = 0; i < parts; i++)
- {
- var bytesRead = await fileStream.ReadAsync(partBuffer.AsMemory(0, partBuffer.Length), ctx);
- var partHash = MD5.HashData(partBuffer.AsSpan(0, bytesRead));
- partHashes.Add(partHash);
- }
-
- // Concatenate all part hashes
- var concatenatedHashes = partHashes.SelectMany(h => h).ToArray();
- var finalHash = MD5.HashData(concatenatedHashes);
-
- var multipartEtag = $"{Convert.ToHexStringLower(finalHash)}-{parts}";
- EtagCache[filePath] = multipartEtag;
- return multipartEtag;
- }
}
diff --git a/src/tooling/docs-assembler/Deploying/DocsSync.cs b/src/tooling/docs-assembler/Deploying/DocsSync.cs
index 1c44cb940..2ee8634bd 100644
--- a/src/tooling/docs-assembler/Deploying/DocsSync.cs
+++ b/src/tooling/docs-assembler/Deploying/DocsSync.cs
@@ -10,7 +10,10 @@ namespace Documentation.Assembler.Deploying;
public interface IDocsSyncPlanStrategy
{
Task Plan(Cancel ctx = default);
+
+ PlanValidationResult Validate(SyncPlan plan, float deleteThreshold);
}
+public record PlanValidationResult(bool Valid, float DeleteRatio, float DeleteThreshold);
public interface IDocsSyncApplyStrategy
{
@@ -49,8 +52,11 @@ public record SkipRequest : SyncRequest
public record SyncPlan
{
- [JsonPropertyName("count")]
- public required int Count { get; init; }
+ [JsonPropertyName("total_source_files")]
+ public required int TotalSourceFiles { get; init; }
+
+ [JsonPropertyName("total_sync_requests")]
+ public required int TotalSyncRequests { get; init; }
[JsonPropertyName("delete")]
public required IReadOnlyList DeleteRequests { get; init; }
diff --git a/tests/docs-assembler.Tests/src/docs-assembler.Tests/DocsSyncTests.cs b/tests/docs-assembler.Tests/src/docs-assembler.Tests/DocsSyncTests.cs
index 11c58b5c8..01f4c72fe 100644
--- a/tests/docs-assembler.Tests/src/docs-assembler.Tests/DocsSyncTests.cs
+++ b/tests/docs-assembler.Tests/src/docs-assembler.Tests/DocsSyncTests.cs
@@ -4,6 +4,7 @@
using System.IO.Abstractions.TestingHelpers;
using Amazon.S3;
+using Amazon.S3.Model;
using Amazon.S3.Transfer;
using Documentation.Assembler.Deploying;
using Elastic.Documentation.Configuration;
@@ -12,6 +13,7 @@
using FakeItEasy;
using FluentAssertions;
using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Logging.Abstractions;
namespace Documentation.Assembler.Tests;
@@ -39,21 +41,18 @@ public async Task TestPlan()
var configurationContext = TestHelpers.CreateConfigurationContext(fileSystem);
var config = AssemblyConfiguration.Create(configurationContext.ConfigurationFileProvider);
var context = new AssembleContext(config, configurationContext, "dev", collector, fileSystem, fileSystem, null, Path.Combine(Paths.WorkingDirectoryRoot.FullName, ".artifacts", "assembly"));
- A.CallTo(() => mockS3Client.ListObjectsV2Async(A._, A._))
- .Returns(new Amazon.S3.Model.ListObjectsV2Response
+ A.CallTo(() => mockS3Client.ListObjectsV2Async(A._, A._))
+ .Returns(new ListObjectsV2Response
{
S3Objects =
[
- new Amazon.S3.Model.S3Object
- {
- Key = "docs/delete.md",
- },
- new Amazon.S3.Model.S3Object
+ new S3Object { Key = "docs/delete.md" },
+ new S3Object
{
Key = "docs/skip.md",
ETag = "\"69048c0964c9577a399b138b706a467a\""
}, // This is the result of CalculateS3ETag
- new Amazon.S3.Model.S3Object
+ new S3Object
{
Key = "docs/update.md",
ETag = "\"existing-etag\""
@@ -63,9 +62,13 @@ public async Task TestPlan()
var planStrategy = new AwsS3SyncPlanStrategy(new LoggerFactory(), mockS3Client, "fake", context);
// Act
- var plan = await planStrategy.Plan(Cancel.None);
+ var plan = await planStrategy.Plan(ctx: Cancel.None);
// Assert
+
+ plan.TotalSourceFiles.Should().Be(5);
+ plan.TotalSyncRequests.Should().Be(6); //including skip on server
+
plan.AddRequests.Count.Should().Be(3);
plan.AddRequests.Should().Contain(i => i.DestinationPath == "docs/add1.md");
plan.AddRequests.Should().Contain(i => i.DestinationPath == "docs/add2.md");
@@ -81,6 +84,128 @@ public async Task TestPlan()
plan.DeleteRequests.Should().Contain(i => i.DestinationPath == "docs/delete.md");
}
+ [Theory]
+ [InlineData(0, 10_000, 10_000, 0, 10_000, 0.2, false)]
+ [InlineData(8_000, 10_000, 10_000, 0, 2000, 0.2, true)]
+ [InlineData(7900, 10_000, 10_000, 0, 2100, 0.2, false)]
+ [InlineData(10_000, 0, 10_000, 10_000, 0, 0.2, true)]
+ [InlineData(2000, 0, 2000, 2000, 0, 0.2, true)]
+ // When total files to sync is lower than 100 we enforce a minimum ratio of 0.8
+ [InlineData(20, 40, 40, 0, 20, 0.2, true)]
+ [InlineData(19, 100, 100, 0, 81, 0.2, false)]
+ // When total files to sync is lower than 1000 we enforce a minimum ratio of 0.5
+ [InlineData(200, 400, 400, 0, 200, 0.2, true)]
+ [InlineData(199, 1000, 1000, 0, 801, 0.2, false)]
+ public async Task ValidateAdditionsPlan(
+ int localFiles,
+ int remoteFiles,
+ int totalFilesToSync,
+ int totalFilesToAdd,
+ int totalFilesToRemove,
+ float deleteThreshold,
+ bool valid
+ )
+ {
+ var (planStrategy, plan) = await SetupS3SyncContextSetup(localFiles, remoteFiles);
+
+ // Assert
+
+ plan.TotalSourceFiles.Should().Be(localFiles);
+ plan.TotalSyncRequests.Should().Be(totalFilesToSync);
+
+ plan.AddRequests.Count.Should().Be(totalFilesToAdd);
+ plan.DeleteRequests.Count.Should().Be(totalFilesToRemove);
+
+ var validationResult = planStrategy.Validate(plan, deleteThreshold);
+ if (plan.TotalSyncRequests <= 100)
+ validationResult.DeleteThreshold.Should().Be(Math.Max(deleteThreshold, 0.8f));
+ else if (plan.TotalSyncRequests <= 1000)
+ validationResult.DeleteThreshold.Should().Be(Math.Max(deleteThreshold, 0.5f));
+
+ validationResult.Valid.Should().Be(valid, $"Delete ratio is {validationResult.DeleteRatio} when maximum is {validationResult.DeleteThreshold}");
+ }
+
+ [Theory]
+ [InlineData(10_000, 0, 10_000, 0, 0, 0.2, true)]
+ [InlineData(2000, 0, 2000, 0, 0, 0.2, true)]
+ [InlineData(0, 10_000, 10_000, 0, 10_000, 0.2, false)]
+ [InlineData(0, 10_000, 10_000, 0, 10_000, 1.0, false)]
+ [InlineData(20, 10_000, 10_000, 20, 9980, 0.2, false)]
+ [InlineData(20, 10_000, 10_000, 20, 9980, 1.0, true)]
+ [InlineData(8_000, 10_000, 10_000, 8000, 2000, 0.2, true)]
+ [InlineData(7900, 10_000, 10_000, 7900, 2100, 0.2, false)]
+ public async Task ValidateUpdatesPlan(
+ int localFiles,
+ int remoteFiles,
+ int totalFilesToSync,
+ int totalFilesToUpdate,
+ int totalFilesToRemove,
+ float deleteThreshold,
+ bool valid
+ )
+ {
+ var (planStrategy, plan) = await SetupS3SyncContextSetup(localFiles, remoteFiles, "different-etag");
+
+ // Assert
+
+ plan.TotalSourceFiles.Should().Be(localFiles);
+ plan.TotalSyncRequests.Should().Be(totalFilesToSync);
+
+ plan.UpdateRequests.Count.Should().Be(totalFilesToUpdate);
+ plan.DeleteRequests.Count.Should().Be(totalFilesToRemove);
+
+ var validationResult = planStrategy.Validate(plan, deleteThreshold);
+ if (plan.TotalSyncRequests <= 100)
+ validationResult.DeleteThreshold.Should().Be(Math.Max(deleteThreshold, 0.8f));
+ else if (plan.TotalSyncRequests <= 1000)
+ validationResult.DeleteThreshold.Should().Be(Math.Max(deleteThreshold, 0.5f));
+
+ validationResult.Valid.Should().Be(valid, $"Delete ratio is {validationResult.DeleteRatio} when maximum is {validationResult.DeleteThreshold}");
+ }
+
+ private static async Task<(AwsS3SyncPlanStrategy planStrategy, SyncPlan plan)> SetupS3SyncContextSetup(
+ int localFiles, int remoteFiles, string etag = "etag")
+ {
+ // Arrange
+ IReadOnlyCollection diagnosticsOutputs = [];
+ var collector = new DiagnosticsCollector(diagnosticsOutputs);
+ var mockS3Client = A.Fake();
+ var fileSystem = new MockFileSystem(new MockFileSystemOptions
+ {
+ CurrentDirectory = Path.Combine(Paths.WorkingDirectoryRoot.FullName, ".artifacts", "assembly")
+ });
+ foreach (var i in Enumerable.Range(0, localFiles))
+ fileSystem.AddFile($"docs/file-{i}.md", new MockFileData($"# Local Document {i}"));
+
+ var configurationContext = TestHelpers.CreateConfigurationContext(fileSystem);
+ var config = AssemblyConfiguration.Create(configurationContext.ConfigurationFileProvider);
+ var context = new AssembleContext(config, configurationContext, "dev", collector, fileSystem, fileSystem, null, Path.Combine(Paths.WorkingDirectoryRoot.FullName, ".artifacts", "assembly"));
+
+ var s3Objects = new List();
+ foreach (var i in Enumerable.Range(0, remoteFiles))
+ {
+ s3Objects.Add(new S3Object
+ {
+ Key = $"docs/file-{i}.md",
+ ETag = etag
+ });
+ }
+
+ A.CallTo(() => mockS3Client.ListObjectsV2Async(A._, A._))
+ .Returns(new ListObjectsV2Response
+ {
+ S3Objects = s3Objects
+ });
+
+ var mockEtagCalculator = A.Fake();
+ A.CallTo(() => mockEtagCalculator.CalculateS3ETag(A._, A._)).Returns("etag");
+ var planStrategy = new AwsS3SyncPlanStrategy(new LoggerFactory(), mockS3Client, "fake", context, mockEtagCalculator);
+
+ // Act
+ var plan = await planStrategy.Plan(ctx: Cancel.None);
+ return (planStrategy, plan);
+ }
+
[Fact]
public async Task TestApply()
{
@@ -102,10 +227,12 @@ public async Task TestApply()
});
var configurationContext = TestHelpers.CreateConfigurationContext(fileSystem);
var config = AssemblyConfiguration.Create(configurationContext.ConfigurationFileProvider);
- var context = new AssembleContext(config, configurationContext, "dev", collector, fileSystem, fileSystem, null, Path.Combine(Paths.WorkingDirectoryRoot.FullName, ".artifacts", "assembly"));
+ var checkoutDirectory = Path.Combine(Paths.WorkingDirectoryRoot.FullName, ".artifacts", "assembly");
+ var context = new AssembleContext(config, configurationContext, "dev", collector, fileSystem, fileSystem, null, checkoutDirectory);
var plan = new SyncPlan
{
- Count = 6,
+ TotalSourceFiles = 5,
+ TotalSyncRequests = 6,
AddRequests = [
new AddRequest { LocalPath = "docs/add1.md", DestinationPath = "docs/add1.md" },
new AddRequest { LocalPath = "docs/add2.md", DestinationPath = "docs/add2.md" },
@@ -124,8 +251,8 @@ public async Task TestApply()
{ DestinationPath = "docs/delete.md" }
]
};
- A.CallTo(() => moxS3Client.DeleteObjectsAsync(A._, A._))
- .Returns(new Amazon.S3.Model.DeleteObjectsResponse
+ A.CallTo(() => moxS3Client.DeleteObjectsAsync(A._, A._))
+ .Returns(new DeleteObjectsResponse
{
HttpStatusCode = System.Net.HttpStatusCode.OK
});
@@ -144,7 +271,7 @@ public async Task TestApply()
transferredFiles.Length.Should().Be(4); // 3 add requests + 1 update request
transferredFiles.Should().NotContain("docs/skip.md");
- A.CallTo(() => moxS3Client.DeleteObjectsAsync(A._, A._))
+ A.CallTo(() => moxS3Client.DeleteObjectsAsync(A._, A._))
.MustHaveHappenedOnceExactly();
A.CallTo(() => moxTransferUtility.UploadDirectoryAsync(A._, A._))