Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions src/tooling/docs-assembler/Cli/DeployCommands.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,15 @@ private void AssignOutputLogger()
/// <param name="environment"> The environment to build</param>
/// <param name="s3BucketName">The S3 bucket name to deploy to</param>
/// <param name="out"> The file to write the plan to</param>
/// <param name="deleteThreshold"> The percentage of deletions allowed in the plan as percentage of total files to sync</param>
/// <param name="ctx"></param>
public async Task<int> Plan(
string environment, string s3BucketName, string @out = "", Cancel ctx = default)
string environment,
string s3BucketName,
string @out = "",
float deleteThreshold = 0.2f,
Cancel ctx = default
)
{
AssignOutputLogger();
await using var collector = new ConsoleDiagnosticsCollector(logFactory, githubActionsService)
Expand All @@ -52,11 +58,25 @@ public async Task<int> Plan(
var s3Client = new AmazonS3Client();
IDocsSyncPlanStrategy planner = new AwsS3SyncPlanStrategy(logFactory, s3Client, s3BucketName, assembleContext);
var plan = await planner.Plan(ctx);
ConsoleApp.Log("Total files to sync: " + plan.Count);
ConsoleApp.Log("Total files to sync: " + plan.TotalSyncRequests);
ConsoleApp.Log("Total files to delete: " + plan.DeleteRequests.Count);
ConsoleApp.Log("Total files to add: " + plan.AddRequests.Count);
ConsoleApp.Log("Total files to update: " + plan.UpdateRequests.Count);
ConsoleApp.Log("Total files to skip: " + plan.SkipRequests.Count);
if (plan.TotalSyncRequests == 0)
{
collector.EmitError(@out, $"Plan has no files to sync so no plan will be written.");
await collector.StopAsync(ctx);
return collector.Errors;
}
var validationResult = planner.Validate(plan, deleteThreshold);
if (!validationResult.Valid)
{
collector.EmitError(@out, $"Plan is invalid, delete ratio: {validationResult.DeleteRatio}, threshold: {validationResult.DeleteThreshold} over {plan.TotalSyncRequests:N0} files while plan has {plan.DeleteRequests:N0} deletions");
await collector.StopAsync(ctx);
return collector.Errors;
}

if (!string.IsNullOrEmpty(@out))
{
var output = SyncPlan.Serialize(plan);
Expand Down Expand Up @@ -91,7 +111,7 @@ public async Task<int> Apply(
var transferUtility = new TransferUtility(s3Client, new TransferUtilityConfig
{
ConcurrentServiceRequests = Environment.ProcessorCount * 2,
MinSizeBeforePartUpload = AwsS3SyncPlanStrategy.PartSize
MinSizeBeforePartUpload = S3EtagCalculator.PartSize
});
IDocsSyncApplyStrategy applier = new AwsS3SyncApplyStrategy(logFactory, s3Client, transferUtility, s3BucketName, assembleContext, collector);
if (!File.Exists(planFile))
Expand Down
154 changes: 103 additions & 51 deletions src/tooling/docs-assembler/Deploying/AwsS3SyncPlanStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,88 @@

using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using System.IO.Abstractions;
using System.Security.Cryptography;
using Amazon.S3;
using Amazon.S3.Model;
using Microsoft.Extensions.Logging;

namespace Documentation.Assembler.Deploying;

public class AwsS3SyncPlanStrategy(ILoggerFactory logFactory, IAmazonS3 s3Client, string bucketName, AssembleContext context) : IDocsSyncPlanStrategy
public interface IS3EtagCalculator
{
Task<string> CalculateS3ETag(string filePath, Cancel ctx = default);
}

public class S3EtagCalculator(ILoggerFactory logFactory, IFileSystem readFileSystem) : IS3EtagCalculator
{
internal const long PartSize = 5 * 1024 * 1024; // 5MB
private readonly ILogger<AwsS3SyncPlanStrategy> _logger = logFactory.CreateLogger<AwsS3SyncPlanStrategy>();

private static readonly ConcurrentDictionary<string, string> EtagCache = new();

internal const long PartSize = 5 * 1024 * 1024; // 5MB

[SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
public async Task<string> CalculateS3ETag(string filePath, Cancel ctx = default)
{
if (EtagCache.TryGetValue(filePath, out var cachedEtag))
{
_logger.LogDebug("Using cached ETag for {Path}", filePath);
return cachedEtag;
}

var fileInfo = readFileSystem.FileInfo.New(filePath);
var fileSize = fileInfo.Length;

// For files under 5MB, use simple MD5 (matching TransferUtility behavior)
if (fileSize <= PartSize)
{
await using var stream = readFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var smallBuffer = new byte[fileSize];
var bytesRead = await stream.ReadAsync(smallBuffer.AsMemory(0, (int)fileSize), ctx);
var hash = MD5.HashData(smallBuffer.AsSpan(0, bytesRead));
var etag = Convert.ToHexStringLower(hash);
EtagCache[filePath] = etag;
return etag;
}

// For files over 5MB, use multipart format with 5MB parts (matching TransferUtility)
var parts = (int)Math.Ceiling((double)fileSize / PartSize);

await using var fileStream = readFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var partBuffer = new byte[PartSize];
var partHashes = new List<byte[]>();

for (var i = 0; i < parts; i++)
{
var bytesRead = await fileStream.ReadAsync(partBuffer.AsMemory(0, partBuffer.Length), ctx);
var partHash = MD5.HashData(partBuffer.AsSpan(0, bytesRead));
partHashes.Add(partHash);
}

// Concatenate all part hashes
var concatenatedHashes = partHashes.SelectMany(h => h).ToArray();
var finalHash = MD5.HashData(concatenatedHashes);

var multipartEtag = $"{Convert.ToHexStringLower(finalHash)}-{parts}";
EtagCache[filePath] = multipartEtag;
return multipartEtag;
}
}

public class AwsS3SyncPlanStrategy(
ILoggerFactory logFactory,
IAmazonS3 s3Client,
string bucketName,
AssembleContext context,
IS3EtagCalculator? calculator = null
)
: IDocsSyncPlanStrategy
{
private readonly ILogger<AwsS3SyncPlanStrategy> _logger = logFactory.CreateLogger<AwsS3SyncPlanStrategy>();

private readonly IS3EtagCalculator _s3EtagCalculator = calculator ?? new S3EtagCalculator(logFactory, context.ReadFileSystem);

private bool IsSymlink(string path)
{
var fileInfo = context.ReadFileSystem.FileInfo.New(path);
Expand All @@ -42,7 +111,7 @@ await Parallel.ForEachAsync(localObjects, ctx, async (localFile, token) =>
if (remoteObjects.TryGetValue(destinationPath, out var remoteObject))
{
// Check if the ETag differs for updates
var localETag = await CalculateS3ETag(localFile.FullName, token);
var localETag = await _s3EtagCalculator.CalculateS3ETag(localFile.FullName, token);
var remoteETag = remoteObject.ETag.Trim('"'); // Remove quotes from remote ETag
if (localETag == remoteETag)
{
Expand Down Expand Up @@ -89,14 +158,44 @@ await Parallel.ForEachAsync(localObjects, ctx, async (localFile, token) =>

return new SyncPlan
{
TotalSourceFiles = localObjects.Length,
DeleteRequests = deleteRequests.ToList(),
AddRequests = addRequests.ToList(),
UpdateRequests = updateRequests.ToList(),
SkipRequests = skipRequests.ToList(),
Count = deleteRequests.Count + addRequests.Count + updateRequests.Count + skipRequests.Count
TotalSyncRequests = deleteRequests.Count + addRequests.Count + updateRequests.Count + skipRequests.Count
};
}

/// <inheritdoc />
public PlanValidationResult Validate(SyncPlan plan, float deleteThreshold)
{
if (plan.TotalSourceFiles == 0)
{
_logger.LogError("No files to sync");
return new(false, 1.0f, deleteThreshold);
}

var deleteRatio = (float)plan.DeleteRequests.Count / plan.TotalSyncRequests;
// if the total sync requests are less than 100, we enforce a higher ratio of 0.8
// this allows newer assembled documentation to be in a higher state of flux
if (plan.TotalSyncRequests <= 100)
deleteThreshold = Math.Max(deleteThreshold, 0.8f);

// if the total sync requests are less than 1000, we enforce a higher ratio of 0.5
// this allows newer assembled documentation to be in a higher state of flux
else if (plan.TotalSyncRequests <= 1000)
deleteThreshold = Math.Max(deleteThreshold, 0.5f);

if (deleteRatio > deleteThreshold)
{
_logger.LogError("Delete ratio is {Ratio} which is greater than the threshold of {Threshold}", deleteRatio, deleteThreshold);
return new(false, deleteRatio, deleteThreshold);
}

return new(true, deleteRatio, deleteThreshold);
}

private async Task<Dictionary<string, S3Object>> ListObjects(Cancel ctx = default)
{
var listBucketRequest = new ListObjectsV2Request
Expand All @@ -115,51 +214,4 @@ private async Task<Dictionary<string, S3Object>> ListObjects(Cancel ctx = defaul

return objects.ToDictionary(o => o.Key);
}

[SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
private async Task<string> CalculateS3ETag(string filePath, Cancel ctx = default)
{
if (EtagCache.TryGetValue(filePath, out var cachedEtag))
{
_logger.LogDebug("Using cached ETag for {Path}", filePath);
return cachedEtag;
}

var fileInfo = context.ReadFileSystem.FileInfo.New(filePath);
var fileSize = fileInfo.Length;

// For files under 5MB, use simple MD5 (matching TransferUtility behavior)
if (fileSize <= PartSize)
{
await using var stream = context.ReadFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var smallBuffer = new byte[fileSize];
var bytesRead = await stream.ReadAsync(smallBuffer.AsMemory(0, (int)fileSize), ctx);
var hash = MD5.HashData(smallBuffer.AsSpan(0, bytesRead));
var etag = Convert.ToHexStringLower(hash);
EtagCache[filePath] = etag;
return etag;
}

// For files over 5MB, use multipart format with 5MB parts (matching TransferUtility)
var parts = (int)Math.Ceiling((double)fileSize / PartSize);

await using var fileStream = context.ReadFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
var partBuffer = new byte[PartSize];
var partHashes = new List<byte[]>();

for (var i = 0; i < parts; i++)
{
var bytesRead = await fileStream.ReadAsync(partBuffer.AsMemory(0, partBuffer.Length), ctx);
var partHash = MD5.HashData(partBuffer.AsSpan(0, bytesRead));
partHashes.Add(partHash);
}

// Concatenate all part hashes
var concatenatedHashes = partHashes.SelectMany(h => h).ToArray();
var finalHash = MD5.HashData(concatenatedHashes);

var multipartEtag = $"{Convert.ToHexStringLower(finalHash)}-{parts}";
EtagCache[filePath] = multipartEtag;
return multipartEtag;
}
}
10 changes: 8 additions & 2 deletions src/tooling/docs-assembler/Deploying/DocsSync.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ namespace Documentation.Assembler.Deploying;
public interface IDocsSyncPlanStrategy
{
Task<SyncPlan> Plan(Cancel ctx = default);

PlanValidationResult Validate(SyncPlan plan, float deleteThreshold);
}
public record PlanValidationResult(bool Valid, float DeleteRatio, float DeleteThreshold);

public interface IDocsSyncApplyStrategy
{
Expand Down Expand Up @@ -49,8 +52,11 @@ public record SkipRequest : SyncRequest

public record SyncPlan
{
[JsonPropertyName("count")]
public required int Count { get; init; }
[JsonPropertyName("total_source_files")]
public required int TotalSourceFiles { get; init; }

[JsonPropertyName("total_sync_requests")]
public required int TotalSyncRequests { get; init; }

[JsonPropertyName("delete")]
public required IReadOnlyList<DeleteRequest> DeleteRequests { get; init; }
Expand Down
Loading
Loading