Skip to content

Add caching for Kroki diagrams #1601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/_docset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ toc:
- file: code.md
- file: comments.md
- file: conditionals.md
- hidden: diagrams.md
- file: diagrams.md
- file: dropdowns.md
- file: definition-lists.md
- file: example_blocks.md
Expand Down
6 changes: 5 additions & 1 deletion docs/syntax/diagrams.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

The `diagram` directive allows you to render various types of diagrams using the [Kroki](https://kroki.io/) service. Kroki supports many diagram types including Mermaid, D2, Graphviz, PlantUML, and more.

::::{warning}
This is an experimental feature. It may change in the future.
::::

## Basic usage

The basic syntax for the diagram directive is:
Expand Down Expand Up @@ -84,7 +88,7 @@ sequenceDiagram
:::::{tab-item} Rendered
::::{diagram} mermaid
sequenceDiagram
participant A as Alice
participant A as Ada
participant B as Bob
A->>B: Hello Bob, how are you?
B-->>A: Great!
Expand Down
4 changes: 4 additions & 0 deletions src/Elastic.Documentation.Configuration/BuildContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Reflection;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Configuration.Builder;
using Elastic.Documentation.Configuration.Diagram;
using Elastic.Documentation.Configuration.Versions;
using Elastic.Documentation.Diagnostics;

Expand Down Expand Up @@ -64,6 +65,8 @@ public string? UrlPathPrefix
init => _urlPathPrefix = value;
}

public DiagramRegistry DiagramRegistry { get; }

public BuildContext(IDiagnosticsCollector collector, IFileSystem fileSystem, VersionsConfiguration versionsConfig)
: this(collector, fileSystem, fileSystem, versionsConfig, null, null)
{
Expand Down Expand Up @@ -105,5 +108,6 @@ public BuildContext(
{
Enabled = false
};
DiagramRegistry = new DiagramRegistry(writeFileSystem);
}
}
209 changes: 209 additions & 0 deletions src/Elastic.Documentation.Configuration/Diagram/DiagramRegistry.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Collections.Concurrent;
using System.IO.Abstractions;
using Microsoft.Extensions.Logging;

namespace Elastic.Documentation.Configuration.Diagram;

/// <summary>
/// Information about a diagram that needs to be cached
/// </summary>
/// <param name="LocalSvgPath">Local SVG path relative to output directory</param>
/// <param name="EncodedUrl">Encoded Kroki URL for downloading</param>
/// <param name="OutputDirectory">Full path to output directory</param>
public record DiagramCacheInfo(string LocalSvgPath, string EncodedUrl, string OutputDirectory);

/// <summary>
/// Registry to track active diagrams and manage cleanup of outdated cached files
/// </summary>
/// <param name="writeFileSystem">File system for write/delete operations during cleanup</param>
public class DiagramRegistry(IFileSystem writeFileSystem) : IDisposable
{
private readonly ConcurrentDictionary<string, bool> _activeDiagrams = new();
private readonly ConcurrentDictionary<string, DiagramCacheInfo> _diagramsToCache = new();
private readonly IFileSystem _writeFileSystem = writeFileSystem;
private readonly HttpClient _httpClient = new() { Timeout = TimeSpan.FromSeconds(30) };

/// <summary>
/// Register a diagram for caching (collects info for later batch processing)
/// </summary>
/// <param name="localSvgPath">The local SVG path relative to output directory</param>
/// <param name="encodedUrl">The encoded Kroki URL for downloading</param>
/// <param name="outputDirectory">The full path to output directory</param>
public void RegisterDiagramForCaching(string localSvgPath, string encodedUrl, string outputDirectory)
{
if (string.IsNullOrEmpty(localSvgPath) || string.IsNullOrEmpty(encodedUrl))
return;

_ = _activeDiagrams.TryAdd(localSvgPath, true);
_ = _diagramsToCache.TryAdd(localSvgPath, new DiagramCacheInfo(localSvgPath, encodedUrl, outputDirectory));
}

/// <summary>
/// Clear all registered diagrams (called at start of build)
/// </summary>
public void Clear()
{
_activeDiagrams.Clear();
_diagramsToCache.Clear();
}

/// <summary>
/// Create cached diagram files by downloading from Kroki in parallel
/// </summary>
/// <param name="logger">Logger for reporting download activity</param>
/// <param name="readFileSystem">File system for checking existing files</param>
/// <returns>Number of diagrams downloaded</returns>
public async Task<int> CreateDiagramCachedFiles(ILogger logger, IFileSystem readFileSystem)
{
if (_diagramsToCache.IsEmpty)
return 0;

var downloadCount = 0;

await Parallel.ForEachAsync(_diagramsToCache.Values, new ParallelOptions
{
MaxDegreeOfParallelism = Environment.ProcessorCount,
CancellationToken = CancellationToken.None
}, async (diagramInfo, ct) =>
{
try
{
var fullPath = _writeFileSystem.Path.Combine(diagramInfo.OutputDirectory, diagramInfo.LocalSvgPath);

// Skip if file already exists
if (readFileSystem.File.Exists(fullPath))
return;

// Create directory if needed
var directory = _writeFileSystem.Path.GetDirectoryName(fullPath);
if (directory != null && !_writeFileSystem.Directory.Exists(directory))
{
_ = _writeFileSystem.Directory.CreateDirectory(directory);
}

// Download SVG content
var svgContent = await _httpClient.GetStringAsync(diagramInfo.EncodedUrl, ct);

// Validate SVG content
if (string.IsNullOrWhiteSpace(svgContent) || !svgContent.Contains("<svg", StringComparison.OrdinalIgnoreCase))
{
logger.LogWarning("Invalid SVG content received for diagram {LocalPath}", diagramInfo.LocalSvgPath);
return;
}

// Write atomically using temp file
var tempPath = fullPath + ".tmp";
await _writeFileSystem.File.WriteAllTextAsync(tempPath, svgContent, ct);
_writeFileSystem.File.Move(tempPath, fullPath);

_ = Interlocked.Increment(ref downloadCount);
logger.LogDebug("Downloaded diagram: {LocalPath}", diagramInfo.LocalSvgPath);
}
catch (HttpRequestException ex)
{
logger.LogWarning("Failed to download diagram {LocalPath}: {Error}", diagramInfo.LocalSvgPath, ex.Message);
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
logger.LogWarning("Timeout downloading diagram {LocalPath}", diagramInfo.LocalSvgPath);
}
catch (Exception ex)
{
logger.LogWarning("Unexpected error downloading diagram {LocalPath}: {Error}", diagramInfo.LocalSvgPath, ex.Message);
}
});

if (downloadCount > 0)
{
logger.LogInformation("Downloaded {DownloadCount} diagram files from Kroki", downloadCount);
}

return downloadCount;
}

/// <summary>
/// Clean up unused diagram files from the cache directory
/// </summary>
/// <param name="outputDirectory">The output directory containing cached diagrams</param>
/// <returns>Number of files cleaned up</returns>
public int CleanupUnusedDiagrams(IDirectoryInfo outputDirectory)
{
var graphsDir = _writeFileSystem.Path.Combine(outputDirectory.FullName, "images", "generated-graphs");
if (!_writeFileSystem.Directory.Exists(graphsDir))
return 0;

var existingFiles = _writeFileSystem.Directory.GetFiles(graphsDir, "*.svg", SearchOption.AllDirectories);
var cleanedCount = 0;

try
{
foreach (var file in existingFiles)
{
var relativePath = _writeFileSystem.Path.GetRelativePath(outputDirectory.FullName, file);
var normalizedPath = relativePath.Replace(_writeFileSystem.Path.DirectorySeparatorChar, '/');

if (!_activeDiagrams.ContainsKey(normalizedPath))
{
try
{
_writeFileSystem.File.Delete(file);
cleanedCount++;
}
catch
{
// Silent failure - cleanup is opportunistic
}
}
}

// Clean up empty directories
CleanupEmptyDirectories(graphsDir);
}
catch
{
// Silent failure - cleanup is opportunistic
}

return cleanedCount;
}

private void CleanupEmptyDirectories(string directory)
{
try
{
foreach (var subDir in _writeFileSystem.Directory.GetDirectories(directory))
{
CleanupEmptyDirectories(subDir);

if (!_writeFileSystem.Directory.EnumerateFileSystemEntries(subDir).Any())
{
try
{
_writeFileSystem.Directory.Delete(subDir);
}
catch
{
// Silent failure - cleanup is opportunistic
}
}
}
}
catch
{
// Silent failure - cleanup is opportunistic
}
}

/// <summary>
/// Dispose of resources, including the HttpClient
/// </summary>
public void Dispose()
{
_httpClient.Dispose();
GC.SuppressFinalize(this);
}
}
19 changes: 19 additions & 0 deletions src/Elastic.Markdown/DocumentationGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Text.Json;
using Elastic.Documentation;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.Diagram;
using Elastic.Documentation.Legacy;
using Elastic.Documentation.Links;
using Elastic.Documentation.Serialization;
Expand All @@ -16,6 +17,7 @@
using Elastic.Markdown.Helpers;
using Elastic.Markdown.IO;
using Elastic.Markdown.Links.CrossLinks;
using Elastic.Markdown.Myst.Directives.Diagram;
using Elastic.Markdown.Myst.Renderers;
using Elastic.Markdown.Myst.Renderers.LlmMarkdown;
using Markdig.Syntax;
Expand Down Expand Up @@ -106,6 +108,9 @@ public async Task ResolveDirectoryTree(Cancel ctx)

public async Task<GenerationResult> GenerateAll(Cancel ctx)
{
// Clear diagram registry for fresh tracking
DocumentationSet.Context.DiagramRegistry.Clear();

var result = new GenerationResult();

var generationState = Context.SkipDocumentationState ? null : GetPreviousGenerationState();
Expand Down Expand Up @@ -142,6 +147,20 @@ public async Task<GenerationResult> GenerateAll(Cancel ctx)
_logger.LogInformation($"Generating links.json");
var linkReference = await GenerateLinkReference(ctx);

// Download diagram files in parallel
var downloadedCount = await DocumentationSet.Context.DiagramRegistry.CreateDiagramCachedFiles(_logger, DocumentationSet.Context.ReadFileSystem);
if (downloadedCount > 0)
{
_logger.LogInformation("Downloaded {DownloadedCount} diagram files from Kroki", downloadedCount);
}

// Clean up unused diagram files
var cleanedCount = DocumentationSet.Context.DiagramRegistry.CleanupUnusedDiagrams(DocumentationSet.OutputDirectory);
if (cleanedCount > 0)
{
_logger.LogInformation("Cleaned up {CleanedCount} unused diagram files", cleanedCount);
}

// ReSharper disable once WithExpressionModifiesAllMembers
return result with
{
Expand Down
49 changes: 49 additions & 0 deletions src/Elastic.Markdown/Myst/Directives/Diagram/DiagramBlock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Security.Cryptography;
using System.Text;
using Elastic.Documentation.Configuration.Diagram;
using Elastic.Markdown.Diagnostics;

namespace Elastic.Markdown.Myst.Directives.Diagram;
Expand All @@ -25,6 +28,16 @@ public class DiagramBlock(DirectiveBlockParser parser, ParserContext context) :
/// </summary>
public string? EncodedUrl { get; private set; }

/// <summary>
/// The local SVG path relative to the output directory
/// </summary>
public string? LocalSvgPath { get; private set; }

/// <summary>
/// Content hash for unique identification and caching
/// </summary>
public string? ContentHash { get; private set; }

public override void FinalizeAndValidate(ParserContext context)
{
// Extract diagram type from arguments or default to "mermaid"
Expand All @@ -39,6 +52,12 @@ public override void FinalizeAndValidate(ParserContext context)
return;
}

// Generate content hash for caching
ContentHash = GenerateContentHash(DiagramType, Content);

// Generate local path for cached SVG
LocalSvgPath = GenerateLocalPath(context);

// Generate the encoded URL for Kroki
try
{
Expand All @@ -47,7 +66,12 @@ public override void FinalizeAndValidate(ParserContext context)
catch (Exception ex)
{
this.EmitError($"Failed to encode diagram: {ex.Message}", ex);
return;
}

// Register diagram for tracking, cleanup, and batch caching
var outputDirectory = context.Build.DocumentationOutputDirectory.FullName;
context.DiagramRegistry.RegisterDiagramForCaching(LocalSvgPath, EncodedUrl, outputDirectory);
}

private string? ExtractContent()
Expand All @@ -68,4 +92,29 @@ public override void FinalizeAndValidate(ParserContext context)

return lines.Count > 0 ? string.Join("\n", lines) : null;
}

private string GenerateContentHash(string diagramType, string content)
{
var input = $"{diagramType}:{content}";
var bytes = Encoding.UTF8.GetBytes(input);
var hash = SHA256.HashData(bytes);
return Convert.ToHexString(hash)[..12].ToLowerInvariant();
}

private string GenerateLocalPath(ParserContext context)
{
var markdownFileName = "unknown";
if (context.MarkdownSourcePath?.Name is not null)
{
markdownFileName = Path.GetFileNameWithoutExtension(context.MarkdownSourcePath.Name);
}

var filename = $"{markdownFileName}-diagram-{DiagramType}-{ContentHash}.svg";
var localPath = Path.Combine("images", "generated-graphs", filename);

// Normalize path separators to forward slashes for web compatibility
return localPath.Replace(Path.DirectorySeparatorChar, '/');
}


}
Loading
Loading