Skip to content

Commit 8869863

Browse files
committed
Add new LLM markdown renderer
1 parent 6684cf2 commit 8869863

File tree

19 files changed

+1798
-74
lines changed

19 files changed

+1798
-74
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.Collections.Concurrent;
6+
using System.IO;
7+
using System.IO.Abstractions;
8+
using System.Text;
9+
using Elastic.Documentation.Configuration;
10+
using Elastic.Documentation.Configuration.Builder;
11+
using Elastic.Markdown.Myst.Renderers;
12+
using Markdig.Syntax;
13+
14+
namespace Elastic.Markdown.Exporters;
15+
16+
/// <summary>
17+
/// Exports markdown files as LLM-optimized CommonMark using custom renderers
18+
/// </summary>
19+
public class LlmMarkdownExporter : IMarkdownExporter
20+
{
21+
22+
public ValueTask StartAsync(Cancel ctx = default) => ValueTask.CompletedTask;
23+
24+
public ValueTask StopAsync(Cancel ctx = default) => ValueTask.CompletedTask;
25+
26+
public ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx) => ValueTask.FromResult(true);
27+
28+
public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx)
29+
{
30+
// Convert the parsed markdown document to LLM-friendly format using our custom renderers
31+
var llmMarkdown = ConvertToLlmMarkdown(fileContext.Document, fileContext);
32+
33+
// Determine output file path
34+
var outputFile = GetLlmOutputFile(fileContext);
35+
36+
// Ensure output directory exists
37+
if (outputFile.Directory is { Exists: false })
38+
outputFile.Directory.Create();
39+
40+
// Write LLM markdown with metadata header
41+
var contentWithMetadata = CreateLlmContentWithMetadata(fileContext, llmMarkdown);
42+
await fileContext.SourceFile.SourceFile.FileSystem.File.WriteAllTextAsync(
43+
outputFile.FullName,
44+
contentWithMetadata,
45+
Encoding.UTF8,
46+
ctx
47+
);
48+
49+
return true;
50+
}
51+
52+
private string ConvertToLlmMarkdown(MarkdownDocument document, MarkdownExportFileContext context)
53+
{
54+
using var writer = new StringWriter();
55+
56+
// Create a new renderer for consistent LLM output with BuildContext for URL transformation
57+
var renderer = new LlmMarkdownRenderer(writer)
58+
{
59+
BuildContext = context.BuildContext
60+
};
61+
62+
_ = renderer.Render(document);
63+
var content = writer.ToString();
64+
65+
// Apply substitutions to the final content
66+
content = ApplySubstitutions(content, context);
67+
68+
return content;
69+
}
70+
71+
private IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext)
72+
{
73+
var source = fileContext.SourceFile.SourceFile;
74+
var fs = source.FileSystem;
75+
var defaultOutputFile = fileContext.DefaultOutputFile;
76+
77+
// Handle both index.md and index.html files (HTML output files)
78+
var fileName = Path.GetFileNameWithoutExtension(defaultOutputFile.Name);
79+
if (fileName == "index")
80+
{
81+
var root = fileContext.BuildContext.DocumentationOutputDirectory;
82+
83+
// Root index becomes llm-docs.md
84+
if (defaultOutputFile.Directory!.FullName == root.FullName)
85+
{
86+
return fs.FileInfo.New(Path.Combine(root.FullName, "llm-docs.md"));
87+
}
88+
else
89+
{
90+
// For index files: /docs/section/index.html -> /docs/section.llm.md
91+
// This allows users to append .llm.md to any URL path
92+
var folderName = defaultOutputFile.Directory!.Name;
93+
return fs.FileInfo.New(Path.Combine(
94+
defaultOutputFile.Directory!.Parent!.FullName,
95+
$"{folderName}.md"
96+
));
97+
}
98+
}
99+
else
100+
{
101+
// Regular files: /docs/section/page.html -> /docs/section/page.llm.md
102+
var directory = defaultOutputFile.Directory!.FullName;
103+
var baseName = Path.GetFileNameWithoutExtension(defaultOutputFile.Name);
104+
return fs.FileInfo.New(Path.Combine(directory, $"{baseName}.md"));
105+
}
106+
}
107+
108+
private string ApplySubstitutions(string content, MarkdownExportFileContext context)
109+
{
110+
// Get combined substitutions (global + file-specific)
111+
var substitutions = GetCombinedSubstitutions(context);
112+
113+
// Process substitutions in the content
114+
foreach (var (key, value) in substitutions)
115+
{
116+
// Replace {{key}} with value
117+
content = content.Replace($"{{{{{key}}}}}", value);
118+
}
119+
120+
return content;
121+
}
122+
123+
private ConcurrentDictionary<string, string> GetCombinedSubstitutions(MarkdownExportFileContext context)
124+
{
125+
// Get global substitutions from BuildContext
126+
var globalSubstitutions = context.BuildContext.Configuration.Substitutions;
127+
128+
// Get file-specific substitutions from YamlFrontMatter
129+
var fileSubstitutions = context.SourceFile.YamlFrontMatter?.Properties;
130+
131+
// Create a new dictionary with all substitutions
132+
var allSubstitutions = new ConcurrentDictionary<string, string>();
133+
134+
// Add file-specific substitutions first
135+
if (fileSubstitutions != null)
136+
{
137+
foreach (var (key, value) in fileSubstitutions)
138+
{
139+
_ = allSubstitutions.TryAdd(key, value);
140+
}
141+
}
142+
143+
// Add global substitutions (will override file-specific ones if there are conflicts)
144+
foreach (var (key, value) in globalSubstitutions)
145+
{
146+
_ = allSubstitutions.TryAdd(key, value);
147+
}
148+
149+
return allSubstitutions;
150+
}
151+
152+
private string CreateLlmContentWithMetadata(MarkdownExportFileContext context, string llmMarkdown)
153+
{
154+
var sourceFile = context.SourceFile;
155+
var metadata = new StringBuilder();
156+
157+
// Add metadata header
158+
// _ = metadata.AppendLine("<!-- LLM-Optimized Markdown Document -->");
159+
_ = metadata.AppendLine("---");
160+
// _ = metadata.AppendLine($"<!-- Source: {Path.GetRelativePath(context.BuildContext.DocumentationOutputDirectory.FullName, sourceFile.SourceFile.FullName)} -->");
161+
// _ = metadata.AppendLine($"<!-- Generated: {DateTime.UtcNow:yyyy-MM-dd HH:mm:ss} UTC -->");
162+
_ = metadata.AppendLine($"title: {sourceFile.Title}");
163+
164+
if (!string.IsNullOrEmpty(sourceFile.Url))
165+
{
166+
_ = metadata.AppendLine($"url: {context.BuildContext.CanonicalBaseUrl?.Scheme}://{context.BuildContext.CanonicalBaseUrl?.Host}{sourceFile.Url}");
167+
}
168+
169+
if (!string.IsNullOrEmpty(sourceFile.YamlFrontMatter?.Description))
170+
{
171+
_ = metadata.AppendLine($"description: {sourceFile.YamlFrontMatter.Description}");
172+
}
173+
else
174+
{
175+
var descriptionGenerator = new DescriptionGenerator();
176+
var generateDescription = descriptionGenerator.GenerateDescription(context.Document);
177+
_ = metadata.AppendLine($"description: {generateDescription}");
178+
}
179+
180+
181+
var configProducts = context.BuildContext.Configuration.Products.Select(p =>
182+
{
183+
if (Products.AllById.TryGetValue(p, out var product))
184+
return product;
185+
throw new ArgumentException($"Invalid product id: {p}");
186+
});
187+
188+
var frontMatterProducts = sourceFile.YamlFrontMatter?.Products ?? [];
189+
190+
var allProducts = frontMatterProducts
191+
.Union(configProducts)
192+
.Distinct()
193+
.ToList();
194+
195+
if (allProducts.Count > 0)
196+
{
197+
_ = metadata.AppendLine("products:");
198+
foreach (var product in allProducts.Select(p => p.DisplayName).Order())
199+
_ = metadata.AppendLine($" - {product}");
200+
}
201+
202+
_ = metadata.AppendLine("---");
203+
204+
// Add an empty line after metadata
205+
_ = metadata.AppendLine();
206+
207+
// Add the title as H1 heading
208+
_ = metadata.AppendLine($"# {sourceFile.Title}");
209+
210+
// Add the converted markdown content
211+
_ = metadata.Append(llmMarkdown);
212+
213+
return metadata.ToString();
214+
}
215+
}
216+
217+
/// <summary>
218+
/// Extension methods for easy integration with existing build configuration
219+
/// </summary>
220+
public static class LlmMarkdownExporterExtensions
221+
{
222+
/// <summary>
223+
/// Adds LLM markdown export to the documentation generator with consistent rendering settings
224+
/// </summary>
225+
public static void AddLlmMarkdownExport(this List<IMarkdownExporter> exporters) => exporters.Add(new LlmMarkdownExporter());
226+
}

src/Elastic.Markdown/Myst/CodeBlocks/EnhancedCodeBlock.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public class EnhancedCodeBlock(BlockParser parser, ParserContext context)
2727

2828
public bool InlineAnnotations { get; set; }
2929

30-
public string Language { get; set; } = "unknown";
30+
public string Language { get; set; } = "plaintext";
3131

3232
public string? Caption { get; set; }
3333

src/Elastic.Markdown/Myst/MarkdownParser.cs

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -133,35 +133,46 @@ private static MarkdownPipeline MinimalPipeline
133133
// ReSharper disable once InconsistentNaming
134134
private static MarkdownPipeline? PipelineCached;
135135

136+
/// <summary>
137+
/// Creates a MarkdownPipelineBuilder configured with all standard MyST extensions.
138+
/// This is the authoritative source for MyST pipeline configuration that can be extended by other components.
139+
/// </summary>
140+
public static MarkdownPipelineBuilder CreateBaseBuilder()
141+
{
142+
var builder = new MarkdownPipelineBuilder()
143+
.UseInlineAnchors()
144+
.UsePreciseSourceLocation()
145+
.UseDiagnosticLinks()
146+
.UseHeadingsWithSlugs()
147+
.UseEmphasisExtras(EmphasisExtraOptions.Default)
148+
.UseInlineAppliesTo()
149+
.UseInlineIcons()
150+
.UseInlineKbd()
151+
.UseSubstitution()
152+
.UseComments()
153+
.UseYamlFrontMatter()
154+
.UseGridTables()
155+
.UsePipeTables()
156+
.UseDirectives()
157+
.UseDefinitionLists()
158+
.UseEnhancedCodeBlocks()
159+
.UseHtmxLinkInlineRenderer()
160+
.DisableHtml()
161+
.UseWhiteSpaceNormalizer()
162+
.UseHardBreaks();
163+
_ = builder.BlockParsers.TryRemove<IndentedCodeBlockParser>();
164+
return builder;
165+
}
166+
136167
public static MarkdownPipeline Pipeline
137168
{
138169
get
139170
{
140171
if (PipelineCached is not null)
141172
return PipelineCached;
142173

143-
var builder = new MarkdownPipelineBuilder()
144-
.UseInlineAnchors()
145-
.UsePreciseSourceLocation()
146-
.UseDiagnosticLinks()
147-
.UseHeadingsWithSlugs()
148-
.UseEmphasisExtras(EmphasisExtraOptions.Default)
149-
.UseInlineAppliesTo()
150-
.UseInlineIcons()
151-
.UseInlineKbd()
152-
.UseSubstitution()
153-
.UseComments()
154-
.UseYamlFrontMatter()
155-
.UseGridTables()
156-
.UsePipeTables()
157-
.UseDirectives()
158-
.UseDefinitionLists()
159-
.UseEnhancedCodeBlocks()
160-
.UseHtmxLinkInlineRenderer()
161-
.DisableHtml()
162-
.UseWhiteSpaceNormalizer()
163-
.UseHardBreaks();
164-
_ = builder.BlockParsers.TryRemove<IndentedCodeBlockParser>();
174+
// Use the shared base builder to ensure consistency
175+
var builder = CreateBaseBuilder();
165176
PipelineCached = builder.Build();
166177
return PipelineCached;
167178
}

0 commit comments

Comments
 (0)