Skip to content

Commit d1b66f5

Browse files
authored
Enhance LLM readable markdown rendering (#1569)
* Add new LLM markdown renderer * Create LlmStubstitutionLeafRenderer instead of post processing and replacing subs in the LLM markdown output * Also create zip file with top level llms.txt file * Cleanup LlmInlineRenderers.cs * Move to dedicated LlmMarkdown folder * Cleanup LLmMarkdownRenderer.cs * Revert changes to MarkdownParser.cs * Cleanup * Add ability to serve LLM markdown during local development * Cleanup legacy LLM text output code * Move description below the title in the frontmatter output * Fix index page serving Otherwise it would serve the markdown file * Fix zip creation * Use DocumentationObjectPoolProvider for StringWriters * Remove unused imports * Optimize imports * Fix DocumentationObjectPoolProvider usage * Cleanup comment * Use action pattern * Remove unnecessary "!" * Reuse LlmMarkdownExporter.ConvertToLlmMarkdown * Make UseLlmMarkdownRenderer use of static action * Change position of arguments
1 parent a8bc16b commit d1b66f5

File tree

24 files changed

+1594
-208
lines changed

24 files changed

+1594
-208
lines changed

src/Elastic.Documentation/Extensions/ReusableStringWriter.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ public sealed class ReusableStringWriter : TextWriter
1818

1919
public void Reset() => _sb = null;
2020

21+
public override string ToString() => _sb?.ToString() ?? string.Empty;
22+
2123
public override void Write(char value) => _sb?.Append(value);
2224

2325
public override void Write(char[] buffer, int index, int count)

src/Elastic.Markdown/DocumentationGenerator.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@
1313
using Elastic.Documentation.Site.Navigation;
1414
using Elastic.Documentation.State;
1515
using Elastic.Markdown.Exporters;
16+
using Elastic.Markdown.Helpers;
1617
using Elastic.Markdown.IO;
1718
using Elastic.Markdown.Links.CrossLinks;
19+
using Elastic.Markdown.Myst.Renderers;
20+
using Elastic.Markdown.Myst.Renderers.LlmMarkdown;
1821
using Markdig.Syntax;
1922
using Microsoft.Extensions.Logging;
2023

@@ -340,6 +343,13 @@ private async Task GenerateDocumentationState(Cancel ctx)
340343
await DocumentationSet.OutputDirectory.FileSystem.File.WriteAllBytesAsync(stateFile.FullName, bytes, ctx);
341344
}
342345

346+
public async Task<string> RenderLlmMarkdown(MarkdownFile markdown, Cancel ctx)
347+
{
348+
await DocumentationSet.Tree.Resolve(ctx);
349+
var document = await markdown.ParseFullAsync(ctx);
350+
return LlmMarkdownExporter.ConvertToLlmMarkdown(document, DocumentationSet.Context);
351+
}
352+
343353
public async Task<RenderResult> RenderLayout(MarkdownFile markdown, Cancel ctx)
344354
{
345355
await DocumentationSet.Tree.Resolve(ctx);

src/Elastic.Markdown/Exporters/LLMTextExporter.cs

Lines changed: 0 additions & 127 deletions
This file was deleted.
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.IO.Abstractions;
6+
using System.IO.Compression;
7+
using System.Text;
8+
using Elastic.Documentation.Configuration;
9+
using Elastic.Documentation.Configuration.Builder;
10+
using Elastic.Markdown.Helpers;
11+
using Elastic.Markdown.Myst.Renderers.LlmMarkdown;
12+
using Markdig.Syntax;
13+
14+
namespace Elastic.Markdown.Exporters;
15+
16+
/// <summary>
17+
/// Exports markdown files as LLM-optimized CommonMark using custom renderers
18+
/// </summary>
19+
public class LlmMarkdownExporter : IMarkdownExporter
20+
{
21+
22+
public ValueTask StartAsync(Cancel ctx = default) => ValueTask.CompletedTask;
23+
24+
public ValueTask StopAsync(Cancel ctx = default) => ValueTask.CompletedTask;
25+
26+
public ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx)
27+
{
28+
var outputDirectory = Path.Combine(outputFolder.FullName, "docs");
29+
var zipPath = Path.Combine(outputDirectory, "llm.zip");
30+
using (var zip = ZipFile.Open(zipPath, ZipArchiveMode.Create))
31+
{
32+
var llmsTxt = Path.Combine(outputDirectory, "llms.txt");
33+
var llmsTxtRelativePath = Path.GetRelativePath(outputDirectory, llmsTxt);
34+
_ = zip.CreateEntryFromFile(llmsTxt, llmsTxtRelativePath);
35+
36+
var markdownFiles = Directory.GetFiles(outputDirectory, "*.md", SearchOption.AllDirectories);
37+
38+
foreach (var file in markdownFiles)
39+
{
40+
var relativePath = Path.GetRelativePath(outputDirectory, file);
41+
_ = zip.CreateEntryFromFile(file, relativePath);
42+
}
43+
}
44+
return ValueTask.FromResult(true);
45+
}
46+
47+
public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx)
48+
{
49+
var llmMarkdown = ConvertToLlmMarkdown(fileContext.Document, fileContext.BuildContext);
50+
var outputFile = GetLlmOutputFile(fileContext);
51+
if (outputFile.Directory is { Exists: false })
52+
outputFile.Directory.Create();
53+
var contentWithMetadata = CreateLlmContentWithMetadata(fileContext, llmMarkdown);
54+
await fileContext.SourceFile.SourceFile.FileSystem.File.WriteAllTextAsync(
55+
outputFile.FullName,
56+
contentWithMetadata,
57+
Encoding.UTF8,
58+
ctx
59+
);
60+
return true;
61+
}
62+
63+
public static string ConvertToLlmMarkdown(MarkdownDocument document, BuildContext context) =>
64+
DocumentationObjectPoolProvider.UseLlmMarkdownRenderer(context, document, static (renderer, obj) =>
65+
{
66+
_ = renderer.Render(obj);
67+
});
68+
69+
private static IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext)
70+
{
71+
var source = fileContext.SourceFile.SourceFile;
72+
var fs = source.FileSystem;
73+
var defaultOutputFile = fileContext.DefaultOutputFile;
74+
75+
var fileName = Path.GetFileNameWithoutExtension(defaultOutputFile.Name);
76+
if (fileName == "index")
77+
{
78+
var root = fileContext.BuildContext.DocumentationOutputDirectory;
79+
80+
if (defaultOutputFile.Directory!.FullName == root.FullName)
81+
return fs.FileInfo.New(Path.Combine(root.FullName, "llms.txt"));
82+
83+
// For index files: /docs/section/index.html -> /docs/section.md
84+
// This allows users to append .md to any URL path
85+
var folderName = defaultOutputFile.Directory!.Name;
86+
return fs.FileInfo.New(Path.Combine(
87+
defaultOutputFile.Directory!.Parent!.FullName,
88+
$"{folderName}.md"
89+
));
90+
}
91+
// Regular files: /docs/section/page.html -> /docs/section/page.llm.md
92+
var directory = defaultOutputFile.Directory!.FullName;
93+
var baseName = Path.GetFileNameWithoutExtension(defaultOutputFile.Name);
94+
return fs.FileInfo.New(Path.Combine(directory, $"{baseName}.md"));
95+
}
96+
97+
98+
private string CreateLlmContentWithMetadata(MarkdownExportFileContext context, string llmMarkdown)
99+
{
100+
var sourceFile = context.SourceFile;
101+
var metadata = DocumentationObjectPoolProvider.StringBuilderPool.Get();
102+
103+
_ = metadata.AppendLine("---");
104+
_ = metadata.AppendLine($"title: {sourceFile.Title}");
105+
106+
if (!string.IsNullOrEmpty(sourceFile.YamlFrontMatter?.Description))
107+
_ = metadata.AppendLine($"description: {sourceFile.YamlFrontMatter.Description}");
108+
else
109+
{
110+
var descriptionGenerator = new DescriptionGenerator();
111+
var generateDescription = descriptionGenerator.GenerateDescription(context.Document);
112+
_ = metadata.AppendLine($"description: {generateDescription}");
113+
}
114+
115+
if (!string.IsNullOrEmpty(sourceFile.Url))
116+
_ = metadata.AppendLine($"url: {context.BuildContext.CanonicalBaseUrl?.Scheme}://{context.BuildContext.CanonicalBaseUrl?.Host}{sourceFile.Url}");
117+
118+
var configProducts = context.BuildContext.Configuration.Products.Select(p =>
119+
{
120+
if (Products.AllById.TryGetValue(p, out var product))
121+
return product;
122+
throw new ArgumentException($"Invalid product id: {p}");
123+
});
124+
var frontMatterProducts = sourceFile.YamlFrontMatter?.Products ?? [];
125+
var allProducts = frontMatterProducts
126+
.Union(configProducts)
127+
.Distinct()
128+
.ToList();
129+
if (allProducts.Count > 0)
130+
{
131+
_ = metadata.AppendLine("products:");
132+
foreach (var product in allProducts.Select(p => p.DisplayName).Order())
133+
_ = metadata.AppendLine($" - {product}");
134+
}
135+
136+
_ = metadata.AppendLine("---");
137+
_ = metadata.AppendLine();
138+
_ = metadata.AppendLine($"# {sourceFile.Title}");
139+
_ = metadata.Append(llmMarkdown);
140+
141+
return metadata.ToString();
142+
}
143+
}
144+
145+
public static class LlmMarkdownExporterExtensions
146+
{
147+
public static void AddLlmMarkdownExport(this List<IMarkdownExporter> exporters) => exporters.Add(new LlmMarkdownExporter());
148+
}

0 commit comments

Comments
 (0)