Skip to content

Commit 4d347fb

Browse files
committed
Create LlmStubstitutionLeafRenderer instead of post processing and replacing subs in the LLM markdown output
1 parent 8869863 commit 4d347fb

File tree

6 files changed

+97
-152
lines changed

6 files changed

+97
-152
lines changed

src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs

Lines changed: 12 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
using System.Text;
99
using Elastic.Documentation.Configuration;
1010
using Elastic.Documentation.Configuration.Builder;
11+
using Elastic.Markdown.Helpers;
12+
using Elastic.Markdown.Myst;
1113
using Elastic.Markdown.Myst.Renderers;
1214
using Markdig.Syntax;
1315

@@ -49,26 +51,27 @@ await fileContext.SourceFile.SourceFile.FileSystem.File.WriteAllTextAsync(
4951
return true;
5052
}
5153

52-
private string ConvertToLlmMarkdown(MarkdownDocument document, MarkdownExportFileContext context)
54+
public static string ConvertToLlmMarkdown(MarkdownDocument document, MarkdownExportFileContext context)
5355
{
5456
using var writer = new StringWriter();
55-
56-
// Create a new renderer for consistent LLM output with BuildContext for URL transformation
57+
var state = new ParserState(context.BuildContext)
58+
{
59+
YamlFrontMatter = context.SourceFile.YamlFrontMatter,
60+
MarkdownSourcePath = context.SourceFile.SourceFile,
61+
CrossLinkResolver = context.Resolvers.CrossLinkResolver,
62+
DocumentationFileLookup = context.Resolvers.DocumentationFileLookup
63+
};
64+
var parserContext = new ParserContext(state);
5765
var renderer = new LlmMarkdownRenderer(writer)
5866
{
5967
BuildContext = context.BuildContext
6068
};
61-
6269
_ = renderer.Render(document);
6370
var content = writer.ToString();
64-
65-
// Apply substitutions to the final content
66-
content = ApplySubstitutions(content, context);
67-
6871
return content;
6972
}
7073

71-
private IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext)
74+
private static IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext)
7275
{
7376
var source = fileContext.SourceFile.SourceFile;
7477
var fs = source.FileSystem;
@@ -105,93 +108,37 @@ private IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext)
105108
}
106109
}
107110

108-
private string ApplySubstitutions(string content, MarkdownExportFileContext context)
109-
{
110-
// Get combined substitutions (global + file-specific)
111-
var substitutions = GetCombinedSubstitutions(context);
112-
113-
// Process substitutions in the content
114-
foreach (var (key, value) in substitutions)
115-
{
116-
// Replace {{key}} with value
117-
content = content.Replace($"{{{{{key}}}}}", value);
118-
}
119-
120-
return content;
121-
}
122-
123-
private ConcurrentDictionary<string, string> GetCombinedSubstitutions(MarkdownExportFileContext context)
124-
{
125-
// Get global substitutions from BuildContext
126-
var globalSubstitutions = context.BuildContext.Configuration.Substitutions;
127-
128-
// Get file-specific substitutions from YamlFrontMatter
129-
var fileSubstitutions = context.SourceFile.YamlFrontMatter?.Properties;
130-
131-
// Create a new dictionary with all substitutions
132-
var allSubstitutions = new ConcurrentDictionary<string, string>();
133-
134-
// Add file-specific substitutions first
135-
if (fileSubstitutions != null)
136-
{
137-
foreach (var (key, value) in fileSubstitutions)
138-
{
139-
_ = allSubstitutions.TryAdd(key, value);
140-
}
141-
}
142-
143-
// Add global substitutions (will override file-specific ones if there are conflicts)
144-
foreach (var (key, value) in globalSubstitutions)
145-
{
146-
_ = allSubstitutions.TryAdd(key, value);
147-
}
148-
149-
return allSubstitutions;
150-
}
151111

152112
private string CreateLlmContentWithMetadata(MarkdownExportFileContext context, string llmMarkdown)
153113
{
154114
var sourceFile = context.SourceFile;
155115
var metadata = new StringBuilder();
156116

157-
// Add metadata header
158-
// _ = metadata.AppendLine("<!-- LLM-Optimized Markdown Document -->");
159117
_ = metadata.AppendLine("---");
160-
// _ = metadata.AppendLine($"<!-- Source: {Path.GetRelativePath(context.BuildContext.DocumentationOutputDirectory.FullName, sourceFile.SourceFile.FullName)} -->");
161-
// _ = metadata.AppendLine($"<!-- Generated: {DateTime.UtcNow:yyyy-MM-dd HH:mm:ss} UTC -->");
162118
_ = metadata.AppendLine($"title: {sourceFile.Title}");
163119

164120
if (!string.IsNullOrEmpty(sourceFile.Url))
165-
{
166121
_ = metadata.AppendLine($"url: {context.BuildContext.CanonicalBaseUrl?.Scheme}://{context.BuildContext.CanonicalBaseUrl?.Host}{sourceFile.Url}");
167-
}
168122

169123
if (!string.IsNullOrEmpty(sourceFile.YamlFrontMatter?.Description))
170-
{
171124
_ = metadata.AppendLine($"description: {sourceFile.YamlFrontMatter.Description}");
172-
}
173125
else
174126
{
175127
var descriptionGenerator = new DescriptionGenerator();
176128
var generateDescription = descriptionGenerator.GenerateDescription(context.Document);
177129
_ = metadata.AppendLine($"description: {generateDescription}");
178130
}
179-
180-
181131
var configProducts = context.BuildContext.Configuration.Products.Select(p =>
182132
{
183133
if (Products.AllById.TryGetValue(p, out var product))
184134
return product;
185135
throw new ArgumentException($"Invalid product id: {p}");
186136
});
187-
188137
var frontMatterProducts = sourceFile.YamlFrontMatter?.Products ?? [];
189-
190138
var allProducts = frontMatterProducts
191139
.Union(configProducts)
192140
.Distinct()
193141
.ToList();
194-
195142
if (allProducts.Count > 0)
196143
{
197144
_ = metadata.AppendLine("products:");
@@ -200,14 +147,8 @@ private string CreateLlmContentWithMetadata(MarkdownExportFileContext context, s
200147
}
201148

202149
_ = metadata.AppendLine("---");
203-
204-
// Add an empty line after metadata
205150
_ = metadata.AppendLine();
206-
207-
// Add the title as H1 heading
208151
_ = metadata.AppendLine($"# {sourceFile.Title}");
209-
210-
// Add the converted markdown content
211152
_ = metadata.Append(llmMarkdown);
212153

213154
return metadata.ToString();

src/Elastic.Markdown/Myst/Renderers/LlmInlineRenderers.cs

Lines changed: 8 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ protected override void Write(LlmMarkdownRenderer renderer, EmphasisInline obj)
7373
}
7474
}
7575

76+
public class LlmSubstituionLeafRenderer : MarkdownObjectRenderer<LlmMarkdownRenderer, SubstitutionLeaf>
77+
{
78+
protected override void Write(LlmMarkdownRenderer renderer, SubstitutionLeaf obj)
79+
=> renderer.Writer.Write(obj.Found ? obj.Replacement : obj.Content);
80+
}
81+
7682
/// <summary>
7783
/// Renders inline code as standard CommonMark code spans
7884
/// </summary>
@@ -103,13 +109,10 @@ protected override void Write(LlmMarkdownRenderer renderer, LineBreakInline obj)
103109
{
104110
if (obj.IsHard)
105111
{
106-
renderer.Writer.Write(" "); // Two spaces for hard break
112+
// renderer.Writer.Write(" "); // Two spaces for hard break
107113
renderer.WriteLine();
108114
}
109-
else
110-
{
111-
renderer.Writer.Write(" "); // Soft break becomes space
112-
}
115+
renderer.WriteLine();
113116
}
114117
}
115118

@@ -120,14 +123,6 @@ public class LlmRoleRenderer : MarkdownObjectRenderer<LlmMarkdownRenderer, RoleL
120123
{
121124
protected override void Write(LlmMarkdownRenderer renderer, RoleLeaf obj)
122125
{
123-
// Convert role to a format LLMs can understand
124-
// For example: :doc:`page` becomes [page](page) or just "page" depending on role type
125-
126-
// RoleLeaf has a Role property and inherits Content from CodeInline
127-
var roleName = obj.Role ?? "unknown";
128-
var content = obj.Content;
129-
130-
131126
switch (obj)
132127
{
133128
case KbdRole kbd:
@@ -144,41 +139,4 @@ protected override void Write(LlmMarkdownRenderer renderer, RoleLeaf obj)
144139
}
145140
}
146141
}
147-
148-
private static string ExtractRoleContent(Role role) =>
149-
// Extract text content from role's children
150-
role.Descendants()
151-
.OfType<LiteralInline>()
152-
.Select(l => l.Content.ToString())
153-
.Aggregate(string.Empty, (current, text) => current + text);
154142
}
155-
156-
/// <summary>
157-
/// Renders MyST substitutions by expanding them to their replacement text
158-
/// </summary>
159-
public class LlmSubstitutionRenderer : MarkdownObjectRenderer<LlmMarkdownRenderer, SubstitutionLeaf>
160-
{
161-
protected override void Write(LlmMarkdownRenderer renderer, SubstitutionLeaf obj)
162-
{
163-
// Include substitution info as comment for LLM understanding
164-
renderer.Writer.Write("<!-- SUBSTITUTION: ");
165-
renderer.Writer.Write(obj.Content);
166-
renderer.Writer.Write(" = ");
167-
renderer.Writer.Write(obj.Replacement);
168-
renderer.Writer.Write(" -->");
169-
170-
// Output the replacement text for LLM consumption
171-
renderer.Writer.Write(obj.Found ? obj.Replacement : obj.Content);
172-
}
173-
}
174-
175-
/// <summary>
176-
/// Renders container inlines by processing their children
177-
/// </summary>
178-
public class LlmContainerInlineRenderer : MarkdownObjectRenderer<LlmMarkdownRenderer, ContainerInline>
179-
{
180-
protected override void Write(LlmMarkdownRenderer renderer, ContainerInline obj) => renderer.WriteChildren(obj);
181-
}
182-
183-
// Note: LlmHtmlInlineRenderer was removed since HTML is disabled in the base pipeline (.DisableHtml())
184-
// HTML elements are not parsed into the AST, making HTML renderers unnecessary dead code

src/Elastic.Markdown/Myst/Renderers/LlmMarkdownRenderer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System.IO;
66
using Elastic.Documentation.Configuration;
7+
using Elastic.Markdown.Exporters;
78
using Markdig.Renderers;
89
using Markdig.Syntax;
910

@@ -71,19 +72,18 @@ public void WriteLeafInline(LeafBlock leafBlock)
7172

7273
public LlmMarkdownRenderer(TextWriter writer) : base(writer)
7374
{
74-
7575
// Add renderer to skip YAML frontmatter blocks (prevents them from appearing as visible content)
7676
ObjectRenderers.Add(new LlmYamlFrontMatterRenderer());
7777

7878
// Add inline renderers
79+
ObjectRenderers.Add(new LlmSubstituionLeafRenderer());
7980
ObjectRenderers.Add(new LlmRoleRenderer());
8081
ObjectRenderers.Add(new LlmLinkInlineRenderer());
8182
ObjectRenderers.Add(new LlmEmphasisInlineRenderer());
8283
ObjectRenderers.Add(new LlmCodeInlineRenderer());
8384
ObjectRenderers.Add(new LlmLiteralInlineRenderer());
8485
ObjectRenderers.Add(new LlmLineBreakInlineRenderer());
8586

86-
8787
// Add custom renderers for your MyST extensions
8888
ObjectRenderers.Add(new LlmDirectiveRenderer());
8989
ObjectRenderers.Add(new LlmEnhancedCodeBlockRenderer());

tests/authoring/Framework/LlmTestAssertions.fs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@ open Xunit.Sdk
1616
module LlmTestAssertions =
1717

1818
let toNewLLM (actual: MarkdownResult) =
19-
use writer = new StringWriter()
20-
// Here we explicitly use the new LlmMarkdownRenderer with BuildContext
21-
let renderer = LlmMarkdownRenderer(writer, BuildContext = actual.Context.Generator.Context)
22-
renderer.Render(actual.Document) |> ignore
23-
writer.ToString().Trim()
19+
use writer = new StringWriter()
20+
let markdownExportFileContext = MarkdownExportFileContext(
21+
BuildContext = actual.Context.Generator.Context,
22+
Resolvers = actual.Context.Set.MarkdownParser.Resolvers,
23+
Document = actual.Document,
24+
SourceFile = actual.File,
25+
DefaultOutputFile = actual.File.SourceFile
26+
)
27+
LlmMarkdownExporter.ConvertToLlmMarkdown(actual.Document, markdownExportFileContext).Trim()
28+
2429

2530
[<DebuggerStepThrough>]
2631
let convertsToNewLLM ([<LanguageInjection("markdown")>]expected: string) (actual: Lazy<GeneratorResults>) =

tests/authoring/Framework/Setup.fs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,9 @@ type Setup =
220220
let logger = new TestLoggerFactory()
221221
let conversionCollector = TestConversionCollector()
222222
let linkResolver = TestCrossLinkResolver(context.Configuration)
223-
let set = DocumentationSet(context, logger, linkResolver);
223+
let set = DocumentationSet(context, logger, linkResolver)
224+
225+
224226
let generator = DocumentationGenerator(set, logger, null, null, null, null, conversionCollector)
225227

226228
let context = {

0 commit comments

Comments
 (0)