-
Notifications
You must be signed in to change notification settings - Fork 32
Enhance LLM readable markdown rendering #1569
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 12 commits
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
8869863
Add new LLM markdown renderer
reakaleek 4d347fb
Create LlmStubstitutionLeafRenderer instead of post processing and re…
reakaleek 89e9732
Also create zip file with top level llms.txt file
reakaleek 74e7ebb
Cleanup LlmInlineRenderers.cs
reakaleek 41660dd
Move to dedicated LlmMarkdown folder
reakaleek ec1f08e
Cleanup LLmMarkdownRenderer.cs
reakaleek 0409a33
Revert changes to MarkdownParser.cs
reakaleek a95d0b2
Cleanup
reakaleek 6e5c3f7
Add ability to serve LLM markdown during local development
reakaleek 1df0dee
Cleanup legacy LLM text output code
reakaleek c240fa7
Merge branch 'main' into feature/llm-markdown-renderer
reakaleek 95ff79f
Move description below the title in the frontmatter output
reakaleek 94da16f
Fix index page serving
reakaleek 5505544
Merge branch 'main' into feature/llm-markdown-renderer
reakaleek 4f1eb44
Fix zip creation
reakaleek b6d8419
Use DocumentationObjectPoolProvider for StringWriters
reakaleek 66c725d
Remove unused imports
reakaleek e3d6c13
Optimize imports
reakaleek f975abd
Fix DocumentationObjectPoolProvider usage
reakaleek 62963b1
Cleanup comment
reakaleek 9e5d96d
Use action pattern
reakaleek 202d428
Remove unnecessary "!"
reakaleek 398b366
Reuse LlmMarkdownExporter.ConvertToLlmMarkdown
reakaleek 9015e13
Make UseLlmMarkdownRenderer use of static action
reakaleek 97a67cb
Change position of arguments
reakaleek 73b7f60
Merge branch 'main' into feature/llm-markdown-renderer
reakaleek File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| // Licensed to Elasticsearch B.V under one or more agreements. | ||
| // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. | ||
| // See the LICENSE file in the project root for more information | ||
|
|
||
| using System.Collections.Concurrent; | ||
| using System.IO; | ||
| using System.IO.Abstractions; | ||
| using System.IO.Compression; | ||
| using System.Text; | ||
| using Elastic.Documentation.Configuration; | ||
| using Elastic.Documentation.Configuration.Builder; | ||
| using Elastic.Markdown.Helpers; | ||
| using Elastic.Markdown.Myst; | ||
| using Elastic.Markdown.Myst.Renderers; | ||
| using Elastic.Markdown.Myst.Renderers.LlmMarkdown; | ||
| using Markdig.Syntax; | ||
|
|
||
| namespace Elastic.Markdown.Exporters; | ||
|
|
||
| /// <summary> | ||
| /// Exports markdown files as LLM-optimized CommonMark using custom renderers | ||
| /// </summary> | ||
| public class LlmMarkdownExporter : IMarkdownExporter | ||
| { | ||
|
|
||
| public ValueTask StartAsync(Cancel ctx = default) => ValueTask.CompletedTask; | ||
|
|
||
| public ValueTask StopAsync(Cancel ctx = default) => ValueTask.CompletedTask; | ||
|
|
||
| public ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx) | ||
| { | ||
| var outputDirectory = Path.Combine(outputFolder.FullName, "docs"); | ||
| var zipPath = Path.Combine(outputDirectory, "llm.zip"); | ||
| using (var zip = ZipFile.Open(zipPath, ZipArchiveMode.Create)) | ||
| { | ||
| var llmsTxt = Path.Combine(outputFolder.FullName, "llms.txt"); | ||
| _ = zip.CreateEntryFromFile(llmsTxt, "llms.txt"); | ||
|
|
||
| var markdownFiles = Directory.GetFiles(outputDirectory, "*.md", SearchOption.AllDirectories); | ||
|
|
||
| foreach (var file in markdownFiles) | ||
| { | ||
| var relativePath = Path.GetRelativePath(outputDirectory, file); | ||
| _ = zip.CreateEntryFromFile(file, relativePath); | ||
| } | ||
| } | ||
| return ValueTask.FromResult(true); | ||
| } | ||
|
|
||
| public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx) | ||
| { | ||
| var llmMarkdown = ConvertToLlmMarkdown(fileContext.Document, fileContext); | ||
| var outputFile = GetLlmOutputFile(fileContext); | ||
| if (outputFile.Directory is { Exists: false }) | ||
| outputFile.Directory.Create(); | ||
| var contentWithMetadata = CreateLlmContentWithMetadata(fileContext, llmMarkdown); | ||
| await fileContext.SourceFile.SourceFile.FileSystem.File.WriteAllTextAsync( | ||
| outputFile.FullName, | ||
| contentWithMetadata, | ||
| Encoding.UTF8, | ||
| ctx | ||
| ); | ||
| return true; | ||
| } | ||
|
|
||
| public static string ConvertToLlmMarkdown(MarkdownDocument document, MarkdownExportFileContext context) | ||
| { | ||
| using var writer = new StringWriter(); | ||
| var renderer = new LlmMarkdownRenderer(writer) | ||
| { | ||
| BuildContext = context.BuildContext | ||
| }; | ||
| _ = renderer.Render(document); | ||
| return writer.ToString(); | ||
| } | ||
|
|
||
| private static IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext) | ||
| { | ||
| var source = fileContext.SourceFile.SourceFile; | ||
| var fs = source.FileSystem; | ||
| var defaultOutputFile = fileContext.DefaultOutputFile; | ||
|
|
||
| var fileName = Path.GetFileNameWithoutExtension(defaultOutputFile.Name); | ||
| if (fileName == "index") | ||
| { | ||
| var root = fileContext.BuildContext.DocumentationOutputDirectory; | ||
|
|
||
| if (defaultOutputFile.Directory!.FullName == root.FullName) | ||
| return fs.FileInfo.New(Path.Combine(root.FullName, "llms.txt")); | ||
|
|
||
| // For index files: /docs/section/index.html -> /docs/section.md | ||
| // This allows users to append .md to any URL path | ||
| var folderName = defaultOutputFile.Directory!.Name; | ||
| return fs.FileInfo.New(Path.Combine( | ||
| defaultOutputFile.Directory!.Parent!.FullName, | ||
| $"{folderName}.md" | ||
| )); | ||
| } | ||
| // Regular files: /docs/section/page.html -> /docs/section/page.llm.md | ||
| var directory = defaultOutputFile.Directory!.FullName; | ||
| var baseName = Path.GetFileNameWithoutExtension(defaultOutputFile.Name); | ||
| return fs.FileInfo.New(Path.Combine(directory, $"{baseName}.md")); | ||
| } | ||
|
|
||
|
|
||
| private string CreateLlmContentWithMetadata(MarkdownExportFileContext context, string llmMarkdown) | ||
| { | ||
| var sourceFile = context.SourceFile; | ||
| var metadata = new StringBuilder(); | ||
|
|
||
| _ = metadata.AppendLine("---"); | ||
| _ = metadata.AppendLine($"title: {sourceFile.Title}"); | ||
|
|
||
| if (!string.IsNullOrEmpty(sourceFile.YamlFrontMatter?.Description)) | ||
| _ = metadata.AppendLine($"description: {sourceFile.YamlFrontMatter.Description}"); | ||
| else | ||
| { | ||
| var descriptionGenerator = new DescriptionGenerator(); | ||
| var generateDescription = descriptionGenerator.GenerateDescription(context.Document); | ||
| _ = metadata.AppendLine($"description: {generateDescription}"); | ||
| } | ||
|
|
||
| if (!string.IsNullOrEmpty(sourceFile.Url)) | ||
| _ = metadata.AppendLine($"url: {context.BuildContext.CanonicalBaseUrl?.Scheme}://{context.BuildContext.CanonicalBaseUrl?.Host}{sourceFile.Url}"); | ||
|
|
||
| var configProducts = context.BuildContext.Configuration.Products.Select(p => | ||
| { | ||
| if (Products.AllById.TryGetValue(p, out var product)) | ||
| return product; | ||
| throw new ArgumentException($"Invalid product id: {p}"); | ||
| }); | ||
| var frontMatterProducts = sourceFile.YamlFrontMatter?.Products ?? []; | ||
| var allProducts = frontMatterProducts | ||
| .Union(configProducts) | ||
| .Distinct() | ||
| .ToList(); | ||
| if (allProducts.Count > 0) | ||
| { | ||
| _ = metadata.AppendLine("products:"); | ||
| foreach (var product in allProducts.Select(p => p.DisplayName).Order()) | ||
| _ = metadata.AppendLine($" - {product}"); | ||
| } | ||
|
|
||
| _ = metadata.AppendLine("---"); | ||
| _ = metadata.AppendLine(); | ||
| _ = metadata.AppendLine($"# {sourceFile.Title}"); | ||
| _ = metadata.Append(llmMarkdown); | ||
|
|
||
| return metadata.ToString(); | ||
| } | ||
| } | ||
|
|
||
| public static class LlmMarkdownExporterExtensions | ||
| { | ||
| public static void AddLlmMarkdownExport(this List<IMarkdownExporter> exporters) => exporters.Add(new LlmMarkdownExporter()); | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have an object pool for stringwriters and stringbuilders and htmlrenderes which can drastically help reduce allocation e.g:
See:
docs-builder/src/Elastic.Markdown/Helpers/DocumentationObjectPoolProvider.cs
Lines 43 to 45 in bdfb9ca
For a usage example see:
docs-builder/src/Elastic.Markdown/Myst/Directives/DirectiveViewModel.cs
Line 16 in cb72d5e
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried to create a LlmMarkdownRenderer object pool based on the existing HtmlRenderSubscription.
Unfortunately, I don't think it will work because I have the BuildContext as property on the LlmMarkdownRenderer.
But I guess I can still use it for StringWriter or StringBuilder.
WIP.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
b6d8419
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. I just noticed, what I did is not enough. I need to return the object.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. I think I got it right now.
f975abd
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK OK. Now I think it's nice. 9e5d96d