Skip to content

Commit 23ee51b

Browse files
theletterfreakaleekMpdreamzCopilot
authored
Add llms.txt template and generator (#1928)
* Add boilerplate * Add H2 generator * Add absolute URLs generation * Add line * Remove line - sigh * No autogenerated summaries for now * Update src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs Co-authored-by: Jan Calanog <[email protected]> * Update src/tooling/docs-assembler/Cli/RepositoryCommands.cs Co-authored-by: Martijn Laarman <[email protected]> * Update src/tooling/docs-assembler/Navigation/LlmsNavigationEnhancer.cs Co-authored-by: Martijn Laarman <[email protected]> * Fix errors * List implementations of INavigationItem and throw exception * Refactor line 79 * Reuse and extend existing MakeAbsoluteUrl method * Remove redundant mapping * Add method to extract best title * Make links absolute in the boilerplate * Fix file generation * Refactor method * Restore file * Restore file from source * Add newline * Make all absolute LLM links use md terminations * Handle crosslinks * Simplify logic * Make method more general for all file extensions * Update src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmBlockRenderers.cs Co-authored-by: Martijn Laarman <[email protected]> * Remove md append logic * Remove md extension from boilerplate * Apply suggestion * Update src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs Co-authored-by: Martijn Laarman <[email protected]> * Update src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmInlineRenderers.cs Co-authored-by: Copilot <[email protected]> * Apply fix for boilerplate --------- Co-authored-by: Jan Calanog <[email protected]> Co-authored-by: Martijn Laarman <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 910d13f commit 23ee51b

File tree

5 files changed

+219
-18
lines changed

5 files changed

+219
-18
lines changed

src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,30 +17,56 @@ namespace Elastic.Markdown.Exporters;
1717
/// </summary>
1818
public class LlmMarkdownExporter : IMarkdownExporter
1919
{
20+
private const string LlmsTxtTemplate = """
21+
# Elastic Documentation
22+
23+
> Elastic provides an open source search, analytics, and AI platform, and out-of-the-box solutions for observability and security. The Search AI platform combines the power of search and generative AI to provide near real-time search and analysis with relevance to reduce your time to value.
24+
>
25+
>Elastic offers the following solutions or types of projects:
26+
>
27+
>* [Elasticsearch](https://www.elastic.co/docs/solutions/search): Build powerful search and RAG applications using Elasticsearch's vector database, AI toolkit, and advanced retrieval capabilities.
28+
>* [Elastic Observability](https://www.elastic.co/docs/solutions/observability): Gain comprehensive visibility into applications, infrastructure, and user experience through logs, metrics, traces, and other telemetry data, all in a single interface.
29+
>* [Elastic Security](https://www.elastic.co/docs/solutions/security): Combine SIEM, endpoint security, and cloud security to provide comprehensive tools for threat detection and prevention, investigation, and response.
30+
31+
The documentation is organized to guide you through your journey with Elastic, from learning the basics to deploying and managing complex solutions. Here is a detailed breakdown of the documentation structure:
32+
33+
* [**Elastic fundamentals**](https://www.elastic.co/docs/get-started): Understand the basics about the deployment options, platform, and solutions, and features of the documentation.
34+
* [**Solutions and use cases**](https://www.elastic.co/docs/solutions): Learn use cases, evaluate, and implement Elastic's solutions: Observability, Search, and Security.
35+
* [**Manage data**](https://www.elastic.co/docs/manage-data): Learn about data store primitives, ingestion and enrichment, managing the data lifecycle, and migrating data.
36+
* [**Explore and analyze**](https://www.elastic.co/docs/explore-analyze): Get value from data through querying, visualization, machine learning, and alerting.
37+
* [**Deploy and manage**](https://www.elastic.co/docs/deploy-manage): Deploy and manage production-ready clusters. Covers deployment options and maintenance tasks.
38+
* [**Manage your Cloud account**](https://www.elastic.co/docs/cloud-account): A dedicated section for user-facing cloud account tasks like resetting passwords.
39+
* [**Troubleshoot**](https://www.elastic.co/docs/troubleshoot): Identify and resolve problems.
40+
* [**Extend and contribute**](https://www.elastic.co/docs/extend): How to contribute to or integrate with Elastic, from open source to plugins to integrations.
41+
* [**Release notes**](https://www.elastic.co/docs/release-notes): Contains release notes and changelogs for each new release.
42+
* [**Reference**](https://www.elastic.co/docs/reference): Reference material for core tasks and manuals for optional products.
43+
""";
2044

2145
public ValueTask StartAsync(Cancel ctx = default) => ValueTask.CompletedTask;
2246

2347
public ValueTask StopAsync(Cancel ctx = default) => ValueTask.CompletedTask;
2448

25-
public ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx)
49+
public async ValueTask<bool> FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx)
2650
{
2751
var outputDirectory = Path.Combine(outputFolder.FullName, "docs");
2852
var zipPath = Path.Combine(outputDirectory, "llm.zip");
29-
using (var zip = ZipFile.Open(zipPath, ZipArchiveMode.Create))
30-
{
31-
var llmsTxt = Path.Combine(outputDirectory, "llms.txt");
32-
var llmsTxtRelativePath = Path.GetRelativePath(outputDirectory, llmsTxt);
33-
_ = zip.CreateEntryFromFile(llmsTxt, llmsTxtRelativePath);
3453

35-
var markdownFiles = Directory.GetFiles(outputDirectory, "*.md", SearchOption.AllDirectories);
54+
// Create the llms.txt file with boilerplate content
55+
var llmsTxt = Path.Combine(outputDirectory, "llms.txt");
56+
await outputFolder.FileSystem.File.WriteAllTextAsync(llmsTxt, LlmsTxtTemplate, ctx);
57+
58+
using var zip = ZipFile.Open(zipPath, ZipArchiveMode.Create);
59+
var llmsTxtRelativePath = Path.GetRelativePath(outputDirectory, llmsTxt);
60+
_ = zip.CreateEntryFromFile(llmsTxt, llmsTxtRelativePath);
61+
62+
var markdownFiles = Directory.GetFiles(outputDirectory, "*.md", SearchOption.AllDirectories);
3663

37-
foreach (var file in markdownFiles)
38-
{
39-
var relativePath = Path.GetRelativePath(outputDirectory, file);
40-
_ = zip.CreateEntryFromFile(file, relativePath);
41-
}
64+
foreach (var file in markdownFiles)
65+
{
66+
var relativePath = Path.GetRelativePath(outputDirectory, file);
67+
_ = zip.CreateEntryFromFile(file, relativePath);
4268
}
43-
return ValueTask.FromResult(true);
69+
return true;
4470
}
4571

4672
public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx)
@@ -49,10 +75,12 @@ public async ValueTask<bool> ExportAsync(MarkdownExportFileContext fileContext,
4975
var outputFile = GetLlmOutputFile(fileContext);
5076
if (outputFile.Directory is { Exists: false })
5177
outputFile.Directory.Create();
52-
var contentWithMetadata = CreateLlmContentWithMetadata(fileContext, llmMarkdown);
78+
79+
var content = IsRootIndexFile(fileContext) ? LlmsTxtTemplate : CreateLlmContentWithMetadata(fileContext, llmMarkdown);
80+
5381
await fileContext.SourceFile.SourceFile.FileSystem.File.WriteAllTextAsync(
5482
outputFile.FullName,
55-
contentWithMetadata,
83+
content,
5684
Encoding.UTF8,
5785
ctx
5886
);
@@ -65,6 +93,12 @@ public static string ConvertToLlmMarkdown(MarkdownDocument document, BuildContex
6593
_ = renderer.Render(obj);
6694
});
6795

96+
private static bool IsRootIndexFile(MarkdownExportFileContext fileContext)
97+
{
98+
var fs = fileContext.BuildContext.ReadFileSystem;
99+
var expected = fs.FileInfo.New(Path.Combine(fileContext.BuildContext.OutputDirectory.FullName, "index.html"));
100+
return fileContext.DefaultOutputFile.FullName == expected.FullName;
101+
}
68102
private static IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext)
69103
{
70104
var source = fileContext.SourceFile.SourceFile;

src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmBlockRenderers.cs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,42 @@ public static void RenderBlockWithIndentation(LlmMarkdownRenderer renderer, Mark
3939
}
4040

4141
/// <summary>
42-
/// Converts relative URLs to absolute URLs using BuildContext.CanonicalBaseUrl for better LLM consumption
42+
/// Converts relative URLs to absolute URLs using BuildContext.CanonicalBaseUrl for better LLM consumption.
43+
/// Also converts localhost URLs to canonical URLs.
4344
/// </summary>
4445
public static string? MakeAbsoluteUrl(LlmMarkdownRenderer renderer, string? url)
46+
{
47+
if (renderer.BuildContext.CanonicalBaseUrl == null)
48+
return url;
49+
50+
// Convert localhost URLs to canonical URLs for LLM consumption
51+
if (!string.IsNullOrEmpty(url) && url.StartsWith("http://localhost", StringComparison.OrdinalIgnoreCase))
52+
{
53+
if (Uri.TryCreate(url, UriKind.Absolute, out var localhostUri) &&
54+
localhostUri.AbsolutePath.StartsWith("/docs/", StringComparison.Ordinal))
55+
{
56+
// Replace localhost with canonical base URL
57+
var canonicalUrl = new Uri(renderer.BuildContext.CanonicalBaseUrl, localhostUri.AbsolutePath);
58+
return canonicalUrl.ToString();
59+
}
60+
}
61+
62+
return MakeAbsoluteUrl(renderer.BuildContext.CanonicalBaseUrl, url);
63+
}
64+
65+
/// <summary>
66+
/// Converts relative URLs to absolute URLs for LLM consumption
67+
/// </summary>
68+
public static string? MakeAbsoluteUrl(Uri? baseUri, string? url)
4569
{
4670
if (
4771
string.IsNullOrEmpty(url)
48-
|| renderer.BuildContext.CanonicalBaseUrl == null
72+
|| baseUri == null
4973
|| Uri.IsWellFormedUriString(url, UriKind.Absolute)
5074
|| !Uri.IsWellFormedUriString(url, UriKind.Relative))
5175
return url;
5276
try
5377
{
54-
var baseUri = renderer.BuildContext.CanonicalBaseUrl;
5578
var absoluteUri = new Uri(baseUri, url);
5679
return absoluteUri.ToString();
5780
}
@@ -60,6 +83,9 @@ public static void RenderBlockWithIndentation(LlmMarkdownRenderer renderer, Mark
6083
return url;
6184
}
6285
}
86+
87+
88+
6389
}
6490

6591
/// <summary>

src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmInlineRenderers.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ protected override void Write(LlmMarkdownRenderer renderer, LinkInline obj)
4343
}
4444
renderer.Writer.Write(")");
4545
}
46+
4647
}
4748

4849
public class LlmEmphasisInlineRenderer : MarkdownObjectRenderer<LlmMarkdownRenderer, EmphasisInline>

src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System.Collections.Frozen;
66
using System.IO.Abstractions;
7+
using System.Text;
78
using Actions.Core.Services;
89
using Elastic.Documentation.Assembler.Navigation;
910
using Elastic.Documentation.Assembler.Sourcing;
@@ -101,9 +102,37 @@ Cancel ctx
101102
sitemapBuilder.Generate();
102103
}
103104

105+
if (exporters.Contains(Exporter.LLMText))
106+
{
107+
_logger.LogInformation("Enhancing llms.txt with navigation structure");
108+
var llmsEnhancer = new LlmsNavigationEnhancer();
109+
await EnhanceLlmsTxtFile(assembleContext, navigation, llmsEnhancer, ctx);
110+
}
111+
112+
await collector.StopAsync(ctx);
113+
104114
_logger.LogInformation("Finished building and exporting exporters {Exporters}", exporters);
105115

106116
return strict.Value ? collector.Errors + collector.Warnings == 0 : collector.Errors == 0;
107117
}
108118

119+
private static async Task EnhanceLlmsTxtFile(AssembleContext context, GlobalNavigation navigation, LlmsNavigationEnhancer enhancer, Cancel ctx)
120+
{
121+
var llmsTxtPath = Path.Combine(context.OutputDirectory.FullName, "docs", "llms.txt");
122+
123+
var readFs = context.ReadFileSystem;
124+
if (!readFs.File.Exists(llmsTxtPath))
125+
return; // No llms.txt file to enhance
126+
127+
var existingContent = await readFs.File.ReadAllTextAsync(llmsTxtPath, ctx);
128+
// Assembler always uses the production URL as canonical base URL
129+
var canonicalBaseUrl = new Uri(context.Environment.Uri);
130+
var navigationSections = enhancer.GenerateNavigationSections(navigation, canonicalBaseUrl);
131+
132+
// Append the navigation sections to the existing boilerplate
133+
var enhancedContent = existingContent + Environment.NewLine + navigationSections;
134+
135+
var writeFs = context.WriteFileSystem;
136+
await writeFs.File.WriteAllTextAsync(llmsTxtPath, enhancedContent, Encoding.UTF8, ctx);
137+
}
109138
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System;
6+
using System.Globalization;
7+
using System.Linq;
8+
using System.Text;
9+
using Elastic.Documentation.Assembler;
10+
using Elastic.Documentation.Assembler.Navigation;
11+
using Elastic.Documentation.Site.Navigation;
12+
using Elastic.Markdown.IO;
13+
using Elastic.Markdown.IO.Navigation;
14+
using Elastic.Markdown.Myst.Renderers.LlmMarkdown;
15+
16+
namespace Elastic.Documentation.Assembler.Navigation;
17+
18+
/// <summary>
19+
/// Generates enhanced navigation sections for the llms.txt file
20+
/// </summary>
21+
public class LlmsNavigationEnhancer
22+
{
23+
public string GenerateNavigationSections(GlobalNavigation navigation, Uri canonicalBaseUrl)
24+
{
25+
var content = new StringBuilder();
26+
27+
// Get top-level navigation items (excluding hidden ones)
28+
var topLevelItems = navigation.TopLevelItems.Where(item => !item.Hidden).ToArray();
29+
30+
foreach (var topLevelItem in topLevelItems)
31+
{
32+
if (topLevelItem is not DocumentationGroup group)
33+
continue;
34+
35+
// Create H2 section for the category - use H1 title if available, fallback to navigation title
36+
var categoryTitle = GetBestTitle(group);
37+
_ = content.AppendLine(CultureInfo.InvariantCulture, $"## {categoryTitle}");
38+
_ = content.AppendLine();
39+
40+
// Get first-level children
41+
var firstLevelChildren = GetFirstLevelChildren(group);
42+
43+
if (firstLevelChildren.Any())
44+
{
45+
foreach (var child in firstLevelChildren)
46+
{
47+
var title = GetBestTitle(child);
48+
var url = LlmRenderingHelpers.MakeAbsoluteUrl(canonicalBaseUrl, child.Url);
49+
var description = GetDescription(child);
50+
51+
_ = !string.IsNullOrEmpty(description)
52+
? content.AppendLine(CultureInfo.InvariantCulture, $"* [{title}]({url}): {description}")
53+
: content.AppendLine(CultureInfo.InvariantCulture, $"* [{title}]({url})");
54+
}
55+
_ = content.AppendLine();
56+
}
57+
}
58+
59+
return content.ToString();
60+
}
61+
62+
63+
private static IEnumerable<INavigationItem> GetFirstLevelChildren(DocumentationGroup group) =>
64+
group.NavigationItems.Where(i => !i.Hidden);
65+
66+
/// <summary>
67+
/// Gets the best title for a navigation item, preferring H1 content over navigation title
68+
/// </summary>
69+
private static string GetBestTitle(INavigationItem navigationItem) => navigationItem switch
70+
{
71+
// For file navigation items, prefer the H1 title from the markdown content
72+
FileNavigationItem fileItem when !string.IsNullOrEmpty(fileItem.Model.Title)
73+
=> fileItem.Model.Title,
74+
FileNavigationItem fileItem
75+
=> fileItem.NavigationTitle,
76+
77+
// For documentation groups, prefer the H1 title from the index file
78+
DocumentationGroup group when !string.IsNullOrEmpty(group.Index?.Title)
79+
=> group.Index.Title,
80+
DocumentationGroup group
81+
=> group.NavigationTitle,
82+
83+
// For other navigation item types, use the navigation title
84+
_ => navigationItem.NavigationTitle
85+
};
86+
87+
private static string? GetDescription(INavigationItem navigationItem) => navigationItem switch
88+
{
89+
// For file navigation items, extract from frontmatter
90+
FileNavigationItem fileItem when fileItem.Model is MarkdownFile markdownFile
91+
=> markdownFile.YamlFrontMatter?.Description,
92+
93+
// For documentation groups, try to get from index file
94+
DocumentationGroup group when group.Index is MarkdownFile indexFile
95+
=> indexFile.YamlFrontMatter?.Description,
96+
97+
// For table of contents trees (inherits from DocumentationGroup, but handled explicitly)
98+
TableOfContentsTree tocTree when tocTree.Index is MarkdownFile indexFile
99+
=> indexFile.YamlFrontMatter?.Description,
100+
101+
// Cross-repository links don't have descriptions in frontmatter
102+
CrossLinkNavigationItem => null,
103+
104+
// API-related navigation items (these don't have markdown frontmatter)
105+
// Check by namespace to avoid direct assembly references
106+
INavigationItem item when item.GetType().FullName?.StartsWith("Elastic.ApiExplorer.", StringComparison.Ordinal) == true => null,
107+
108+
// Throw exception for any unhandled navigation item types
109+
_ => throw new InvalidOperationException($"Unhandled navigation item type: {navigationItem.GetType().FullName}")
110+
};
111+
}

0 commit comments

Comments
 (0)