diff --git a/src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs index aae0f3ac7..365e1286f 100644 --- a/src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/LlmMarkdownExporter.cs @@ -17,30 +17,56 @@ namespace Elastic.Markdown.Exporters; /// public class LlmMarkdownExporter : IMarkdownExporter { + private const string LlmsTxtTemplate = """ + # Elastic Documentation + + > Elastic provides an open source search, analytics, and AI platform, and out-of-the-box solutions for observability and security. The Search AI platform combines the power of search and generative AI to provide near real-time search and analysis with relevance to reduce your time to value. + > + >Elastic offers the following solutions or types of projects: + > + >* [Elasticsearch](https://www.elastic.co/docs/solutions/search): Build powerful search and RAG applications using Elasticsearch's vector database, AI toolkit, and advanced retrieval capabilities. + >* [Elastic Observability](https://www.elastic.co/docs/solutions/observability): Gain comprehensive visibility into applications, infrastructure, and user experience through logs, metrics, traces, and other telemetry data, all in a single interface. + >* [Elastic Security](https://www.elastic.co/docs/solutions/security): Combine SIEM, endpoint security, and cloud security to provide comprehensive tools for threat detection and prevention, investigation, and response. + + The documentation is organized to guide you through your journey with Elastic, from learning the basics to deploying and managing complex solutions. Here is a detailed breakdown of the documentation structure: + + * [**Elastic fundamentals**](https://www.elastic.co/docs/get-started): Understand the basics about the deployment options, platform, and solutions, and features of the documentation. + * [**Solutions and use cases**](https://www.elastic.co/docs/solutions): Learn use cases, evaluate, and implement Elastic's solutions: Observability, Search, and Security. + * [**Manage data**](https://www.elastic.co/docs/manage-data): Learn about data store primitives, ingestion and enrichment, managing the data lifecycle, and migrating data. + * [**Explore and analyze**](https://www.elastic.co/docs/explore-analyze): Get value from data through querying, visualization, machine learning, and alerting. + * [**Deploy and manage**](https://www.elastic.co/docs/deploy-manage): Deploy and manage production-ready clusters. Covers deployment options and maintenance tasks. + * [**Manage your Cloud account**](https://www.elastic.co/docs/cloud-account): A dedicated section for user-facing cloud account tasks like resetting passwords. + * [**Troubleshoot**](https://www.elastic.co/docs/troubleshoot): Identify and resolve problems. + * [**Extend and contribute**](https://www.elastic.co/docs/extend): How to contribute to or integrate with Elastic, from open source to plugins to integrations. + * [**Release notes**](https://www.elastic.co/docs/release-notes): Contains release notes and changelogs for each new release. + * [**Reference**](https://www.elastic.co/docs/reference): Reference material for core tasks and manuals for optional products. + """; public ValueTask StartAsync(Cancel ctx = default) => ValueTask.CompletedTask; public ValueTask StopAsync(Cancel ctx = default) => ValueTask.CompletedTask; - public ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx) + public async ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Cancel ctx) { var outputDirectory = Path.Combine(outputFolder.FullName, "docs"); var zipPath = Path.Combine(outputDirectory, "llm.zip"); - using (var zip = ZipFile.Open(zipPath, ZipArchiveMode.Create)) - { - var llmsTxt = Path.Combine(outputDirectory, "llms.txt"); - var llmsTxtRelativePath = Path.GetRelativePath(outputDirectory, llmsTxt); - _ = zip.CreateEntryFromFile(llmsTxt, llmsTxtRelativePath); - var markdownFiles = Directory.GetFiles(outputDirectory, "*.md", SearchOption.AllDirectories); + // Create the llms.txt file with boilerplate content + var llmsTxt = Path.Combine(outputDirectory, "llms.txt"); + await outputFolder.FileSystem.File.WriteAllTextAsync(llmsTxt, LlmsTxtTemplate, ctx); + + using var zip = ZipFile.Open(zipPath, ZipArchiveMode.Create); + var llmsTxtRelativePath = Path.GetRelativePath(outputDirectory, llmsTxt); + _ = zip.CreateEntryFromFile(llmsTxt, llmsTxtRelativePath); + + var markdownFiles = Directory.GetFiles(outputDirectory, "*.md", SearchOption.AllDirectories); - foreach (var file in markdownFiles) - { - var relativePath = Path.GetRelativePath(outputDirectory, file); - _ = zip.CreateEntryFromFile(file, relativePath); - } + foreach (var file in markdownFiles) + { + var relativePath = Path.GetRelativePath(outputDirectory, file); + _ = zip.CreateEntryFromFile(file, relativePath); } - return ValueTask.FromResult(true); + return true; } public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx) @@ -49,10 +75,12 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, var outputFile = GetLlmOutputFile(fileContext); if (outputFile.Directory is { Exists: false }) outputFile.Directory.Create(); - var contentWithMetadata = CreateLlmContentWithMetadata(fileContext, llmMarkdown); + + var content = IsRootIndexFile(fileContext) ? LlmsTxtTemplate : CreateLlmContentWithMetadata(fileContext, llmMarkdown); + await fileContext.SourceFile.SourceFile.FileSystem.File.WriteAllTextAsync( outputFile.FullName, - contentWithMetadata, + content, Encoding.UTF8, ctx ); @@ -65,6 +93,12 @@ public static string ConvertToLlmMarkdown(MarkdownDocument document, BuildContex _ = renderer.Render(obj); }); + private static bool IsRootIndexFile(MarkdownExportFileContext fileContext) + { + var fs = fileContext.BuildContext.ReadFileSystem; + var expected = fs.FileInfo.New(Path.Combine(fileContext.BuildContext.OutputDirectory.FullName, "index.html")); + return fileContext.DefaultOutputFile.FullName == expected.FullName; + } private static IFileInfo GetLlmOutputFile(MarkdownExportFileContext fileContext) { var source = fileContext.SourceFile.SourceFile; diff --git a/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmBlockRenderers.cs b/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmBlockRenderers.cs index 7cb3c3a24..75cb698d2 100644 --- a/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmBlockRenderers.cs +++ b/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmBlockRenderers.cs @@ -39,19 +39,42 @@ public static void RenderBlockWithIndentation(LlmMarkdownRenderer renderer, Mark } /// - /// Converts relative URLs to absolute URLs using BuildContext.CanonicalBaseUrl for better LLM consumption + /// Converts relative URLs to absolute URLs using BuildContext.CanonicalBaseUrl for better LLM consumption. + /// Also converts localhost URLs to canonical URLs. /// public static string? MakeAbsoluteUrl(LlmMarkdownRenderer renderer, string? url) + { + if (renderer.BuildContext.CanonicalBaseUrl == null) + return url; + + // Convert localhost URLs to canonical URLs for LLM consumption + if (!string.IsNullOrEmpty(url) && url.StartsWith("http://localhost", StringComparison.OrdinalIgnoreCase)) + { + if (Uri.TryCreate(url, UriKind.Absolute, out var localhostUri) && + localhostUri.AbsolutePath.StartsWith("/docs/", StringComparison.Ordinal)) + { + // Replace localhost with canonical base URL + var canonicalUrl = new Uri(renderer.BuildContext.CanonicalBaseUrl, localhostUri.AbsolutePath); + return canonicalUrl.ToString(); + } + } + + return MakeAbsoluteUrl(renderer.BuildContext.CanonicalBaseUrl, url); + } + + /// + /// Converts relative URLs to absolute URLs for LLM consumption + /// + public static string? MakeAbsoluteUrl(Uri? baseUri, string? url) { if ( string.IsNullOrEmpty(url) - || renderer.BuildContext.CanonicalBaseUrl == null + || baseUri == null || Uri.IsWellFormedUriString(url, UriKind.Absolute) || !Uri.IsWellFormedUriString(url, UriKind.Relative)) return url; try { - var baseUri = renderer.BuildContext.CanonicalBaseUrl; var absoluteUri = new Uri(baseUri, url); return absoluteUri.ToString(); } @@ -60,6 +83,9 @@ public static void RenderBlockWithIndentation(LlmMarkdownRenderer renderer, Mark return url; } } + + + } /// diff --git a/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmInlineRenderers.cs b/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmInlineRenderers.cs index 9ef11985d..a78bcd534 100644 --- a/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmInlineRenderers.cs +++ b/src/Elastic.Markdown/Myst/Renderers/LlmMarkdown/LlmInlineRenderers.cs @@ -43,6 +43,7 @@ protected override void Write(LlmMarkdownRenderer renderer, LinkInline obj) } renderer.Writer.Write(")"); } + } public class LlmEmphasisInlineRenderer : MarkdownObjectRenderer diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs index 6a18791f3..a9c9aa77e 100644 --- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs +++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs @@ -4,6 +4,7 @@ using System.Collections.Frozen; using System.IO.Abstractions; +using System.Text; using Actions.Core.Services; using Elastic.Documentation.Assembler.Navigation; using Elastic.Documentation.Assembler.Sourcing; @@ -101,9 +102,37 @@ Cancel ctx sitemapBuilder.Generate(); } + if (exporters.Contains(Exporter.LLMText)) + { + _logger.LogInformation("Enhancing llms.txt with navigation structure"); + var llmsEnhancer = new LlmsNavigationEnhancer(); + await EnhanceLlmsTxtFile(assembleContext, navigation, llmsEnhancer, ctx); + } + + await collector.StopAsync(ctx); + _logger.LogInformation("Finished building and exporting exporters {Exporters}", exporters); return strict.Value ? collector.Errors + collector.Warnings == 0 : collector.Errors == 0; } + private static async Task EnhanceLlmsTxtFile(AssembleContext context, GlobalNavigation navigation, LlmsNavigationEnhancer enhancer, Cancel ctx) + { + var llmsTxtPath = Path.Combine(context.OutputDirectory.FullName, "docs", "llms.txt"); + + var readFs = context.ReadFileSystem; + if (!readFs.File.Exists(llmsTxtPath)) + return; // No llms.txt file to enhance + + var existingContent = await readFs.File.ReadAllTextAsync(llmsTxtPath, ctx); + // Assembler always uses the production URL as canonical base URL + var canonicalBaseUrl = new Uri(context.Environment.Uri); + var navigationSections = enhancer.GenerateNavigationSections(navigation, canonicalBaseUrl); + + // Append the navigation sections to the existing boilerplate + var enhancedContent = existingContent + Environment.NewLine + navigationSections; + + var writeFs = context.WriteFileSystem; + await writeFs.File.WriteAllTextAsync(llmsTxtPath, enhancedContent, Encoding.UTF8, ctx); + } } diff --git a/src/services/Elastic.Documentation.Assembler/Navigation/LlmsNavigationEnhancer.cs b/src/services/Elastic.Documentation.Assembler/Navigation/LlmsNavigationEnhancer.cs new file mode 100644 index 000000000..e269cb7d3 --- /dev/null +++ b/src/services/Elastic.Documentation.Assembler/Navigation/LlmsNavigationEnhancer.cs @@ -0,0 +1,111 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using System.Globalization; +using System.Linq; +using System.Text; +using Elastic.Documentation.Assembler; +using Elastic.Documentation.Assembler.Navigation; +using Elastic.Documentation.Site.Navigation; +using Elastic.Markdown.IO; +using Elastic.Markdown.IO.Navigation; +using Elastic.Markdown.Myst.Renderers.LlmMarkdown; + +namespace Elastic.Documentation.Assembler.Navigation; + +/// +/// Generates enhanced navigation sections for the llms.txt file +/// +public class LlmsNavigationEnhancer +{ + public string GenerateNavigationSections(GlobalNavigation navigation, Uri canonicalBaseUrl) + { + var content = new StringBuilder(); + + // Get top-level navigation items (excluding hidden ones) + var topLevelItems = navigation.TopLevelItems.Where(item => !item.Hidden).ToArray(); + + foreach (var topLevelItem in topLevelItems) + { + if (topLevelItem is not DocumentationGroup group) + continue; + + // Create H2 section for the category - use H1 title if available, fallback to navigation title + var categoryTitle = GetBestTitle(group); + _ = content.AppendLine(CultureInfo.InvariantCulture, $"## {categoryTitle}"); + _ = content.AppendLine(); + + // Get first-level children + var firstLevelChildren = GetFirstLevelChildren(group); + + if (firstLevelChildren.Any()) + { + foreach (var child in firstLevelChildren) + { + var title = GetBestTitle(child); + var url = LlmRenderingHelpers.MakeAbsoluteUrl(canonicalBaseUrl, child.Url); + var description = GetDescription(child); + + _ = !string.IsNullOrEmpty(description) + ? content.AppendLine(CultureInfo.InvariantCulture, $"* [{title}]({url}): {description}") + : content.AppendLine(CultureInfo.InvariantCulture, $"* [{title}]({url})"); + } + _ = content.AppendLine(); + } + } + + return content.ToString(); + } + + + private static IEnumerable GetFirstLevelChildren(DocumentationGroup group) => + group.NavigationItems.Where(i => !i.Hidden); + + /// + /// Gets the best title for a navigation item, preferring H1 content over navigation title + /// + private static string GetBestTitle(INavigationItem navigationItem) => navigationItem switch + { + // For file navigation items, prefer the H1 title from the markdown content + FileNavigationItem fileItem when !string.IsNullOrEmpty(fileItem.Model.Title) + => fileItem.Model.Title, + FileNavigationItem fileItem + => fileItem.NavigationTitle, + + // For documentation groups, prefer the H1 title from the index file + DocumentationGroup group when !string.IsNullOrEmpty(group.Index?.Title) + => group.Index.Title, + DocumentationGroup group + => group.NavigationTitle, + + // For other navigation item types, use the navigation title + _ => navigationItem.NavigationTitle + }; + + private static string? GetDescription(INavigationItem navigationItem) => navigationItem switch + { + // For file navigation items, extract from frontmatter + FileNavigationItem fileItem when fileItem.Model is MarkdownFile markdownFile + => markdownFile.YamlFrontMatter?.Description, + + // For documentation groups, try to get from index file + DocumentationGroup group when group.Index is MarkdownFile indexFile + => indexFile.YamlFrontMatter?.Description, + + // For table of contents trees (inherits from DocumentationGroup, but handled explicitly) + TableOfContentsTree tocTree when tocTree.Index is MarkdownFile indexFile + => indexFile.YamlFrontMatter?.Description, + + // Cross-repository links don't have descriptions in frontmatter + CrossLinkNavigationItem => null, + + // API-related navigation items (these don't have markdown frontmatter) + // Check by namespace to avoid direct assembly references + INavigationItem item when item.GetType().FullName?.StartsWith("Elastic.ApiExplorer.", StringComparison.Ordinal) == true => null, + + // Throw exception for any unhandled navigation item types + _ => throw new InvalidOperationException($"Unhandled navigation item type: {navigationItem.GetType().FullName}") + }; +}