diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 612b835d5..c66493fe3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,7 +94,17 @@ jobs: run: dotnet run --project build -c release -- unit-test - name: Publish AOT + if: ${{ matrix.os != 'ubuntu-latest' }} # publish containers already validates AOT build run: dotnet run --project build -c release -- publishbinaries + + - name: Publish Containers + if: ${{ matrix.os == 'ubuntu-latest' }} + run: dotnet run --project build -c release -- publishcontainers + + - name: Run Container + if: ${{ matrix.os == 'ubuntu-latest' }} + run: docker run elastic/docs-builder:ci-${{ github.event.pull_request.number }} --help + integration: if: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7a39bd772..493395e69 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,7 +67,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Publish Containers - run: ./build.sh publishcontainers\ + run: ./build.sh publishcontainers build-lambda: needs: diff --git a/Directory.Packages.props b/Directory.Packages.props index d785e6180..6003ac145 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -23,8 +23,9 @@ + - + diff --git a/build/Targets.fs b/build/Targets.fs index 47baaf75a..b82b6c7fe 100644 --- a/build/Targets.fs +++ b/build/Targets.fs @@ -77,32 +77,40 @@ let private publishZip _ = let private publishContainers _ = let createImage project = + let ci = Environment.environVarOrNone "GITHUB_ACTIONS" + let pr = + match Environment.environVarOrNone "GITHUB_REF_NAME" with + | None -> None + | Some s when s.EndsWith "/merge" -> Some (s.Split('/') |> Seq.head) + | _ -> None let imageTag = match project with - | "docs-builder" -> "jammy-chiseled-aot" - | _ -> "jammy-chiseled-aot" + | _ -> "9.0-noble-chiseled-aot" let labels = let exitCode = exec { validExitCode (fun _ -> true) exit_code_of "git" "describe" "--tags" "--exact-match" "HEAD" } - match exitCode with | 0 -> "edge;latest" | _ -> "edge" + match (exitCode, pr) with + | 0, _ -> "edge;latest" + | _, None -> "edge" + | _, Some pr -> $"ci-%s{pr}" let args = ["publish"; $"src/tooling/%s{project}/%s{project}.csproj"] @ [ "/t:PublishContainer"; "-p"; "DebugType=none"; - "-p"; $"ContainerBaseImage=mcr.microsoft.com/dotnet/nightly/runtime-deps:8.0-%s{imageTag}"; + "-p"; $"ContainerBaseImage=mcr.microsoft.com/dotnet/nightly/runtime-deps:%s{imageTag}"; "-p"; $"ContainerImageTags=\"%s{labels};%s{Software.Version.Normalize()}\"" "-p"; $"ContainerRepository=elastic/%s{project}" ] let registry = - match Environment.environVarOrNone "GITHUB_ACTIONS" with - | None -> [] - | Some _ -> [ - "-p"; "ContainerRegistry=ghcr.io" - "-p"; "ContainerUser=1001:1001"; - ] + match (ci, pr) with + | Some _, None -> [ + "-p"; "ContainerRegistry=ghcr.io" + "-p"; "ContainerUser=1001:1001"; + ] + | _, _ -> [] exec { run "dotnet" (args @ registry) } createImage "docs-builder" createImage "docs-assembler" diff --git a/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/AskAi/AskAiSuggestions.tsx b/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/AskAi/AskAiSuggestions.tsx index 0af28a82c..7399095dd 100644 --- a/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/AskAi/AskAiSuggestions.tsx +++ b/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/AskAi/AskAiSuggestions.tsx @@ -1,5 +1,11 @@ import { useSearchActions, useSearchTerm } from '../search.store' -import { EuiButton, EuiSpacer, EuiText, useEuiTheme } from '@elastic/eui' +import { + EuiButton, + EuiIcon, + EuiSpacer, + EuiText, + useEuiTheme, +} from '@elastic/eui' import { css } from '@emotion/react' import * as React from 'react' @@ -26,7 +32,16 @@ export const AskAiSuggestions = (props: Props) => { ` return ( <> - Ask Elastic Docs AI Assistant +
+ + Ask Elastic Docs AI Assistant +
{searchTerm && ( { const searchTerm = useSearchTerm() - const { data, error, isLoading } = useSearchQuery() + const [activePage, setActivePage] = useState(0) + const debouncedSearchTerm = useDebounce(searchTerm, 300) + useEffect(() => { + setActivePage(0) + }, [debouncedSearchTerm]) + const { data, error, isLoading, isFetching } = useSearchQuery({ + searchTerm, + pageNumber: activePage + 1, + }) const { euiTheme } = useEuiTheme() if (!searchTerm) { @@ -23,88 +37,193 @@ export const SearchResults = () => { return
Error loading search results: {error.message}
} - if (isLoading) { - return ( -
- Loading search results... + return ( +
+
+ {isLoading || isFetching ? ( + + ) : ( + + )} + + Search results for{' '} + + {searchTerm} + +
- ) - } + + {data && ( + <> +
    + {data.results.map((result) => ( + + ))} +
+
+ + setActivePage(activePage) + } + /> +
+ + )} +
+ ) +} - if (!data || data.results.length === 0) { - return No results found for "{searchTerm}" - } +interface SearchResultListItemProps { + item: SearchResultItem +} + +function SearchResultListItem({ item: result }: SearchResultListItemProps) { + const { euiTheme } = useEuiTheme() + const searchTerm = useSearchTerm() + const highlightSearchTerms = useMemo( + () => searchTerm.toLowerCase().split(' '), + [searchTerm] + ) - const buttonCss = css` - border: none; - vertical-align: top; - justify-content: flex-start; - block-size: 100%; - padding-block: 4px; - & > span { - justify-content: flex-start; - align-items: flex-start; - } - svg { - color: ${euiTheme.colors.textSubdued}; - } - .euiIcon { - margin-top: 4px; - } - ` + if (highlightSearchTerms.includes('esql')) { + highlightSearchTerms.push('es|ql') + } - const trimDescription = (description: string) => { - const limit = 200 - return description.length > limit - ? description.slice(0, limit) + '...' - : description + if (highlightSearchTerms.includes('dotnet')) { + highlightSearchTerms.push('.net') } + return ( +
  • +
    + +
    + + + {result.title} + + + +
    +
    +
  • + ) +} +function Breadcrumbs({ + parents, + highlightSearchTerms, +}: { + parents: SearchResultItem['parents'] + highlightSearchTerms: string[] +}) { + const { euiTheme } = useEuiTheme() + const { fontSize: smallFontsize } = useEuiFontSize('xs') return ( -
    - Search Results for "{searchTerm}" - -
      - {data.results.map((result) => ( -
    • - -
      ( +
    • + + - {result.title} - - - {trimDescription(result.description)} - -
    - - {/**/} - {/*{result.title}*/} + {parent.title} + + + ))} - -
    + ) } diff --git a/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/useSearchQuery.ts b/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/useSearchQuery.ts index d9cd3455f..f3ab3ad5f 100644 --- a/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/useSearchQuery.ts +++ b/src/Elastic.Documentation.Site/Assets/web-components/SearchOrAskAi/Search/useSearchQuery.ts @@ -1,36 +1,56 @@ -import { useSearchTerm } from '../search.store' -import { useQuery } from '@tanstack/react-query' +import { keepPreviousData, useQuery } from '@tanstack/react-query' import { useDebounce } from '@uidotdev/usehooks' import * as z from 'zod' +const SearchResultItemParent = z.object({ + url: z.string(), + title: z.string(), +}) + const SearchResultItem = z.object({ url: z.string(), title: z.string(), description: z.string(), score: z.number(), + parents: z.array(SearchResultItemParent), }) +export type SearchResultItem = z.infer + const SearchResponse = z.object({ results: z.array(SearchResultItem), totalResults: z.number(), + pageCount: z.number(), + pageNumber: z.number(), + pageSize: z.number(), }) -type SearchResponse = z.infer +export type SearchResponse = z.infer + +type Props = { + searchTerm: string + pageNumber?: number +} -export const useSearchQuery = () => { - const searchTerm = useSearchTerm() +export const useSearchQuery = ({ searchTerm, pageNumber = 1 }: Props) => { const trimmedSearchTerm = searchTerm.trim() const debouncedSearchTerm = useDebounce(trimmedSearchTerm, 300) return useQuery({ - queryKey: ['search', { searchTerm: debouncedSearchTerm }], + queryKey: [ + 'search', + { searchTerm: debouncedSearchTerm.toLowerCase(), pageNumber }, + ], queryFn: async () => { if (!debouncedSearchTerm || debouncedSearchTerm.length < 1) { return SearchResponse.parse({ results: [], totalResults: 0 }) } + const params = new URLSearchParams({ + q: debouncedSearchTerm, + page: pageNumber.toString(), + }) const response = await fetch( - '/docs/_api/v1/search?q=' + - encodeURIComponent(debouncedSearchTerm) + '/docs/_api/v1/search?' + params.toString() ) if (!response.ok) { throw new Error( @@ -42,6 +62,7 @@ export const useSearchQuery = () => { }, enabled: !!trimmedSearchTerm && trimmedSearchTerm.length >= 1, refetchOnWindowFocus: false, - staleTime: 1000 * 60 * 10, // 10 minutes + placeholderData: keepPreviousData, + staleTime: 1000 * 60 * 5, // 5 minutes }) } diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index 2053ec6ee..145ab0d66 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -8,6 +8,15 @@ namespace Elastic.Documentation.Search; +public record ParentDocument +{ + [JsonPropertyName("title")] + public string? Title { get; set; } + + [JsonPropertyName("url")] + public string? Url { get; set; } +} + public record DocumentationDocument { [JsonPropertyName("title")] @@ -31,6 +40,12 @@ public record DocumentationDocument [JsonPropertyName("body")] public string? Body { get; set; } + [JsonPropertyName("url_segment_count")] + public int? UrlSegmentCount { get; set; } + [JsonPropertyName("abstract")] public string? Abstract { get; set; } + + [JsonPropertyName("parents")] + public ParentDocument[] Parents { get; set; } = []; } diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs index b4133f990..25ea85351 100644 --- a/src/Elastic.Markdown/DocumentationGenerator.cs +++ b/src/Elastic.Markdown/DocumentationGenerator.cs @@ -265,7 +265,8 @@ private async Task ProcessFile(HashSet offendingFiles, DocumentationFile Resolvers = DocumentationSet.MarkdownParser.Resolvers, Document = document, SourceFile = markdown, - DefaultOutputFile = outputFile + DefaultOutputFile = outputFile, + DocumentationSet = DocumentationSet }, ctx); } } diff --git a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs index 570544141..492df8ced 100644 --- a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs @@ -18,6 +18,7 @@ public record MarkdownExportFileContext public required MarkdownFile SourceFile { get; init; } public required IFileInfo DefaultOutputFile { get; init; } public string? LLMText { get; set; } + public required DocumentationSet DocumentationSet { get; init; } } public interface IMarkdownExporter diff --git a/src/Elastic.Markdown/IO/DocumentationSet.cs b/src/Elastic.Markdown/IO/DocumentationSet.cs index 7c8c5784d..e848ae8a6 100644 --- a/src/Elastic.Markdown/IO/DocumentationSet.cs +++ b/src/Elastic.Markdown/IO/DocumentationSet.cs @@ -78,7 +78,9 @@ INavigationItem[] GetParents(INavigationItem current) { if (parent is null) continue; - parents.Add(parent); + if (parents.All(i => i.Url != parent.Url)) + parents.Add(parent); + parent = parent.Parent; } while (parent != null); diff --git a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs index ec1badef7..745b0b16b 100644 --- a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs +++ b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs @@ -10,6 +10,7 @@ public class SearchUsecase(ISearchGateway searchGateway) { public async Task Search(SearchRequest request, Cancel ctx = default) { + // var validationResult = validator.Validate(request); // if (!validationResult.IsValid) // throw new ArgumentException(validationResult.Message); @@ -17,13 +18,17 @@ public async Task Search(SearchRequest request, Cancel ctx = def var (totalHits, results) = await searchGateway.SearchAsync( request.Query, request.PageNumber, - request.PageSize, ctx + request.PageSize, + ctx ); + return new SearchResponse { Results = results, - TotalResults = totalHits + TotalResults = totalHits, + PageNumber = request.PageNumber, + PageSize = request.PageSize, }; } } @@ -32,13 +37,24 @@ public record SearchRequest { public required string Query { get; init; } public int PageNumber { get; init; } = 1; - public int PageSize { get; init; } = 10; + public int PageSize { get; init; } = 5; } public record SearchResponse { public required IEnumerable Results { get; init; } public required int TotalResults { get; init; } + public required int PageNumber { get; init; } + public required int PageSize { get; init; } + public int PageCount => TotalResults > 0 + ? (int)Math.Ceiling((double)TotalResults / PageSize) + : 0; +} + +public record SearchResultItemParent +{ + public required string Title { get; init; } + public required string Url { get; init; } } public record SearchResultItem @@ -46,5 +62,6 @@ public record SearchResultItem public required string Url { get; init; } public required string Title { get; init; } public required string Description { get; init; } - public required double Score { get; init; } + public required SearchResultItemParent[] Parents { get; init; } + public float Score { get; init; } } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/LlmGatewayAskAiGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/LlmGatewayAskAiGateway.cs index fb7236c72..3430ecec5 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/LlmGatewayAskAiGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/AskAi/LlmGatewayAskAiGateway.cs @@ -40,7 +40,7 @@ string ThreadId public static LlmGatewayRequest CreateFromRequest(AskAiRequest request) => new( UserContext: new UserContext("elastic-docs-v3@invalid"), - PlatformContext: new PlatformContext("support_portal", "support_assistant", []), + PlatformContext: new PlatformContext("docs_site", "docs_assistant", []), Input: [ new ChatInput("user", AskAiRequest.SystemPrompt), diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs new file mode 100644 index 000000000..0f14f4f2f --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -0,0 +1,196 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Text.Json.Serialization; +using Elastic.Clients.Elasticsearch; +using Elastic.Clients.Elasticsearch.QueryDsl; +using Elastic.Clients.Elasticsearch.Serialization; +using Elastic.Documentation.Api.Core.Search; +using Elastic.Transport; +using Microsoft.Extensions.Logging; + +namespace Elastic.Documentation.Api.Infrastructure.Adapters.Search; + +internal sealed record DocumentDto +{ + [JsonPropertyName("title")] + public required string Title { get; init; } + + [JsonPropertyName("url")] + public required string Url { get; init; } + + [JsonPropertyName("description")] + public string? Description { get; init; } + + [JsonPropertyName("body")] + public string? Body { get; init; } + + [JsonPropertyName("abstract")] + public required string Abstract { get; init; } + + [JsonPropertyName("url_segment_count")] + public int UrlSegmentCount { get; init; } + + [JsonPropertyName("parents")] + public ParentDocumentDto[] Parents { get; init; } = []; +} + +internal sealed record ParentDocumentDto +{ + [JsonPropertyName("title")] + public required string Title { get; init; } + + [JsonPropertyName("url")] + public required string Url { get; init; } +} + +public class ElasticsearchGateway : ISearchGateway +{ + private readonly ElasticsearchClient _client; + private readonly ElasticsearchOptions _elasticsearchOptions; + private readonly ILogger _logger; + + public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger logger) + { + _logger = logger; + _elasticsearchOptions = elasticsearchOptions; + var nodePool = new SingleNodePool(new Uri(elasticsearchOptions.Url.Trim())); + var clientSettings = new ElasticsearchClientSettings( + nodePool, + sourceSerializer: (_, settings) => new DefaultSourceSerializer(settings, EsJsonContext.Default) + ) + .DefaultIndex(elasticsearchOptions.IndexName) + .Authentication(new ApiKey(elasticsearchOptions.ApiKey)); + + _client = new ElasticsearchClient(clientSettings); + } + + public async Task<(int TotalHits, List Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) => + await ExactSearchAsync(query, pageNumber, pageSize, ctx); + + public async Task<(int TotalHits, List Results)> ExactSearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) + { + _logger.LogInformation("Starting search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize); + + var searchQuery = query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase); + + try + { + var response = await _client.SearchAsync(s => s + .Indices(_elasticsearchOptions.IndexName) + .Query(q => q + .Bool(b => b + .Should( + // Tier 1: Exact/Prefix matches (highest boost) + sh => sh.Prefix(p => p + .Field("title.keyword") + .Value(searchQuery) + .CaseInsensitive(true) + .Boost(300.0f) + ), + + // Tier 2: Semantic search (combined into one clause) + sh => sh.DisMax(dm => dm + .Queries( + dq => dq.Semantic(sem => sem + .Field("title.semantic_text") + .Query(searchQuery) + ), + dq => dq.Semantic(sem => sem + .Field("abstract") + .Query(searchQuery) + ) + ) + .Boost(200.0f) + ), + + // Tier 3: Standard text matching + sh => sh.DisMax(dm => dm + .Queries( + dq => dq.MatchBoolPrefix(m => m + .Field(f => f.Title) + .Query(searchQuery) + ), + dq => dq.Match(m => m + .Field(f => f.Title) + .Query(searchQuery) + .Operator(Operator.And) + ), + dq => dq.Match(m => m + .Field(f => f.Abstract) + .Query(searchQuery) + ) + ) + .Boost(100.0f) + ), + + // Tier 4: Parent matching + sh => sh.Match(m => m + .Field("parents.title") + .Query(searchQuery) + .Boost(75.0f) + ), + + // Tier 5: Fuzzy fallback + sh => sh.Match(m => m + .Field(f => f.Title) + .Query(searchQuery) + .Fuzziness(1) // Reduced from 2 + .Boost(25.0f) + ) + ) + .MustNot(mn => mn.Terms(t => t + .Field("url.keyword") + .Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/")) + )) + .MinimumShouldMatch(1) + ) + ) + .From((pageNumber - 1) * pageSize) + .Size(pageSize), ctx); + + if (!response.IsValidResponse) + { + _logger.LogWarning("Elasticsearch search response was not valid. Reason: {Reason}", + response.ElasticsearchServerError?.Error?.Reason ?? "Unknown"); + } + else + { + _logger.LogInformation("Search completed for '{Query}'. Total hits: {TotalHits}", query, response.Total); + } + + return ProcessSearchResponse(response); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error occurred during Elasticsearch search for '{Query}'", query); + throw; + } + } + + + private static (int TotalHits, List Results) ProcessSearchResponse(SearchResponse response) + { + var totalHits = (int)response.Total; + + var results = response.Documents.Select((doc, index) => new SearchResultItem + { + Url = doc.Url, + Title = doc.Title, + Description = doc.Description ?? string.Empty, + Parents = doc.Parents.Select(parent => new SearchResultItemParent + { + Title = parent.Title, + Url = parent.Url + }).ToArray(), + Score = (float)(response.Hits.ElementAtOrDefault(index)?.Score ?? 0.0) + }).ToList(); + + return (totalHits, results); + } +} + +[JsonSerializable(typeof(DocumentDto))] +[JsonSerializable(typeof(ParentDocumentDto))] +internal sealed partial class EsJsonContext : JsonSerializerContext; diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchOptions.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchOptions.cs new file mode 100644 index 000000000..d30a3083a --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchOptions.cs @@ -0,0 +1,14 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Api.Infrastructure.Aws; + +namespace Elastic.Documentation.Api.Infrastructure.Adapters.Search; + +public class ElasticsearchOptions(IParameterProvider parameterProvider) +{ + public string Url { get; } = parameterProvider.GetParam("docs-elasticsearch-url").GetAwaiter().GetResult(); + public string ApiKey { get; } = parameterProvider.GetParam("docs-elasticsearch-apikey").GetAwaiter().GetResult(); + public string IndexName { get; } = parameterProvider.GetParam("docs-elasticsearch-index").GetAwaiter().GetResult() ?? "documentation-latest"; +} diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/MockSearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/MockSearchGateway.cs index 608b83201..2e5705986 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/MockSearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/MockSearchGateway.cs @@ -16,14 +16,14 @@ public class MockSearchGateway : ISearchGateway Title = "Kibana: Explore, Visualize, Discover Data", Description = "Run data analytics at speed and scale for observability, security, and search with Kibana. Powerful analysis on any data from any source.", - Score = 0.92 + Parents = [] }, new SearchResultItem { Url = "https://www.elastic.co/docs/explore-analyze", Title = "Explore and analyze | Elastic Docs", Description = "Kibana provides a comprehensive suite of tools to help you search, interact with, explore, and analyze your data effectively.", - Score = 0.86 + Parents = [] }, new SearchResultItem { @@ -31,7 +31,7 @@ public class MockSearchGateway : ISearchGateway Title = "Install Kibana | Elastic Docs", Description = "Information on how to set up Kibana and get it running, including downloading, enrollment with Elasticsearch cluster, and configuration.", - Score = 0.75 + Parents = [] }, new SearchResultItem { @@ -39,7 +39,7 @@ public class MockSearchGateway : ISearchGateway Title = "Kibana Lens – Data visualization. Simply.", Description = "Kibana Lens simplifies the process of data visualization through a drag‑and‑drop experience, ideal for exploring logs, trends, and metrics.", - Score = 0.70 + Parents = [] }, new SearchResultItem { @@ -47,7 +47,7 @@ public class MockSearchGateway : ISearchGateway Title = "Elastic Docs – Elastic products, guides & reference", Description = "Official Elastic documentation. Explore guides for Elastic Cloud (hosted & on‑prem), product documentation, how‑to guides and API reference.", - Score = 0.88 + Parents = [] }, new SearchResultItem { @@ -55,14 +55,14 @@ public class MockSearchGateway : ISearchGateway Title = "Get started | Elastic Docs", Description = "Use Elasticsearch to search, index, store, and analyze data of all shapes and sizes in near real time. Kibana is the graphical user interface for Elasticsearch.", - Score = 0.85 + Parents = [] }, new SearchResultItem { Url = "https://www.elastic.co/docs/solutions/search/elasticsearch-basics-quickstart", Title = "Elasticsearch basics quickstart", Description = "Hands‑on introduction to fundamental Elasticsearch concepts: indices, documents, mappings, and search via Console syntax.", - Score = 0.80 + Parents = [] }, new SearchResultItem { @@ -70,21 +70,26 @@ public class MockSearchGateway : ISearchGateway Title = "Elasticsearch API documentation", Description = "Elastic provides REST APIs that are used by the UI components and can be called directly to configure and access Elasticsearch features.", - Score = 0.78 + Parents = [] } ]; public async Task<(int TotalHits, List Results)> SearchAsync(string query, int pageNumber, int pageSize, CancellationToken ctx = default) { var filteredResults = Results - .Where(item => query.Split(' ') - .All(token => item.Title.Contains(token, StringComparison.OrdinalIgnoreCase) || - item.Description.Contains(token, StringComparison.OrdinalIgnoreCase))) + .Where(item => + item.Title.Equals(query, StringComparison.OrdinalIgnoreCase) || + item.Description?.Equals(query, StringComparison.OrdinalIgnoreCase) == true) + .ToList(); + + var pagedResults = filteredResults .Skip((pageNumber - 1) * pageSize) .Take(pageSize) .ToList(); + Console.WriteLine($"MockSearchGateway: Paged results count: {pagedResults.Count}"); + return await Task.Delay(1000, ctx) - .ContinueWith(_ => (TotalHits: filteredResults.Count, Results: filteredResults), ctx); + .ContinueWith(_ => (TotalHits: filteredResults.Count, Results: pagedResults), ctx); } } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs index 66d817d61..f4fea9f47 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs @@ -12,10 +12,7 @@ public async Task GetParam(string name, bool withDecryption = true, Canc { case "llm-gateway-service-account": { - const string envName = "LLM_GATEWAY_SERVICE_ACCOUNT_KEY_PATH"; - var serviceAccountKeyPath = Environment.GetEnvironmentVariable(envName); - if (string.IsNullOrEmpty(serviceAccountKeyPath)) - throw new ArgumentException($"Environment variable '{envName}' not found."); + var serviceAccountKeyPath = GetEnv("LLM_GATEWAY_SERVICE_ACCOUNT_KEY_PATH"); if (!File.Exists(serviceAccountKeyPath)) throw new ArgumentException($"Service account key file not found at '{serviceAccountKeyPath}'."); var serviceAccountKey = await File.ReadAllTextAsync(serviceAccountKeyPath, ctx); @@ -23,11 +20,19 @@ public async Task GetParam(string name, bool withDecryption = true, Canc } case "llm-gateway-function-url": { - const string envName = "LLM_GATEWAY_FUNCTION_URL"; - var value = Environment.GetEnvironmentVariable(envName); - if (string.IsNullOrEmpty(value)) - throw new ArgumentException($"Environment variable '{envName}' not found."); - return value; + return GetEnv("LLM_GATEWAY_FUNCTION_URL"); + } + case "docs-elasticsearch-url": + { + return GetEnv("DOCUMENTATION_ELASTIC_URL"); + } + case "docs-elasticsearch-apikey": + { + return GetEnv("DOCUMENTATION_ELASTIC_APIKEY"); + } + case "docs-elasticsearch-index": + { + return "semantic-documentation-latest"; } default: { @@ -35,4 +40,12 @@ public async Task GetParam(string name, bool withDecryption = true, Canc } } } + + private static string GetEnv(string name) + { + var value = Environment.GetEnvironmentVariable(name); + if (string.IsNullOrEmpty(value)) + throw new ArgumentException($"Environment variable '{name}' not found."); + return value; + } } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj b/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj index effd23891..c7fc40d3a 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj @@ -16,7 +16,8 @@ + - + diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/MappingsExstension.cs b/src/api/Elastic.Documentation.Api.Infrastructure/MappingsExstension.cs index fadc1afd5..1dcede6d9 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/MappingsExstension.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/MappingsExstension.cs @@ -32,14 +32,21 @@ private static void MapAskAiEndpoint(IEndpointRouteBuilder group) private static void MapSearchEndpoint(IEndpointRouteBuilder group) { var searchGroup = group.MapGroup("/search"); - _ = searchGroup.MapGet("/", async ([FromQuery(Name = "q")] string query, SearchUsecase searchUsecase, Cancel ctx) => - { - var searchRequest = new SearchRequest + _ = searchGroup.MapGet("/", + async ( + [FromQuery(Name = "q")] string query, + [FromQuery(Name = "page")] int? pageNumber, + SearchUsecase searchUsecase, + Cancel ctx + ) => { - Query = query - }; - var searchResponse = await searchUsecase.Search(searchRequest, ctx); - return Results.Ok(searchResponse); - }); + var searchRequest = new SearchRequest + { + Query = query, + PageNumber = pageNumber ?? 1 + }; + var searchResponse = await searchUsecase.Search(searchRequest, ctx); + return Results.Ok(searchResponse); + }); } } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs b/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs index 43862f029..293547b92 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs @@ -113,7 +113,8 @@ private static void AddSearchUsecase(IServiceCollection services, AppEnv appEnv) { var logger = GetLogger(services); logger?.LogInformation("Configuring Search use case for environment {AppEnvironment}", appEnv); - _ = services.AddScoped(); + _ = services.AddScoped(); + _ = services.AddScoped(); _ = services.AddScoped(); } } diff --git a/src/tooling/Elastic.Documentation.Tooling/Exporters/ElasticsearchMarkdownExporter.cs b/src/tooling/Elastic.Documentation.Tooling/Exporters/ElasticsearchMarkdownExporter.cs index 0055ee866..2d9725bee 100644 --- a/src/tooling/Elastic.Documentation.Tooling/Exporters/ElasticsearchMarkdownExporter.cs +++ b/src/tooling/Elastic.Documentation.Tooling/Exporters/ElasticsearchMarkdownExporter.cs @@ -12,8 +12,10 @@ using Elastic.Ingest.Elasticsearch.Catalog; using Elastic.Ingest.Elasticsearch.Semantic; using Elastic.Markdown.Exporters; +using Elastic.Markdown.IO; using Elastic.Transport; using Elastic.Transport.Products.Elasticsearch; +using Markdig.Syntax; using Microsoft.Extensions.Logging; namespace Elastic.Documentation.Tooling.Exporters; @@ -33,6 +35,7 @@ public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, IDiagnosti /// protected override CatalogIndexChannel NewChannel(CatalogIndexChannelOptions options) => new(options); } + public class ElasticsearchMarkdownSemanticExporter(ILoggerFactory logFactory, IDiagnosticsCollector collector, DocumentationEndpoints endpoints) : ElasticsearchMarkdownExporterBase, SemanticIndexChannel> (logFactory, collector, endpoints) @@ -41,10 +44,11 @@ public class ElasticsearchMarkdownSemanticExporter(ILoggerFactory logFactory, ID protected override SemanticIndexChannelOptions NewOptions(DistributedTransport transport) => new(transport) { GetMapping = (inferenceId, _) => CreateMapping(inferenceId), + GetMappingSettings = (_, _) => CreateMappingSetting(), IndexFormat = "semantic-documentation-{0:yyyy.MM.dd.HHmmss}", ActiveSearchAlias = "semantic-documentation", IndexNumThreads = IndexNumThreads, - InferenceCreateTimeout = TimeSpan.FromMinutes(4), + InferenceCreateTimeout = TimeSpan.FromMinutes(4) }; /// @@ -67,34 +71,87 @@ public abstract class ElasticsearchMarkdownExporterBase 8; - protected static string CreateMapping(string? inferenceId) => - // langugage=json - $$""" + protected static string CreateMappingSetting() => + // language=json + """ { - "properties": { - "title": { "type": "text" }, - "body": { "type": "text" } - {{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}} + "analysis": { + "analyzer": { + "synonyms_analyzer": { + "tokenizer": "whitespace", + "filter": [ + "lowercase", + "synonyms_filter" + ] + } + }, + "filter": { + "synonyms_filter": { + "type": "synonym", + "synonyms_set": "docs", + "updateable": true + } + } } } """; + protected static string CreateMapping(string? inferenceId) => + // langugage=json + $$""" + { + "properties": { + "title": { + "type": "text", + "search_analyzer": "synonyms_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}} + } + }, + "url": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "url_segment_count": { + "type": "integer" + }, + "body": { + "type": "text" + } + {{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}} + } + } + """; + private static string AbstractMapping() => // langugage=json """ , "abstract": { - "type": "text", + "type": "text" } """; + private static string InferenceMapping(string inferenceId) => + // langugage=json + $""" + "type": "semantic_text", + "inference_id": "{inferenceId}" + """; + private static string AbstractInferenceMapping(string inferenceId) => // langugage=json $$""" - , "abstract": { - "type": "semantic_text", - "inference_id": "{{inferenceId}}" - } - """; + , "abstract": { + {{InferenceMapping(inferenceId)}} + } + """; public async ValueTask StartAsync(Cancel ctx = default) { @@ -112,6 +169,7 @@ public async ValueTask StartAsync(Cancel ctx = default) }; var transport = new DistributedTransport(configuration); + //The max num threads per allocated node, from testing its best to limit our max concurrency //producing to this number as well var options = NewOptions(transport); @@ -182,18 +240,41 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, var url = file.Url; + if (url is "/docs" or "/docs/404") + { + // Skip the root and 404 pages + _logger.LogInformation("Skipping export for {Url}", url); + return true; + } + + IPositionalNavigation navigation = fileContext.DocumentationSet; + //use LLM text if it was already provided (because we run with both llm and elasticsearch output) - var body = fileContext.LLMText ??= LlmMarkdownExporter.ConvertToLlmMarkdown(fileContext.Document, fileContext.BuildContext); + var body = fileContext.LLMText ??= LlmMarkdownExporter.ConvertToLlmMarkdown(document, fileContext.BuildContext); + + var headings = fileContext.Document.Descendants() + .Select(h => (h.GetData("header") as string) ?? string.Empty) + .Where(text => !string.IsNullOrEmpty(text)) + .ToArray(); + var doc = new DocumentationDocument { Title = file.Title, Url = url, Body = body, Description = fileContext.SourceFile.YamlFrontMatter?.Description, + Abstract = !string.IsNullOrEmpty(body) - ? body[..Math.Min(body.Length, 400)] + ? body[..Math.Min(body.Length, 400)] + " " + string.Join(" \n- ", headings) : string.Empty, Applies = fileContext.SourceFile.YamlFrontMatter?.AppliesTo, + UrlSegmentCount = url.Split('/', StringSplitOptions.RemoveEmptyEntries).Length, + Parents = navigation.GetParentsOfMarkdownFile(file).Select(i => new ParentDocument + { + Title = i.NavigationTitle, + Url = i.Url + }).Reverse().ToArray(), + Headings = headings }; return await TryWrite(doc, ctx); }