diff --git a/aspire/AppHost.cs b/aspire/AppHost.cs index d35dbd809..a4c26ecec 100644 --- a/aspire/AppHost.cs +++ b/aspire/AppHost.cs @@ -14,7 +14,7 @@ // ReSharper disable once RedundantLambdaParameterType // ReSharper disable once VariableHidesOuterVariable -async Task BuildAspireHost(bool startElasticsearch, bool assumeCloned, bool skipPrivateRepositories, Cancel ctx) +async Task BuildAspireHost(bool startElasticsearch, bool assumeCloned, bool assumeBuild, bool skipPrivateRepositories, Cancel ctx) { var builder = DistributedApplication.CreateBuilder(args); @@ -28,8 +28,10 @@ async Task BuildAspireHost(bool startElasticsearch, bool assumeCloned, bool skip string[] cloneArgs = assumeCloned ? ["--assume-cloned"] : []; cloneAll = cloneAll.WithArgs(["assembler", "clone", .. globalArguments, .. cloneArgs]); - var buildAll = builder.AddProject(AssemblerBuild) - .WithArgs(["assembler", "build", .. globalArguments]) + var buildAll = builder.AddProject(AssemblerBuild); + string[] buildArgs = assumeBuild ? ["--assume-build"] : []; + buildAll = buildAll + .WithArgs(["assembler", "build", .. globalArguments, .. buildArgs]) .WaitForCompletion(cloneAll) .WithParentRelationship(cloneAll); @@ -62,8 +64,8 @@ async Task BuildAspireHost(bool startElasticsearch, bool assumeCloned, bool skip var indexElasticsearch = builder.AddProject(ElasticsearchIngest) .WithArgs(["assembler", "index", .. globalArguments]) - .WithExplicitStart() - .WaitForCompletion(cloneAll); + .WaitForCompletion(cloneAll) + .WithExplicitStart(); // ReSharper disable once RedundantAssignment indexElasticsearch = startElasticsearch diff --git a/aspire/README.md b/aspire/README.md index 1d1ac620f..621a7c895 100644 --- a/aspire/README.md +++ b/aspire/README.md @@ -81,9 +81,9 @@ dotnet user-secrets --project aspire list Should have these secrets -> Parameters:LlmGatewayUrl = https://**** -> Parameters:LlmGatewayServiceAccountPath = -> Parameters:DocumentationElasticUrl = https://*.elastic.cloud:443 +> Parameters:LlmGatewayUrl = https://**** +> Parameters:LlmGatewayServiceAccountPath = +> Parameters:DocumentationElasticUrl = https://*.elastic.cloud:443 > Parameters:DocumentationElasticApiKey = **** To set them: @@ -94,3 +94,59 @@ dotnet user-secrets --project aspire set Parameters:DocumentationElasticApiKey < Do note `dotnet user-secrets` should only be used on local development machines and not on CI. +## Integration Tests + +The `Elastic.Assembler.IntegrationTests` project includes integration tests for various components, including the search functionality. + +### Search Integration Tests + +The search integration tests (`Search/SearchIntegrationTests.cs`) verify that the ElasticsearchGateway correctly processes queries through the `/docs/_api/v1/search` endpoint. + +**Optimized Indexing**: The test base class (`SearchTestBase`) intelligently checks if the remote Elasticsearch instance already contains up-to-date indexed data. If the index exists with sufficient documents and a valid template, indexing is automatically skipped to improve test performance. Otherwise, the Elasticsearch indexer runs before the tests execute. + +#### Prerequisites + +The tests require a valid Elasticsearch instance. Choose one of these options: + +1. **External Elasticsearch** - Set up user secrets: + ```bash + dotnet user-secrets --project aspire set Parameters:DocumentationElasticUrl + dotnet user-secrets --project aspire set Parameters:DocumentationElasticApiKey + ``` + +2. **Local Elasticsearch** - The `--start-elasticsearch` flag will be automatically handled by the test fixture: + ```bash + # Tests will use the configured Elasticsearch (local or remote) + dotnet test tests-integration/Elastic.Assembler.IntegrationTests --filter "FullyQualifiedName~SearchIntegrationTests" + ``` + +#### Running the Tests + +```bash +# Run all integration tests +dotnet test tests-integration/Elastic.Assembler.IntegrationTests + +# Run only search integration tests +dotnet test tests-integration/Elastic.Assembler.IntegrationTests --filter "FullyQualifiedName~SearchIntegrationTests" +``` + +#### Test Execution Flow + +1. The test fixture starts all Aspire resources (clone, build, serve, API) +2. **Intelligent indexing check**: The test queries the remote Elasticsearch to check: + - If the semantic index template exists and has a valid version + - If the index contains sufficient documents (> 100) + - If both conditions are met, indexing is **skipped** for faster test execution +3. If indexing is needed, the Elasticsearch indexer runs automatically in test mode (up to 10 minutes timeout) +4. Search queries are executed against the indexed data +5. Results are validated against expected URLs + +**Notes**: +- The search tests use data-driven theory tests that verify expected search results +- Tests may initially fail if: + - The Elasticsearch index is empty or not populated correctly + - The expected URLs don't match the actual indexed content + - Network connectivity issues with Elasticsearch +- **Performance optimization**: Subsequent test runs against the same Elasticsearch instance are significantly faster because indexing is skipped when data is already up-to-date +- The base class `SearchTestBase` can be extended for additional search-related tests, providing consistent initialization and intelligent indexing behavior + diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 670948149..6d943a884 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -83,6 +83,41 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) => await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx); + /// + /// Builds the lexical search query for the given search term. + /// + private static Query BuildLexicalQuery(string searchQuery) => + ((Query)new PrefixQuery(Infer.Field(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } + || new MatchPhrasePrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 9.0f } + || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } + || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } + || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.And, Boost = 5.0f } + || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.And, Boost = 4.5f } + || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.And, Boost = 4.5f } + || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.Or, Boost = 4.0f } + || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.Or, Boost = 3.0f } + || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.Or, Boost = 3.0f } + || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } + || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } + ) + && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])); + + /// + /// Builds the semantic search query for the given search term. + /// + private static Query BuildSemanticQuery(string searchQuery) => + ((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } + || new SemanticQuery("abstract.semantic_text", searchQuery) { Boost = 3.0f } + ) + && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), + new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])); + + /// + /// Normalizes the search query by replacing "dotnet" with "net". + /// + private static string NormalizeSearchQuery(string query) => + query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase); + public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) { _logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize); @@ -90,31 +125,9 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } - || new MatchPhrasePrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 9.0f } - || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } - || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.And, Boost = 5.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.And, Boost = 4.5f } - || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.And, Boost = 4.5f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.Or, Boost = 4.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.Or, Boost = 3.0f } - || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.Or, Boost = 3.0f } - || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } - || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } - ) - && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) - ; - var semanticSearchRetriever = - ((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } - || new SemanticQuery("abstract.semantic_text", searchQuery) { Boost = 3.0f } - ) - && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), - new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) - ; + var searchQuery = NormalizeSearchQuery(query); + var lexicalSearchRetriever = BuildLexicalQuery(searchQuery); + var semanticSearchRetriever = BuildSemanticQuery(searchQuery); try { @@ -220,6 +233,144 @@ private static (int TotalHits, List Results) ProcessSearchResp return (totalHits, results); } + + /// + /// Explains why a document did or didn't match for a given query. + /// Returns detailed scoring information using Elasticsearch's _explain API. + /// + public async Task ExplainDocumentAsync(string query, string documentUrl, Cancel ctx = default) + { + var searchQuery = NormalizeSearchQuery(query); + var lexicalQuery = BuildLexicalQuery(searchQuery); + var semanticQuery = BuildSemanticQuery(searchQuery); + + // Combine queries with bool should to match RRF behavior + var combinedQuery = (Query)new BoolQuery + { + Should = [lexicalQuery, semanticQuery], + MinimumShouldMatch = 1 + }; + + try + { + // First, find the document by URL + var getDocResponse = await _client.SearchAsync(s => s + .Indices(_elasticsearchOptions.IndexName) + .Query(q => q.Term(t => t.Field(f => f.Url).Value(documentUrl))) + .Size(1), ctx); + + if (!getDocResponse.IsValidResponse || getDocResponse.Documents.Count == 0) + { + return new ExplainResult + { + DocumentUrl = documentUrl, + Found = false, + Explanation = $"Document with URL '{documentUrl}' not found in index" + }; + } + + var documentId = getDocResponse.Hits.First().Id; + + // Now explain why this document matches (or doesn't match) the query + var explainResponse = await _client.ExplainAsync(_elasticsearchOptions.IndexName, documentId, e => e + .Query(combinedQuery), ctx); + + if (!explainResponse.IsValidResponse) + { + return new ExplainResult + { + DocumentUrl = documentUrl, + Found = true, + Matched = false, + Explanation = $"Error explaining document: {explainResponse.ElasticsearchServerError?.Error?.Reason ?? "Unknown error"}" + }; + } + + return new ExplainResult + { + DocumentUrl = documentUrl, + Found = true, + Matched = explainResponse.Matched, + Score = explainResponse.Explanation?.Value ?? 0, + Explanation = FormatExplanation(explainResponse.Explanation, 0) + }; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error explaining document '{Url}' for query '{Query}'", documentUrl, query); + return new ExplainResult + { + DocumentUrl = documentUrl, + Found = false, + Explanation = $"Exception during explain: {ex.Message}" + }; + } + } + + /// + /// Formats the Elasticsearch explanation into a readable string with indentation. + /// + private static string FormatExplanation(Elastic.Clients.Elasticsearch.Core.Explain.ExplanationDetail? explanation, int indent) + { + if (explanation == null) + return string.Empty; + + var indentStr = new string(' ', indent * 2); + var value = explanation.Value.ToString("F4", System.Globalization.CultureInfo.InvariantCulture); + var desc = explanation.Description ?? "No description"; + var result = $"{indentStr}{value} - {desc}\n"; + + if (explanation.Details != null && explanation.Details.Count > 0) + { + foreach (var detail in explanation.Details) + result += FormatExplanation(detail, indent + 1); + } + + return result; + } + + /// + /// Explains both the top search result and an expected document for comparison. + /// Returns detailed scoring information for both documents. + /// + public async Task<(ExplainResult TopResult, ExplainResult ExpectedResult)> ExplainTopResultAndExpectedAsync( + string query, + string expectedDocumentUrl, + Cancel ctx = default) + { + // First, get the top result + var searchResults = await HybridSearchWithRrfAsync(query, 1, 1, ctx); + var topResultUrl = searchResults.Results.FirstOrDefault()?.Url; + + if (string.IsNullOrEmpty(topResultUrl)) + { + var emptyResult = new ExplainResult + { + DocumentUrl = "N/A", + Found = false, + Explanation = "No search results returned" + }; + return (emptyResult, await ExplainDocumentAsync(query, expectedDocumentUrl, ctx)); + } + + // Explain both documents + var topResultExplain = await ExplainDocumentAsync(query, topResultUrl, ctx); + var expectedResultExplain = await ExplainDocumentAsync(query, expectedDocumentUrl, ctx); + + return (topResultExplain, expectedResultExplain); + } +} + +/// +/// Result of explaining why a document matched or didn't match a query. +/// +public sealed record ExplainResult +{ + public required string DocumentUrl { get; init; } + public bool Found { get; init; } + public bool Matched { get; init; } + public double Score { get; init; } + public string Explanation { get; init; } = string.Empty; } [JsonSerializable(typeof(DocumentDto))] diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs b/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs index 328bff119..704ddf40d 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs @@ -112,8 +112,7 @@ private static void AddParameterProvider(IServiceCollection services, AppEnv app } default: { - throw new ArgumentOutOfRangeException(nameof(appEnv), appEnv, - "Unsupported environment for parameter provider."); + throw new ArgumentOutOfRangeException(nameof(appEnv), appEnv, "Unsupported environment for parameter provider."); } } } diff --git a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs index 3726a0e7d..5e72221a7 100644 --- a/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs +++ b/src/services/Elastic.Documentation.Assembler/Building/AssemblerBuildService.cs @@ -33,6 +33,7 @@ public async Task BuildAll( bool? metadataOnly, bool? showHints, IReadOnlySet? exporters, + bool? assumeBuild, FileSystem fs, Cancel ctx ) @@ -44,6 +45,8 @@ Cancel ctx if (exporters.Contains(Exporter.DocumentationState)) exporters = new HashSet(exporters.Except([Exporter.DocumentationState])); + var elasticsearchExportOnly = exporters.SetEquals([Exporter.Elasticsearch]); + var githubEnvironmentInput = githubActionsService.GetInput("environment"); environment ??= !string.IsNullOrEmpty(githubEnvironmentInput) ? githubEnvironmentInput : "dev"; @@ -53,10 +56,29 @@ Cancel ctx var assembleContext = new AssembleContext(assemblyConfiguration, configurationContext, environment, collector, fs, fs, null, null); + // Early return if --assume-build is specified and output already exists + if (assumeBuild.GetValueOrDefault(false)) + { + var indexHtmlPath = Path.Combine(assembleContext.OutputDirectory.FullName, "docs", "index.html"); + if (assembleContext.OutputDirectory.Exists && fs.File.Exists(indexHtmlPath)) + { + _logger.LogInformation("Assuming build already exists (--assume-build). Found index.html at {Path}. Skipping build.", indexHtmlPath); + return true; + } + _logger.LogInformation("--assume-build specified but output directory does not exist or is incomplete. Proceeding with build."); + } + if (assembleContext.OutputDirectory.Exists) { - _logger.LogInformation("Cleaning target output directory"); - assembleContext.OutputDirectory.Delete(true); + if (elasticsearchExportOnly) + { + _logger.LogInformation("Elasticsearch export only. Skipping clean up of target output directory"); + } + else + { + _logger.LogInformation("Cleaning target output directory"); + assembleContext.OutputDirectory.Delete(true); + } } _logger.LogInformation("Get all clone directory information"); diff --git a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs index 5a1137c6f..93779d259 100644 --- a/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs +++ b/src/services/Elastic.Documentation.Assembler/Indexing/AssemblerIndexService.cs @@ -144,6 +144,6 @@ public async Task Index(IDiagnosticsCollector collector, var exporters = new HashSet { Elasticsearch }; - return await BuildAll(collector, strict: false, environment, metadataOnly: true, showHints: false, exporters, fileSystem, ctx); + return await BuildAll(collector, strict: false, environment, metadataOnly: true, showHints: false, exporters, assumeBuild: false, fileSystem, ctx); } } diff --git a/src/tooling/docs-builder/Commands/Assembler/AssemblerCommands.cs b/src/tooling/docs-builder/Commands/Assembler/AssemblerCommands.cs index 6fa4b2b84..ce8266892 100644 --- a/src/tooling/docs-builder/Commands/Assembler/AssemblerCommands.cs +++ b/src/tooling/docs-builder/Commands/Assembler/AssemblerCommands.cs @@ -31,6 +31,7 @@ ICoreService githubActionsService /// The environment to build /// If true, fetch the latest commit of the branch instead of the link registry entry ref /// If true, assume the repository folder already exists on disk assume it's cloned already, primarily used for testing + /// If true, assume the build output already exists and skip building if index.html exists, primarily used for testing /// Only emit documentation metadata to output, ignored if 'exporters' is also set /// Show hints from all documentation sets during assembler build /// Set available exporters: @@ -45,6 +46,7 @@ public async Task CloneAndBuild( string? environment = null, bool? fetchLatest = null, bool? assumeCloned = null, + bool? assumeBuild = null, bool? metadataOnly = null, bool? showHints = null, [ExporterParser] IReadOnlySet? exporters = null, @@ -61,9 +63,9 @@ static async (s, collector, state, ctx) => await s.CloneAll(collector, state.str var buildService = new AssemblerBuildService(logFactory, assemblyConfiguration, configurationContext, githubActionsService); var fs = new FileSystem(); - serviceInvoker.AddCommand(buildService, (strict, environment, metadataOnly, showHints, exporters, fs), strict ?? false, + serviceInvoker.AddCommand(buildService, (strict, environment, metadataOnly, showHints, exporters, assumeBuild, fs), strict ?? false, static async (s, collector, state, ctx) => - await s.BuildAll(collector, state.strict, state.environment, state.metadataOnly, state.showHints, state.exporters, state.fs, ctx) + await s.BuildAll(collector, state.strict, state.environment, state.metadataOnly, state.showHints, state.exporters, state.assumeBuild, state.fs, ctx) ); var result = await serviceInvoker.InvokeAsync(ctx); @@ -116,6 +118,7 @@ static async (s, collector, state, ctx) => await s.CloneAll(collector, state.str /// Builds all repositories /// Treat warnings as errors and fail the build on warnings /// The environment to build + /// If true, assume the build output already exists and skip building if index.html exists, primarily used for testing /// Only emit documentation metadata to output, ignored if 'exporters' is also set /// Show hints from all documentation sets during assembler build /// Set available exporters: @@ -127,6 +130,7 @@ static async (s, collector, state, ctx) => await s.CloneAll(collector, state.str public async Task BuildAll( bool? strict = null, string? environment = null, + bool? assumeBuild = null, bool? metadataOnly = null, bool? showHints = null, [ExporterParser] IReadOnlySet? exporters = null, @@ -137,9 +141,9 @@ public async Task BuildAll( var fs = new FileSystem(); var service = new AssemblerBuildService(logFactory, assemblyConfiguration, configurationContext, githubActionsService); - serviceInvoker.AddCommand(service, (strict, environment, metadataOnly, showHints, exporters, fs), strict ?? false, + serviceInvoker.AddCommand(service, (strict, environment, assumeBuild, metadataOnly, showHints, exporters, fs), strict ?? false, static async (s, collector, state, ctx) => - await s.BuildAll(collector, state.strict, state.environment, state.metadataOnly, state.showHints, state.exporters, state.fs, ctx) + await s.BuildAll(collector, state.strict, state.environment, state.metadataOnly, state.showHints, state.exporters, state.assumeBuild, state.fs, ctx) ); return await serviceInvoker.InvokeAsync(ctx); diff --git a/src/tooling/docs-builder/Http/StaticWebHost.cs b/src/tooling/docs-builder/Http/StaticWebHost.cs index aae268b52..131ce3271 100644 --- a/src/tooling/docs-builder/Http/StaticWebHost.cs +++ b/src/tooling/docs-builder/Http/StaticWebHost.cs @@ -57,14 +57,32 @@ public StaticWebHost(int port, string? path) private void SetUpRoutes() { + _ = WebApplication.Use(async (context, next) => + { + try + { + await next(context); + } + catch (Exception ex) + { + Console.WriteLine($"[UNHANDLED EXCEPTION] {ex.GetType().Name}: {ex.Message}"); + Console.WriteLine($"[STACK TRACE] {ex.StackTrace}"); + if (ex.InnerException != null) + Console.WriteLine($"[INNER EXCEPTION] {ex.InnerException.GetType().Name}: {ex.InnerException.Message}"); + + throw; // Re-throw to let ASP.NET Core handle it + } + }); _ = WebApplication + .UseDeveloperExceptionPage(new DeveloperExceptionPageOptions()) .UseRouting(); _ = WebApplication.MapGet("/", (Cancel _) => Results.Redirect("docs")); _ = WebApplication.MapGet("{**slug}", ServeDocumentationFile); + var apiV1 = WebApplication.MapGroup("/docs/_api/v1"); #if DEBUG apiV1.MapElasticDocsApiEndpoints(); diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/AssembleFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/AssembleFixture.cs index 20a11747f..28da7d798 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/AssembleFixture.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/AssembleFixture.cs @@ -7,6 +7,7 @@ using Aspire.Hosting.Testing; using Elastic.Documentation.ServiceDefaults; using InMemLogger; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using static Elastic.Documentation.Aspire.ResourceNames; @@ -27,7 +28,12 @@ public static TBuilder WithEmptyParameters(this TBuilder builder) foreach (var parameter in parameters) builder.Configuration[$"Parameters:{parameter.Name}"] = string.Empty; - builder.Configuration[$"Parameters:DocumentationElasticUrl"] = "http://localhost.example:9200"; + var configBuilder = new ConfigurationBuilder(); + _ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var config = configBuilder.Build(); + + builder.Configuration[$"Parameters:DocumentationElasticUrl"] = config["Parameters:DocumentationElasticUrl"] ?? "http://localhost.example:9200"; + builder.Configuration[$"Parameters:DocumentationElasticApiKey"] = config["Parameters:DocumentationElasticApiKey"] ?? "not-configured"; return builder; } } @@ -43,7 +49,7 @@ public class DocumentationFixture : IAsyncLifetime public async ValueTask InitializeAsync() { var builder = await DistributedApplicationTestingBuilder.CreateAsync( - ["--skip-private-repositories", "--assume-cloned"], + ["--skip-private-repositories", "--assume-cloned", "--assume-build"], (options, _) => { options.DisableDashboard = true; diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Elastic.Assembler.IntegrationTests.csproj b/tests-integration/Elastic.Assembler.IntegrationTests/Elastic.Assembler.IntegrationTests.csproj index 80b41db08..a6be43951 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Elastic.Assembler.IntegrationTests.csproj +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Elastic.Assembler.IntegrationTests.csproj @@ -11,6 +11,9 @@ + + + diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchIntegrationTests.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchIntegrationTests.cs new file mode 100644 index 000000000..f863618ac --- /dev/null +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchIntegrationTests.cs @@ -0,0 +1,126 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Net.Http.Json; +using Elastic.Documentation.Api.Core.Search; +using FluentAssertions; + +namespace Elastic.Assembler.IntegrationTests.Search; + +/// +/// Integration tests for the search endpoint exposed through MapSearchEndpoint. +/// These tests verify that the ElasticsearchGateway correctly processes queries +/// and returns expected results. Inherits from SearchTestBase which handles +/// conditional indexing based on remote Elasticsearch state. +/// +[Collection(SearchBootstrapFixture.Collection)] +public class SearchIntegrationTests(SearchBootstrapFixture searchFixture, ITestOutputHelper output) : SearchTestBase +{ + /// + /// Theory data for search queries mapped to expected first hit URLs. + /// Format: (query, expectedFirstResultUrl) + /// Note: These URLs reflect the actual search results from the indexed documentation. + /// + public static TheoryData SearchQueryTestCases => new() + { + //TODO these results reflect todays result, we still have some work to do to improve the relevance of the search results + + // Elasticsearch specific queries + { "elasticsearch getting started", "/docs/reference/elasticsearch/clients/java/getting-started" }, + { "apm", "/docs/reference/apm/observability/apm" }, + { "kibana dashboard", "/docs/reference/beats/auditbeat/configuration-dashboards" }, + + // .NET specific queries (testing dotnet -> net replacement) + { "dotnet client", "/docs/reference/elasticsearch/clients/dotnet/using-net-client" }, + { ".net apm agent", "/docs/reference/apm/agents/dotnet" }, + + // General queries + { "machine learning", "/docs/reference/machine-learning" }, + { "ingest pipeline", "/docs/reference/beats/metricbeat/configuring-ingest-node" }, + }; + + [Theory] + [MemberData(nameof(SearchQueryTestCases))] + public async Task SearchEndpointReturnsExpectedFirstResult(string query, string expectedFirstResultUrl) + { + Assert.SkipUnless(searchFixture.Connected, "Elasticsearch is not connected"); + + // Arrange + searchFixture.HttpClient.Should().NotBeNull("HTTP client should be initialized"); + + // Act + var response = await searchFixture.HttpClient.GetAsync($"/docs/_api/v1/search?q={Uri.EscapeDataString(query)}&page=1", TestContext.Current.CancellationToken); + + // Assert - Response should be successful + response.EnsureSuccessStatusCode(); + + var searchResponse = await response.Content.ReadFromJsonAsync(cancellationToken: TestContext.Current.CancellationToken); + searchResponse.Should().NotBeNull("Search response should be deserialized"); + + // Log results for debugging + output.WriteLine($"Query: {query}"); + output.WriteLine($"Total results: {searchResponse.TotalResults}"); + output.WriteLine($"Results returned: {searchResponse.Results.Count()}"); + + if (searchResponse.Results.Any()) + { + output.WriteLine("First result:"); + var firstResult = searchResponse.Results.First(); + output.WriteLine($" Title: {firstResult.Title}"); + output.WriteLine($" URL: {firstResult.Url}"); + output.WriteLine($" Score: {firstResult.Score}"); + } + + // Assert - Should have at least one result + searchResponse.Results.Should().NotBeEmpty($"Search for '{query}' should return results"); + + // Assert - First result should match expected URL + var actualFirstResultUrl = searchResponse.Results.First().Url; + actualFirstResultUrl.Should().Be(expectedFirstResultUrl, + $"First result for query '{query}' should be the expected documentation page"); + } + + [Fact] + public async Task SearchEndpointWithPaginationReturnsCorrectPage() + { + // Arrange + searchFixture.HttpClient.Should().NotBeNull("HTTP client should be initialized"); + const string query = "elasticsearch"; + + // Act - Get first page + var page1Response = await searchFixture.HttpClient!.GetAsync($"/docs/_api/v1/search?q={Uri.EscapeDataString(query)}&page=1", TestContext.Current.CancellationToken); + page1Response.EnsureSuccessStatusCode(); + var page1Data = await page1Response.Content.ReadFromJsonAsync(cancellationToken: TestContext.Current.CancellationToken); + + // Act - Get second page + var page2Response = await searchFixture.HttpClient.GetAsync($"/docs/_api/v1/search?q={Uri.EscapeDataString(query)}&page=2", TestContext.Current.CancellationToken); + page2Response.EnsureSuccessStatusCode(); + var page2Data = await page2Response.Content.ReadFromJsonAsync(cancellationToken: TestContext.Current.CancellationToken); + + // Assert + page1Data.Should().NotBeNull(); + page2Data.Should().NotBeNull(); + page1Data.PageNumber.Should().Be(1); + page2Data.PageNumber.Should().Be(2); + page1Data.TotalResults.Should().Be(page2Data.TotalResults, "Total results should be the same across pages"); + + // Results on different pages should be different + var page1Urls = page1Data.Results.Select(r => r.Url).ToHashSet(); + var page2Urls = page2Data.Results.Select(r => r.Url).ToHashSet(); + page1Urls.Should().NotIntersectWith(page2Urls, "Different pages should contain different results"); + } + + [Fact] + public async Task SearchEndpointWithEmptyQueryReturnsError() + { + // Arrange + searchFixture.HttpClient.Should().NotBeNull("HTTP client should be initialized"); + + // Act + var response = await searchFixture.HttpClient.GetAsync("/docs/_api/v1/search?q=&page=1", TestContext.Current.CancellationToken); + + // Assert - Should return bad request for empty query + response.IsSuccessStatusCode.Should().BeFalse("Empty query should not be allowed"); + } +} diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs new file mode 100644 index 000000000..4247b459d --- /dev/null +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs @@ -0,0 +1,203 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Api.Infrastructure.Adapters.Search; +using Elastic.Documentation.Api.Infrastructure.Aws; +using Elastic.Documentation.Configuration; +using FluentAssertions; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace Elastic.Assembler.IntegrationTests.Search; + +/// +/// Integration tests for search relevance that use ElasticsearchGateway directly +/// to provide detailed explanations of search results using Elasticsearch's _explain API. +/// These tests help understand and improve search ranking by showing detailed scoring breakdowns. +/// +[Collection(SearchBootstrapFixture.Collection)] +public class SearchRelevanceTests(SearchBootstrapFixture searchFixture, DocumentationFixture documentationFixture, ITestOutputHelper output) : SearchTestBase +{ + /// + /// Theory data for search queries mapped to expected first hit URLs. + /// Same as SearchIntegrationTests but with detailed explain output on failures. + /// + public static TheoryData SearchQueryTestCases => new() + { + //TODO these results reflect today's result, we still have some work to do to improve the relevance of the search results + + // Elasticsearch specific queries + { "elasticsearch getting started", "/docs/reference/elasticsearch/clients/java/getting-started" }, + { "apm", "/docs/reference/apm/observability/apm" }, + { "kibana dashboard", "/docs/reference/beats/auditbeat/configuration-dashboards" }, + + // .NET specific queries (testing dotnet -> net replacement) + { "dotnet client", "/docs/reference/elasticsearch/clients/dotnet/using-net-client" }, + { ".net apm agent", "/docs/reference/apm/agents/dotnet" }, + + // General queries + { "machine learning", "/docs/reference/machine-learning" }, + { "ingest pipeline", "/docs/reference/beats/metricbeat/configuring-ingest-node" }, + }; + + [Theory] + [MemberData(nameof(SearchQueryTestCases))] + public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl) + { + Assert.SkipUnless(searchFixture.Connected, "Elasticsearch is not connected"); + + // Arrange - Create ElasticsearchGateway directly + var gateway = CreateElasticsearchGateway(); + + // Act - Perform the search + var (totalHits, results) = await gateway.HybridSearchWithRrfAsync(query, 1, 5, TestContext.Current.CancellationToken); + + // Log basic results + output.WriteLine($"Query: {query}"); + output.WriteLine($"Total hits: {totalHits}"); + output.WriteLine($"Results returned: {results.Count}"); + + results.Should().NotBeEmpty($"Search for '{query}' should return results"); + + var actualFirstResultUrl = results.First().Url; + + // If the first result doesn't match expectations, use _explain API for detailed analysis + if (actualFirstResultUrl != expectedFirstResultUrl) + { + output.WriteLine("\n❌ FIRST RESULT MISMATCH - Fetching detailed explanations...\n"); + + // Get explain for both the actual top result and the expected result + var (topResultExplain, expectedResultExplain) = await gateway.ExplainTopResultAndExpectedAsync( + query, + expectedFirstResultUrl, + TestContext.Current.CancellationToken); + + // Output the actual top result explanation + output.WriteLine("═══════════════════════════════════════════════════════════════"); + output.WriteLine($"ACTUAL TOP RESULT: {topResultExplain.DocumentUrl}"); + output.WriteLine($"Score: {topResultExplain.Score:F4}"); + output.WriteLine($"Matched: {topResultExplain.Matched}"); + output.WriteLine("───────────────────────────────────────────────────────────────"); + output.WriteLine("Scoring Breakdown:"); + output.WriteLine(topResultExplain.Explanation); + + // Output the expected result explanation + output.WriteLine("═══════════════════════════════════════════════════════════════"); + output.WriteLine($"EXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); + output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); + output.WriteLine($"Matched: {expectedResultExplain.Matched}"); + output.WriteLine("───────────────────────────────────────────────────────────────"); + output.WriteLine("Scoring Breakdown:"); + output.WriteLine(expectedResultExplain.Explanation); + output.WriteLine("═══════════════════════════════════════════════════════════════\n"); + + // Create a detailed failure message + var scoreDiff = topResultExplain.Score - expectedResultExplain.Score; + var failureMessage = $@" +First result for query '{query}' did not match expectation. + +Expected: {expectedFirstResultUrl} + - Score: {expectedResultExplain.Score:F4} + - Matched: {expectedResultExplain.Matched} + +Actual: {actualFirstResultUrl} + - Score: {topResultExplain.Score:F4} + - Matched: {topResultExplain.Matched} + +Score Difference: {scoreDiff:F4} (actual is {(scoreDiff > 0 ? "higher" : "lower")}) + +See test output above for detailed scoring breakdowns from Elasticsearch's _explain API. +"; + + actualFirstResultUrl.Should().Be(expectedFirstResultUrl, failureMessage); + } + else + { + output.WriteLine($"✅ First result matches expected: {actualFirstResultUrl}"); + output.WriteLine($" Score: {results.First().Score:F4}"); + } + } + + [Fact] + public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() + { + // Arrange + var gateway = CreateElasticsearchGateway(); + const string query = "elasticsearch getting started"; + const string expectedUrl = "/docs/reference/elasticsearch/clients/java/getting-started"; + + // Act - Use the ExplainTopResultAndExpectedAsync method which gets top result and explains both + var (topResultExplain, expectedResultExplain) = await gateway.ExplainTopResultAndExpectedAsync( + query, + expectedUrl, + TestContext.Current.CancellationToken); + + // Assert - Top result should have explanation + output.WriteLine($"Query: {query}"); + output.WriteLine($"\nTOP RESULT: {topResultExplain.DocumentUrl}"); + output.WriteLine($"Found: {topResultExplain.Found}"); + output.WriteLine($"Matched: {topResultExplain.Matched}"); + output.WriteLine($"Score: {topResultExplain.Score:F4}"); + output.WriteLine("Explanation:"); + output.WriteLine(topResultExplain.Explanation); + + output.WriteLine($"\nEXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); + output.WriteLine($"Found: {expectedResultExplain.Found}"); + output.WriteLine($"Matched: {expectedResultExplain.Matched}"); + output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); + output.WriteLine("Explanation:"); + output.WriteLine(expectedResultExplain.Explanation); + + // Both results should have explanations (even if scores are different) + topResultExplain.Explanation.Should().NotBeEmpty("Top result should have an explanation"); + expectedResultExplain.Explanation.Should().NotBeEmpty("Expected result should have an explanation"); + } + + /// + /// Creates an ElasticsearchGateway instance using configuration from the distributed application. + /// + private ElasticsearchGateway CreateElasticsearchGateway() + { + var configuration = documentationFixture.DistributedApplication.Services.GetRequiredService(); + var loggerFactory = documentationFixture.DistributedApplication.Services.GetRequiredService(); + + // Build a new ConfigurationBuilder to read user secrets + var configBuilder = new ConfigurationBuilder(); + configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + // Get Elasticsearch configuration with fallback chain: user secrets → configuration → environment + var elasticsearchUrl = userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? configuration["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL") + ?? throw new InvalidOperationException("Elasticsearch URL not configured"); + + var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] + ?? configuration["Parameters:DocumentationElasticApiKey"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") + ?? throw new InvalidOperationException("Elasticsearch API key not configured"); + + // Create a test parameter provider with the configuration values + var parameterProvider = new TestParameterProvider(elasticsearchUrl, elasticsearchApiKey, "semantic-docs-dev-latest"); + var options = new ElasticsearchOptions(parameterProvider); + + return new ElasticsearchGateway(options, loggerFactory.CreateLogger()); + } + + /// + /// Simple test implementation of IParameterProvider that returns configured values. + /// + private sealed class TestParameterProvider(string url, string apiKey, string indexName) : IParameterProvider + { + public Task GetParam(string name, bool withDecryption = true, Cancel ctx = default) => + name switch + { + "docs-elasticsearch-url" => Task.FromResult(url), + "docs-elasticsearch-apikey" => Task.FromResult(apiKey), + "docs-elasticsearch-index" => Task.FromResult(indexName), + _ => throw new ArgumentException($"Parameter '{name}' not configured in test provider") + }; + } +} diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs new file mode 100644 index 000000000..2dae77bbb --- /dev/null +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs @@ -0,0 +1,292 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Aspire.Hosting.ApplicationModel; +using Aspire.Hosting.Testing; +using Documentation.Builder.Diagnostics.Console; +using Elastic.Documentation.Configuration; +using Elastic.Ingest.Elasticsearch; +using Elastic.Markdown.Exporters.Elasticsearch; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; +using FluentAssertions; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using static Elastic.Documentation.Aspire.ResourceNames; + +namespace Elastic.Assembler.IntegrationTests.Search; + + +[CollectionDefinition(Collection)] +public class SearchBootstrapFixture(DocumentationFixture fixture) : IAsyncLifetime +{ + public const string Collection = "Search"; + public HttpClient HttpClient { get; private set; } = null!; + public bool Connected { get; private set; } + + /// + /// Initializes the test by ensuring AssemblerServe (which hosts the API) is healthy and Elasticsearch is indexed. + /// Checks if the remote Elasticsearch already has up-to-date data to avoid unnecessary indexing. + /// + public async ValueTask InitializeAsync() + { + try + { + // Wait for AssemblerServe to be ready (it hosts the embedded Lambda API) + Console.WriteLine("Waiting for AssemblerServe (with embedded API) to become healthy..."); + await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceHealthyAsync(AssemblerServe, cancellationToken: TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(2), TestContext.Current.CancellationToken); + + Console.WriteLine("AssemblerServe is healthy. Creating HTTP client..."); + + // Get the HTTP client for AssemblerServe which includes the API endpoints + HttpClient = fixture.DistributedApplication.CreateHttpClient(AssemblerServe, "http"); + HttpClient.Should().NotBeNull("Should be able to create HTTP client for AssemblerServe"); + + // Check if Elasticsearch already has up-to-date data + var indexingNeeded = await IsIndexingNeeded(); + + if (!Connected) + { + Console.WriteLine("Can not connect to Elasticsearch. Skipping indexing."); + return; + } + + if (!indexingNeeded) + { + Console.WriteLine("Elasticsearch already has up-to-date data. Skipping indexing."); + return; + } + + Console.WriteLine("Elasticsearch needs indexing. Manually starting indexer..."); + + // The indexer always has WithExplicitStart(), so we must manually start it + // Get the ResourceLoggerService to send the start command + fixture.DistributedApplication.Services + .GetRequiredService(); + + // Get the resource notification service to find the resource + fixture.DistributedApplication.Services + .GetRequiredService(); + + // Wait for the resource to be available + var resourceEvent = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(ElasticsearchIngest, _ => true, TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + + // Get the resource instance + var resource = resourceEvent.Resource; + + // Execute the start command using ResourceCommandAnnotation + var startCommand = resource.Annotations.OfType() + .FirstOrDefault(a => a.Name == "resource-start"); + + if (startCommand != null) + { + Console.WriteLine($"Executing start command for {ElasticsearchIngest}..."); + + // Create ExecuteCommandContext for the start command + var commandContext = new ExecuteCommandContext + { + ResourceName = resourceEvent.ResourceId, + ServiceProvider = fixture.DistributedApplication.Services, + CancellationToken = TestContext.Current.CancellationToken + }; + + await startCommand.ExecuteCommand(commandContext); + Console.WriteLine($"Start command executed for {ElasticsearchIngest}"); + } + else + { + throw new Exception($"Could not find start command for {ElasticsearchIngest}"); + } + + Console.WriteLine("Waiting for indexer to complete..."); + + // Wait for the indexer to complete + _ = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(ElasticsearchIngest, KnownResourceStates.TerminalStates, + cancellationToken: TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(10), TestContext.Current.CancellationToken); + + Console.WriteLine("Elasticsearch indexer reached terminal state. Validating exit code..."); + + // Validate the indexer completed successfully + await ValidateResourceExitCode(ElasticsearchIngest); + + Console.WriteLine("Elasticsearch indexing completed successfully. Tests can now run."); + } + catch (Exception e) + { + Console.WriteLine($"Failed to initialize test: {e.Message}"); + Console.WriteLine(string.Join(Environment.NewLine, + fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse())); + throw; + } + } + + /// + /// Checks if indexing is needed by comparing the channel hash in Elasticsearch + /// with the current semantic exporter channel hash. + /// Uses the same pattern as ElasticsearchMarkdownExporter. + /// + private async ValueTask IsIndexingNeeded() + { + try + { + // Get Elasticsearch configuration from Aspire + var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); + + if (string.IsNullOrEmpty(elasticsearchUrl)) + { + Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); + Connected = false; + return false; + } + + Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); + + // Create Elasticsearch endpoint configuration + var endpoint = new ElasticsearchEndpoint + { + Uri = new Uri(elasticsearchUrl), + ApiKey = apiKey, + Username = username, + Password = password + }; + + // Create transport configuration (similar to ElasticsearchMarkdownExporter) + var configuration = new ElasticsearchConfiguration(endpoint.Uri) + { + Authentication = endpoint.ApiKey is { } eApiKey + ? new ApiKey(eApiKey) + : endpoint is { Username: { } eUsername, Password: { } ePassword } + ? new BasicAuthentication(eUsername, ePassword) + : null, + EnableHttpCompression = true + }; + + var transport = new DistributedTransport(configuration); + Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; + + // Create a logger factory and diagnostics collector + var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); + var collector = new ConsoleDiagnosticsCollector(loggerFactory); + + // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) + using var semanticExporter = new ElasticsearchSemanticExporter( + loggerFactory, + collector, + endpoint, + "dev", // index namespace + transport + ); + + // Get the current hash from Elasticsearch index template + var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; + + // Get the expected channel hash from the semantic exporter + await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); + var expectedSemanticHash = semanticExporter.Channel.ChannelHash; + + Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); + Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); + + // If hashes match, no indexing needed + if (!string.IsNullOrEmpty(currentSemanticHash) && currentSemanticHash == expectedSemanticHash) + { + Console.WriteLine("Semantic channel hashes match. Skipping indexing."); + return false; + } + + Console.WriteLine("Semantic channel hashes do not match or remote hash is empty. Indexing needed."); + return true; + } + catch (Exception ex) + { + Console.WriteLine($"Error checking Elasticsearch state: {ex.Message}. Will proceed with indexing."); + return true; // If we can't check, safer to index + } + } + + private async ValueTask ValidateResourceExitCode(string resourceName) + { + var eventResource = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(resourceName, _ => true); + var id = eventResource.ResourceId; + + if (!fixture.DistributedApplication.ResourceNotifications.TryGetCurrentState(id, out var state)) + throw new Exception($"Could not find {resourceName} in the current state"); + + if (state.Snapshot.ExitCode is not 0) + { + var recentLogs = string.Join(Environment.NewLine, + fixture.InMemoryLogger.RecordedLogs.Reverse().Take(100).Reverse()); + throw new Exception( + $"Exit code should be 0 for {resourceName}, but was {state.Snapshot.ExitCode}. Recent logs:{Environment.NewLine}{recentLogs}"); + } + + Console.WriteLine($"{resourceName} completed with exit code 0"); + } + + /// + /// Gets Elasticsearch configuration from Aspire parameters and environment. + /// Manually reads user secrets from the aspire project, then falls back to environment variables. + /// + private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() + { + // Manually read user secrets from the aspire project + // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 + var configBuilder = new ConfigurationBuilder(); + configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + // Get URL - try user secrets first, then Aspire configuration, then environment + var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); + + // Get API Key - try user secrets first, then Aspire configuration, then environment + var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] + ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); + + // Get password for local Elasticsearch (when using --start-elasticsearch) + var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); + + // Get username (defaults to "elastic") + var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") + ?? "elastic"; + + Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); + + return (url, apiKey, password, username); + } + + public ValueTask DisposeAsync() + { + HttpClient?.Dispose(); + + // Only dump logs if test failed + if (TestContext.Current.TestState?.Result is not TestResult.Passed) + { + foreach (var log in fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse()) + Console.WriteLine(log.Message); + } + + GC.SuppressFinalize(this); + return default; + } +} +/// +/// Base class for search integration tests that handles initialization +/// and conditional Elasticsearch indexing based on hash comparison. +/// +public abstract class SearchTestBase : IClassFixture +{ +} diff --git a/tests-integration/Elastic.Documentation.Api.IntegrationTests/Fixtures/ApiWebApplicationFactory.cs b/tests-integration/Elastic.Documentation.Api.IntegrationTests/Fixtures/ApiWebApplicationFactory.cs index 96c60e7aa..211cc8ec5 100644 --- a/tests-integration/Elastic.Documentation.Api.IntegrationTests/Fixtures/ApiWebApplicationFactory.cs +++ b/tests-integration/Elastic.Documentation.Api.IntegrationTests/Fixtures/ApiWebApplicationFactory.cs @@ -56,7 +56,7 @@ protected override void ConfigureWebHost(IWebHostBuilder builder) => A.CallTo(() => mockAskAiGateway.AskAi(A._, A._)) .ReturnsLazily(() => { - var stream = new MemoryStream(Encoding.UTF8.GetBytes("data: test\n\n")); + var stream = new MemoryStream("data: test\n\n"u8.ToArray()); _mockMemoryStreams.Add(stream); return Task.FromResult(stream); }); @@ -69,7 +69,7 @@ protected override void ConfigureWebHost(IWebHostBuilder builder) => A.CallTo(() => mockTransformer.TransformAsync(A._, A._, A._, A._)) .ReturnsLazily((Stream s, string? _, Activity? activity, Cancel _) => { - // Dispose the activity if provided (simulating what the real transformer does) + // Dispose of the activity if provided (simulating what the real transformer does) activity?.Dispose(); return Task.FromResult(s); });