diff --git a/doc/multi-backend-configuration.md b/doc/multi-backend-configuration.md new file mode 100644 index 0000000..eebcda8 --- /dev/null +++ b/doc/multi-backend-configuration.md @@ -0,0 +1,147 @@ +# Multi-Backend Configuration Example + +This example demonstrates how to configure and use the multi-backend retrieval architecture in NLWebNet. + +## Single Backend (Default/Legacy) + +```csharp +// Traditional single backend setup - still works +services.AddNLWebNet(options => +{ + options.DefaultMode = QueryMode.List; + options.MaxResultsPerQuery = 20; +}); +``` + +## Multi-Backend Configuration + +```csharp +// New multi-backend setup +services.AddNLWebNetMultiBackend( + options => + { + options.DefaultMode = QueryMode.List; + options.MaxResultsPerQuery = 50; + }, + multiBackendOptions => + { + multiBackendOptions.Enabled = true; + multiBackendOptions.EnableParallelQuerying = true; + multiBackendOptions.EnableResultDeduplication = true; + multiBackendOptions.MaxConcurrentQueries = 3; + multiBackendOptions.BackendTimeoutSeconds = 30; + multiBackendOptions.WriteEndpoint = "primary_backend"; + }); +``` + +## Configuration via appsettings.json + +```json +{ + "NLWebNet": { + "DefaultMode": "List", + "MaxResultsPerQuery": 50, + "MultiBackend": { + "Enabled": true, + "EnableParallelQuerying": true, + "EnableResultDeduplication": true, + "MaxConcurrentQueries": 3, + "BackendTimeoutSeconds": 30, + "WriteEndpoint": "primary_backend", + "Endpoints": { + "primary_backend": { + "Enabled": true, + "BackendType": "azure_ai_search", + "Priority": 10, + "Properties": { + "ConnectionString": "your-connection-string", + "IndexName": "your-index" + } + }, + "secondary_backend": { + "Enabled": true, + "BackendType": "mock", + "Priority": 5, + "Properties": {} + } + } + } + } +} +``` + +## Usage Example + +```csharp +public class ExampleController : ControllerBase +{ + private readonly INLWebService _nlWebService; + private readonly IBackendManager _backendManager; + + public ExampleController(INLWebService nlWebService, IBackendManager backendManager) + { + _nlWebService = nlWebService; + _backendManager = backendManager; + } + + [HttpPost("search")] + public async Task Search([FromBody] NLWebRequest request) + { + // Multi-backend search automatically handled + var response = await _nlWebService.ProcessRequestAsync(request); + return Ok(response); + } + + [HttpGet("backend-info")] + public IActionResult GetBackendInfo() + { + // Get information about configured backends + var backendInfo = _backendManager.GetBackendInfo(); + return Ok(backendInfo); + } + + [HttpGet("write-backend-capabilities")] + public IActionResult GetWriteBackendCapabilities() + { + // Access the designated write backend + var writeBackend = _backendManager.GetWriteBackend(); + if (writeBackend == null) + { + return NotFound("No write backend configured"); + } + + var capabilities = writeBackend.GetCapabilities(); + return Ok(capabilities); + } +} +``` + +## Key Features + +### Parallel Querying +- Queries execute simultaneously across all enabled backends +- Configurable concurrency limits and timeouts +- Graceful handling of backend failures + +### Result Deduplication +- Automatic deduplication based on URL +- Higher scoring results from different backends take precedence +- Can be disabled for scenarios requiring all results + +### Write Endpoint +- Designate one backend as the primary write endpoint +- Other backends remain read-only for queries +- Useful for hybrid architectures + +### Backward Compatibility +- Existing single-backend configurations continue to work +- No breaking changes to existing APIs +- Gradual migration path available + +## Migration from Single Backend + +1. Replace `AddNLWebNet()` with `AddNLWebNetMultiBackend()` +2. Set `MultiBackend.Enabled = false` initially to maintain existing behavior +3. Configure additional backends in the `Endpoints` section +4. Enable multi-backend mode by setting `MultiBackend.Enabled = true` +5. Test and adjust concurrency and timeout settings as needed \ No newline at end of file diff --git a/samples/AspireDemo/AspireHost/Program.cs b/samples/AspireDemo/AspireHost/Program.cs index 71e1221..ad2dcdf 100644 --- a/samples/AspireDemo/AspireHost/Program.cs +++ b/samples/AspireDemo/AspireHost/Program.cs @@ -11,18 +11,18 @@ options.AddFilter("Aspire.Hosting.ApplicationModel", LogLevel.Information); options.AddFilter("Aspire.Hosting", LogLevel.Information); options.AddFilter("Aspire", LogLevel.Warning); - + // Reduce OpenTelemetry noise options.AddFilter("OpenTelemetry", LogLevel.Warning); - + // Keep basic hosting messages options.AddFilter("Microsoft.Extensions.Hosting.Internal.Host", LogLevel.Information); options.AddFilter("Microsoft.Extensions.Hosting", LogLevel.Warning); - + // Reduce ASP.NET Core noise but keep startup messages options.AddFilter("Microsoft.AspNetCore.Hosting.Diagnostics", LogLevel.Information); options.AddFilter("Microsoft.AspNetCore", LogLevel.Warning); - + // Reduce DI and HTTP noise options.AddFilter("Microsoft.Extensions.DependencyInjection", LogLevel.Warning); options.AddFilter("System.Net.Http", LogLevel.Warning); diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Program.cs b/samples/AspireDemo/NLWebNet.AspireApp/Program.cs index a59a625..c2fea50 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Program.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Program.cs @@ -59,8 +59,9 @@ .WithName("GetHealth") .WithOpenApi(); -app.MapGet("/api/health", () => Results.Ok(new { - Status = "Healthy", +app.MapGet("/api/health", () => Results.Ok(new +{ + Status = "Healthy", Timestamp = DateTimeOffset.UtcNow, Service = "NLWebNet AspireApp API" })) @@ -88,7 +89,7 @@ { // Extract GitHub token from headers if provided for consistent embedding var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); - + var count = await ingestionService.IngestDemoFeedsAsync(githubToken); return Results.Ok(new { Message = $"Successfully ingested {count} documents from demo feeds", Count = count }); } @@ -106,10 +107,11 @@ { new { Name = "Microsoft .NET Blog", Url = "https://devblogs.microsoft.com/dotnet/feed/", Note = "Latest 25 articles" } }; - - return Results.Ok(new { + + return Results.Ok(new + { Message = "Demo RSS feed used for focused ingestion (latest 25 articles from .NET blog)", - Feeds = demoFeeds + Feeds = demoFeeds }); }) .WithName("GetDemoFeeds") @@ -137,12 +139,12 @@ { using var activity = System.Diagnostics.Activity.Current?.Source.StartActivity("VectorSearch.SearchDocuments"); var correlationId = Guid.NewGuid().ToString("N")[..8]; - + activity?.SetTag("search.correlation_id", correlationId); activity?.SetTag("search.query", query); activity?.SetTag("search.limit", limit); activity?.SetTag("search.threshold", threshold); - + try { if (string.IsNullOrWhiteSpace(query)) @@ -153,56 +155,56 @@ } var searchLimit = limit ?? 10; - + // Extract GitHub token from headers if provided var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); var hasToken = !string.IsNullOrEmpty(githubToken); - + // Adjust threshold based on embedding type var searchThreshold = threshold ?? (hasToken && IsValidGitHubToken(githubToken) ? 0.1f : 0.03f); - + logger.LogInformation("=== SEARCH REQUEST START [{CorrelationId}] ===", correlationId); - logger.LogInformation("[{CorrelationId}] Search parameters - Query: '{Query}', Limit: {Limit}, Threshold: {Threshold}, HasToken: {HasToken}, TokenLength: {TokenLength}", + logger.LogInformation("[{CorrelationId}] Search parameters - Query: '{Query}', Limit: {Limit}, Threshold: {Threshold}, HasToken: {HasToken}, TokenLength: {TokenLength}", correlationId, query, searchLimit, searchThreshold, hasToken, githubToken?.Length ?? 0); - + activity?.SetTag("auth.has_token", hasToken); activity?.SetTag("auth.token_length", githubToken?.Length ?? 0); activity?.SetTag("search.processed_limit", searchLimit); activity?.SetTag("search.processed_threshold", searchThreshold); - + // Generate embedding for the search query logger.LogInformation("[{CorrelationId}] Generating query embedding...", correlationId); var embeddingStopwatch = System.Diagnostics.Stopwatch.StartNew(); - + var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query, githubToken); - + embeddingStopwatch.Stop(); - logger.LogInformation("[{CorrelationId}] Query embedding generated - Duration: {Duration}ms, Dimensions: {Dimensions}, EmbeddingType: {EmbeddingType}", + logger.LogInformation("[{CorrelationId}] Query embedding generated - Duration: {Duration}ms, Dimensions: {Dimensions}, EmbeddingType: {EmbeddingType}", correlationId, embeddingStopwatch.ElapsedMilliseconds, queryEmbedding.Length, hasToken ? "GitHub Models" : "Simple Hash"); - + activity?.SetTag("embedding.duration_ms", embeddingStopwatch.ElapsedMilliseconds); activity?.SetTag("embedding.dimensions", queryEmbedding.Length); activity?.SetTag("embedding.type", hasToken ? "github_models" : "simple_hash"); - + // Search for similar documents logger.LogInformation("[{CorrelationId}] Performing vector similarity search...", correlationId); var searchStopwatch = System.Diagnostics.Stopwatch.StartNew(); - + var results = await vectorStorage.SearchSimilarAsync(queryEmbedding, searchLimit, searchThreshold); - + searchStopwatch.Stop(); var rawResultCount = results.Count(); - - logger.LogInformation("[{CorrelationId}] Vector search completed - Duration: {Duration}ms, RawResults: {RawResultCount}", + + logger.LogInformation("[{CorrelationId}] Vector search completed - Duration: {Duration}ms, RawResults: {RawResultCount}", correlationId, searchStopwatch.ElapsedMilliseconds, rawResultCount); - + activity?.SetTag("vector_search.duration_ms", searchStopwatch.ElapsedMilliseconds); activity?.SetTag("vector_search.raw_result_count", rawResultCount); - + // Process and format results logger.LogInformation("[{CorrelationId}] Processing search results...", correlationId); var processingStopwatch = System.Diagnostics.Stopwatch.StartNew(); - + var searchResults = results.Select(r => new { Id = r.Document.Id, @@ -212,22 +214,22 @@ PublishedDate = r.Document.IngestedAt, Similarity = Math.Max(0.0, Math.Min(1.0, r.Score)) }).ToList(); - + processingStopwatch.Stop(); - + // Log result statistics if (searchResults.Any()) { var avgSimilarity = searchResults.Average(r => r.Similarity); var maxSimilarity = searchResults.Max(r => r.Similarity); var minSimilarity = searchResults.Min(r => r.Similarity); - - logger.LogInformation("[{CorrelationId}] Result statistics - Count: {Count}, AvgSimilarity: {AvgSimilarity:F3}, MaxSimilarity: {MaxSimilarity:F3}, MinSimilarity: {MinSimilarity:F3}", + + logger.LogInformation("[{CorrelationId}] Result statistics - Count: {Count}, AvgSimilarity: {AvgSimilarity:F3}, MaxSimilarity: {MaxSimilarity:F3}, MinSimilarity: {MinSimilarity:F3}", correlationId, searchResults.Count, avgSimilarity, maxSimilarity, minSimilarity); - - logger.LogInformation("[{CorrelationId}] Top result - Title: '{Title}', Similarity: {Similarity:F3}", + + logger.LogInformation("[{CorrelationId}] Top result - Title: '{Title}', Similarity: {Similarity:F3}", correlationId, searchResults[0].Title, searchResults[0].Similarity); - + activity?.SetTag("results.count", searchResults.Count); activity?.SetTag("results.avg_similarity", avgSimilarity); activity?.SetTag("results.max_similarity", maxSimilarity); @@ -235,30 +237,30 @@ } else { - logger.LogWarning("[{CorrelationId}] No results found for query '{Query}' with threshold {Threshold}", + logger.LogWarning("[{CorrelationId}] No results found for query '{Query}' with threshold {Threshold}", correlationId, query, searchThreshold); activity?.SetTag("results.count", 0); } var totalDuration = embeddingStopwatch.ElapsedMilliseconds + searchStopwatch.ElapsedMilliseconds + processingStopwatch.ElapsedMilliseconds; - - logger.LogInformation("=== SEARCH REQUEST SUCCESS [{CorrelationId}] === Total duration: {TotalDuration}ms, Results: {ResultCount}, EmbeddingType: {EmbeddingType}", + + logger.LogInformation("=== SEARCH REQUEST SUCCESS [{CorrelationId}] === Total duration: {TotalDuration}ms, Results: {ResultCount}, EmbeddingType: {EmbeddingType}", correlationId, totalDuration, searchResults.Count, hasToken ? "GitHub Models" : "Simple Hash"); - + activity?.SetTag("search.success", true); activity?.SetTag("search.total_duration_ms", totalDuration); - + return Results.Ok(searchResults); } catch (Exception ex) { logger.LogError(ex, "=== SEARCH REQUEST FAILED [{CorrelationId}] === Query: '{Query}', Error: {Message}", correlationId, query, ex.Message); - + activity?.SetTag("search.success", false); activity?.SetTag("error.type", ex.GetType().Name); activity?.SetTag("error.message", ex.Message); activity?.SetTag("error.stack_trace", ex.StackTrace); - + return Results.BadRequest(new { Error = ex.Message }); } }) @@ -272,11 +274,11 @@ { var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); var hasToken = !string.IsNullOrEmpty(githubToken) && IsValidGitHubToken(githubToken); - + logger.LogInformation("Generating embedding for diagnostic - Text: '{Text}', HasToken: {HasToken}", text, hasToken); - + var embedding = await embeddingService.GenerateEmbeddingAsync(text, githubToken); - + var stats = new { Text = text, @@ -294,7 +296,7 @@ NonZeroCount = embedding.Span.ToArray().Count(x => Math.Abs(x) > 0.001f) } }; - + return Results.Ok(stats); } catch (Exception ex) @@ -314,16 +316,16 @@ var searchLimit = limit ?? 10; var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); var hasToken = !string.IsNullOrEmpty(githubToken) && IsValidGitHubToken(githubToken); - + logger.LogInformation("=== DIAGNOSTIC SEARCH ==="); logger.LogInformation("Query: '{Query}', HasToken: {HasToken}", query, hasToken); - + // Generate query embedding var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query, githubToken); - + // Get raw search results with very low threshold var results = await vectorStorage.SearchSimilarAsync(queryEmbedding, searchLimit, 0.0f); - + var diagnosticResults = results.Select((r, index) => new { Rank = index + 1, @@ -337,7 +339,7 @@ TitleMatch = r.Document.Title?.Contains(query, StringComparison.OrdinalIgnoreCase) == true, DescriptionMatch = r.Document.Description?.Contains(query, StringComparison.OrdinalIgnoreCase) == true }).ToList(); - + var analysis = new { Query = query, @@ -355,10 +357,10 @@ LowestSimilarity = diagnosticResults.LastOrDefault()?.Similarity ?? 0, Results = diagnosticResults }; - - logger.LogInformation("Diagnostic complete - {ResultCount} results, {TextMatches} contain query term", + + logger.LogInformation("Diagnostic complete - {ResultCount} results, {TextMatches} contain query term", diagnosticResults.Count, diagnosticResults.Count(r => r.ContainsQueryTerm)); - + return Results.Ok(analysis); } catch (Exception ex) @@ -377,7 +379,7 @@ { var searchLimit = limit ?? 50; var documents = await vectorStorage.GetAllDocumentsAsync(searchLimit); - + var results = documents.Select(doc => new { Id = doc.Id, @@ -388,13 +390,13 @@ TitleMatch = !string.IsNullOrEmpty(search) && doc.Title.Contains(search, StringComparison.OrdinalIgnoreCase), DescriptionMatch = !string.IsNullOrEmpty(search) && !string.IsNullOrEmpty(doc.Description) && doc.Description.Contains(search, StringComparison.OrdinalIgnoreCase) }).ToList(); - + if (!string.IsNullOrEmpty(search)) { // Filter to only documents that contain the search term results = results.Where(r => r.TitleMatch || r.DescriptionMatch).ToList(); } - + return Results.Ok(new { TotalDocuments = documents.Count(), @@ -438,7 +440,7 @@ // Try with the actual token from headers var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); ReadOnlyMemory? realGithubEmbedding = null; - + if (!string.IsNullOrEmpty(githubToken)) { try @@ -489,7 +491,7 @@ // Helper method for GitHub token validation static bool IsValidGitHubToken(string? token) { - return !string.IsNullOrWhiteSpace(token) && + return !string.IsNullOrWhiteSpace(token) && (token.StartsWith("gho_") || token.StartsWith("ghp_") || token.StartsWith("github_pat_")) && token.Length > 20; } diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Services/CompositeEmbeddingService.cs b/samples/AspireDemo/NLWebNet.AspireApp/Services/CompositeEmbeddingService.cs index f00fdd1..072b794 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Services/CompositeEmbeddingService.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Services/CompositeEmbeddingService.cs @@ -18,7 +18,7 @@ public CompositeEmbeddingService( { _serviceProvider = serviceProvider ?? throw new ArgumentNullException(nameof(serviceProvider)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); - + // Always create simple embedding service as fallback var simpleLogger = _serviceProvider.GetRequiredService>(); _simpleEmbeddingService = new SimpleEmbeddingService(simpleLogger); @@ -37,10 +37,10 @@ public async Task> GenerateEmbeddingAsync(string text, str try { _logger.LogDebug("Attempting to use GitHub Models embedding service with provided token"); - + var githubService = CreateGitHubModelsService(githubToken); var result = await githubService.GenerateEmbeddingAsync(text, githubToken, cancellationToken); - + _logger.LogDebug("Successfully generated embedding using GitHub Models"); return result; } @@ -63,14 +63,14 @@ private GitHubModelsEmbeddingService CreateGitHubModelsService(string githubToke { var httpClientFactory = _serviceProvider.GetRequiredService(); var httpClient = httpClientFactory.CreateClient("GitHubModels"); - + // Configure the HttpClient for this request httpClient.BaseAddress = new Uri("https://models.inference.ai.azure.com/"); - httpClient.DefaultRequestHeaders.Authorization = + httpClient.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", githubToken); httpClient.DefaultRequestHeaders.Add("User-Agent", "NLWebNet-AspireDemo"); httpClient.Timeout = TimeSpan.FromSeconds(30); - + var logger = _serviceProvider.GetRequiredService>(); return new GitHubModelsEmbeddingService(httpClient, "text-embedding-3-small", logger); } @@ -79,7 +79,7 @@ private static bool IsValidGitHubToken(string token) { // Basic validation for GitHub token format // Real tokens start with 'gho_', 'ghp_', or 'github_pat_' - return !string.IsNullOrWhiteSpace(token) && + return !string.IsNullOrWhiteSpace(token) && (token.StartsWith("gho_") || token.StartsWith("ghp_") || token.StartsWith("github_pat_")) && token.Length > 20; // GitHub tokens are typically much longer } diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Services/EmbeddingService.cs b/samples/AspireDemo/NLWebNet.AspireApp/Services/EmbeddingService.cs index 2c14045..6cdab39 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Services/EmbeddingService.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Services/EmbeddingService.cs @@ -100,15 +100,15 @@ private static ReadOnlyMemory GenerateSimpleEmbedding(string text) // Create a simple hash-based embedding for demo purposes // This is NOT suitable for production use var embedding = new float[EmbeddingSize]; - + var hash = text.GetHashCode(); var random = new Random(hash); - + for (int i = 0; i < EmbeddingSize; i++) { embedding[i] = (float)(random.NextDouble() * 2.0 - 1.0); // Range: -1 to 1 } - + // Normalize the embedding vector var magnitude = Math.Sqrt(embedding.Sum(x => x * x)); if (magnitude > 0) @@ -118,7 +118,7 @@ private static ReadOnlyMemory GenerateSimpleEmbedding(string text) embedding[i] = (float)(embedding[i] / magnitude); } } - + return new ReadOnlyMemory(embedding); } } diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Services/GitHubModelsEmbeddingService.cs b/samples/AspireDemo/NLWebNet.AspireApp/Services/GitHubModelsEmbeddingService.cs index af2f8e4..f6302a0 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Services/GitHubModelsEmbeddingService.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Services/GitHubModelsEmbeddingService.cs @@ -12,7 +12,7 @@ public class GitHubModelsEmbeddingService : IEmbeddingService private readonly HttpClient _httpClient; private readonly string _model; private readonly ILogger _logger; - + private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, @@ -20,7 +20,7 @@ public class GitHubModelsEmbeddingService : IEmbeddingService }; public GitHubModelsEmbeddingService( - HttpClient httpClient, + HttpClient httpClient, string model, ILogger logger) { diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Services/IVectorStorageService.cs b/samples/AspireDemo/NLWebNet.AspireApp/Services/IVectorStorageService.cs index 21a984c..c42e90d 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Services/IVectorStorageService.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Services/IVectorStorageService.cs @@ -30,9 +30,9 @@ public interface IVectorStorageService /// Cancellation token /// List of similar documents with their similarity scores Task> SearchSimilarAsync( - ReadOnlyMemory queryEmbedding, - int limit = 10, - float threshold = 0.7f, + ReadOnlyMemory queryEmbedding, + int limit = 10, + float threshold = 0.7f, CancellationToken cancellationToken = default); /// diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Services/QdrantVectorStorageService.cs b/samples/AspireDemo/NLWebNet.AspireApp/Services/QdrantVectorStorageService.cs index 866bdd5..09222a9 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Services/QdrantVectorStorageService.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Services/QdrantVectorStorageService.cs @@ -27,7 +27,7 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default) try { // Check if collection exists by trying to get its info - try + try { await _qdrantClient.GetCollectionInfoAsync(CollectionName, cancellationToken); _logger.LogInformation("Qdrant collection already exists: {CollectionName}", CollectionName); @@ -50,7 +50,7 @@ await _qdrantClient.CreateCollectionAsync( }, cancellationToken: cancellationToken); - _logger.LogInformation("Created Qdrant collection: {CollectionName} with vector size: {VectorSize}", + _logger.LogInformation("Created Qdrant collection: {CollectionName} with vector size: {VectorSize}", CollectionName, VectorSize); _isInitialized = true; } @@ -64,7 +64,7 @@ await _qdrantClient.CreateCollectionAsync( public async Task StoreDocumentAsync(DocumentRecord document, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(document); - + if (!_isInitialized) await InitializeAsync(cancellationToken); @@ -78,7 +78,7 @@ public async Task StoreDocumentAsync(DocumentRecord document, Cancellati // Convert ReadOnlyMemory to float array for Qdrant var embeddingArray = document.Embedding.ToArray(); - + var point = new PointStruct { Id = new PointId { Uuid = document.Id }, @@ -118,9 +118,9 @@ public async Task StoreDocumentAsync(DocumentRecord document, Cancellati } public async Task> SearchSimilarAsync( - ReadOnlyMemory queryEmbedding, - int limit = 10, - float threshold = 0.7f, + ReadOnlyMemory queryEmbedding, + int limit = 10, + float threshold = 0.7f, CancellationToken cancellationToken = default) { if (!_isInitialized) @@ -129,7 +129,7 @@ public async Task StoreDocumentAsync(DocumentRecord document, Cancellati try { var embeddingArray = queryEmbedding.ToArray(); - + var searchResponse = await _qdrantClient.SearchAsync( collectionName: CollectionName, vector: embeddingArray, @@ -236,11 +236,11 @@ public async Task ClearAllDocumentsAsync(CancellationToken cancellationTok // Delete the collection and recreate it await _qdrantClient.DeleteCollectionAsync(CollectionName, cancellationToken: cancellationToken); _logger.LogInformation("Deleted Qdrant collection: {CollectionName}", CollectionName); - + // Recreate the collection _isInitialized = false; await InitializeAsync(cancellationToken); - + _logger.LogInformation("Cleared all documents from Qdrant collection: {CollectionName}", CollectionName); return true; } diff --git a/samples/AspireDemo/NLWebNet.AspireApp/Services/RssFeedIngestionService.cs b/samples/AspireDemo/NLWebNet.AspireApp/Services/RssFeedIngestionService.cs index 7089e80..c5ffd86 100644 --- a/samples/AspireDemo/NLWebNet.AspireApp/Services/RssFeedIngestionService.cs +++ b/samples/AspireDemo/NLWebNet.AspireApp/Services/RssFeedIngestionService.cs @@ -61,21 +61,21 @@ public async Task IngestFeedAsync(string feedUrl, string? githubToken, Canc var request = new HttpRequestMessage(HttpMethod.Get, feedUrl); request.Headers.Add("User-Agent", "NLWebNet RSS Ingestion Service 1.0"); request.Headers.Add("Accept", "application/rss+xml, application/xml, text/xml"); - + var response = await _httpClient.SendAsync(request, cancellationToken); - + _logger.LogInformation("RSS feed response: {StatusCode} for {FeedUrl}", response.StatusCode, feedUrl); - + if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(cancellationToken); - _logger.LogError("Failed to fetch RSS feed {FeedUrl}. Status: {StatusCode}. Content: {Content}", + _logger.LogError("Failed to fetch RSS feed {FeedUrl}. Status: {StatusCode}. Content: {Content}", feedUrl, response.StatusCode, errorContent); throw new HttpRequestException($"Failed to fetch RSS feed from {feedUrl}. Status: {response.StatusCode}"); } var content = await response.Content.ReadAsStringAsync(cancellationToken); - + // Parse the RSS feed with proper XML settings using var stringReader = new StringReader(content); var xmlSettings = new XmlReaderSettings @@ -84,7 +84,7 @@ public async Task IngestFeedAsync(string feedUrl, string? githubToken, Canc XmlResolver = null // Disable external entity resolution for security }; using var xmlReader = XmlReader.Create(stringReader, xmlSettings); - + var feed = SyndicationFeed.Load(xmlReader); if (feed == null) { @@ -97,7 +97,7 @@ public async Task IngestFeedAsync(string feedUrl, string? githubToken, Canc // Process only the latest 25 items to keep ingestion fast and focused var itemsToProcess = feed.Items.Take(25); - _logger.LogInformation("Processing latest {ItemCount} items from feed: {SiteName}", + _logger.LogInformation("Processing latest {ItemCount} items from feed: {SiteName}", itemsToProcess.Count(), siteName); // Process each item in the feed @@ -111,7 +111,7 @@ public async Task IngestFeedAsync(string feedUrl, string? githubToken, Canc // Generate semantic embedding for the document using the provided GitHub token var textToEmbed = $"{document.Title} {document.Description}"; document.Embedding = await _embeddingService.GenerateEmbeddingAsync(textToEmbed, githubToken, cancellationToken); - + await _vectorStorage.StoreDocumentAsync(document, cancellationToken); processedCount++; } @@ -122,9 +122,9 @@ public async Task IngestFeedAsync(string feedUrl, string? githubToken, Canc } } - _logger.LogInformation("Successfully ingested {ProcessedCount} items from feed: {FeedUrl}", + _logger.LogInformation("Successfully ingested {ProcessedCount} items from feed: {FeedUrl}", processedCount, feedUrl); - + return processedCount; } catch (Exception ex) @@ -137,7 +137,7 @@ public async Task IngestFeedAsync(string feedUrl, string? githubToken, Canc public async Task IngestDemoFeedsAsync(CancellationToken cancellationToken = default) { _logger.LogInformation("Starting ingestion of demo RSS feeds"); - + int totalProcessed = 0; var tasks = new List>(); @@ -150,8 +150,8 @@ public async Task IngestDemoFeedsAsync(CancellationToken cancellationToken { var results = await Task.WhenAll(tasks); totalProcessed = results.Sum(); - - _logger.LogInformation("Successfully ingested {TotalProcessed} items from {FeedCount} demo feeds", + + _logger.LogInformation("Successfully ingested {TotalProcessed} items from {FeedCount} demo feeds", totalProcessed, _demoFeeds.Length); } catch (Exception ex) @@ -166,7 +166,7 @@ public async Task IngestDemoFeedsAsync(CancellationToken cancellationToken public async Task IngestDemoFeedsAsync(string? githubToken, CancellationToken cancellationToken = default) { _logger.LogInformation("Starting focused ingestion of .NET blog RSS feed with GitHub token: {HasToken}", !string.IsNullOrEmpty(githubToken)); - + int totalProcessed = 0; // Process feeds sequentially to reduce server load and improve reliability @@ -186,9 +186,9 @@ public async Task IngestDemoFeedsAsync(string? githubToken, CancellationTok } } - _logger.LogInformation("Demo ingestion completed: {TotalProcessed} items from {FeedCount} feeds", + _logger.LogInformation("Demo ingestion completed: {TotalProcessed} items from {FeedCount} feeds", totalProcessed, _demoFeeds.Length); - + return totalProcessed; } diff --git a/samples/AspireDemo/NLWebNet.Frontend/Services/ApiService.cs b/samples/AspireDemo/NLWebNet.Frontend/Services/ApiService.cs index 64e8ba1..c0a54d3 100644 --- a/samples/AspireDemo/NLWebNet.Frontend/Services/ApiService.cs +++ b/samples/AspireDemo/NLWebNet.Frontend/Services/ApiService.cs @@ -32,32 +32,32 @@ public async Task SearchAsync(string query, string? githubTok activity?.SetTag("search.limit", limit); var correlationId = Guid.NewGuid().ToString("N")[..8]; - + try { _logger.LogInformation("=== API SERVICE SEARCH START [{CorrelationId}] ===", correlationId); - _logger.LogInformation("[{CorrelationId}] SearchAsync called - Query: '{Query}', HasToken: {HasToken}, Threshold: {Threshold}, Limit: {Limit}", + _logger.LogInformation("[{CorrelationId}] SearchAsync called - Query: '{Query}', HasToken: {HasToken}, Threshold: {Threshold}, Limit: {Limit}", correlationId, query, !string.IsNullOrEmpty(githubToken), threshold, limit); - + var queryParams = new List { $"query={Uri.EscapeDataString(query)}" }; - + if (threshold.HasValue) queryParams.Add($"threshold={threshold.Value}"); - + if (limit.HasValue) queryParams.Add($"limit={limit.Value}"); - + var queryString = string.Join("&", queryParams); var requestUrl = $"/api/search?{queryString}"; - + _logger.LogInformation("[{CorrelationId}] Building HTTP request - URL: {RequestUrl}", correlationId, requestUrl); _logger.LogInformation("[{CorrelationId}] HttpClient BaseAddress: {BaseAddress}", correlationId, _httpClient.BaseAddress?.ToString() ?? "null"); - + activity?.SetTag("http.url", requestUrl); activity?.SetTag("http.base_address", _httpClient.BaseAddress?.ToString()); - + var request = new HttpRequestMessage(HttpMethod.Get, requestUrl); - + if (!string.IsNullOrEmpty(githubToken)) { request.Headers.Add("X-GitHub-Token", githubToken); @@ -71,76 +71,76 @@ public async Task SearchAsync(string query, string? githubTok _logger.LogInformation("[{CorrelationId}] Sending HTTP request...", correlationId); var httpStopwatch = Stopwatch.StartNew(); - + var response = await _httpClient.SendAsync(request); - + httpStopwatch.Stop(); - _logger.LogInformation("[{CorrelationId}] HTTP Response received - Duration: {Duration}ms, StatusCode: {StatusCode}, ReasonPhrase: '{ReasonPhrase}'", + _logger.LogInformation("[{CorrelationId}] HTTP Response received - Duration: {Duration}ms, StatusCode: {StatusCode}, ReasonPhrase: '{ReasonPhrase}'", correlationId, httpStopwatch.ElapsedMilliseconds, response.StatusCode, response.ReasonPhrase); - + activity?.SetTag("http.status_code", (int)response.StatusCode); activity?.SetTag("http.duration_ms", httpStopwatch.ElapsedMilliseconds); - + if (response.IsSuccessStatusCode) { _logger.LogInformation("[{CorrelationId}] Reading JSON response...", correlationId); var jsonStopwatch = Stopwatch.StartNew(); - + var responseContent = await response.Content.ReadAsStringAsync(); - - _logger.LogInformation("[{CorrelationId}] Raw response content - Length: {Length} chars, Sample: {Sample}", - correlationId, responseContent.Length, + + _logger.LogInformation("[{CorrelationId}] Raw response content - Length: {Length} chars, Sample: {Sample}", + correlationId, responseContent.Length, responseContent.Length > 200 ? responseContent.Substring(0, 200) + "..." : responseContent); - + var results = await response.Content.ReadFromJsonAsync(); - + jsonStopwatch.Stop(); var resultCount = results?.Length ?? 0; - - _logger.LogInformation("[{CorrelationId}] JSON deserialization completed - Duration: {Duration}ms, ResultCount: {ResultCount}", + + _logger.LogInformation("[{CorrelationId}] JSON deserialization completed - Duration: {Duration}ms, ResultCount: {ResultCount}", correlationId, jsonStopwatch.ElapsedMilliseconds, resultCount); - + activity?.SetTag("response.result_count", resultCount); activity?.SetTag("response.json_parse_duration_ms", jsonStopwatch.ElapsedMilliseconds); - + if (results != null && results.Length > 0) { - _logger.LogInformation("[{CorrelationId}] First result details - Title: '{Title}', Similarity: {Similarity:F3}", + _logger.LogInformation("[{CorrelationId}] First result details - Title: '{Title}', Similarity: {Similarity:F3}", correlationId, results[0].Title, results[0].Similarity); - + activity?.SetTag("response.first_result_similarity", results[0].Similarity); - + // Log similarity score distribution var highScores = results.Count(r => r.Similarity >= 0.7); var mediumScores = results.Count(r => r.Similarity >= 0.4 && r.Similarity < 0.7); var lowScores = results.Count(r => r.Similarity < 0.4); - - _logger.LogInformation("[{CorrelationId}] Similarity distribution - High (≥0.7): {High}, Medium (0.4-0.7): {Medium}, Low (<0.4): {Low}", + + _logger.LogInformation("[{CorrelationId}] Similarity distribution - High (≥0.7): {High}, Medium (0.4-0.7): {Medium}, Low (<0.4): {Low}", correlationId, highScores, mediumScores, lowScores); - + activity?.SetTag("results.high_similarity_count", highScores); activity?.SetTag("results.medium_similarity_count", mediumScores); activity?.SetTag("results.low_similarity_count", lowScores); } - - _logger.LogInformation("=== API SERVICE SEARCH SUCCESS [{CorrelationId}] === Total duration: {TotalDuration}ms", + + _logger.LogInformation("=== API SERVICE SEARCH SUCCESS [{CorrelationId}] === Total duration: {TotalDuration}ms", correlationId, httpStopwatch.ElapsedMilliseconds + jsonStopwatch.ElapsedMilliseconds); - + activity?.SetTag("search.success", true); activity?.SetTag("search.total_duration_ms", httpStopwatch.ElapsedMilliseconds + jsonStopwatch.ElapsedMilliseconds); - + return results ?? Array.Empty(); } else { var errorContent = await response.Content.ReadAsStringAsync(); - _logger.LogError("[{CorrelationId}] API Error - StatusCode: {StatusCode}, ReasonPhrase: '{ReasonPhrase}', Content: {Content}", + _logger.LogError("[{CorrelationId}] API Error - StatusCode: {StatusCode}, ReasonPhrase: '{ReasonPhrase}', Content: {Content}", correlationId, response.StatusCode, response.ReasonPhrase, errorContent); - + activity?.SetTag("search.success", false); activity?.SetTag("error.http_status", (int)response.StatusCode); activity?.SetTag("error.content", errorContent); - + _logger.LogInformation("=== API SERVICE SEARCH FAILED [{CorrelationId}] ===", correlationId); return Array.Empty(); } @@ -148,12 +148,12 @@ public async Task SearchAsync(string query, string? githubTok catch (Exception ex) { _logger.LogError(ex, "=== API SERVICE SEARCH EXCEPTION [{CorrelationId}] === Query: '{Query}', Error: {Message}", correlationId, query, ex.Message); - + activity?.SetTag("search.success", false); activity?.SetTag("error.type", ex.GetType().Name); activity?.SetTag("error.message", ex.Message); activity?.SetTag("error.stack_trace", ex.StackTrace); - + return Array.Empty(); } } @@ -163,7 +163,7 @@ public async Task TestConnectionAsync(string githubToken) try { var request = new HttpRequestMessage(HttpMethod.Get, "/api/health"); - + if (!string.IsNullOrEmpty(githubToken)) { request.Headers.Add("X-GitHub-Token", githubToken); diff --git a/samples/AspireDemo/NLWebNet.Frontend/Services/EmbeddingConfigurationService.cs b/samples/AspireDemo/NLWebNet.Frontend/Services/EmbeddingConfigurationService.cs index a0711de..fb67b22 100644 --- a/samples/AspireDemo/NLWebNet.Frontend/Services/EmbeddingConfigurationService.cs +++ b/samples/AspireDemo/NLWebNet.Frontend/Services/EmbeddingConfigurationService.cs @@ -40,7 +40,7 @@ public async Task InitializeAsync() { // Try to restore token from session storage var storedToken = await _jsRuntime.InvokeAsync("sessionStorage.getItem", "github-token"); - + if (!string.IsNullOrEmpty(storedToken)) { _githubToken = storedToken; @@ -53,7 +53,7 @@ public async Task InitializeAsync() _logger.LogWarning(ex, "Failed to restore token from session storage"); // Session storage not available or error - continue without token } - + _initialized = true; } @@ -68,12 +68,12 @@ public async Task ConfigureGitHubTokenAsync(string token) } _githubToken = token; - + // Store in session storage await _jsRuntime.InvokeVoidAsync("sessionStorage.setItem", "github-token", token); - + _logger.LogInformation("GitHub Models token configured and stored in session storage"); - + ConfigurationChanged?.Invoke(this, true); return true; } @@ -89,12 +89,12 @@ public async Task ClearConfigurationAsync() try { _githubToken = null; - + // Remove from session storage await _jsRuntime.InvokeVoidAsync("sessionStorage.removeItem", "github-token"); - + _logger.LogInformation("GitHub Models configuration cleared from session storage"); - + ConfigurationChanged?.Invoke(this, false); } catch (Exception ex) diff --git a/src/NLWebNet/Extensions/ServiceCollectionExtensions.cs b/src/NLWebNet/Extensions/ServiceCollectionExtensions.cs index d247166..4e618fd 100644 --- a/src/NLWebNet/Extensions/ServiceCollectionExtensions.cs +++ b/src/NLWebNet/Extensions/ServiceCollectionExtensions.cs @@ -1,5 +1,8 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Diagnostics.HealthChecks; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; using NLWebNet.Models; using NLWebNet.Services; using NLWebNet.MCP; @@ -94,4 +97,106 @@ public static IServiceCollection AddNLWebNet(this IServiceCollecti return services; } + + /// + /// Adds NLWebNet services with multi-backend support + /// + /// The service collection + /// Optional configuration callback + /// Multi-backend configuration callback + /// The service collection for chaining + public static IServiceCollection AddNLWebNetMultiBackend(this IServiceCollection services, + Action? configureOptions = null, + Action? configureMultiBackend = null) + { + // Configure options + if (configureOptions != null) + { + services.Configure(configureOptions); + } + + // Configure multi-backend options + if (configureMultiBackend != null) + { + services.Configure(configureMultiBackend); + } + + // Add logging (required for the services) + services.AddLogging(); + + // Register core NLWebNet services + services.AddScoped(provider => + { + var options = provider.GetRequiredService>(); + var multiBackendOptions = provider.GetRequiredService>(); + if (multiBackendOptions.Value.Enabled) + { + // Use multi-backend constructor + return new NLWebService( + provider.GetRequiredService(), + provider.GetRequiredService(), + provider.GetRequiredService(), + provider.GetRequiredService>(), + options); + } + else + { + // Use single backend constructor for backward compatibility + return new NLWebService( + provider.GetRequiredService(), + provider.GetRequiredService(), + provider.GetRequiredService(), + provider.GetRequiredService>(), + options); + } + }); + + services.AddScoped(); + services.AddScoped(provider => + { + var options = provider.GetRequiredService>(); + var multiBackendOptions = provider.GetRequiredService>(); + if (multiBackendOptions.Value.Enabled) + { + // Use multi-backend constructor + return new ResultGenerator( + provider.GetRequiredService(), + provider.GetRequiredService>(), + options, + provider.GetService()); + } + else + { + // Use single backend constructor for backward compatibility + return new ResultGenerator( + provider.GetRequiredService(), + provider.GetRequiredService>(), + options, + provider.GetService()); + } + }); + + // Register MCP services + services.AddScoped(); + + // Register multi-backend services + services.AddScoped(); + + // Register default data backend (can be overridden) + services.AddScoped(); + + // Add health checks + services.AddHealthChecks() + .AddCheck("nlweb") + .AddCheck("data-backend") + .AddCheck("ai-service"); + + // Add metrics + services.AddMetrics(); + + // Add rate limiting + services.AddSingleton(); + + return services; + } } diff --git a/src/NLWebNet/Models/MultiBackendOptions.cs b/src/NLWebNet/Models/MultiBackendOptions.cs new file mode 100644 index 0000000..e4a3b5a --- /dev/null +++ b/src/NLWebNet/Models/MultiBackendOptions.cs @@ -0,0 +1,78 @@ +using System.ComponentModel.DataAnnotations; + +namespace NLWebNet.Models; + +/// +/// Configuration options for multi-backend retrieval architecture. +/// +public class MultiBackendOptions +{ + /// + /// The configuration section name for binding from appsettings.json. + /// + public const string SectionName = "NLWebNet:MultiBackend"; + + /// + /// Whether multi-backend mode is enabled. When false, uses single backend for backward compatibility. + /// + public bool Enabled { get; set; } = false; + + /// + /// The identifier of the backend to use as the primary write endpoint. + /// + public string? WriteEndpoint { get; set; } + + /// + /// Configuration for individual backend endpoints. + /// + public Dictionary Endpoints { get; set; } = new(); + + /// + /// Whether to enable parallel querying across backends. + /// + public bool EnableParallelQuerying { get; set; } = true; + + /// + /// Whether to enable automatic result deduplication. + /// + public bool EnableResultDeduplication { get; set; } = true; + + /// + /// Maximum number of concurrent backend queries. + /// + [Range(1, 10)] + public int MaxConcurrentQueries { get; set; } = 5; + + /// + /// Timeout for individual backend queries in seconds. + /// + [Range(1, 120)] + public int BackendTimeoutSeconds { get; set; } = 30; +} + +/// +/// Configuration for an individual backend endpoint. +/// +public class BackendEndpointOptions +{ + /// + /// Whether this backend endpoint is enabled. + /// + public bool Enabled { get; set; } = true; + + /// + /// The type of backend (e.g., "azure_ai_search", "mock", "custom"). + /// + public string BackendType { get; set; } = string.Empty; + + /// + /// Priority for this backend (higher values = higher priority). + /// Used for ordering results when deduplication is disabled. + /// + public int Priority { get; set; } = 0; + + /// + /// Backend-specific configuration properties. + /// + public Dictionary Properties { get; set; } = new(); +} \ No newline at end of file diff --git a/src/NLWebNet/Models/NLWebOptions.cs b/src/NLWebNet/Models/NLWebOptions.cs index 891f64c..db1c7df 100644 --- a/src/NLWebNet/Models/NLWebOptions.cs +++ b/src/NLWebNet/Models/NLWebOptions.cs @@ -71,4 +71,9 @@ public class NLWebOptions /// Rate limiting configuration /// public RateLimitingOptions RateLimiting { get; set; } = new(); + + /// + /// Multi-backend configuration options. When enabled, overrides single backend behavior. + /// + public MultiBackendOptions MultiBackend { get; set; } = new(); } diff --git a/src/NLWebNet/Services/BackendManager.cs b/src/NLWebNet/Services/BackendManager.cs new file mode 100644 index 0000000..a280b88 --- /dev/null +++ b/src/NLWebNet/Services/BackendManager.cs @@ -0,0 +1,261 @@ +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using NLWebNet.Models; +using System.Collections.Concurrent; + +namespace NLWebNet.Services; + +/// +/// Implementation of backend manager that coordinates operations across multiple data backends. +/// +public class BackendManager : IBackendManager +{ + private readonly IEnumerable _backends; + private readonly MultiBackendOptions _options; + private readonly ILogger _logger; + private readonly Dictionary _backendsByName; + private readonly IDataBackend? _writeBackend; + + public BackendManager( + IEnumerable backends, + IOptions options, + ILogger logger) + { + _backends = backends ?? throw new ArgumentNullException(nameof(backends)); + _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + + // For this initial implementation, we'll work with the backends provided via DI + // In a full implementation, this would use a factory pattern to create backends + // based on the configuration + _backendsByName = new Dictionary(); + + // Assign backends names based on configured endpoints if available, + // otherwise fall back to generic names for backward compatibility + var backendArray = _backends.ToArray(); + var configuredEndpoints = _options.Endpoints?.Where(e => e.Value.Enabled).ToList() ?? new List>(); + + if (configuredEndpoints.Count > 0 && configuredEndpoints.Count == backendArray.Length) + { + // Use configured endpoint identifiers + for (int i = 0; i < backendArray.Length; i++) + { + var endpointName = configuredEndpoints[i].Key; + _backendsByName[endpointName] = backendArray[i]; + } + } + else + { + // Fall back to generic names for backward compatibility + for (int i = 0; i < backendArray.Length; i++) + { + _backendsByName[$"backend_{i}"] = backendArray[i]; + } + } + + // Set write backend - for now use the first backend if writeEndpoint is configured + if (!string.IsNullOrEmpty(_options.WriteEndpoint) && + _backendsByName.TryGetValue(_options.WriteEndpoint, out var writeBackend)) + { + _writeBackend = writeBackend; + } + else if (backendArray.Length > 0) + { + _writeBackend = backendArray[0]; // Default to first backend + } + + _logger.LogInformation("BackendManager initialized with {BackendCount} backends, WriteEndpoint: {WriteEndpoint}", + _backendsByName.Count, _options.WriteEndpoint ?? "default"); + } + + /// + public async Task> SearchAsync(string query, string? site = null, int maxResults = 10, CancellationToken cancellationToken = default) + { + if (!_options.Enabled || !_backends.Any()) + { + // Fall back to single backend behavior if multi-backend is disabled + var firstBackend = _backends.FirstOrDefault(); + if (firstBackend == null) + { + return Enumerable.Empty(); + } + return await firstBackend.SearchAsync(query, site, maxResults, cancellationToken); + } + + _logger.LogDebug("Starting parallel search across {BackendCount} backends for query: {Query}", + _backends.Count(), query); + + var results = new ConcurrentBag(); + var semaphore = new SemaphoreSlim(_options.MaxConcurrentQueries, _options.MaxConcurrentQueries); + + if (_options.EnableParallelQuerying) + { + // Execute searches in parallel + var tasks = _backends.Select(async backend => + { + await semaphore.WaitAsync(cancellationToken); + try + { + using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(_options.BackendTimeoutSeconds)); + using var combinedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token); + + var backendResults = await backend.SearchAsync(query, site, maxResults, combinedCts.Token); + foreach (var result in backendResults) + { + results.Add(result); + } + _logger.LogDebug("Backend search completed with {ResultCount} results", backendResults.Count()); + } + catch (OperationCanceledException) + { + _logger.LogWarning("Backend search timed out or was cancelled"); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error during backend search"); + } + finally + { + semaphore.Release(); + } + }); + + await Task.WhenAll(tasks); + } + else + { + // Execute searches sequentially + foreach (var backend in _backends) + { + try + { + using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(_options.BackendTimeoutSeconds)); + using var combinedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token); + + var backendResults = await backend.SearchAsync(query, site, maxResults, combinedCts.Token); + foreach (var result in backendResults) + { + results.Add(result); + } + } + catch (OperationCanceledException) + { + _logger.LogWarning("Backend search timed out or was cancelled"); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error during backend search"); + } + } + } + + var allResults = results.ToList(); + _logger.LogDebug("Collected {TotalResults} results from all backends", allResults.Count); + + // Apply deduplication if enabled + if (_options.EnableResultDeduplication) + { + allResults = DeduplicateResults(allResults); + _logger.LogDebug("After deduplication: {DeduplicatedResults} results", allResults.Count); + } + + // Sort by relevance score and take the requested number of results + return allResults + .OrderByDescending(r => r.Score) + .Take(maxResults); + } + + /// + public async Task> GetAvailableSitesAsync(CancellationToken cancellationToken = default) + { + var allSites = new HashSet(); + + foreach (var backend in _backends) + { + try + { + var sites = await backend.GetAvailableSitesAsync(cancellationToken); + foreach (var site in sites) + { + allSites.Add(site); + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting available sites from backend"); + } + } + + return allSites; + } + + /// + public async Task GetItemByUrlAsync(string url, CancellationToken cancellationToken = default) + { + foreach (var backend in _backends) + { + try + { + var result = await backend.GetItemByUrlAsync(url, cancellationToken); + if (result != null) + { + return result; + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Error getting item by URL from backend"); + } + } + + return null; + } + + /// + public IDataBackend? GetWriteBackend() + { + return _writeBackend; + } + + /// + public IEnumerable GetBackendInfo() + { + return _backendsByName.Select(kvp => new BackendInfo + { + Id = kvp.Key, + Enabled = true, // All registered backends are considered enabled + IsWriteEndpoint = kvp.Value == _writeBackend, + Capabilities = kvp.Value.GetCapabilities(), + Priority = 0 // Default priority, would be configurable in full implementation + }); + } + + /// + /// Deduplicates results based on URL and title similarity. + /// + private List DeduplicateResults(List results) + { + // Use Dictionary for O(n) performance instead of O(n²) + var resultsByUrl = new Dictionary(); + + foreach (var result in results) + { + // Check if we've seen this URL before + if (resultsByUrl.TryGetValue(result.Url, out var existing)) + { + // Keep the result with the higher score + if (result.Score > existing.Score) + { + resultsByUrl[result.Url] = result; + } + } + else + { + // First time seeing this URL + resultsByUrl[result.Url] = result; + } + } + + return resultsByUrl.Values.ToList(); + } +} \ No newline at end of file diff --git a/src/NLWebNet/Services/IBackendManager.cs b/src/NLWebNet/Services/IBackendManager.cs new file mode 100644 index 0000000..e9791d8 --- /dev/null +++ b/src/NLWebNet/Services/IBackendManager.cs @@ -0,0 +1,78 @@ +using NLWebNet.Models; + +namespace NLWebNet.Services; + +/// +/// Interface for managing multiple data backends and coordinating operations across them. +/// +public interface IBackendManager +{ + /// + /// Searches across all enabled backends in parallel, with automatic result deduplication. + /// + /// The search query + /// Optional site filter to restrict search scope + /// Maximum number of results to return + /// Cancellation token for async operations + /// A collection of deduplicated search results with relevance scores + Task> SearchAsync(string query, string? site = null, int maxResults = 10, CancellationToken cancellationToken = default); + + /// + /// Gets available sites/scopes from all backends. + /// + /// Cancellation token for async operations + /// A collection of all available site identifiers across all backends + Task> GetAvailableSitesAsync(CancellationToken cancellationToken = default); + + /// + /// Gets detailed information about a specific item by its URL or ID from all backends. + /// Returns the first match found across all backends. + /// + /// The URL or identifier of the item + /// Cancellation token for async operations + /// The detailed item information, or null if not found in any backend + Task GetItemByUrlAsync(string url, CancellationToken cancellationToken = default); + + /// + /// Gets the primary write backend for operations that require a single endpoint. + /// + /// The primary write backend, or null if not configured + IDataBackend? GetWriteBackend(); + + /// + /// Gets information about all configured backends and their capabilities. + /// + /// A collection of backend information including their capabilities + IEnumerable GetBackendInfo(); +} + +/// +/// Information about a configured backend. +/// +public record BackendInfo +{ + /// + /// The unique identifier for this backend. + /// + public string Id { get; init; } = string.Empty; + + /// + /// Whether this backend is currently enabled. + /// + public bool Enabled { get; init; } + + /// + /// Whether this is the designated write endpoint. + /// + public bool IsWriteEndpoint { get; init; } + + /// + /// The backend's capabilities. + /// + public BackendCapabilities Capabilities { get; init; } = new(); + + /// + /// The priority of this backend. + /// + public int Priority { get; init; } +} \ No newline at end of file diff --git a/src/NLWebNet/Services/NLWebService.cs b/src/NLWebNet/Services/NLWebService.cs index b2ad774..e2f4273 100644 --- a/src/NLWebNet/Services/NLWebService.cs +++ b/src/NLWebNet/Services/NLWebService.cs @@ -12,10 +12,14 @@ public class NLWebService : INLWebService { private readonly IQueryProcessor _queryProcessor; private readonly IResultGenerator _resultGenerator; - private readonly IDataBackend _dataBackend; + private readonly IDataBackend? _dataBackend; + private readonly IBackendManager? _backendManager; private readonly ILogger _logger; private readonly NLWebOptions _options; + /// + /// Constructor for single-backend mode (backward compatibility). + /// public NLWebService( IQueryProcessor queryProcessor, IResultGenerator resultGenerator, @@ -30,6 +34,23 @@ public NLWebService( _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); } + /// + /// Constructor for multi-backend mode. + /// + public NLWebService( + IQueryProcessor queryProcessor, + IResultGenerator resultGenerator, + IBackendManager backendManager, + ILogger logger, + IOptions options) + { + _queryProcessor = queryProcessor ?? throw new ArgumentNullException(nameof(queryProcessor)); + _resultGenerator = resultGenerator ?? throw new ArgumentNullException(nameof(resultGenerator)); + _backendManager = backendManager ?? throw new ArgumentNullException(nameof(backendManager)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); + } + /// public async Task ProcessRequestAsync(NLWebRequest request, CancellationToken cancellationToken = default) { diff --git a/src/NLWebNet/Services/ResultGenerator.cs b/src/NLWebNet/Services/ResultGenerator.cs index c38ad68..c702e24 100644 --- a/src/NLWebNet/Services/ResultGenerator.cs +++ b/src/NLWebNet/Services/ResultGenerator.cs @@ -11,10 +11,16 @@ namespace NLWebNet.Services; /// public class ResultGenerator : IResultGenerator { - private readonly IDataBackend _dataBackend; + private readonly IDataBackend? _dataBackend; + private readonly IBackendManager? _backendManager; private readonly ILogger _logger; private readonly NLWebOptions _options; - private readonly IChatClient? _chatClient; public ResultGenerator( + private readonly IChatClient? _chatClient; + + /// + /// Constructor for single-backend mode (backward compatibility). + /// + public ResultGenerator( IDataBackend dataBackend, ILogger logger, IOptions options, @@ -25,7 +31,25 @@ public class ResultGenerator : IResultGenerator _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _chatClient = chatClient; - _logger.LogDebug("ResultGenerator initialized with ChatClient: {ChatClientType}", + _logger.LogDebug("ResultGenerator initialized with single backend and ChatClient: {ChatClientType}", + _chatClient?.GetType().Name ?? "null"); + } + + /// + /// Constructor for multi-backend mode. + /// + public ResultGenerator( + IBackendManager backendManager, + ILogger logger, + IOptions options, + IChatClient? chatClient = null) + { + _backendManager = backendManager ?? throw new ArgumentNullException(nameof(backendManager)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); + _chatClient = chatClient; + + _logger.LogDebug("ResultGenerator initialized with multi-backend manager and ChatClient: {ChatClientType}", _chatClient?.GetType().Name ?? "null"); } @@ -34,7 +58,22 @@ public async Task> GenerateListAsync(string query, stri { _logger.LogDebug("Generating list results for query: {Query}", query); - var results = await _dataBackend.SearchAsync(query, site, _options.MaxResultsPerQuery, cancellationToken); + IEnumerable results; + + if (_backendManager != null) + { + // Use multi-backend manager + results = await _backendManager.SearchAsync(query, site, _options.MaxResultsPerQuery, cancellationToken); + } + else if (_dataBackend != null) + { + // Use single backend for backward compatibility + results = await _dataBackend.SearchAsync(query, site, _options.MaxResultsPerQuery, cancellationToken); + } + else + { + throw new InvalidOperationException("Neither backend manager nor single backend is configured"); + } _logger.LogDebug("Generated {Count} list results", results.Count()); return results; diff --git a/tests/NLWebNet.Tests/Extensions/MultiBackendExtensionsTests.cs b/tests/NLWebNet.Tests/Extensions/MultiBackendExtensionsTests.cs new file mode 100644 index 0000000..516ef8e --- /dev/null +++ b/tests/NLWebNet.Tests/Extensions/MultiBackendExtensionsTests.cs @@ -0,0 +1,108 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Options; +using NLWebNet.Models; +using NLWebNet.Services; + +namespace NLWebNet.Tests.Extensions; + +[TestClass] +public class MultiBackendExtensionsTests +{ + [TestMethod] + public void AddNLWebNetMultiBackend_RegistersRequiredServices() + { + // Arrange + var services = new ServiceCollection(); + + // Act + services.AddNLWebNetMultiBackend( + configureOptions: null, + configureMultiBackend: options => + { + options.Enabled = true; + }); + + // Assert + var serviceProvider = services.BuildServiceProvider(); + + // Check that all required services are registered + Assert.IsNotNull(serviceProvider.GetService()); + Assert.IsNotNull(serviceProvider.GetService()); + Assert.IsNotNull(serviceProvider.GetService()); + Assert.IsNotNull(serviceProvider.GetService()); + Assert.IsNotNull(serviceProvider.GetService()); + } + + [TestMethod] + public void AddNLWebNetMultiBackend_WithMultiBackendDisabled_UsesBackwardCompatibility() + { + // Arrange + var services = new ServiceCollection(); + + // Act + services.AddNLWebNetMultiBackend( + configureOptions: null, + configureMultiBackend: options => + { + options.Enabled = false; + }); + + // Assert + var serviceProvider = services.BuildServiceProvider(); + var multiBackendOptions = serviceProvider.GetRequiredService>(); + + Assert.IsFalse(multiBackendOptions.Value.Enabled); + + // Should still be able to get the main service + var nlWebService = serviceProvider.GetService(); + Assert.IsNotNull(nlWebService); + } + + [TestMethod] + public void AddNLWebNetMultiBackend_ConfiguresMultiBackendOptions() + { + // Arrange + var services = new ServiceCollection(); + + // Act + services.AddNLWebNetMultiBackend( + options => options.DefaultMode = QueryMode.Summarize, + multiBackendOptions => + { + multiBackendOptions.Enabled = true; + multiBackendOptions.EnableParallelQuerying = false; + multiBackendOptions.MaxConcurrentQueries = 3; + }); + + // Assert + var serviceProvider = services.BuildServiceProvider(); + var options = serviceProvider.GetRequiredService>(); + var multiBackendOptions = serviceProvider.GetRequiredService>(); + + Assert.AreEqual(QueryMode.Summarize, options.Value.DefaultMode); + Assert.IsTrue(multiBackendOptions.Value.Enabled); + Assert.IsFalse(multiBackendOptions.Value.EnableParallelQuerying); + Assert.AreEqual(3, multiBackendOptions.Value.MaxConcurrentQueries); + } + + [TestMethod] + public void AddNLWebNetMultiBackend_WithoutConfiguration_UsesDefaults() + { + // Arrange + var services = new ServiceCollection(); + + // Act + services.AddNLWebNetMultiBackend(); + + // Assert + var serviceProvider = services.BuildServiceProvider(); + var options = serviceProvider.GetRequiredService>(); + var multiBackendOptions = serviceProvider.GetRequiredService>(); + + // Should use default values + Assert.AreEqual(QueryMode.List, options.Value.DefaultMode); + Assert.IsFalse(multiBackendOptions.Value.Enabled); // Default is false for backward compatibility + Assert.IsTrue(multiBackendOptions.Value.EnableParallelQuerying); + Assert.AreEqual(5, multiBackendOptions.Value.MaxConcurrentQueries); + } +} \ No newline at end of file diff --git a/tests/NLWebNet.Tests/Integration/MultiBackendIntegrationTests.cs b/tests/NLWebNet.Tests/Integration/MultiBackendIntegrationTests.cs new file mode 100644 index 0000000..4afa4f1 --- /dev/null +++ b/tests/NLWebNet.Tests/Integration/MultiBackendIntegrationTests.cs @@ -0,0 +1,179 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using NLWebNet.Models; +using NLWebNet.Services; + +namespace NLWebNet.Tests.Integration; + +[TestClass] +public class MultiBackendIntegrationTests +{ + [TestMethod] + public async Task EndToEnd_MultiBackendSearch_WorksCorrectly() + { + // Arrange - Set up a complete multi-backend service configuration + var services = new ServiceCollection(); + + services.AddNLWebNetMultiBackend( + options => + { + options.DefaultMode = QueryMode.List; + options.MaxResultsPerQuery = 20; + options.EnableDecontextualization = false; // Simplify for test + }, + multiBackendOptions => + { + multiBackendOptions.Enabled = true; + multiBackendOptions.EnableParallelQuerying = true; + multiBackendOptions.EnableResultDeduplication = true; + multiBackendOptions.MaxConcurrentQueries = 2; + multiBackendOptions.BackendTimeoutSeconds = 10; + }); + + var serviceProvider = services.BuildServiceProvider(); + var nlWebService = serviceProvider.GetRequiredService(); + var backendManager = serviceProvider.GetRequiredService(); + + // Act - Perform a search using the NLWebService + var request = new NLWebRequest + { + QueryId = "test-001", + Query = "millennium falcon", + Mode = QueryMode.List, + Site = null + }; + + var response = await nlWebService.ProcessRequestAsync(request); + + // Assert - Verify the response contains results from multiple backends + Assert.IsNotNull(response); + Assert.AreEqual("test-001", response.QueryId); + Assert.IsNull(response.Error, "Response should not have an error"); + Assert.IsNotNull(response.Results); + Assert.IsTrue(response.Results.Any(), "Should return search results"); + + // Verify backend manager provides information about backends + var backendInfo = backendManager.GetBackendInfo().ToList(); + Assert.IsTrue(backendInfo.Count >= 1, "Should have at least one backend configured"); + Assert.IsTrue(backendInfo.Any(b => b.IsWriteEndpoint), "Should have a write endpoint designated"); + + // Verify write backend is accessible + var writeBackend = backendManager.GetWriteBackend(); + Assert.IsNotNull(writeBackend, "Should have a write backend available"); + + var capabilities = writeBackend.GetCapabilities(); + Assert.IsNotNull(capabilities, "Write backend should have capabilities"); + } + + [TestMethod] + public async Task EndToEnd_MultiBackendDisabled_FallsBackToSingleBackend() + { + // Arrange - Set up multi-backend service but with multi-backend disabled + var services = new ServiceCollection(); + + services.AddNLWebNetMultiBackend( + options => + { + options.DefaultMode = QueryMode.List; + options.MaxResultsPerQuery = 20; + options.EnableDecontextualization = false; + options.MultiBackend.Enabled = false; // Disabled for backward compatibility + }); + + var serviceProvider = services.BuildServiceProvider(); + var nlWebService = serviceProvider.GetRequiredService(); + + // Act - Perform a search + var request = new NLWebRequest + { + QueryId = "test-002", + Query = "millennium falcon", + Mode = QueryMode.List + }; + + var response = await nlWebService.ProcessRequestAsync(request); + + // Assert - Should still work in single-backend mode + Assert.IsNotNull(response); + Assert.AreEqual("test-002", response.QueryId); + Assert.IsNull(response.Error, "Response should not have an error"); + + // Verify configuration + var options = serviceProvider.GetRequiredService>(); + Assert.IsFalse(options.Value.MultiBackend.Enabled, "Multi-backend should be disabled"); + } + + [TestMethod] + public async Task EndToEnd_StreamingResponse_WorksWithMultiBackend() + { + // Arrange + var services = new ServiceCollection(); + + services.AddNLWebNetMultiBackend(options => + { + options.EnableStreaming = true; + options.MultiBackend.Enabled = true; + }); + + var serviceProvider = services.BuildServiceProvider(); + var nlWebService = serviceProvider.GetRequiredService(); + + // Act - Test streaming response + var request = new NLWebRequest + { + QueryId = "test-003", + Query = "millennium falcon", + Mode = QueryMode.List, + Streaming = true + }; + + var responseCount = 0; + await foreach (var response in nlWebService.ProcessRequestStreamAsync(request)) + { + responseCount++; + Assert.IsNotNull(response); + Assert.AreEqual("test-003", response.QueryId); + + // Break after a few responses to avoid long test + if (responseCount >= 3) break; + } + + Assert.IsTrue(responseCount > 0, "Should receive streaming responses"); + } + + [TestMethod] + public async Task EndToEnd_DeduplicationAcrossBackends_WorksCorrectly() + { + // Arrange + var services = new ServiceCollection(); + + services.AddNLWebNetMultiBackend(options => + { + options.MultiBackend.Enabled = true; + options.MultiBackend.EnableResultDeduplication = true; + }); + + var serviceProvider = services.BuildServiceProvider(); + var backendManager = serviceProvider.GetRequiredService(); + + // Act - Direct test of backend manager deduplication + var results = await backendManager.SearchAsync("millennium falcon", maxResults: 20); + + // Assert + var resultList = results.ToList(); + var uniqueUrls = resultList.Select(r => r.Url).Distinct().Count(); + + Assert.AreEqual(resultList.Count, uniqueUrls, + "Results should be deduplicated - no duplicate URLs"); + + if (resultList.Count > 1) + { + // Verify results are sorted by score + var scores = resultList.Select(r => r.Score).ToList(); + var sortedScores = scores.OrderByDescending(s => s).ToList(); + CollectionAssert.AreEqual(sortedScores, scores, + "Results should be sorted by relevance score"); + } + } +} \ No newline at end of file diff --git a/tests/NLWebNet.Tests/Services/BackendManagerTests.cs b/tests/NLWebNet.Tests/Services/BackendManagerTests.cs new file mode 100644 index 0000000..e863706 --- /dev/null +++ b/tests/NLWebNet.Tests/Services/BackendManagerTests.cs @@ -0,0 +1,251 @@ +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using NLWebNet.Models; +using NLWebNet.Services; + +namespace NLWebNet.Tests.Services; + +[TestClass] +public class BackendManagerTests +{ + private ILogger _logger = null!; + private MockDataBackend _backend1 = null!; + private MockDataBackend _backend2 = null!; + private MultiBackendOptions _options = null!; + + [TestInitialize] + public void Initialize() + { + _logger = new TestLogger(); + _backend1 = new MockDataBackend(new TestLogger()); + _backend2 = new MockDataBackend(new TestLogger()); + + _options = new MultiBackendOptions + { + Enabled = true, + EnableParallelQuerying = true, + EnableResultDeduplication = true, + MaxConcurrentQueries = 5, + BackendTimeoutSeconds = 30 + }; + } + + [TestMethod] + public async Task SearchAsync_WithMultipleBackends_ReturnsCombinedResults() + { + // Arrange + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act - use a query that will match the sample data + var results = await manager.SearchAsync("millennium falcon", maxResults: 10); + + // Assert + Assert.IsNotNull(results); + var resultList = results.ToList(); + Assert.IsTrue(resultList.Count > 0, "Should return results from multiple backends"); + } + + [TestMethod] + public async Task SearchAsync_WithDeduplicationEnabled_RemovesDuplicateUrls() + { + // Arrange + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Both backends should return some overlapping results due to MockDataBackend implementation + // Act + var results = await manager.SearchAsync("test", maxResults: 20); + + // Assert + var resultList = results.ToList(); + var uniqueUrls = resultList.Select(r => r.Url).Distinct().Count(); + Assert.AreEqual(resultList.Count, uniqueUrls, "Results should be deduplicated by URL"); + } + + [TestMethod] + public async Task SearchAsync_WithDeduplicationDisabled_ReturnsAllResults() + { + // Arrange + _options.EnableResultDeduplication = false; + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act - use a query that will match the sample data + var results = await manager.SearchAsync("millennium", maxResults: 50); + + // Assert + var resultList = results.ToList(); + // Should have results when using a valid query + Assert.IsTrue(resultList.Count > 0, "Should return results even without deduplication"); + } + + [TestMethod] + public async Task SearchAsync_WithMultiBackendDisabled_UsesFirstBackendOnly() + { + // Arrange + _options.Enabled = false; + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act - use a query that will match the sample data + var results = await manager.SearchAsync("millennium", maxResults: 10); + + // Assert + Assert.IsNotNull(results); + // Should work even when multi-backend is disabled + Assert.IsTrue(results.Any(), "Should return results from single backend fallback"); + } + + [TestMethod] + public async Task GetAvailableSitesAsync_CombinesAllBackendSites() + { + // Arrange + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act + var sites = await manager.GetAvailableSitesAsync(); + + // Assert + Assert.IsNotNull(sites); + var siteList = sites.ToList(); + Assert.IsTrue(siteList.Count > 0, "Should return sites from all backends"); + } + + [TestMethod] + public async Task GetItemByUrlAsync_ReturnsFirstMatch() + { + // Arrange + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act - use an actual URL from the mock data + var result = await manager.GetItemByUrlAsync("https://galactic-shipyards.com/millennium-falcon"); + + // Assert + // MockDataBackend should return a result for this URL + Assert.IsNotNull(result, "Should return item from first backend that has it"); + } + + [TestMethod] + public void GetWriteBackend_ReturnsFirstBackendByDefault() + { + // Arrange + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act + var writeBackend = manager.GetWriteBackend(); + + // Assert + Assert.IsNotNull(writeBackend, "Should return a write backend"); + Assert.AreSame(_backend1, writeBackend, "Should return first backend as default write backend"); + } + + [TestMethod] + public void GetBackendInfo_ReturnsInfoForAllBackends() + { + // Arrange + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act + var backendInfo = manager.GetBackendInfo(); + + // Assert + Assert.IsNotNull(backendInfo); + var infoList = backendInfo.ToList(); + Assert.AreEqual(2, infoList.Count, "Should return info for all backends"); + Assert.IsTrue(infoList.All(info => info.Enabled), "All backends should be marked as enabled"); + Assert.IsTrue(infoList.Any(info => info.IsWriteEndpoint), "One backend should be marked as write endpoint"); + } + + [TestMethod] + public async Task SearchAsync_WithParallelQueryingDisabled_StillWorks() + { + // Arrange + _options.EnableParallelQuerying = false; + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act - use a query that will match the sample data + var results = await manager.SearchAsync("millennium", maxResults: 10); + + // Assert + Assert.IsNotNull(results); + Assert.IsTrue(results.Any(), "Should return results even with sequential querying"); + } + + [TestMethod] + public void GetBackendInfo_UsesConfiguredEndpointNames_WhenEndpointsProvided() + { + // Arrange + var optionsWithEndpoints = new MultiBackendOptions + { + Enabled = true, + EnableParallelQuerying = true, + EnableResultDeduplication = true, + MaxConcurrentQueries = 5, + BackendTimeoutSeconds = 30, + WriteEndpoint = "primary_backend", + Endpoints = new Dictionary + { + ["primary_backend"] = new() { Enabled = true, BackendType = "mock", Priority = 10 }, + ["secondary_backend"] = new() { Enabled = true, BackendType = "mock", Priority = 5 } + } + }; + + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(optionsWithEndpoints); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act + var backendInfo = manager.GetBackendInfo(); + + // Assert + Assert.IsNotNull(backendInfo); + var infoList = backendInfo.ToList(); + Assert.AreEqual(2, infoList.Count, "Should return info for all backends"); + + // Verify that configured endpoint names are used instead of generic backend_0, backend_1 + var backendIds = infoList.Select(info => info.Id).OrderBy(id => id).ToList(); + CollectionAssert.AreEqual(new[] { "primary_backend", "secondary_backend" }, backendIds, + "Backend IDs should use configured endpoint names"); + + // Verify write endpoint identification works with configured names + var writeBackend = infoList.Single(info => info.IsWriteEndpoint); + Assert.AreEqual("primary_backend", writeBackend.Id, "Primary backend should be identified as write endpoint"); + } + + [TestMethod] + public void GetBackendInfo_FallsBackToGenericNames_WhenNoEndpointsConfigured() + { + // Arrange - use original options without configured endpoints + var backends = new[] { _backend1, _backend2 }; + var optionsWrapper = Options.Create(_options); + var manager = new BackendManager(backends, optionsWrapper, _logger); + + // Act + var backendInfo = manager.GetBackendInfo(); + + // Assert + Assert.IsNotNull(backendInfo); + var infoList = backendInfo.ToList(); + Assert.AreEqual(2, infoList.Count, "Should return info for all backends"); + + // Verify that generic names are used as fallback + var backendIds = infoList.Select(info => info.Id).OrderBy(id => id).ToList(); + CollectionAssert.AreEqual(new[] { "backend_0", "backend_1" }, backendIds, + "Backend IDs should fall back to generic names when no endpoints configured"); + } +} \ No newline at end of file