|
59 | 59 | .WithName("GetHealth") |
60 | 60 | .WithOpenApi(); |
61 | 61 |
|
62 | | -app.MapGet("/api/health", () => Results.Ok(new { |
63 | | - Status = "Healthy", |
| 62 | +app.MapGet("/api/health", () => Results.Ok(new |
| 63 | +{ |
| 64 | + Status = "Healthy", |
64 | 65 | Timestamp = DateTimeOffset.UtcNow, |
65 | 66 | Service = "NLWebNet AspireApp API" |
66 | 67 | })) |
|
88 | 89 | { |
89 | 90 | // Extract GitHub token from headers if provided for consistent embedding |
90 | 91 | var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); |
91 | | - |
| 92 | + |
92 | 93 | var count = await ingestionService.IngestDemoFeedsAsync(githubToken); |
93 | 94 | return Results.Ok(new { Message = $"Successfully ingested {count} documents from demo feeds", Count = count }); |
94 | 95 | } |
|
106 | 107 | { |
107 | 108 | new { Name = "Microsoft .NET Blog", Url = "https://devblogs.microsoft.com/dotnet/feed/", Note = "Latest 25 articles" } |
108 | 109 | }; |
109 | | - |
110 | | - return Results.Ok(new { |
| 110 | + |
| 111 | + return Results.Ok(new |
| 112 | + { |
111 | 113 | Message = "Demo RSS feed used for focused ingestion (latest 25 articles from .NET blog)", |
112 | | - Feeds = demoFeeds |
| 114 | + Feeds = demoFeeds |
113 | 115 | }); |
114 | 116 | }) |
115 | 117 | .WithName("GetDemoFeeds") |
|
137 | 139 | { |
138 | 140 | using var activity = System.Diagnostics.Activity.Current?.Source.StartActivity("VectorSearch.SearchDocuments"); |
139 | 141 | var correlationId = Guid.NewGuid().ToString("N")[..8]; |
140 | | - |
| 142 | + |
141 | 143 | activity?.SetTag("search.correlation_id", correlationId); |
142 | 144 | activity?.SetTag("search.query", query); |
143 | 145 | activity?.SetTag("search.limit", limit); |
144 | 146 | activity?.SetTag("search.threshold", threshold); |
145 | | - |
| 147 | + |
146 | 148 | try |
147 | 149 | { |
148 | 150 | if (string.IsNullOrWhiteSpace(query)) |
|
153 | 155 | } |
154 | 156 |
|
155 | 157 | var searchLimit = limit ?? 10; |
156 | | - |
| 158 | + |
157 | 159 | // Extract GitHub token from headers if provided |
158 | 160 | var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); |
159 | 161 | var hasToken = !string.IsNullOrEmpty(githubToken); |
160 | | - |
| 162 | + |
161 | 163 | // Adjust threshold based on embedding type |
162 | 164 | var searchThreshold = threshold ?? (hasToken && IsValidGitHubToken(githubToken) ? 0.1f : 0.03f); |
163 | | - |
| 165 | + |
164 | 166 | logger.LogInformation("=== SEARCH REQUEST START [{CorrelationId}] ===", correlationId); |
165 | | - logger.LogInformation("[{CorrelationId}] Search parameters - Query: '{Query}', Limit: {Limit}, Threshold: {Threshold}, HasToken: {HasToken}, TokenLength: {TokenLength}", |
| 167 | + logger.LogInformation("[{CorrelationId}] Search parameters - Query: '{Query}', Limit: {Limit}, Threshold: {Threshold}, HasToken: {HasToken}, TokenLength: {TokenLength}", |
166 | 168 | correlationId, query, searchLimit, searchThreshold, hasToken, githubToken?.Length ?? 0); |
167 | | - |
| 169 | + |
168 | 170 | activity?.SetTag("auth.has_token", hasToken); |
169 | 171 | activity?.SetTag("auth.token_length", githubToken?.Length ?? 0); |
170 | 172 | activity?.SetTag("search.processed_limit", searchLimit); |
171 | 173 | activity?.SetTag("search.processed_threshold", searchThreshold); |
172 | | - |
| 174 | + |
173 | 175 | // Generate embedding for the search query |
174 | 176 | logger.LogInformation("[{CorrelationId}] Generating query embedding...", correlationId); |
175 | 177 | var embeddingStopwatch = System.Diagnostics.Stopwatch.StartNew(); |
176 | | - |
| 178 | + |
177 | 179 | var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query, githubToken); |
178 | | - |
| 180 | + |
179 | 181 | embeddingStopwatch.Stop(); |
180 | | - logger.LogInformation("[{CorrelationId}] Query embedding generated - Duration: {Duration}ms, Dimensions: {Dimensions}, EmbeddingType: {EmbeddingType}", |
| 182 | + logger.LogInformation("[{CorrelationId}] Query embedding generated - Duration: {Duration}ms, Dimensions: {Dimensions}, EmbeddingType: {EmbeddingType}", |
181 | 183 | correlationId, embeddingStopwatch.ElapsedMilliseconds, queryEmbedding.Length, hasToken ? "GitHub Models" : "Simple Hash"); |
182 | | - |
| 184 | + |
183 | 185 | activity?.SetTag("embedding.duration_ms", embeddingStopwatch.ElapsedMilliseconds); |
184 | 186 | activity?.SetTag("embedding.dimensions", queryEmbedding.Length); |
185 | 187 | activity?.SetTag("embedding.type", hasToken ? "github_models" : "simple_hash"); |
186 | | - |
| 188 | + |
187 | 189 | // Search for similar documents |
188 | 190 | logger.LogInformation("[{CorrelationId}] Performing vector similarity search...", correlationId); |
189 | 191 | var searchStopwatch = System.Diagnostics.Stopwatch.StartNew(); |
190 | | - |
| 192 | + |
191 | 193 | var results = await vectorStorage.SearchSimilarAsync(queryEmbedding, searchLimit, searchThreshold); |
192 | | - |
| 194 | + |
193 | 195 | searchStopwatch.Stop(); |
194 | 196 | var rawResultCount = results.Count(); |
195 | | - |
196 | | - logger.LogInformation("[{CorrelationId}] Vector search completed - Duration: {Duration}ms, RawResults: {RawResultCount}", |
| 197 | + |
| 198 | + logger.LogInformation("[{CorrelationId}] Vector search completed - Duration: {Duration}ms, RawResults: {RawResultCount}", |
197 | 199 | correlationId, searchStopwatch.ElapsedMilliseconds, rawResultCount); |
198 | | - |
| 200 | + |
199 | 201 | activity?.SetTag("vector_search.duration_ms", searchStopwatch.ElapsedMilliseconds); |
200 | 202 | activity?.SetTag("vector_search.raw_result_count", rawResultCount); |
201 | | - |
| 203 | + |
202 | 204 | // Process and format results |
203 | 205 | logger.LogInformation("[{CorrelationId}] Processing search results...", correlationId); |
204 | 206 | var processingStopwatch = System.Diagnostics.Stopwatch.StartNew(); |
205 | | - |
| 207 | + |
206 | 208 | var searchResults = results.Select(r => new |
207 | 209 | { |
208 | 210 | Id = r.Document.Id, |
|
212 | 214 | PublishedDate = r.Document.IngestedAt, |
213 | 215 | Similarity = Math.Max(0.0, Math.Min(1.0, r.Score)) |
214 | 216 | }).ToList(); |
215 | | - |
| 217 | + |
216 | 218 | processingStopwatch.Stop(); |
217 | | - |
| 219 | + |
218 | 220 | // Log result statistics |
219 | 221 | if (searchResults.Any()) |
220 | 222 | { |
221 | 223 | var avgSimilarity = searchResults.Average(r => r.Similarity); |
222 | 224 | var maxSimilarity = searchResults.Max(r => r.Similarity); |
223 | 225 | var minSimilarity = searchResults.Min(r => r.Similarity); |
224 | | - |
225 | | - logger.LogInformation("[{CorrelationId}] Result statistics - Count: {Count}, AvgSimilarity: {AvgSimilarity:F3}, MaxSimilarity: {MaxSimilarity:F3}, MinSimilarity: {MinSimilarity:F3}", |
| 226 | + |
| 227 | + logger.LogInformation("[{CorrelationId}] Result statistics - Count: {Count}, AvgSimilarity: {AvgSimilarity:F3}, MaxSimilarity: {MaxSimilarity:F3}, MinSimilarity: {MinSimilarity:F3}", |
226 | 228 | correlationId, searchResults.Count, avgSimilarity, maxSimilarity, minSimilarity); |
227 | | - |
228 | | - logger.LogInformation("[{CorrelationId}] Top result - Title: '{Title}', Similarity: {Similarity:F3}", |
| 229 | + |
| 230 | + logger.LogInformation("[{CorrelationId}] Top result - Title: '{Title}', Similarity: {Similarity:F3}", |
229 | 231 | correlationId, searchResults[0].Title, searchResults[0].Similarity); |
230 | | - |
| 232 | + |
231 | 233 | activity?.SetTag("results.count", searchResults.Count); |
232 | 234 | activity?.SetTag("results.avg_similarity", avgSimilarity); |
233 | 235 | activity?.SetTag("results.max_similarity", maxSimilarity); |
234 | 236 | activity?.SetTag("results.min_similarity", minSimilarity); |
235 | 237 | } |
236 | 238 | else |
237 | 239 | { |
238 | | - logger.LogWarning("[{CorrelationId}] No results found for query '{Query}' with threshold {Threshold}", |
| 240 | + logger.LogWarning("[{CorrelationId}] No results found for query '{Query}' with threshold {Threshold}", |
239 | 241 | correlationId, query, searchThreshold); |
240 | 242 | activity?.SetTag("results.count", 0); |
241 | 243 | } |
242 | 244 |
|
243 | 245 | var totalDuration = embeddingStopwatch.ElapsedMilliseconds + searchStopwatch.ElapsedMilliseconds + processingStopwatch.ElapsedMilliseconds; |
244 | | - |
245 | | - logger.LogInformation("=== SEARCH REQUEST SUCCESS [{CorrelationId}] === Total duration: {TotalDuration}ms, Results: {ResultCount}, EmbeddingType: {EmbeddingType}", |
| 246 | + |
| 247 | + logger.LogInformation("=== SEARCH REQUEST SUCCESS [{CorrelationId}] === Total duration: {TotalDuration}ms, Results: {ResultCount}, EmbeddingType: {EmbeddingType}", |
246 | 248 | correlationId, totalDuration, searchResults.Count, hasToken ? "GitHub Models" : "Simple Hash"); |
247 | | - |
| 249 | + |
248 | 250 | activity?.SetTag("search.success", true); |
249 | 251 | activity?.SetTag("search.total_duration_ms", totalDuration); |
250 | | - |
| 252 | + |
251 | 253 | return Results.Ok(searchResults); |
252 | 254 | } |
253 | 255 | catch (Exception ex) |
254 | 256 | { |
255 | 257 | logger.LogError(ex, "=== SEARCH REQUEST FAILED [{CorrelationId}] === Query: '{Query}', Error: {Message}", correlationId, query, ex.Message); |
256 | | - |
| 258 | + |
257 | 259 | activity?.SetTag("search.success", false); |
258 | 260 | activity?.SetTag("error.type", ex.GetType().Name); |
259 | 261 | activity?.SetTag("error.message", ex.Message); |
260 | 262 | activity?.SetTag("error.stack_trace", ex.StackTrace); |
261 | | - |
| 263 | + |
262 | 264 | return Results.BadRequest(new { Error = ex.Message }); |
263 | 265 | } |
264 | 266 | }) |
|
272 | 274 | { |
273 | 275 | var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); |
274 | 276 | var hasToken = !string.IsNullOrEmpty(githubToken) && IsValidGitHubToken(githubToken); |
275 | | - |
| 277 | + |
276 | 278 | logger.LogInformation("Generating embedding for diagnostic - Text: '{Text}', HasToken: {HasToken}", text, hasToken); |
277 | | - |
| 279 | + |
278 | 280 | var embedding = await embeddingService.GenerateEmbeddingAsync(text, githubToken); |
279 | | - |
| 281 | + |
280 | 282 | var stats = new |
281 | 283 | { |
282 | 284 | Text = text, |
|
294 | 296 | NonZeroCount = embedding.Span.ToArray().Count(x => Math.Abs(x) > 0.001f) |
295 | 297 | } |
296 | 298 | }; |
297 | | - |
| 299 | + |
298 | 300 | return Results.Ok(stats); |
299 | 301 | } |
300 | 302 | catch (Exception ex) |
|
314 | 316 | var searchLimit = limit ?? 10; |
315 | 317 | var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); |
316 | 318 | var hasToken = !string.IsNullOrEmpty(githubToken) && IsValidGitHubToken(githubToken); |
317 | | - |
| 319 | + |
318 | 320 | logger.LogInformation("=== DIAGNOSTIC SEARCH ==="); |
319 | 321 | logger.LogInformation("Query: '{Query}', HasToken: {HasToken}", query, hasToken); |
320 | | - |
| 322 | + |
321 | 323 | // Generate query embedding |
322 | 324 | var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query, githubToken); |
323 | | - |
| 325 | + |
324 | 326 | // Get raw search results with very low threshold |
325 | 327 | var results = await vectorStorage.SearchSimilarAsync(queryEmbedding, searchLimit, 0.0f); |
326 | | - |
| 328 | + |
327 | 329 | var diagnosticResults = results.Select((r, index) => new |
328 | 330 | { |
329 | 331 | Rank = index + 1, |
|
337 | 339 | TitleMatch = r.Document.Title?.Contains(query, StringComparison.OrdinalIgnoreCase) == true, |
338 | 340 | DescriptionMatch = r.Document.Description?.Contains(query, StringComparison.OrdinalIgnoreCase) == true |
339 | 341 | }).ToList(); |
340 | | - |
| 342 | + |
341 | 343 | var analysis = new |
342 | 344 | { |
343 | 345 | Query = query, |
|
355 | 357 | LowestSimilarity = diagnosticResults.LastOrDefault()?.Similarity ?? 0, |
356 | 358 | Results = diagnosticResults |
357 | 359 | }; |
358 | | - |
359 | | - logger.LogInformation("Diagnostic complete - {ResultCount} results, {TextMatches} contain query term", |
| 360 | + |
| 361 | + logger.LogInformation("Diagnostic complete - {ResultCount} results, {TextMatches} contain query term", |
360 | 362 | diagnosticResults.Count, diagnosticResults.Count(r => r.ContainsQueryTerm)); |
361 | | - |
| 363 | + |
362 | 364 | return Results.Ok(analysis); |
363 | 365 | } |
364 | 366 | catch (Exception ex) |
|
377 | 379 | { |
378 | 380 | var searchLimit = limit ?? 50; |
379 | 381 | var documents = await vectorStorage.GetAllDocumentsAsync(searchLimit); |
380 | | - |
| 382 | + |
381 | 383 | var results = documents.Select(doc => new |
382 | 384 | { |
383 | 385 | Id = doc.Id, |
|
388 | 390 | TitleMatch = !string.IsNullOrEmpty(search) && doc.Title.Contains(search, StringComparison.OrdinalIgnoreCase), |
389 | 391 | DescriptionMatch = !string.IsNullOrEmpty(search) && !string.IsNullOrEmpty(doc.Description) && doc.Description.Contains(search, StringComparison.OrdinalIgnoreCase) |
390 | 392 | }).ToList(); |
391 | | - |
| 393 | + |
392 | 394 | if (!string.IsNullOrEmpty(search)) |
393 | 395 | { |
394 | 396 | // Filter to only documents that contain the search term |
395 | 397 | results = results.Where(r => r.TitleMatch || r.DescriptionMatch).ToList(); |
396 | 398 | } |
397 | | - |
| 399 | + |
398 | 400 | return Results.Ok(new |
399 | 401 | { |
400 | 402 | TotalDocuments = documents.Count(), |
|
438 | 440 | // Try with the actual token from headers |
439 | 441 | var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault(); |
440 | 442 | ReadOnlyMemory<float>? realGithubEmbedding = null; |
441 | | - |
| 443 | + |
442 | 444 | if (!string.IsNullOrEmpty(githubToken)) |
443 | 445 | { |
444 | 446 | try |
|
489 | 491 | // Helper method for GitHub token validation |
490 | 492 | static bool IsValidGitHubToken(string? token) |
491 | 493 | { |
492 | | - return !string.IsNullOrWhiteSpace(token) && |
| 494 | + return !string.IsNullOrWhiteSpace(token) && |
493 | 495 | (token.StartsWith("gho_") || token.StartsWith("ghp_") || token.StartsWith("github_pat_")) && |
494 | 496 | token.Length > 20; |
495 | 497 | } |
|
0 commit comments