Skip to content

Commit 6b8c356

Browse files
committed
feat: Add diagnostic endpoints for embedding and search analysis, and implement document browsing functionality
1 parent 8075a6a commit 6b8c356

File tree

4 files changed

+455
-0
lines changed

4 files changed

+455
-0
lines changed

samples/AspireDemo/NLWebNet.AspireApp/Program.cs

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,152 @@
265265
.WithName("SearchDocuments")
266266
.WithOpenApi();
267267

268+
// Diagnostic endpoint for analyzing embeddings and search quality
269+
app.MapGet("/api/diagnostics/embedding", async (HttpContext context, string text, IEmbeddingService embeddingService, ILogger<Program> logger) =>
270+
{
271+
try
272+
{
273+
var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault();
274+
var hasToken = !string.IsNullOrEmpty(githubToken) && IsValidGitHubToken(githubToken);
275+
276+
logger.LogInformation("Generating embedding for diagnostic - Text: '{Text}', HasToken: {HasToken}", text, hasToken);
277+
278+
var embedding = await embeddingService.GenerateEmbeddingAsync(text, githubToken);
279+
280+
var stats = new
281+
{
282+
Text = text,
283+
EmbeddingType = hasToken ? "GitHub Models" : "Simple Hash",
284+
HasGitHubToken = hasToken,
285+
TokenLength = githubToken?.Length ?? 0,
286+
EmbeddingDimensions = embedding.Length,
287+
EmbeddingSample = embedding.Span[0..Math.Min(10, embedding.Length)].ToArray(), // First 10 values
288+
EmbeddingMagnitude = Math.Sqrt(embedding.Span.ToArray().Sum(x => x * x)),
289+
EmbeddingStats = new
290+
{
291+
Min = embedding.Span.ToArray().Min(),
292+
Max = embedding.Span.ToArray().Max(),
293+
Average = embedding.Span.ToArray().Average(),
294+
NonZeroCount = embedding.Span.ToArray().Count(x => Math.Abs(x) > 0.001f)
295+
}
296+
};
297+
298+
return Results.Ok(stats);
299+
}
300+
catch (Exception ex)
301+
{
302+
logger.LogError(ex, "Error generating diagnostic embedding for text: {Text}", text);
303+
return Results.BadRequest(new { Error = ex.Message });
304+
}
305+
})
306+
.WithName("DiagnosticEmbedding")
307+
.WithOpenApi();
308+
309+
// Diagnostic endpoint for searching with detailed analysis
310+
app.MapGet("/api/diagnostics/search", async (HttpContext context, string query, int? limit, IVectorStorageService vectorStorage, IEmbeddingService embeddingService, ILogger<Program> logger) =>
311+
{
312+
try
313+
{
314+
var searchLimit = limit ?? 10;
315+
var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault();
316+
var hasToken = !string.IsNullOrEmpty(githubToken) && IsValidGitHubToken(githubToken);
317+
318+
logger.LogInformation("=== DIAGNOSTIC SEARCH ===");
319+
logger.LogInformation("Query: '{Query}', HasToken: {HasToken}", query, hasToken);
320+
321+
// Generate query embedding
322+
var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query, githubToken);
323+
324+
// Get raw search results with very low threshold
325+
var results = await vectorStorage.SearchSimilarAsync(queryEmbedding, searchLimit, 0.0f);
326+
327+
var diagnosticResults = results.Select((r, index) => new
328+
{
329+
Rank = index + 1,
330+
Id = r.Document.Id,
331+
Title = r.Document.Title,
332+
Description = r.Document.Description?.Substring(0, Math.Min(200, r.Document.Description?.Length ?? 0)) + "...",
333+
Similarity = r.Score,
334+
SimilarityPercent = Math.Round(r.Score * 100, 2),
335+
ContainsQueryTerm = r.Document.Title?.Contains(query, StringComparison.OrdinalIgnoreCase) == true ||
336+
r.Document.Description?.Contains(query, StringComparison.OrdinalIgnoreCase) == true,
337+
TitleMatch = r.Document.Title?.Contains(query, StringComparison.OrdinalIgnoreCase) == true,
338+
DescriptionMatch = r.Document.Description?.Contains(query, StringComparison.OrdinalIgnoreCase) == true
339+
}).ToList();
340+
341+
var analysis = new
342+
{
343+
Query = query,
344+
EmbeddingType = hasToken ? "GitHub Models" : "Simple Hash",
345+
HasGitHubToken = hasToken,
346+
QueryEmbeddingStats = new
347+
{
348+
Dimensions = queryEmbedding.Length,
349+
Magnitude = Math.Sqrt(queryEmbedding.Span.ToArray().Sum(x => x * x)),
350+
Sample = queryEmbedding.Span[0..Math.Min(5, queryEmbedding.Length)].ToArray()
351+
},
352+
TotalResults = diagnosticResults.Count,
353+
ResultsWithTextMatch = diagnosticResults.Count(r => r.ContainsQueryTerm),
354+
HighestSimilarity = diagnosticResults.FirstOrDefault()?.Similarity ?? 0,
355+
LowestSimilarity = diagnosticResults.LastOrDefault()?.Similarity ?? 0,
356+
Results = diagnosticResults
357+
};
358+
359+
logger.LogInformation("Diagnostic complete - {ResultCount} results, {TextMatches} contain query term",
360+
diagnosticResults.Count, diagnosticResults.Count(r => r.ContainsQueryTerm));
361+
362+
return Results.Ok(analysis);
363+
}
364+
catch (Exception ex)
365+
{
366+
logger.LogError(ex, "Error in diagnostic search for query: {Query}", query);
367+
return Results.BadRequest(new { Error = ex.Message });
368+
}
369+
})
370+
.WithName("DiagnosticSearch")
371+
.WithOpenApi();
372+
373+
// Diagnostic endpoint to browse ingested documents
374+
app.MapGet("/api/documents", async (IVectorStorageService vectorStorage, string? search = null, int? limit = null) =>
375+
{
376+
try
377+
{
378+
var searchLimit = limit ?? 50;
379+
var documents = await vectorStorage.GetAllDocumentsAsync(searchLimit);
380+
381+
var results = documents.Select(doc => new
382+
{
383+
Id = doc.Id,
384+
Title = doc.Title,
385+
Description = doc.Description?.Length > 200 ? doc.Description.Substring(0, 200) + "..." : doc.Description,
386+
Url = doc.Url,
387+
IngestedAt = doc.IngestedAt,
388+
TitleMatch = !string.IsNullOrEmpty(search) && doc.Title.Contains(search, StringComparison.OrdinalIgnoreCase),
389+
DescriptionMatch = !string.IsNullOrEmpty(search) && !string.IsNullOrEmpty(doc.Description) && doc.Description.Contains(search, StringComparison.OrdinalIgnoreCase)
390+
}).ToList();
391+
392+
if (!string.IsNullOrEmpty(search))
393+
{
394+
// Filter to only documents that contain the search term
395+
results = results.Where(r => r.TitleMatch || r.DescriptionMatch).ToList();
396+
}
397+
398+
return Results.Ok(new
399+
{
400+
TotalDocuments = documents.Count(),
401+
SearchTerm = search,
402+
MatchingDocuments = results.Count,
403+
Documents = results
404+
});
405+
}
406+
catch (Exception ex)
407+
{
408+
return Results.BadRequest(new { Error = ex.Message });
409+
}
410+
})
411+
.WithName("BrowseDocuments")
412+
.WithOpenApi();
413+
268414
app.MapDelete("/vector/clear", async (IVectorStorageService vectorStorage) =>
269415
{
270416
try
@@ -280,6 +426,64 @@
280426
.WithName("ClearVectors")
281427
.WithOpenApi();
282428

429+
// Diagnostic endpoint to test embedding consistency
430+
app.MapGet("/api/embedding-test", async (HttpContext context, string text, IEmbeddingService embeddingService, ILogger<Program> logger) =>
431+
{
432+
try
433+
{
434+
// Test both with and without GitHub token
435+
var simpleEmbedding = await embeddingService.GenerateEmbeddingAsync(text, null);
436+
var githubEmbedding = await embeddingService.GenerateEmbeddingAsync(text, "dummy_token"); // Will use simple if token is invalid
437+
438+
// Try with the actual token from headers
439+
var githubToken = context.Request.Headers["X-GitHub-Token"].FirstOrDefault();
440+
ReadOnlyMemory<float>? realGithubEmbedding = null;
441+
442+
if (!string.IsNullOrEmpty(githubToken))
443+
{
444+
try
445+
{
446+
realGithubEmbedding = await embeddingService.GenerateEmbeddingAsync(text, githubToken);
447+
}
448+
catch (Exception ex)
449+
{
450+
logger.LogWarning(ex, "Failed to generate embedding with real GitHub token");
451+
}
452+
}
453+
454+
return Results.Ok(new
455+
{
456+
Text = text,
457+
SimpleEmbedding = new
458+
{
459+
Dimensions = simpleEmbedding.Length,
460+
Sample = simpleEmbedding.Span.Slice(0, Math.Min(10, simpleEmbedding.Length)).ToArray(),
461+
Magnitude = Math.Sqrt(simpleEmbedding.Span.ToArray().Sum(x => x * x))
462+
},
463+
GithubEmbedding = new
464+
{
465+
Dimensions = githubEmbedding.Length,
466+
Sample = githubEmbedding.Span.Slice(0, Math.Min(10, githubEmbedding.Length)).ToArray(),
467+
Magnitude = Math.Sqrt(githubEmbedding.Span.ToArray().Sum(x => x * x))
468+
},
469+
RealGithubEmbedding = realGithubEmbedding.HasValue ? new
470+
{
471+
Dimensions = realGithubEmbedding.Value.Length,
472+
Sample = realGithubEmbedding.Value.Span.Slice(0, Math.Min(10, realGithubEmbedding.Value.Length)).ToArray(),
473+
Magnitude = Math.Sqrt(realGithubEmbedding.Value.Span.ToArray().Sum(x => x * x))
474+
} : null,
475+
AreSimpleAndGithubSame = simpleEmbedding.Span.SequenceEqual(githubEmbedding.Span),
476+
AreGithubEmbeddingsDifferent = realGithubEmbedding.HasValue && !githubEmbedding.Span.SequenceEqual(realGithubEmbedding.Value.Span)
477+
});
478+
}
479+
catch (Exception ex)
480+
{
481+
return Results.BadRequest(new { Error = ex.Message });
482+
}
483+
})
484+
.WithName("TestEmbeddingConsistency")
485+
.WithOpenApi();
486+
283487
app.MapDefaultEndpoints();
284488

285489
// Helper method for GitHub token validation

samples/AspireDemo/NLWebNet.AspireApp/Services/IVectorStorageService.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ public interface IVectorStorageService
4242
/// <returns>Total document count</returns>
4343
Task<int> GetDocumentCountAsync(CancellationToken cancellationToken = default);
4444

45+
/// <summary>
46+
/// Get all documents from the vector storage (for debugging/browsing)
47+
/// </summary>
48+
/// <param name="limit">Maximum number of documents to return</param>
49+
/// <param name="cancellationToken">Cancellation token</param>
50+
/// <returns>List of all documents</returns>
51+
Task<IEnumerable<DocumentRecord>> GetAllDocumentsAsync(int limit = 100, CancellationToken cancellationToken = default);
52+
4553
/// <summary>
4654
/// Delete all documents from the vector storage
4755
/// </summary>

samples/AspireDemo/NLWebNet.AspireApp/Services/QdrantVectorStorageService.cs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,48 @@ public async Task<int> GetDocumentCountAsync(CancellationToken cancellationToken
184184
}
185185
}
186186

187+
public async Task<IEnumerable<DocumentRecord>> GetAllDocumentsAsync(int limit = 100, CancellationToken cancellationToken = default)
188+
{
189+
if (!_isInitialized)
190+
await InitializeAsync(cancellationToken);
191+
192+
try
193+
{
194+
// Use ScrollAsync with collection name and scroll parameters
195+
var response = await _qdrantClient.ScrollAsync(
196+
collectionName: CollectionName,
197+
limit: (uint)limit,
198+
payloadSelector: true,
199+
vectorsSelector: false,
200+
cancellationToken: cancellationToken);
201+
202+
var documents = new List<DocumentRecord>();
203+
foreach (var point in response.Result)
204+
{
205+
var document = new DocumentRecord
206+
{
207+
Id = point.Id.Uuid,
208+
Url = point.Payload["url"].StringValue,
209+
Title = point.Payload["title"].StringValue,
210+
Site = point.Payload["site"].StringValue,
211+
Description = point.Payload["description"].StringValue,
212+
Score = (float)point.Payload["score"].DoubleValue,
213+
IngestedAt = DateTimeOffset.Parse(point.Payload["ingested_at"].StringValue),
214+
SourceType = point.Payload["source_type"].StringValue
215+
};
216+
documents.Add(document);
217+
}
218+
219+
_logger.LogDebug("Retrieved {Count} documents from Qdrant", documents.Count);
220+
return documents;
221+
}
222+
catch (Exception ex)
223+
{
224+
_logger.LogError(ex, "Failed to get all documents from Qdrant");
225+
return new List<DocumentRecord>();
226+
}
227+
}
228+
187229
public async Task<bool> ClearAllDocumentsAsync(CancellationToken cancellationToken = default)
188230
{
189231
if (!_isInitialized)

0 commit comments

Comments
 (0)