Skip to content

Commit 04d777c

Browse files
committed
Improve docx parsing, chunk ordering, and DB config
- Add null check for Document in DocxContentDecoder to prevent exceptions. - Set DocumentChunk.Id to auto-generate in ApplicationDbContext. - Order vector search results by cosine similarity for relevance.
1 parent 1ae1db2 commit 04d777c

File tree

3 files changed

+3
-3
lines changed

3 files changed

+3
-3
lines changed

SqlDatabaseVectorSearch/ContentDecoders/DocxContentDecoder.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public Task<IEnumerable<Chunk>> DecodeAsync(Stream stream, string contentType, C
1414
// Open a Word document for read-only access.
1515
using var document = WordprocessingDocument.Open(stream, false);
1616

17-
var body = document.MainDocumentPart?.Document.Body;
17+
var body = document.MainDocumentPart?.Document?.Body;
1818
var content = new StringBuilder();
1919

2020
foreach (var p in body?.Descendants<Paragraph>() ?? [])

SqlDatabaseVectorSearch/Data/ApplicationDbContext.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ protected override void OnModelCreating(ModelBuilder modelBuilder)
3434
modelBuilder.Entity<DocumentChunk>(entity =>
3535
{
3636
entity.ToTable("DocumentChunks");
37-
entity.HasKey(e => e.Id);
37+
entity.HasKey(e => e.Id);
3838

3939
entity.Property(e => e.Id).ValueGeneratedOnAdd();
4040
entity.Property(e => e.Content).IsRequired();

SqlDatabaseVectorSearch/Services/VectorSearchService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public async IAsyncEnumerable<Response> AskStreamingAsync(Question question, boo
152152
var questionEmbedding = await embeddingGenerator.GenerateVectorAsync(reformulatedQuestion.Text!, cancellationToken: cancellationToken);
153153
var embeddingVector = new SqlVector<float>(questionEmbedding);
154154

155-
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
155+
var chunks = await dbContext.DocumentChunks.Include(c => c.Document)
156156
.OrderBy(c => EF.Functions.VectorDistance("cosine", c.Embedding, embeddingVector))
157157
.Take(appSettings.MaxRelevantChunks)
158158
.ToListAsync(cancellationToken);

0 commit comments

Comments
 (0)