Skip to content

Commit 712d9aa

Browse files
Replace JSON vector store with SQLite (#6438)
Co-authored-by: Jeff Handley <[email protected]>
1 parent 6ae09b8 commit 712d9aa

File tree

59 files changed

+296
-963
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+296
-963
lines changed

src/ProjectTemplates/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package-lock.json
99
*/src/**/*.sln
1010
*/src/**/NuGet.config
1111
*/src/**/Directory.Build.targets
12+
*/src/**/Directory.Build.props
1213
*/src/**/ingestioncache.*
1314

1415
# launchSettings.json files are required for the templates.

src/ProjectTemplates/GeneratedContent.targets

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
<TemplatePackageVersion_AzureSearchDocuments>11.6.0</TemplatePackageVersion_AzureSearchDocuments>
3636
<TemplatePackageVersion_CommunityToolkitAspire>9.4.1-beta.277</TemplatePackageVersion_CommunityToolkitAspire>
3737
<TemplatePackageVersion_MicrosoftExtensionsServiceDiscovery>9.2.0</TemplatePackageVersion_MicrosoftExtensionsServiceDiscovery>
38-
<TemplatePackageVersion_MicrosoftSemanticKernel>1.50.0</TemplatePackageVersion_MicrosoftSemanticKernel>
39-
<TemplatePackageVersion_MicrosoftSemanticKernel_Preview>1.50.0-preview</TemplatePackageVersion_MicrosoftSemanticKernel_Preview>
38+
<TemplatePackageVersion_MicrosoftSemanticKernel>1.52.1</TemplatePackageVersion_MicrosoftSemanticKernel>
39+
<TemplatePackageVersion_MicrosoftSemanticKernel_Preview>1.52.1-preview</TemplatePackageVersion_MicrosoftSemanticKernel_Preview>
4040
<TemplatePackageVersion_OllamaSharp>5.1.16</TemplatePackageVersion_OllamaSharp>
4141
<TemplatePackageVersion_OpenTelemetry>1.9.0</TemplatePackageVersion_OpenTelemetry>
4242
<TemplatePackageVersion_PdfPig>0.1.10</TemplatePackageVersion_PdfPig>
@@ -85,6 +85,9 @@
8585
<GeneratedContent
8686
Include="$(_ChatWithCustomDataContentRoot)Directory.Build.targets.in"
8787
OutputPath="$(_ChatWithCustomDataContentRoot)Directory.Build.targets" />
88+
<GeneratedContent
89+
Include="$(_ChatWithCustomDataContentRoot)Directory.Build.props.in"
90+
OutputPath="$(_ChatWithCustomDataContentRoot)Directory.Build.props" />
8891
<GeneratedContent
8992
Include="$(_ChatWithCustomDataContentRoot)ChatWithCustomData-CSharp.Web\ChatWithCustomData-CSharp.Web.csproj.in"
9093
OutputPath="$(_ChatWithCustomDataContentRoot)ChatWithCustomData-CSharp.Web\ChatWithCustomData-CSharp.Web.csproj" />

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/Microsoft.Extensions.AI.Templates.csproj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
</ItemGroup>
4545

4646
<ItemGroup>
47-
<!-- Keep the exclude patterns below in sync with those in AichatwebTemplatesTests.cs -->
47+
<!-- Keep the exclude patterns below in sync with those in AIChatWebSnapshotTests.cs -->
4848
<Content
4949
Include="src\ChatWithCustomData\**\*"
5050
Exclude="
@@ -59,7 +59,8 @@
5959
**\package-lock.json;
6060
**\ingestioncache.*;
6161
**\NuGet.config;
62-
**\Directory.Build.targets;" />
62+
**\Directory.Build.targets;
63+
**\Directory.Build.props;" />
6364
<None Include="THIRD-PARTY-NOTICES.TXT" Pack="true" PackagePath="." />
6465
<Compile Remove="**\*" />
6566
</ItemGroup>

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/src/ChatWithCustomData/ChatWithCustomData-CSharp.Web/ChatWithCustomData-CSharp.Web.csproj.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
#elif (UseQdrant)-->
4646
<PackageReference Include="Aspire.Qdrant.Client" Version="${TemplatePackageVersion_Aspire}" />
4747
<PackageReference Include="Microsoft.SemanticKernel.Connectors.Qdrant" Version="${TemplatePackageVersion_MicrosoftSemanticKernel_Preview}" />
48+
<!--#elif (UseLocalVectorStore)-->
49+
<PackageReference Include="Microsoft.SemanticKernel.Connectors.SqliteVec" Version="${TemplatePackageVersion_MicrosoftSemanticKernel_Preview}" />
4850
<!--#endif -->
4951
</ItemGroup>
5052
<!--#if (IsAspire) -->

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/src/ChatWithCustomData/ChatWithCustomData-CSharp.Web/Program.Aspire.cs

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
11
using Microsoft.Extensions.AI;
2-
using Microsoft.Extensions.VectorData;
32
using ChatWithCustomData_CSharp.Web.Components;
43
using ChatWithCustomData_CSharp.Web.Services;
54
using ChatWithCustomData_CSharp.Web.Services.Ingestion;
65
#if (IsOllama)
76
#else // IsAzureOpenAI || IsOpenAI || IsGHModels
87
using OpenAI;
98
#endif
10-
#if (UseAzureAISearch)
11-
using Microsoft.SemanticKernel.Connectors.AzureAISearch;
12-
#elif (UseQdrant)
13-
using Microsoft.SemanticKernel.Connectors.Qdrant;
14-
#endif
159

1610
var builder = WebApplication.CreateBuilder(args);
1711
builder.AddServiceDefaults();
@@ -41,15 +35,17 @@
4135

4236
#if (UseAzureAISearch)
4337
builder.AddAzureSearchClient("azureAISearch");
44-
45-
builder.Services.AddSingleton<IVectorStore, AzureAISearchVectorStore>();
38+
builder.Services.AddAzureAISearchCollection<IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks");
39+
builder.Services.AddAzureAISearchCollection<IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents");
4640
#elif (UseQdrant)
4741
builder.AddQdrantClient("vectordb");
48-
49-
builder.Services.AddSingleton<IVectorStore, QdrantVectorStore>();
42+
builder.Services.AddQdrantCollection<Guid, IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks");
43+
builder.Services.AddQdrantCollection<Guid, IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents");
5044
#else // UseLocalVectorStore
51-
var vectorStore = new JsonVectorStore(Path.Combine(AppContext.BaseDirectory, "vector-store"));
52-
builder.Services.AddSingleton<IVectorStore>(vectorStore);
45+
var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db");
46+
var vectorStoreConnectionString = $"Data Source={vectorStorePath}";
47+
builder.Services.AddSqliteCollection<string, IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks", vectorStoreConnectionString);
48+
builder.Services.AddSqliteCollection<string, IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents", vectorStoreConnectionString);
5349
#endif
5450
builder.Services.AddScoped<DataIngestor>();
5551
builder.Services.AddSingleton<SemanticSearch>();

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/src/ChatWithCustomData/ChatWithCustomData-CSharp.Web/Program.cs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using Microsoft.Extensions.AI;
2-
using Microsoft.Extensions.VectorData;
32
using ChatWithCustomData_CSharp.Web.Components;
43
using ChatWithCustomData_CSharp.Web.Services;
54
using ChatWithCustomData_CSharp.Web.Services.Ingestion;
@@ -18,10 +17,6 @@
1817
using Azure.AI.OpenAI;
1918
using System.ClientModel;
2019
#endif
21-
#if (UseAzureAISearch)
22-
using Azure.Search.Documents.Indexes;
23-
using Microsoft.SemanticKernel.Connectors.AzureAISearch;
24-
#endif
2520

2621
var builder = WebApplication.CreateBuilder(args);
2722
builder.Services.AddRazorComponents().AddInteractiveServerComponents();
@@ -83,19 +78,23 @@
8378
#if (!UseManagedIdentity)
8479
// dotnet user-secrets set AzureAISearch:Key YOUR-API-KEY
8580
#endif
86-
var vectorStore = new AzureAISearchVectorStore(
87-
new SearchIndexClient(
88-
new Uri(builder.Configuration["AzureAISearch:Endpoint"] ?? throw new InvalidOperationException("Missing configuration: AzureAISearch:Endpoint. See the README for details.")),
81+
var azureAISearchEndpoint = new Uri(builder.Configuration["AzureAISearch:Endpoint"]
82+
?? throw new InvalidOperationException("Missing configuration: AzureAISearch:Endpoint. See the README for details."));
8983
#if (UseManagedIdentity)
90-
new DefaultAzureCredential()));
84+
var azureAISearchCredential = new DefaultAzureCredential();
9185
#else
92-
new AzureKeyCredential(builder.Configuration["AzureAISearch:Key"] ?? throw new InvalidOperationException("Missing configuration: AzureAISearch:Key. See the README for details."))));
86+
var azureAISearchCredential = new AzureKeyCredential(builder.Configuration["AzureAISearch:Key"]
87+
?? throw new InvalidOperationException("Missing configuration: AzureAISearch:Key. See the README for details."));
9388
#endif
89+
builder.Services.AddAzureAISearchCollection<IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks", azureAISearchEndpoint, azureAISearchCredential);
90+
builder.Services.AddAzureAISearchCollection<IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents", azureAISearchEndpoint, azureAISearchCredential);
9491
#else // UseLocalVectorStore
95-
var vectorStore = new JsonVectorStore(Path.Combine(AppContext.BaseDirectory, "vector-store"));
92+
var vectorStorePath = Path.Combine(AppContext.BaseDirectory, "vector-store.db");
93+
var vectorStoreConnectionString = $"Data Source={vectorStorePath}";
94+
builder.Services.AddSqliteCollection<string, IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks", vectorStoreConnectionString);
95+
builder.Services.AddSqliteCollection<string, IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents", vectorStoreConnectionString);
9696
#endif
9797

98-
builder.Services.AddSingleton<IVectorStore>(vectorStore);
9998
builder.Services.AddScoped<DataIngestor>();
10099
builder.Services.AddSingleton<SemanticSearch>();
101100
builder.Services.AddChatClient(chatClient).UseFunctionInvocation().UseLogging();

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/src/ChatWithCustomData/ChatWithCustomData-CSharp.Web/Services/IngestedChunk.cs

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,33 @@ namespace ChatWithCustomData_CSharp.Web.Services;
44

55
public class IngestedChunk
66
{
7-
[VectorStoreRecordKey]
7+
#if (IsOllama)
8+
private const int VectorDimensions = 384; // 384 is the default vector size for the all-minilm embedding model
9+
#else
10+
private const int VectorDimensions = 1536; // 1536 is the default vector size for the OpenAI text-embedding-3-small model
11+
#endif
12+
#if (UseAzureAISearch || UseQdrant)
13+
private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
14+
#else
15+
private const string VectorDistanceFunction = DistanceFunction.CosineDistance;
16+
#endif
17+
18+
[VectorStoreKey]
819
#if (UseQdrant)
920
public required Guid Key { get; set; }
1021
#else
1122
public required string Key { get; set; }
1223
#endif
1324

14-
[VectorStoreRecordData(IsIndexed = true)]
25+
[VectorStoreData(IsIndexed = true)]
1526
public required string DocumentId { get; set; }
1627

17-
[VectorStoreRecordData]
28+
[VectorStoreData]
1829
public int PageNumber { get; set; }
1930

20-
[VectorStoreRecordData]
31+
[VectorStoreData]
2132
public required string Text { get; set; }
2233

23-
#if (IsOllama)
24-
[VectorStoreRecordVector(384, DistanceFunction = DistanceFunction.CosineSimilarity)] // 384 is the default vector size for the all-minilm embedding model
25-
#else
26-
[VectorStoreRecordVector(1536, DistanceFunction = DistanceFunction.CosineSimilarity)] // 1536 is the default vector size for the OpenAI text-embedding-3-small model
27-
#endif
28-
public ReadOnlyMemory<float> Vector { get; set; }
34+
[VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
35+
public string? Vector => Text;
2936
}

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/src/ChatWithCustomData/ChatWithCustomData-CSharp.Web/Services/IngestedDocument.cs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,30 @@ namespace ChatWithCustomData_CSharp.Web.Services;
44

55
public class IngestedDocument
66
{
7-
[VectorStoreRecordKey]
7+
private const int VectorDimensions = 2;
8+
#if (UseAzureAISearch || UseQdrant)
9+
private const string VectorDistanceFunction = DistanceFunction.CosineSimilarity;
10+
#else
11+
private const string VectorDistanceFunction = DistanceFunction.CosineDistance;
12+
#endif
13+
14+
[VectorStoreKey]
815
#if (UseQdrant)
916
public required Guid Key { get; set; }
1017
#else
1118
public required string Key { get; set; }
1219
#endif
1320

14-
[VectorStoreRecordData(IsIndexed = true)]
21+
[VectorStoreData(IsIndexed = true)]
1522
public required string SourceId { get; set; }
1623

17-
[VectorStoreRecordData]
24+
[VectorStoreData]
1825
public required string DocumentId { get; set; }
1926

20-
[VectorStoreRecordData]
27+
[VectorStoreData]
2128
public required string DocumentVersion { get; set; }
2229

2330
// The vector is not used but required for some vector databases
24-
[VectorStoreRecordVector(2, DistanceFunction = DistanceFunction.CosineSimilarity)]
31+
[VectorStoreVector(VectorDimensions, DistanceFunction = VectorDistanceFunction)]
2532
public ReadOnlyMemory<float> Vector { get; set; } = new ReadOnlyMemory<float>([0, 0]);
2633
}

src/ProjectTemplates/Microsoft.Extensions.AI.Templates/src/ChatWithCustomData/ChatWithCustomData-CSharp.Web/Services/Ingestion/DataIngestor.cs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,13 @@ namespace ChatWithCustomData_CSharp.Web.Services.Ingestion;
55

66
public class DataIngestor(
77
ILogger<DataIngestor> logger,
8-
IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator,
9-
IVectorStore vectorStore)
8+
#if (UseQdrant)
9+
VectorStoreCollection<Guid, IngestedChunk> chunksCollection,
10+
VectorStoreCollection<Guid, IngestedDocument> documentsCollection)
11+
#else
12+
VectorStoreCollection<string, IngestedChunk> chunksCollection,
13+
VectorStoreCollection<string, IngestedDocument> documentsCollection)
14+
#endif
1015
{
1116
public static async Task IngestDataAsync(IServiceProvider services, IIngestionSource source)
1217
{
@@ -17,15 +22,8 @@ public static async Task IngestDataAsync(IServiceProvider services, IIngestionSo
1722

1823
public async Task IngestDataAsync(IIngestionSource source)
1924
{
20-
#if (UseQdrant)
21-
var chunksCollection = vectorStore.GetCollection<Guid, IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks");
22-
var documentsCollection = vectorStore.GetCollection<Guid, IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents");
23-
#else
24-
var chunksCollection = vectorStore.GetCollection<string, IngestedChunk>("data-ChatWithCustomData-CSharp.Web-chunks");
25-
var documentsCollection = vectorStore.GetCollection<string, IngestedDocument>("data-ChatWithCustomData-CSharp.Web-documents");
26-
#endif
27-
await chunksCollection.CreateCollectionIfNotExistsAsync();
28-
await documentsCollection.CreateCollectionIfNotExistsAsync();
25+
await chunksCollection.EnsureCollectionExistsAsync();
26+
await documentsCollection.EnsureCollectionExistsAsync();
2927

3028
var sourceId = source.SourceId;
3129
var documentsForSource = await documentsCollection.GetAsync(doc => doc.SourceId == sourceId, top: int.MaxValue).ToListAsync();
@@ -46,7 +44,7 @@ public async Task IngestDataAsync(IIngestionSource source)
4644

4745
await documentsCollection.UpsertAsync(modifiedDocument);
4846

49-
var newRecords = await source.CreateChunksForDocumentAsync(embeddingGenerator, modifiedDocument);
47+
var newRecords = await source.CreateChunksForDocumentAsync(modifiedDocument);
5048
await chunksCollection.UpsertAsync(newRecords);
5149
}
5250

Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
using Microsoft.Extensions.AI;
2-
3-
namespace ChatWithCustomData_CSharp.Web.Services.Ingestion;
1+
namespace ChatWithCustomData_CSharp.Web.Services.Ingestion;
42

53
public interface IIngestionSource
64
{
@@ -10,5 +8,5 @@ public interface IIngestionSource
108

119
Task<IEnumerable<IngestedDocument>> GetDeletedDocumentsAsync(IReadOnlyList<IngestedDocument> existingDocuments);
1210

13-
Task<IEnumerable<IngestedChunk>> CreateChunksForDocumentAsync(IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator, IngestedDocument document);
11+
Task<IEnumerable<IngestedChunk>> CreateChunksForDocumentAsync(IngestedDocument document);
1412
}

0 commit comments

Comments
 (0)