Skip to content

Commit 69d835c

Browse files
authored
DuckDB as memory storage (#1074)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> DuckDB is becoming a popular option for in memory OLAP storage. This pr contributes a MemoryStorage implementation on DuckDB using both tile and in memory setup. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> DuckDB memory storage implementation
1 parent d095e5a commit 69d835c

File tree

8 files changed

+1239
-1
lines changed

8 files changed

+1239
-1
lines changed

dotnet/Directory.Packages.props

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
<!-- Skills -->
3232
<PackageVersion Include="DocumentFormat.OpenXml" Version="2.20.0" />
3333
<PackageVersion Include="Microsoft.Data.Sqlite" Version="[7.0, )" />
34+
<PackageVersion Include="DuckDB.NET.Data.Full" Version="[0.8, )" />
35+
<PackageVersion Include="DuckDB.NET.Data" Version="[0.8, )" />
3436
<PackageVersion Include="Microsoft.Graph" Version="[4.51.0, 5)" />
3537
<PackageVersion Include="Microsoft.Azure.Cosmos" Version="[3.32.3, )" />
3638
<PackageVersion Include="Microsoft.Identity.Client.Extensions.Msal" Version="[2.28.0, )" />

dotnet/SK-dotnet.sln

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "InternalUtilities", "Intern
103103
EndProject
104104
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "OpenApiSkillsExample", "..\samples\dotnet\openapi-skills\OpenApiSkillsExample.csproj", "{4D91A3E0-C404-495B-AD4A-411C4E83CF54}"
105105
EndProject
106+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.Memory.DuckDB", "src\Connectors\Connectors.Memory.DuckDB\Connectors.Memory.DuckDB.csproj", "{50FAE231-6F24-4779-9D02-12ABBC9A49E2}"
107+
EndProject
106108
Global
107109
GlobalSection(SolutionConfigurationPlatforms) = preSolution
108110
Debug|Any CPU = Debug|Any CPU
@@ -294,6 +296,12 @@ Global
294296
{4D91A3E0-C404-495B-AD4A-411C4E83CF54}.Publish|Any CPU.Build.0 = Release|Any CPU
295297
{4D91A3E0-C404-495B-AD4A-411C4E83CF54}.Release|Any CPU.ActiveCfg = Release|Any CPU
296298
{4D91A3E0-C404-495B-AD4A-411C4E83CF54}.Release|Any CPU.Build.0 = Release|Any CPU
299+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
300+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2}.Debug|Any CPU.Build.0 = Debug|Any CPU
301+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2}.Publish|Any CPU.ActiveCfg = Publish|Any CPU
302+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2}.Publish|Any CPU.Build.0 = Publish|Any CPU
303+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2}.Release|Any CPU.ActiveCfg = Release|Any CPU
304+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2}.Release|Any CPU.Build.0 = Release|Any CPU
297305
EndGlobalSection
298306
GlobalSection(SolutionProperties) = preSolution
299307
HideSolutionNode = FALSE
@@ -336,6 +344,7 @@ Global
336344
{136823BE-8665-4D57-87E0-EF41535539E2} = {0247C2C9-86C3-45BA-8873-28B0948EDC0C}
337345
{4D3DAE63-41C6-4E1C-A35A-E77BDFC40675} = {831DDCA2-7D2C-4C31-80DB-6BDB3E1F7AE0}
338346
{4D91A3E0-C404-495B-AD4A-411C4E83CF54} = {FA3720F1-C99A-49B2-9577-A940257098BF}
347+
{50FAE231-6F24-4779-9D02-12ABBC9A49E2} = {0247C2C9-86C3-45BA-8873-28B0948EDC0C}
339348
EndGlobalSection
340349
GlobalSection(ExtensibilityGlobals) = postSolution
341350
SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<!-- THIS PROPERTY GROUP MUST COME FIRST -->
5+
<AssemblyName>Microsoft.SemanticKernel.Connectors.Memory.DuckDB</AssemblyName>
6+
<RootNamespace>$(AssemblyName)</RootNamespace>
7+
<TargetFramework>netstandard2.0</TargetFramework>
8+
</PropertyGroup>
9+
10+
<!-- IMPORT NUGET PACKAGE SHARED PROPERTIES -->
11+
<Import Project="$(RepoRoot)/dotnet/nuget/nuget-package.props" />
12+
13+
<PropertyGroup>
14+
<!-- NuGet Package Settings -->
15+
<Title>Semantic Kernel - DuckDB Connector</Title>
16+
<Description>DuckDB connector for Semantic Kernel skills and semantic memory</Description>
17+
</PropertyGroup>
18+
19+
<ItemGroup>
20+
<PackageReference Include="DuckDB.NET.Data.Full" />
21+
<PackageReference Include="DuckDB.NET.Data" />
22+
</ItemGroup>
23+
24+
<ItemGroup>
25+
<ProjectReference Include="..\..\SemanticKernel\SemanticKernel.csproj" />
26+
</ItemGroup>
27+
28+
</Project>
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System.Collections.Generic;
4+
using System.Linq;
5+
using System.Runtime.CompilerServices;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
using DuckDB.NET.Data;
9+
10+
namespace Microsoft.SemanticKernel.Connectors.Memory.DuckDB;
11+
12+
internal struct DatabaseEntry
13+
{
14+
public string Key { get; set; }
15+
16+
public string MetadataString { get; set; }
17+
18+
public string EmbeddingString { get; set; }
19+
20+
public string? Timestamp { get; set; }
21+
}
22+
23+
internal sealed class Database
24+
{
25+
private const string TableName = "SKMemoryTable";
26+
27+
public Database() { }
28+
29+
public Task CreateTableAsync(DuckDBConnection conn, CancellationToken cancellationToken = default)
30+
{
31+
using var cmd = conn.CreateCommand();
32+
cmd.CommandText = $@"
33+
CREATE TABLE IF NOT EXISTS {TableName}(
34+
collection TEXT,
35+
key TEXT,
36+
metadata TEXT,
37+
embedding TEXT,
38+
timestamp TEXT,
39+
PRIMARY KEY(collection, key))";
40+
return cmd.ExecuteNonQueryAsync(cancellationToken);
41+
}
42+
43+
public async Task CreateCollectionAsync(DuckDBConnection conn, string collectionName, CancellationToken cancellationToken = default)
44+
{
45+
if (await this.DoesCollectionExistsAsync(conn, collectionName, cancellationToken).ConfigureAwait(false))
46+
{
47+
// Collection already exists
48+
return;
49+
}
50+
51+
using var cmd = conn.CreateCommand();
52+
cmd.CommandText = $@"
53+
INSERT INTO {TableName} VALUES (?1,?2,?3,?4,?5 ); ";
54+
cmd.Parameters.Add(new DuckDBParameter(collectionName));
55+
cmd.Parameters.Add(new DuckDBParameter(string.Empty));
56+
cmd.Parameters.Add(new DuckDBParameter(string.Empty));
57+
cmd.Parameters.Add(new DuckDBParameter(string.Empty));
58+
cmd.Parameters.Add(new DuckDBParameter(string.Empty));
59+
60+
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
61+
}
62+
63+
public async Task UpdateOrInsertAsync(DuckDBConnection conn,
64+
string collection, string key, string? metadata, string? embedding, string? timestamp, CancellationToken cancellationToken = default)
65+
{
66+
using var cmd = conn.CreateCommand();
67+
cmd.CommandText = $@"
68+
INSERT INTO {TableName} VALUES(?1, ?2, ?3, ?4, ?5)
69+
ON CONFLICT (collection, key) DO UPDATE SET metadata=?3, embedding=?4, timestamp=?5; ";
70+
cmd.Parameters.Add(new DuckDBParameter(collection));
71+
cmd.Parameters.Add(new DuckDBParameter(key));
72+
cmd.Parameters.Add(new DuckDBParameter(metadata ?? string.Empty));
73+
cmd.Parameters.Add(new DuckDBParameter(embedding ?? string.Empty));
74+
cmd.Parameters.Add(new DuckDBParameter(timestamp ?? string.Empty));
75+
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
76+
}
77+
78+
public async Task<bool> DoesCollectionExistsAsync(DuckDBConnection conn,
79+
string collectionName,
80+
CancellationToken cancellationToken = default)
81+
{
82+
var collections = await this.GetCollectionsAsync(conn, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false);
83+
return collections.Contains(collectionName);
84+
}
85+
86+
public async IAsyncEnumerable<string> GetCollectionsAsync(DuckDBConnection conn,
87+
[EnumeratorCancellation] CancellationToken cancellationToken = default)
88+
{
89+
using var cmd = conn.CreateCommand();
90+
cmd.CommandText = $@"
91+
SELECT DISTINCT collection
92+
FROM {TableName};";
93+
94+
using var dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
95+
while (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false))
96+
{
97+
yield return dataReader.GetString("collection");
98+
}
99+
}
100+
101+
public async IAsyncEnumerable<DatabaseEntry> ReadAllAsync(DuckDBConnection conn,
102+
string collectionName,
103+
[EnumeratorCancellation] CancellationToken cancellationToken = default)
104+
{
105+
using var cmd = conn.CreateCommand();
106+
cmd.CommandText = $@"
107+
SELECT * FROM {TableName}
108+
WHERE collection=?1;";
109+
cmd.Parameters.Add(new DuckDBParameter(collectionName));
110+
111+
using var dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
112+
while (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false))
113+
{
114+
string key = dataReader.GetString("key");
115+
if (string.IsNullOrWhiteSpace(key))
116+
{
117+
continue;
118+
}
119+
string metadata = dataReader.GetString("metadata");
120+
string embedding = dataReader.GetString("embedding");
121+
string timestamp = dataReader.GetString("timestamp");
122+
yield return new DatabaseEntry() { Key = key, MetadataString = metadata, EmbeddingString = embedding, Timestamp = timestamp };
123+
}
124+
}
125+
126+
public async Task<DatabaseEntry?> ReadAsync(DuckDBConnection conn,
127+
string collectionName,
128+
string key,
129+
CancellationToken cancellationToken = default)
130+
{
131+
using var cmd = conn.CreateCommand();
132+
cmd.CommandText = $@"
133+
SELECT * FROM {TableName}
134+
WHERE collection=?1
135+
AND key=?2; ";
136+
cmd.Parameters.Add(new DuckDBParameter(collectionName));
137+
cmd.Parameters.Add(new DuckDBParameter(key));
138+
139+
using var dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
140+
if (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false))
141+
{
142+
string metadata = dataReader.GetString(dataReader.GetOrdinal("metadata"));
143+
string embedding = dataReader.GetString(dataReader.GetOrdinal("embedding"));
144+
string timestamp = dataReader.GetString(dataReader.GetOrdinal("timestamp"));
145+
return new DatabaseEntry()
146+
{
147+
Key = key,
148+
MetadataString = metadata,
149+
EmbeddingString = embedding,
150+
Timestamp = timestamp
151+
};
152+
}
153+
154+
return null;
155+
}
156+
157+
public Task DeleteCollectionAsync(DuckDBConnection conn, string collectionName, CancellationToken cancellationToken = default)
158+
{
159+
using var cmd = conn.CreateCommand();
160+
cmd.CommandText = $@"
161+
DELETE FROM {TableName}
162+
WHERE collection=?;";
163+
cmd.Parameters.Add(new DuckDBParameter(collectionName));
164+
return cmd.ExecuteNonQueryAsync(cancellationToken);
165+
}
166+
167+
public Task DeleteAsync(DuckDBConnection conn, string collectionName, string key, CancellationToken cancellationToken = default)
168+
{
169+
using var cmd = conn.CreateCommand();
170+
cmd.CommandText = $@"
171+
DELETE FROM {TableName}
172+
WHERE collection=?1
173+
AND key=?2; ";
174+
cmd.Parameters.Add(new DuckDBParameter(collectionName));
175+
cmd.Parameters.Add(new DuckDBParameter(key));
176+
return cmd.ExecuteNonQueryAsync(cancellationToken);
177+
}
178+
179+
public Task DeleteEmptyAsync(DuckDBConnection conn, string collectionName, CancellationToken cancellationToken = default)
180+
{
181+
using var cmd = conn.CreateCommand();
182+
cmd.CommandText = $@"
183+
DELETE FROM {TableName}
184+
WHERE collection=?1
185+
AND key IS NULL";
186+
cmd.Parameters.Add(new DuckDBParameter(collectionName));
187+
return cmd.ExecuteNonQueryAsync(cancellationToken);
188+
}
189+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System.Data.Common;
4+
5+
namespace Microsoft.SemanticKernel.Connectors.Memory.DuckDB;
6+
7+
internal static class DuckDBExtensions
8+
{
9+
public static string GetString(this DbDataReader reader, string fieldName)
10+
{
11+
int ordinal = reader.GetOrdinal(fieldName);
12+
return reader.GetString(ordinal);
13+
}
14+
}

0 commit comments

Comments
 (0)