Skip to content

Commit b8fe07c

Browse files
committed
refactor: use composite primary key in embedding cache
Changed from single TEXT key to composite primary key with individual columns (provider, model, dimensions, normalized, text_hash) for: - Easier debugging when inspecting SQLite database contents - Selective SELECT/DELETE with individual discriminators - Better query flexibility for cache management Added indexes on provider and (provider, model) for common query patterns.
1 parent df9091c commit b8fe07c

File tree

1 file changed

+37
-15
lines changed

1 file changed

+37
-15
lines changed

src/Core/Embeddings/Cache/SqliteEmbeddingCache.cs

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,33 @@ public sealed class SqliteEmbeddingCache : IEmbeddingCache, IDisposable
1515
{
1616
private const string CreateTableSql = """
1717
CREATE TABLE IF NOT EXISTS embeddings_cache (
18-
key TEXT PRIMARY KEY,
18+
provider TEXT NOT NULL,
19+
model TEXT NOT NULL,
20+
dimensions INTEGER NOT NULL,
21+
normalized INTEGER NOT NULL,
22+
text_length INTEGER NOT NULL,
23+
text_hash TEXT NOT NULL,
1924
vector BLOB NOT NULL,
2025
token_count INTEGER NULL,
21-
timestamp TEXT NOT NULL
26+
timestamp TEXT NOT NULL,
27+
PRIMARY KEY (provider, model, dimensions, normalized, text_hash)
2228
);
2329
CREATE INDEX IF NOT EXISTS idx_timestamp ON embeddings_cache(timestamp);
30+
CREATE INDEX IF NOT EXISTS idx_provider ON embeddings_cache(provider);
31+
CREATE INDEX IF NOT EXISTS idx_model ON embeddings_cache(provider, model);
32+
""";
33+
34+
private const string SelectSql = """
35+
SELECT vector, token_count, timestamp FROM embeddings_cache
36+
WHERE provider = @provider AND model = @model AND dimensions = @dimensions
37+
AND normalized = @normalized AND text_hash = @textHash
2438
""";
2539

26-
private const string SelectSql = "SELECT vector, token_count, timestamp FROM embeddings_cache WHERE key = @key";
2740
private const string UpsertSql = """
28-
INSERT INTO embeddings_cache (key, vector, token_count, timestamp) VALUES (@key, @vector, @tokenCount, @timestamp)
29-
ON CONFLICT(key) DO UPDATE SET vector = @vector, token_count = @tokenCount, timestamp = @timestamp
41+
INSERT INTO embeddings_cache (provider, model, dimensions, normalized, text_length, text_hash, vector, token_count, timestamp)
42+
VALUES (@provider, @model, @dimensions, @normalized, @textLength, @textHash, @vector, @tokenCount, @timestamp)
43+
ON CONFLICT(provider, model, dimensions, normalized, text_hash)
44+
DO UPDATE SET vector = @vector, token_count = @tokenCount, timestamp = @timestamp
3045
""";
3146

3247
private readonly SqliteConnection _connection;
@@ -100,20 +115,23 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
100115
return null;
101116
}
102117

103-
var compositeKey = key.ToCompositeKey();
104-
105118
var command = this._connection.CreateCommand();
106119
await using (command.ConfigureAwait(false))
107120
{
108121
command.CommandText = SelectSql;
109-
command.Parameters.AddWithValue("@key", compositeKey);
122+
command.Parameters.AddWithValue("@provider", key.Provider);
123+
command.Parameters.AddWithValue("@model", key.Model);
124+
command.Parameters.AddWithValue("@dimensions", key.VectorDimensions);
125+
command.Parameters.AddWithValue("@normalized", key.IsNormalized ? 1 : 0);
126+
command.Parameters.AddWithValue("@textHash", key.TextHash);
110127

111128
var reader = await command.ExecuteReaderAsync(ct).ConfigureAwait(false);
112129
await using (reader.ConfigureAwait(false))
113130
{
114131
if (!await reader.ReadAsync(ct).ConfigureAwait(false))
115132
{
116-
this._logger.LogTrace("Cache miss for key: {KeyPrefix}...", compositeKey[..Math.Min(50, compositeKey.Length)]);
133+
this._logger.LogTrace("Cache miss for {Provider}/{Model} hash: {HashPrefix}...",
134+
key.Provider, key.Model, key.TextHash[..Math.Min(16, key.TextHash.Length)]);
117135
return null;
118136
}
119137

@@ -123,8 +141,8 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
123141
int? tokenCount = reader["token_count"] == DBNull.Value ? null : Convert.ToInt32(reader["token_count"], CultureInfo.InvariantCulture);
124142
var timestamp = DateTimeOffset.Parse((string)reader["timestamp"], CultureInfo.InvariantCulture);
125143

126-
this._logger.LogTrace("Cache hit for key: {KeyPrefix}..., vector dimensions: {Dimensions}",
127-
compositeKey[..Math.Min(50, compositeKey.Length)], vector.Length);
144+
this._logger.LogTrace("Cache hit for {Provider}/{Model} hash: {HashPrefix}..., dimensions: {Dimensions}",
145+
key.Provider, key.Model, key.TextHash[..Math.Min(16, key.TextHash.Length)], vector.Length);
128146

129147
return new CachedEmbedding
130148
{
@@ -148,23 +166,27 @@ public async Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCo
148166
return;
149167
}
150168

151-
var compositeKey = key.ToCompositeKey();
152169
var vectorBlob = FloatArrayToBytes(vector);
153170
var timestamp = DateTimeOffset.UtcNow.ToString("o", CultureInfo.InvariantCulture);
154171

155172
var command = this._connection.CreateCommand();
156173
await using (command.ConfigureAwait(false))
157174
{
158175
command.CommandText = UpsertSql;
159-
command.Parameters.AddWithValue("@key", compositeKey);
176+
command.Parameters.AddWithValue("@provider", key.Provider);
177+
command.Parameters.AddWithValue("@model", key.Model);
178+
command.Parameters.AddWithValue("@dimensions", key.VectorDimensions);
179+
command.Parameters.AddWithValue("@normalized", key.IsNormalized ? 1 : 0);
180+
command.Parameters.AddWithValue("@textLength", key.TextLength);
181+
command.Parameters.AddWithValue("@textHash", key.TextHash);
160182
command.Parameters.AddWithValue("@vector", vectorBlob);
161183
command.Parameters.AddWithValue("@tokenCount", tokenCount.HasValue ? tokenCount.Value : DBNull.Value);
162184
command.Parameters.AddWithValue("@timestamp", timestamp);
163185

164186
await command.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
165187

166-
this._logger.LogTrace("Stored embedding in cache: {KeyPrefix}..., vector dimensions: {Dimensions}",
167-
compositeKey[..Math.Min(50, compositeKey.Length)], vector.Length);
188+
this._logger.LogTrace("Stored embedding in cache: {Provider}/{Model} hash: {HashPrefix}..., dimensions: {Dimensions}",
189+
key.Provider, key.Model, key.TextHash[..Math.Min(16, key.TextHash.Length)], vector.Length);
168190
}
169191
}
170192

0 commit comments

Comments
 (0)