Skip to content

Commit b9ec398

Browse files
committed
fix: address PR review comments for embedding cache
Changes: - Allow null text in EmbeddingCacheKey.Create(), normalize to empty string - Remove unnecessary indexes (idx_timestamp, idx_provider, idx_model) - Rename 'normalized' column to 'is_normalized' for clarity - Rename 'timestamp' column to 'created_at' for clarity - Simplify timestamp documentation (debugging only, no invalidation)
1 parent b8fe07c commit b9ec398

File tree

6 files changed

+42
-26
lines changed

6 files changed

+42
-26
lines changed

docs

Submodule docs updated from 03362c9 to 23845bf

dotnet-tools.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"version": 1,
3+
"isRoot": true,
4+
"tools": {
5+
"dotnet-reportgenerator-globaltool": {
6+
"version": "5.5.0",
7+
"commands": [
8+
"reportgenerator"
9+
],
10+
"rollForward": false
11+
}
12+
}
13+
}

src/Core/Embeddings/Cache/CachedEmbedding.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public sealed class CachedEmbedding
2525

2626
/// <summary>
2727
/// Timestamp when this embedding was stored in the cache.
28+
/// Useful for debugging and diagnostics.
2829
/// </summary>
2930
public required DateTimeOffset Timestamp { get; init; }
3031
}

src/Core/Embeddings/Cache/EmbeddingCacheKey.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,22 @@ public sealed class EmbeddingCacheKey
5353
/// <param name="isNormalized">Whether vectors are normalized.</param>
5454
/// <param name="text">The text to hash.</param>
5555
/// <returns>A new EmbeddingCacheKey instance.</returns>
56-
/// <exception cref="ArgumentNullException">When provider, model, or text is null.</exception>
56+
/// <exception cref="ArgumentNullException">When provider or model is null.</exception>
5757
/// <exception cref="ArgumentOutOfRangeException">When vectorDimensions is less than 1.</exception>
5858
public static EmbeddingCacheKey Create(
5959
string provider,
6060
string model,
6161
int vectorDimensions,
6262
bool isNormalized,
63-
string text)
63+
string? text)
6464
{
6565
ArgumentNullException.ThrowIfNull(provider, nameof(provider));
6666
ArgumentNullException.ThrowIfNull(model, nameof(model));
67-
ArgumentNullException.ThrowIfNull(text, nameof(text));
6867
ArgumentOutOfRangeException.ThrowIfLessThan(vectorDimensions, 1, nameof(vectorDimensions));
6968

69+
// Normalize null to empty string for consistent hashing
70+
text ??= string.Empty;
71+
7072
return new EmbeddingCacheKey
7173
{
7274
Provider = provider,

src/Core/Embeddings/Cache/SqliteEmbeddingCache.cs

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,30 +18,27 @@ CREATE TABLE IF NOT EXISTS embeddings_cache (
1818
provider TEXT NOT NULL,
1919
model TEXT NOT NULL,
2020
dimensions INTEGER NOT NULL,
21-
normalized INTEGER NOT NULL,
21+
is_normalized INTEGER NOT NULL,
2222
text_length INTEGER NOT NULL,
2323
text_hash TEXT NOT NULL,
2424
vector BLOB NOT NULL,
2525
token_count INTEGER NULL,
26-
timestamp TEXT NOT NULL,
27-
PRIMARY KEY (provider, model, dimensions, normalized, text_hash)
26+
created_at TEXT NOT NULL,
27+
PRIMARY KEY (provider, model, dimensions, is_normalized, text_hash)
2828
);
29-
CREATE INDEX IF NOT EXISTS idx_timestamp ON embeddings_cache(timestamp);
30-
CREATE INDEX IF NOT EXISTS idx_provider ON embeddings_cache(provider);
31-
CREATE INDEX IF NOT EXISTS idx_model ON embeddings_cache(provider, model);
3229
""";
3330

3431
private const string SelectSql = """
35-
SELECT vector, token_count, timestamp FROM embeddings_cache
32+
SELECT vector, token_count, created_at FROM embeddings_cache
3633
WHERE provider = @provider AND model = @model AND dimensions = @dimensions
37-
AND normalized = @normalized AND text_hash = @textHash
34+
AND is_normalized = @isNormalized AND text_hash = @textHash
3835
""";
3936

4037
private const string UpsertSql = """
41-
INSERT INTO embeddings_cache (provider, model, dimensions, normalized, text_length, text_hash, vector, token_count, timestamp)
42-
VALUES (@provider, @model, @dimensions, @normalized, @textLength, @textHash, @vector, @tokenCount, @timestamp)
43-
ON CONFLICT(provider, model, dimensions, normalized, text_hash)
44-
DO UPDATE SET vector = @vector, token_count = @tokenCount, timestamp = @timestamp
38+
INSERT INTO embeddings_cache (provider, model, dimensions, is_normalized, text_length, text_hash, vector, token_count, created_at)
39+
VALUES (@provider, @model, @dimensions, @isNormalized, @textLength, @textHash, @vector, @tokenCount, @createdAt)
40+
ON CONFLICT(provider, model, dimensions, is_normalized, text_hash)
41+
DO UPDATE SET vector = @vector, token_count = @tokenCount, created_at = @createdAt
4542
""";
4643

4744
private readonly SqliteConnection _connection;
@@ -122,7 +119,7 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
122119
command.Parameters.AddWithValue("@provider", key.Provider);
123120
command.Parameters.AddWithValue("@model", key.Model);
124121
command.Parameters.AddWithValue("@dimensions", key.VectorDimensions);
125-
command.Parameters.AddWithValue("@normalized", key.IsNormalized ? 1 : 0);
122+
command.Parameters.AddWithValue("@isNormalized", key.IsNormalized ? 1 : 0);
126123
command.Parameters.AddWithValue("@textHash", key.TextHash);
127124

128125
var reader = await command.ExecuteReaderAsync(ct).ConfigureAwait(false);
@@ -139,7 +136,7 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
139136
var vector = BytesToFloatArray(vectorBlob);
140137

141138
int? tokenCount = reader["token_count"] == DBNull.Value ? null : Convert.ToInt32(reader["token_count"], CultureInfo.InvariantCulture);
142-
var timestamp = DateTimeOffset.Parse((string)reader["timestamp"], CultureInfo.InvariantCulture);
139+
var createdAt = DateTimeOffset.Parse((string)reader["created_at"], CultureInfo.InvariantCulture);
143140

144141
this._logger.LogTrace("Cache hit for {Provider}/{Model} hash: {HashPrefix}..., dimensions: {Dimensions}",
145142
key.Provider, key.Model, key.TextHash[..Math.Min(16, key.TextHash.Length)], vector.Length);
@@ -148,7 +145,7 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
148145
{
149146
Vector = vector,
150147
TokenCount = tokenCount,
151-
Timestamp = timestamp
148+
Timestamp = createdAt
152149
};
153150
}
154151
}
@@ -167,7 +164,7 @@ public async Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCo
167164
}
168165

169166
var vectorBlob = FloatArrayToBytes(vector);
170-
var timestamp = DateTimeOffset.UtcNow.ToString("o", CultureInfo.InvariantCulture);
167+
var createdAt = DateTimeOffset.UtcNow.ToString("o", CultureInfo.InvariantCulture);
171168

172169
var command = this._connection.CreateCommand();
173170
await using (command.ConfigureAwait(false))
@@ -176,12 +173,12 @@ public async Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCo
176173
command.Parameters.AddWithValue("@provider", key.Provider);
177174
command.Parameters.AddWithValue("@model", key.Model);
178175
command.Parameters.AddWithValue("@dimensions", key.VectorDimensions);
179-
command.Parameters.AddWithValue("@normalized", key.IsNormalized ? 1 : 0);
176+
command.Parameters.AddWithValue("@isNormalized", key.IsNormalized ? 1 : 0);
180177
command.Parameters.AddWithValue("@textLength", key.TextLength);
181178
command.Parameters.AddWithValue("@textHash", key.TextHash);
182179
command.Parameters.AddWithValue("@vector", vectorBlob);
183180
command.Parameters.AddWithValue("@tokenCount", tokenCount.HasValue ? tokenCount.Value : DBNull.Value);
184-
command.Parameters.AddWithValue("@timestamp", timestamp);
181+
command.Parameters.AddWithValue("@createdAt", createdAt);
185182

186183
await command.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
187184

tests/Core.Tests/Embeddings/EmbeddingCacheKeyTests.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,14 @@ public void Create_WithLongText_ShouldCaptureCorrectLength()
189189
}
190190

191191
[Fact]
192-
public void Create_WithNullText_ShouldThrowArgumentNullException()
192+
public void Create_WithNullText_ShouldNormalizeToEmptyString()
193193
{
194-
// Arrange & Act & Assert
195-
Assert.Throws<ArgumentNullException>(() =>
196-
EmbeddingCacheKey.Create("OpenAI", "model", 1536, true, null!));
194+
// Arrange & Act
195+
var key = EmbeddingCacheKey.Create("OpenAI", "model", 1536, true, null);
196+
197+
// Assert - null should be normalized to empty string
198+
Assert.Equal(0, key.TextLength);
199+
Assert.NotEmpty(key.TextHash); // Hash of empty string
197200
}
198201

199202
[Fact]

0 commit comments

Comments
 (0)