@@ -15,18 +15,33 @@ public sealed class SqliteEmbeddingCache : IEmbeddingCache, IDisposable
1515{
1616 private const string CreateTableSql = """
1717 CREATE TABLE IF NOT EXISTS embeddings_cache (
18- key TEXT PRIMARY KEY,
18+ provider TEXT NOT NULL,
19+ model TEXT NOT NULL,
20+ dimensions INTEGER NOT NULL,
21+ normalized INTEGER NOT NULL,
22+ text_length INTEGER NOT NULL,
23+ text_hash TEXT NOT NULL,
1924 vector BLOB NOT NULL,
2025 token_count INTEGER NULL,
21- timestamp TEXT NOT NULL
26+ timestamp TEXT NOT NULL,
27+ PRIMARY KEY (provider, model, dimensions, normalized, text_hash)
2228 );
2329 CREATE INDEX IF NOT EXISTS idx_timestamp ON embeddings_cache(timestamp);
30+ CREATE INDEX IF NOT EXISTS idx_provider ON embeddings_cache(provider);
31+ CREATE INDEX IF NOT EXISTS idx_model ON embeddings_cache(provider, model);
32+ """ ;
33+
34+ private const string SelectSql = """
35+ SELECT vector, token_count, timestamp FROM embeddings_cache
36+ WHERE provider = @provider AND model = @model AND dimensions = @dimensions
37+ AND normalized = @normalized AND text_hash = @textHash
2438 """ ;
2539
26- private const string SelectSql = "SELECT vector, token_count, timestamp FROM embeddings_cache WHERE key = @key" ;
2740 private const string UpsertSql = """
28- INSERT INTO embeddings_cache (key, vector, token_count, timestamp) VALUES (@key, @vector, @tokenCount, @timestamp)
29- ON CONFLICT(key) DO UPDATE SET vector = @vector, token_count = @tokenCount, timestamp = @timestamp
41+ INSERT INTO embeddings_cache (provider, model, dimensions, normalized, text_length, text_hash, vector, token_count, timestamp)
42+ VALUES (@provider, @model, @dimensions, @normalized, @textLength, @textHash, @vector, @tokenCount, @timestamp)
43+ ON CONFLICT(provider, model, dimensions, normalized, text_hash)
44+ DO UPDATE SET vector = @vector, token_count = @tokenCount, timestamp = @timestamp
3045 """ ;
3146
3247 private readonly SqliteConnection _connection ;
@@ -100,20 +115,23 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
100115 return null ;
101116 }
102117
103- var compositeKey = key . ToCompositeKey ( ) ;
104-
105118 var command = this . _connection . CreateCommand ( ) ;
106119 await using ( command . ConfigureAwait ( false ) )
107120 {
108121 command . CommandText = SelectSql ;
109- command . Parameters . AddWithValue ( "@key" , compositeKey ) ;
122+ command . Parameters . AddWithValue ( "@provider" , key . Provider ) ;
123+ command . Parameters . AddWithValue ( "@model" , key . Model ) ;
124+ command . Parameters . AddWithValue ( "@dimensions" , key . VectorDimensions ) ;
125+ command . Parameters . AddWithValue ( "@normalized" , key . IsNormalized ? 1 : 0 ) ;
126+ command . Parameters . AddWithValue ( "@textHash" , key . TextHash ) ;
110127
111128 var reader = await command . ExecuteReaderAsync ( ct ) . ConfigureAwait ( false ) ;
112129 await using ( reader . ConfigureAwait ( false ) )
113130 {
114131 if ( ! await reader . ReadAsync ( ct ) . ConfigureAwait ( false ) )
115132 {
116- this . _logger . LogTrace ( "Cache miss for key: {KeyPrefix}..." , compositeKey [ ..Math . Min ( 50 , compositeKey . Length ) ] ) ;
133+ this . _logger . LogTrace ( "Cache miss for {Provider}/{Model} hash: {HashPrefix}..." ,
134+ key . Provider , key . Model , key . TextHash [ ..Math . Min ( 16 , key . TextHash . Length ) ] ) ;
117135 return null ;
118136 }
119137
@@ -123,8 +141,8 @@ public SqliteEmbeddingCache(string dbPath, CacheModes mode, ILogger<SqliteEmbedd
123141 int ? tokenCount = reader [ "token_count" ] == DBNull . Value ? null : Convert . ToInt32 ( reader [ "token_count" ] , CultureInfo . InvariantCulture ) ;
124142 var timestamp = DateTimeOffset . Parse ( ( string ) reader [ "timestamp" ] , CultureInfo . InvariantCulture ) ;
125143
126- this . _logger . LogTrace ( "Cache hit for key : {KeyPrefix }..., vector dimensions: {Dimensions}" ,
127- compositeKey [ ..Math . Min ( 50 , compositeKey . Length ) ] , vector . Length ) ;
144+ this . _logger . LogTrace ( "Cache hit for {Provider}/{Model} hash : {HashPrefix }..., dimensions: {Dimensions}" ,
145+ key . Provider , key . Model , key . TextHash [ ..Math . Min ( 16 , key . TextHash . Length ) ] , vector . Length ) ;
128146
129147 return new CachedEmbedding
130148 {
@@ -148,23 +166,27 @@ public async Task StoreAsync(EmbeddingCacheKey key, float[] vector, int? tokenCo
148166 return ;
149167 }
150168
151- var compositeKey = key . ToCompositeKey ( ) ;
152169 var vectorBlob = FloatArrayToBytes ( vector ) ;
153170 var timestamp = DateTimeOffset . UtcNow . ToString ( "o" , CultureInfo . InvariantCulture ) ;
154171
155172 var command = this . _connection . CreateCommand ( ) ;
156173 await using ( command . ConfigureAwait ( false ) )
157174 {
158175 command . CommandText = UpsertSql ;
159- command . Parameters . AddWithValue ( "@key" , compositeKey ) ;
176+ command . Parameters . AddWithValue ( "@provider" , key . Provider ) ;
177+ command . Parameters . AddWithValue ( "@model" , key . Model ) ;
178+ command . Parameters . AddWithValue ( "@dimensions" , key . VectorDimensions ) ;
179+ command . Parameters . AddWithValue ( "@normalized" , key . IsNormalized ? 1 : 0 ) ;
180+ command . Parameters . AddWithValue ( "@textLength" , key . TextLength ) ;
181+ command . Parameters . AddWithValue ( "@textHash" , key . TextHash ) ;
160182 command . Parameters . AddWithValue ( "@vector" , vectorBlob ) ;
161183 command . Parameters . AddWithValue ( "@tokenCount" , tokenCount . HasValue ? tokenCount . Value : DBNull . Value ) ;
162184 command . Parameters . AddWithValue ( "@timestamp" , timestamp ) ;
163185
164186 await command . ExecuteNonQueryAsync ( ct ) . ConfigureAwait ( false ) ;
165187
166- this . _logger . LogTrace ( "Stored embedding in cache: {KeyPrefix} ..., vector dimensions: {Dimensions}" ,
167- compositeKey [ ..Math . Min ( 50 , compositeKey . Length ) ] , vector . Length ) ;
188+ this . _logger . LogTrace ( "Stored embedding in cache: {Provider}/{Model} hash: {HashPrefix} ..., dimensions: {Dimensions}" ,
189+ key . Provider , key . Model , key . TextHash [ ..Math . Min ( 16 , key . TextHash . Length ) ] , vector . Length ) ;
168190 }
169191 }
170192
0 commit comments