@@ -263,6 +263,11 @@ func (c *MilvusCache) createCollection() error {
263
263
PrimaryKey : true ,
264
264
TypeParams : map [string ]string {"max_length" : "64" },
265
265
},
266
+ {
267
+ Name : "request_id" ,
268
+ DataType : entity .FieldTypeVarChar ,
269
+ TypeParams : map [string ]string {"max_length" : "64" },
270
+ },
266
271
{
267
272
Name : "model" ,
268
273
DataType : entity .FieldTypeVarChar ,
@@ -328,50 +333,45 @@ func (c *MilvusCache) IsEnabled() bool {
328
333
}
329
334
330
335
// AddPendingRequest stores a request that is awaiting its response
331
- func (c * MilvusCache ) AddPendingRequest (model string , query string , requestBody []byte ) ( string , error ) {
336
+ func (c * MilvusCache ) AddPendingRequest (requestID string , model string , query string , requestBody []byte ) error {
332
337
start := time .Now ()
333
338
334
339
if ! c .enabled {
335
- return query , nil
340
+ return nil
336
341
}
337
342
338
343
// Store incomplete entry for later completion with response
339
- result , err := c .addEntry (model , query , requestBody , nil )
344
+ err := c .addEntry ("" , requestID , model , query , requestBody , nil )
340
345
341
346
if err != nil {
342
347
metrics .RecordCacheOperation ("milvus" , "add_pending" , "error" , time .Since (start ).Seconds ())
343
348
} else {
344
349
metrics .RecordCacheOperation ("milvus" , "add_pending" , "success" , time .Since (start ).Seconds ())
345
350
}
346
351
347
- return result , err
352
+ return err
348
353
}
349
354
350
355
// UpdateWithResponse completes a pending request by adding the response
351
- func (c * MilvusCache ) UpdateWithResponse (query string , responseBody []byte ) error {
356
+ func (c * MilvusCache ) UpdateWithResponse (requestID string , responseBody []byte ) error {
352
357
start := time .Now ()
353
358
354
359
if ! c .enabled {
355
360
return nil
356
361
}
357
362
358
- queryPreview := query
359
- if len (query ) > 50 {
360
- queryPreview = query [:50 ] + "..."
361
- }
362
-
363
- observability .Debugf ("MilvusCache.UpdateWithResponse: updating pending entry (query: %s, response_size: %d)" ,
364
- queryPreview , len (responseBody ))
363
+ observability .Debugf ("MilvusCache.UpdateWithResponse: updating pending entry (request_id: %s, response_size: %d)" ,
364
+ requestID , len (responseBody ))
365
365
366
366
// Find the pending entry and complete it with the response
367
367
// Query for the incomplete entry to retrieve its metadata
368
368
ctx := context .Background ()
369
- queryExpr := fmt .Sprintf ("query == \" %s\" && response_body == \" \" " , query )
369
+ queryExpr := fmt .Sprintf ("request_id == \" %s\" && response_body == \" \" " , requestID )
370
370
371
371
observability .Debugf ("MilvusCache.UpdateWithResponse: searching for pending entry with expr: %s" , queryExpr )
372
372
373
373
results , err := c .client .Query (ctx , c .collectionName , []string {}, queryExpr ,
374
- []string {"model" , "request_body" })
374
+ []string {"id" , " model" , "query " , "request_body" })
375
375
376
376
if err != nil {
377
377
observability .Debugf ("MilvusCache.UpdateWithResponse: query failed: %v" , err )
@@ -380,29 +380,27 @@ func (c *MilvusCache) UpdateWithResponse(query string, responseBody []byte) erro
380
380
}
381
381
382
382
if len (results ) == 0 {
383
- observability .Debugf ("MilvusCache.UpdateWithResponse: no pending entry found, adding as new complete entry" )
384
- // Create new complete entry when no pending entry exists
385
- _ , err := c .addEntry ("unknown" , query , []byte ("" ), responseBody )
386
- if err != nil {
387
- metrics .RecordCacheOperation ("milvus" , "update_response" , "error" , time .Since (start ).Seconds ())
388
- } else {
389
- metrics .RecordCacheOperation ("milvus" , "update_response" , "success" , time .Since (start ).Seconds ())
390
- }
391
- return err
383
+ observability .Debugf ("MilvusCache.UpdateWithResponse: no pending entry found" )
384
+ metrics .RecordCacheOperation ("milvus" , "update_response" , "error" , time .Since (start ).Seconds ())
385
+ return fmt .Errorf ("no pending entry found" )
392
386
}
393
387
394
388
// Get the model and request body from the pending entry
395
- modelColumn := results [0 ].(* entity.ColumnVarChar )
396
- requestColumn := results [1 ].(* entity.ColumnVarChar )
389
+ idColumn := results [0 ].(* entity.ColumnVarChar )
390
+ modelColumn := results [1 ].(* entity.ColumnVarChar )
391
+ queryColumn := results [2 ].(* entity.ColumnVarChar )
392
+ requestColumn := results [3 ].(* entity.ColumnVarChar )
397
393
398
- if modelColumn .Len () > 0 {
394
+ if idColumn .Len () > 0 {
395
+ id := idColumn .Data ()[0 ]
399
396
model := modelColumn .Data ()[0 ]
397
+ query := queryColumn .Data ()[0 ]
400
398
requestBody := requestColumn .Data ()[0 ]
401
399
402
- observability .Debugf ("MilvusCache.UpdateWithResponse: found pending entry, adding complete entry (model: %s)" , model )
400
+ observability .Debugf ("MilvusCache.UpdateWithResponse: found pending entry, adding complete entry (id: %s, model: %s)" , id , model )
403
401
404
402
// Create the complete entry with response data
405
- _ , err := c .addEntry (model , query , []byte (requestBody ), responseBody )
403
+ err := c .addEntry (id , requestID , model , query , []byte (requestBody ), responseBody )
406
404
if err != nil {
407
405
metrics .RecordCacheOperation ("milvus" , "update_response" , "error" , time .Since (start ).Seconds ())
408
406
return fmt .Errorf ("failed to add complete entry: %w" , err )
@@ -416,14 +414,14 @@ func (c *MilvusCache) UpdateWithResponse(query string, responseBody []byte) erro
416
414
}
417
415
418
416
// AddEntry stores a complete request-response pair in the cache
419
- func (c * MilvusCache ) AddEntry (model string , query string , requestBody , responseBody []byte ) error {
417
+ func (c * MilvusCache ) AddEntry (requestID string , model string , query string , requestBody , responseBody []byte ) error {
420
418
start := time .Now ()
421
419
422
420
if ! c .enabled {
423
421
return nil
424
422
}
425
423
426
- _ , err := c .addEntry (model , query , requestBody , responseBody )
424
+ err := c .addEntry ("" , requestID , model , query , requestBody , responseBody )
427
425
428
426
if err != nil {
429
427
metrics .RecordCacheOperation ("milvus" , "add_entry" , "error" , time .Since (start ).Seconds ())
@@ -435,20 +433,23 @@ func (c *MilvusCache) AddEntry(model string, query string, requestBody, response
435
433
}
436
434
437
435
// addEntry handles the internal logic for storing entries in Milvus
438
- func (c * MilvusCache ) addEntry (model string , query string , requestBody , responseBody []byte ) ( string , error ) {
436
+ func (c * MilvusCache ) addEntry (id string , requestID string , model string , query string , requestBody , responseBody []byte ) error {
439
437
// Generate semantic embedding for the query
440
438
embedding , err := candle_binding .GetEmbedding (query , 0 ) // Auto-detect dimension
441
439
if err != nil {
442
- return "" , fmt .Errorf ("failed to generate embedding: %w" , err )
440
+ return fmt .Errorf ("failed to generate embedding: %w" , err )
443
441
}
444
442
445
- // Generate unique ID
446
- id := fmt .Sprintf ("%x" , md5 .Sum ([]byte (fmt .Sprintf ("%s_%s_%d" , model , query , time .Now ().UnixNano ()))))
443
+ // Generate unique ID if not provided
444
+ if id == "" {
445
+ id = fmt .Sprintf ("%x" , md5 .Sum (fmt .Appendf (nil , "%s_%s_%d" , model , query , time .Now ().UnixNano ())))
446
+ }
447
447
448
448
ctx := context .Background ()
449
449
450
- // Prepare data for insertion
450
+ // Prepare data for upsert
451
451
ids := []string {id }
452
+ requestIDs := []string {requestID }
452
453
models := []string {model }
453
454
queries := []string {query }
454
455
requestBodies := []string {string (requestBody )}
@@ -458,20 +459,21 @@ func (c *MilvusCache) addEntry(model string, query string, requestBody, response
458
459
459
460
// Create columns
460
461
idColumn := entity .NewColumnVarChar ("id" , ids )
462
+ requestIDColumn := entity .NewColumnVarChar ("request_id" , requestIDs )
461
463
modelColumn := entity .NewColumnVarChar ("model" , models )
462
464
queryColumn := entity .NewColumnVarChar ("query" , queries )
463
465
requestColumn := entity .NewColumnVarChar ("request_body" , requestBodies )
464
466
responseColumn := entity .NewColumnVarChar ("response_body" , responseBodies )
465
467
embeddingColumn := entity .NewColumnFloatVector (c .config .Collection .VectorField .Name , len (embedding ), embeddings )
466
468
timestampColumn := entity .NewColumnInt64 ("timestamp" , timestamps )
467
469
468
- // Insert the entry into the collection
469
- observability .Debugf ("MilvusCache.addEntry: inserting entry into collection '%s' (embedding_dim: %d, request_size: %d, response_size: %d)" ,
470
+ // Upsert the entry into the collection
471
+ observability .Debugf ("MilvusCache.addEntry: upserting entry into collection '%s' (embedding_dim: %d, request_size: %d, response_size: %d)" ,
470
472
c .collectionName , len (embedding ), len (requestBody ), len (responseBody ))
471
- _ , err = c .client .Insert (ctx , c .collectionName , "" , idColumn , modelColumn , queryColumn , requestColumn , responseColumn , embeddingColumn , timestampColumn )
473
+ _ , err = c .client .Upsert (ctx , c .collectionName , "" , idColumn , requestIDColumn , modelColumn , queryColumn , requestColumn , responseColumn , embeddingColumn , timestampColumn )
472
474
if err != nil {
473
- observability .Debugf ("MilvusCache.addEntry: insert failed: %v" , err )
474
- return "" , fmt .Errorf ("failed to insert cache entry: %w" , err )
475
+ observability .Debugf ("MilvusCache.addEntry: upsert failed: %v" , err )
476
+ return fmt .Errorf ("failed to upsert cache entry: %w" , err )
475
477
}
476
478
477
479
// Ensure data is persisted to storage
@@ -483,11 +485,12 @@ func (c *MilvusCache) addEntry(model string, query string, requestBody, response
483
485
observability .LogEvent ("cache_entry_added" , map [string ]interface {}{
484
486
"backend" : "milvus" ,
485
487
"collection" : c .collectionName ,
488
+ "request_id" : requestID ,
486
489
"query" : query ,
487
490
"model" : model ,
488
491
"embedding_dimension" : len (embedding ),
489
492
})
490
- return query , nil
493
+ return nil
491
494
}
492
495
493
496
// FindSimilar searches for semantically similar cached requests
0 commit comments