From 2c5ee46d6b10ddb2e0c76c38f96420dc2a1e4aee Mon Sep 17 00:00:00 2001 From: cryo Date: Wed, 8 Oct 2025 14:49:05 +0000 Subject: [PATCH 1/2] fix: keep memory cache metrics accurate Signed-off-by: cryo --- .../pkg/cache/inmemory_cache.go | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/semantic-router/pkg/cache/inmemory_cache.go b/src/semantic-router/pkg/cache/inmemory_cache.go index 75298e19..c76e92e5 100644 --- a/src/semantic-router/pkg/cache/inmemory_cache.go +++ b/src/semantic-router/pkg/cache/inmemory_cache.go @@ -324,6 +324,10 @@ func (c *InMemoryCache) Close() error { // Clear all entries to free memory c.entries = nil + + // Zero cache entries metrics + metrics.UpdateCacheEntries("memory", 0) + return nil } @@ -355,7 +359,7 @@ func (c *InMemoryCache) GetStats() CacheStats { return stats } -// cleanupExpiredEntries removes entries that have exceeded their TTL +// cleanupExpiredEntries removes entries that have exceeded their TTL and updates the cache entry count metric to keep metrics in sync. // Caller must hold a write lock func (c *InMemoryCache) cleanupExpiredEntries() { if c.ttlSeconds <= 0 { @@ -372,20 +376,25 @@ func (c *InMemoryCache) cleanupExpiredEntries() { } } - if len(validEntries) < len(c.entries) { - expiredCount := len(c.entries) - len(validEntries) - observability.Debugf("InMemoryCache: TTL cleanup removed %d expired entries (remaining: %d)", - expiredCount, len(validEntries)) - observability.LogEvent("cache_cleanup", map[string]interface{}{ - "backend": "memory", - "expired_count": expiredCount, - "remaining_count": len(validEntries), - "ttl_seconds": c.ttlSeconds, - }) - c.entries = validEntries - cleanupTime := time.Now() - c.lastCleanupTime = &cleanupTime + if len(validEntries) == len(c.entries) { + return } + + expiredCount := len(c.entries) - len(validEntries) + observability.Debugf("InMemoryCache: TTL cleanup removed %d expired entries (remaining: %d)", + expiredCount, len(validEntries)) + observability.LogEvent("cache_cleanup", map[string]interface{}{ + "backend": "memory", + "expired_count": expiredCount, + "remaining_count": len(validEntries), + "ttl_seconds": c.ttlSeconds, + }) + c.entries = validEntries + cleanupTime := time.Now() + c.lastCleanupTime = &cleanupTime + + // Update metrics after cleanup + metrics.UpdateCacheEntries("memory", len(c.entries)) } // cleanupExpiredEntriesReadOnly identifies expired entries without modifying the cache From c93d0fdc07b84324a9049b4c6f0dd2a9d0e54fdb Mon Sep 17 00:00:00 2001 From: cryo Date: Wed, 8 Oct 2025 16:40:55 +0000 Subject: [PATCH 2/2] test: add test for metrics fix for UpdateWithResponse Signed-off-by: cryo --- src/semantic-router/go.mod | 1 + src/semantic-router/pkg/cache/cache_test.go | 30 +++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/semantic-router/go.mod b/src/semantic-router/go.mod index 20bf1da0..a12034d2 100644 --- a/src/semantic-router/go.mod +++ b/src/semantic-router/go.mod @@ -59,6 +59,7 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect diff --git a/src/semantic-router/pkg/cache/cache_test.go b/src/semantic-router/pkg/cache/cache_test.go index 8e6104ee..d99d2450 100644 --- a/src/semantic-router/pkg/cache/cache_test.go +++ b/src/semantic-router/pkg/cache/cache_test.go @@ -5,9 +5,13 @@ import ( "path/filepath" "strings" "testing" + "time" candle_binding "github.com/vllm-project/semantic-router/candle-binding" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics" + + "github.com/prometheus/client_golang/prometheus/testutil" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -501,6 +505,32 @@ development: Expect(response).To(Equal([]byte("response"))) }) + It("should update cache entries metric when cleanup occurs during UpdateWithResponse", func() { + // Reset gauge defensively so the assertion stands alone even if other specs fail early + metrics.UpdateCacheEntries("memory", 0) + + Expect(inMemoryCache.Close()).NotTo(HaveOccurred()) + inMemoryCache = cache.NewInMemoryCache(cache.InMemoryCacheOptions{ + Enabled: true, + SimilarityThreshold: 0.8, + MaxEntries: 100, + TTLSeconds: 1, + }) + + err := inMemoryCache.AddPendingRequest("expired-request-id", "test-model", "stale query", []byte("request")) + Expect(err).NotTo(HaveOccurred()) + Expect(testutil.ToFloat64(metrics.CacheEntriesTotal.WithLabelValues("memory"))).To(Equal(float64(1))) + + // Wait for TTL to expire before triggering the update path + time.Sleep(2 * time.Second) + + err = inMemoryCache.UpdateWithResponse("expired-request-id", []byte("response")) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("no pending request")) + + Expect(testutil.ToFloat64(metrics.CacheEntriesTotal.WithLabelValues("memory"))).To(BeZero()) + }) + It("should respect similarity threshold", func() { // Add entry with a very high similarity threshold highThresholdOptions := cache.InMemoryCacheOptions{