From 53bb8e9a68874bb27e9fe8c08823293850192099 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 9 Mar 2026 16:49:02 +0530
Subject: [PATCH 01/46] feat: expensive cache

---
 router/core/expensive_query_cache.go          |  93 +++++++++++
 router/core/graph_server.go                   |  25 ++-
 router/core/operation_planner.go              |  97 ++++++++----
 router/core/operation_planner_test.go         | 145 ++++++++++++++++++
 router/core/reload_persistent_state.go        |  54 ++++++-
 router/pkg/config/config.go                   |   2 +
 router/pkg/config/config.schema.json          |  14 ++
 .../pkg/config/testdata/config_defaults.json  |   2 +
 router/pkg/config/testdata/config_full.json   |   2 +
 9 files changed, 404 insertions(+), 30 deletions(-)
 create mode 100644 router/core/expensive_query_cache.go
 create mode 100644 router/core/operation_planner_test.go

diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
new file mode 100644
index 0000000000..3b115a9b5d
--- /dev/null
+++ b/router/core/expensive_query_cache.go
@@ -0,0 +1,93 @@
+package core
+
+import (
+	"sync"
+	"time"
+)
+
+// expensivePlanEntry holds a cached plan and the duration it took to plan.
+type expensivePlanEntry struct {
+	plan     *planWithMetaData
+	duration time.Duration
+}
+
+// expensivePlanCache is a bounded, mutex-protected map that holds expensive plans
+// that should not be subject to TinyLFU eviction in the main cache.
+type expensivePlanCache struct {
+	mu      sync.RWMutex
+	entries map[uint64]*expensivePlanEntry
+	maxSize int
+}
+
+func newExpensivePlanCache(maxSize int) *expensivePlanCache {
+	return &expensivePlanCache{
+		entries: make(map[uint64]*expensivePlanEntry, maxSize),
+		maxSize: maxSize,
+	}
+}
+
+func (c *expensivePlanCache) Get(key uint64) (*planWithMetaData, bool) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	entry, ok := c.entries[key]
+	if !ok {
+		return nil, false
+	}
+	return entry.plan, true
+}
+
+// Set stores a plan in the expensive cache. When at capacity, it only adds the
+// new entry if its duration exceeds the current minimum; otherwise, it is skipped.
+func (c *expensivePlanCache) Set(key uint64, plan *planWithMetaData, duration time.Duration) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	// If key already exists, update it
+	if _, ok := c.entries[key]; ok {
+		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+		return
+	}
+
+	// If not at capacity, just add
+	if len(c.entries) < c.maxSize {
+		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+		return
+	}
+
+	// At capacity: find the minimum and only evict if new entry is more expensive
+	var minKey uint64
+	var minDur time.Duration
+	first := true
+	for k, e := range c.entries {
+		if first || e.duration < minDur {
+			minKey = k
+			minDur = e.duration
+			first = false
+		}
+	}
+
+	if duration > minDur {
+		delete(c.entries, minKey)
+		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+	}
+}
+
+func (c *expensivePlanCache) IterValues(cb func(v *planWithMetaData) bool) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	for _, e := range c.entries {
+		if cb(e.plan) {
+			return
+		}
+	}
+}
+
+func (c *expensivePlanCache) Close() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.entries = nil
+}
+
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 325ec74c25..843d32e382 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -544,6 +544,7 @@ func (s *graphServer) setupEngineStatistics(baseAttributes []attribute.KeyValue)
 type graphMux struct {
 	mux *chi.Mux
 
+	operationPlanner            *OperationPlanner
 	planCache                   *ristretto.Cache[uint64, *planWithMetaData]
 	persistedOperationCache     *ristretto.Cache[uint64, NormalizationCacheEntry]
 	normalizationCache          *ristretto.Cache[uint64, NormalizationCacheEntry]
@@ -582,6 +583,15 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 			IgnoreInternalCost: true,
 			BufferItems:        64,
 		}
+		if srv.cacheWarmup != nil && srv.cacheWarmup.Enabled && srv.cacheWarmup.InMemoryFallback {
+			planCacheConfig.OnEvict = func(item *ristretto.Item[*planWithMetaData]) {
+				if s.operationPlanner != nil && s.operationPlanner.expensiveCache != nil {
+					if s.operationPlanner.threshold > 0 && item.Value.planningDuration >= s.operationPlanner.threshold && item.Value.content != "" {
+						s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
+					}
+				}
+			}
+		}
 		s.planCache, err = ristretto.NewCache[uint64, *planWithMetaData](planCacheConfig)
 		if err != nil {
 			return computeSha256, fmt.Errorf("failed to create planner cache: %w", err)
@@ -782,6 +792,9 @@ func (s *graphMux) configureCacheMetrics(srv *graphServer, baseOtelAttributes []
 }
 
 func (s *graphMux) Shutdown(ctx context.Context) error {
+	if s.operationPlanner != nil {
+		s.operationPlanner.Close()
+	}
 	s.planCache.Close()
 	s.persistedOperationCache.Close()
 	s.normalizationCache.Close()
@@ -1332,7 +1345,15 @@ func (s *graphServer) buildGraphMux(
 		ComplexityLimits:                                       s.securityConfiguration.ComplexityLimits,
 	})
 
-	operationPlanner := NewOperationPlanner(executor, gm.planCache, opts.ReloadPersistentState.inMemoryPlanCacheFallback.IsEnabled())
+	operationPlanner := NewOperationPlanner(
+		s.logger,
+		executor,
+		gm.planCache,
+		opts.ReloadPersistentState.inMemoryPlanCacheFallback.IsEnabled(),
+		int(s.engineExecutionConfiguration.ExpensiveQueryCacheSize),
+		s.engineExecutionConfiguration.ExpensiveQueryThreshold,
+	)
+	gm.operationPlanner = operationPlanner
 
 	// We support the MCP only on the base graph. Feature flags are not supported yet.
 	if opts.IsBaseGraph() && s.mcpServer != nil {
@@ -1392,6 +1413,7 @@ func (s *graphServer) buildGraphMux(
 			// and then reset the plan cache to the new plan cache for this start afterwards.
 			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
 			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planCache)
+			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 		case s.cacheWarmup.Source.CdnSource.Enabled:
 			if s.graphApiToken == "" {
 				return nil, fmt.Errorf("graph token is required for cache warmup in order to communicate with the CDN")
@@ -1402,6 +1424,7 @@ func (s *graphServer) buildGraphMux(
 			if s.cacheWarmup.InMemoryFallback {
 				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
 				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planCache)
+				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 			}
 			cdnSource, err := NewCDNSource(s.cdnConfig.URL, s.graphApiToken, s.logger)
 			if err != nil {
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 12ff2f6929..de87e10022 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -2,7 +2,9 @@ package core
 
 import (
 	"errors"
+	"go.uber.org/zap"
 	"strconv"
+	"time"
 
 	"golang.org/x/sync/singleflight"
 
@@ -22,14 +24,20 @@ type planWithMetaData struct {
 	typeFieldUsageInfo                []*graphqlschemausage.TypeFieldUsageInfo
 	argumentUsageInfo                 []*graphqlmetricsv1.ArgumentUsageInfo
 	content                           string
+	operationName                     string
+	planningDuration                  time.Duration
 }
 
 type OperationPlanner struct {
-	sf               singleflight.Group
-	planCache        ExecutionPlanCache[uint64, *planWithMetaData]
-	executor         *Executor
-	trackUsageInfo   bool
-	operationContent bool
+	sf             singleflight.Group
+	planCache      ExecutionPlanCache[uint64, *planWithMetaData]
+	expensiveCache *expensivePlanCache
+	executor       *Executor
+	trackUsageInfo bool
+	useFallback    bool
+	logger         *zap.Logger
+
+	threshold time.Duration
 }
 
 type operationPlannerOpts struct {
@@ -47,17 +55,35 @@ type ExecutionPlanCache[K any, V any] interface {
 	Close()
 }
 
-func NewOperationPlanner(executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], storeContent bool) *OperationPlanner {
-	return &OperationPlanner{
-		planCache:        planCache,
-		executor:         executor,
-		trackUsageInfo:   executor.TrackUsageInfo,
-		operationContent: storeContent,
+func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], inMemoryPlanCacheFallback bool, expensiveCacheSize int, threshold time.Duration) *OperationPlanner {
+	p := &OperationPlanner{
+		logger:         logger,
+		planCache:      planCache,
+		executor:       executor,
+		trackUsageInfo: executor.TrackUsageInfo,
+		useFallback:    inMemoryPlanCacheFallback,
+	}
+
+	if inMemoryPlanCacheFallback {
+		p.expensiveCache = newExpensivePlanCache(expensiveCacheSize)
+		p.threshold = threshold
 	}
+
+	return p
 }
 
-func (p *OperationPlanner) preparePlan(ctx *operationContext, opts operationPlannerOpts) (*planWithMetaData, error) {
-	doc, report := astparser.ParseGraphqlDocumentString(ctx.content)
+// Close releases expensive cache resources.
+func (p *OperationPlanner) Close() {
+	if !p.useFallback {
+		return
+	}
+	p.expensiveCache.Close()
+}
+
+// planOperation performs the core planning work: parse, plan, and postprocess.
+// This is the single source of truth for query planning logic.
+func (p *OperationPlanner) planOperation(content string, name string, includeQueryPlan bool) (*planWithMetaData, error) {
+	doc, report := astparser.ParseGraphqlDocumentString(content)
 	if report.HasErrors() {
 		return nil, &reportError{report: &report}
 	}
@@ -67,16 +93,11 @@ func (p *OperationPlanner) preparePlan(ctx *operationContext, opts operationPlan
 		return nil, err
 	}
 
-	var (
-		preparedPlan plan.Plan
-	)
-
-	// create and postprocess the plan
-	// planning uses the router schema
-	if ctx.executionOptions.IncludeQueryPlanInResponse {
-		preparedPlan = planner.Plan(&doc, p.executor.RouterSchema, ctx.name, &report, plan.IncludeQueryPlanInResponse())
+	var preparedPlan plan.Plan
+	if includeQueryPlan {
+		preparedPlan = planner.Plan(&doc, p.executor.RouterSchema, name, &report, plan.IncludeQueryPlanInResponse())
 	} else {
-		preparedPlan = planner.Plan(&doc, p.executor.RouterSchema, ctx.name, &report)
+		preparedPlan = planner.Plan(&doc, p.executor.RouterSchema, name, &report)
 	}
 	if report.HasErrors() {
 		return nil, &reportError{report: &report}
@@ -84,19 +105,28 @@ func (p *OperationPlanner) preparePlan(ctx *operationContext, opts operationPlan
 	post := postprocess.NewProcessor(postprocess.CollectDataSourceInfo())
 	post.Process(preparedPlan)
 
-	out := &planWithMetaData{
+	return &planWithMetaData{
 		preparedPlan:      preparedPlan,
 		operationDocument: &doc,
 		schemaDocument:    p.executor.RouterSchema,
+	}, nil
+}
+
+func (p *OperationPlanner) preparePlan(ctx *operationContext, opts operationPlannerOpts) (*planWithMetaData, error) {
+	out, err := p.planOperation(ctx.content, ctx.name, ctx.executionOptions.IncludeQueryPlanInResponse)
+	if err != nil {
+		return nil, err
 	}
 
+	out.operationName = ctx.name
+
 	if opts.operationContent {
 		out.content = ctx.Content()
 	}
 
 	if p.trackUsageInfo {
-		out.typeFieldUsageInfo = graphqlschemausage.GetTypeFieldUsageInfo(preparedPlan)
-		out.argumentUsageInfo, err = graphqlschemausage.GetArgumentUsageInfo(&doc, p.executor.RouterSchema, ctx.variables, preparedPlan, ctx.remapVariables)
+		out.typeFieldUsageInfo = graphqlschemausage.GetTypeFieldUsageInfo(out.preparedPlan)
+		out.argumentUsageInfo, err = graphqlschemausage.GetArgumentUsageInfo(out.operationDocument, p.executor.RouterSchema, ctx.variables, out.preparedPlan, ctx.remapVariables)
 		if err != nil {
 			return nil, err
 		}
@@ -139,19 +169,32 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 	// try to get a prepared plan for this operation ID from the cache
 	cachedPlan, ok := p.planCache.Get(operationID)
 	if ok && cachedPlan != nil {
-		// re-use a prepared plan
+		// re-use a prepared plan from the main cache
 		opContext.preparedPlan = cachedPlan
 		opContext.planCacheHit = true
+	} else if p.useFallback {
+		if cachedPlan, ok = p.expensiveCache.Get(operationID); ok {
+			// found in the expensive query cache — re-use and re-insert into main cache
+			opContext.preparedPlan = cachedPlan
+			opContext.planCacheHit = true
+			p.planCache.Set(operationID, cachedPlan, 1)
+		}
 	} else {
 		// prepare a new plan using single flight
 		// this ensures that we only prepare the plan once for this operation ID
 		operationIDStr := strconv.FormatUint(operationID, 10)
 		sharedPreparedPlan, err, _ := p.sf.Do(operationIDStr, func() (interface{}, error) {
-			prepared, err := p.preparePlan(opContext, operationPlannerOpts{operationContent: p.operationContent})
+			start := time.Now()
+			prepared, err := p.preparePlan(opContext, operationPlannerOpts{operationContent: p.useFallback})
 			if err != nil {
 				return nil, err
 			}
+			prepared.planningDuration = time.Since(start)
+
 			p.planCache.Set(operationID, prepared, 1)
+			if p.useFallback && p.threshold > 0 && prepared.planningDuration >= p.threshold && prepared.content != "" {
+				p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
+			}
 			return prepared, nil
 		})
 		if err != nil {
diff --git a/router/core/operation_planner_test.go b/router/core/operation_planner_test.go
new file mode 100644
index 0000000000..72c25508d9
--- /dev/null
+++ b/router/core/operation_planner_test.go
@@ -0,0 +1,145 @@
+package core
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestExpensivePlanCache_GetSet(t *testing.T) {
+	c := newExpensivePlanCache(10)
+
+	plan1 := &planWithMetaData{content: "query { a }"}
+	plan2 := &planWithMetaData{content: "query { b }"}
+
+	// Miss
+	_, ok := c.Get(1)
+	require.False(t, ok)
+
+	// Set and get
+	c.Set(1, plan1, 10*time.Millisecond)
+	got, ok := c.Get(1)
+	require.True(t, ok)
+	require.Equal(t, plan1, got)
+
+	// Different key
+	c.Set(2, plan2, 20*time.Millisecond)
+	got, ok = c.Get(2)
+	require.True(t, ok)
+	require.Equal(t, plan2, got)
+
+	// Original still there
+	got, ok = c.Get(1)
+	require.True(t, ok)
+	require.Equal(t, plan1, got)
+}
+
+func TestExpensivePlanCache_BoundedSize(t *testing.T) {
+	c := newExpensivePlanCache(3)
+
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+
+	// Cache is full (3/3). Adding a 4th with higher duration should evict the shortest (key=1, 10ms)
+	c.Set(4, &planWithMetaData{content: "q4"}, 25*time.Millisecond)
+
+	// Key 1 should be evicted (it had the shortest duration: 10ms)
+	_, ok := c.Get(1)
+	require.False(t, ok, "key 1 should have been evicted")
+
+	// Keys 2, 3, 4 should remain
+	_, ok = c.Get(2)
+	require.True(t, ok)
+	_, ok = c.Get(3)
+	require.True(t, ok)
+	_, ok = c.Get(4)
+	require.True(t, ok)
+}
+
+func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
+	c := newExpensivePlanCache(3)
+
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Second)
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Second)
+	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Second)
+
+	// Try to add a cheaper entry (5s < 10s minimum) — should be rejected
+	c.Set(4, &planWithMetaData{content: "q4"}, 5*time.Second)
+
+	_, ok := c.Get(4)
+	require.False(t, ok, "cheaper entry should not be added when cache is full")
+
+	// All original entries should remain
+	_, ok = c.Get(1)
+	require.True(t, ok)
+	_, ok = c.Get(2)
+	require.True(t, ok)
+	_, ok = c.Get(3)
+	require.True(t, ok)
+}
+
+func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
+	c := newExpensivePlanCache(2)
+
+	plan1 := &planWithMetaData{content: "q1"}
+	plan1Updated := &planWithMetaData{content: "q1-updated"}
+
+	c.Set(1, plan1, 10*time.Millisecond)
+	c.Set(1, plan1Updated, 50*time.Millisecond)
+
+	got, ok := c.Get(1)
+	require.True(t, ok)
+	require.Equal(t, "q1-updated", got.content)
+
+	// Updating an existing key should not increase the count
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	_, ok = c.Get(1)
+	require.True(t, ok, "key 1 should still exist after adding key 2 (capacity is 2)")
+	_, ok = c.Get(2)
+	require.True(t, ok)
+}
+
+func TestExpensivePlanCache_IterValues(t *testing.T) {
+	c := newExpensivePlanCache(10)
+
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+
+	var contents []string
+	c.IterValues(func(v *planWithMetaData) bool {
+		contents = append(contents, v.content)
+		return false
+	})
+	require.Len(t, contents, 3)
+	require.ElementsMatch(t, []string{"q1", "q2", "q3"}, contents)
+}
+
+func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
+	c := newExpensivePlanCache(10)
+
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+
+	count := 0
+	c.IterValues(func(v *planWithMetaData) bool {
+		count++
+		return true // stop after first
+	})
+	require.Equal(t, 1, count)
+}
+
+func TestExpensivePlanCache_Close(t *testing.T) {
+	c := newExpensivePlanCache(10)
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+
+	c.Close()
+
+	// After close, entries map should be nil
+	_, ok := c.Get(1)
+	require.False(t, ok)
+}
+
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index d842b239a5..54a41acc71 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -49,6 +49,7 @@ func (s *ReloadPersistentState) OnRouterConfigReload() {
 type InMemoryPlanCacheFallback struct {
 	mu                    sync.RWMutex
 	queriesForFeatureFlag map[string]any
+	expensiveCaches       map[string]*expensivePlanCache
 	logger                *zap.Logger
 }
 
@@ -67,11 +68,15 @@ func (c *InMemoryPlanCacheFallback) updateStateFromConfig(config *Config) {
 		if c.queriesForFeatureFlag == nil {
 			c.queriesForFeatureFlag = make(map[string]any)
 		}
+		if c.expensiveCaches == nil {
+			c.expensiveCaches = make(map[string]*expensivePlanCache)
+		}
 		return
 	}
 
 	// Reset the map to free up memory
 	c.queriesForFeatureFlag = nil
+	c.expensiveCaches = nil
 }
 
 // IsEnabled returns whether the in-memory fallback cache is enabled
@@ -117,7 +122,20 @@ func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cac
 	c.queriesForFeatureFlag[featureFlagKey] = cache
 }
 
-// extractQueriesAndOverridePlanCache extracts the queries from the plan cache and overrides the internal map
+// setExpensiveCacheForFF stores the expensive plan cache reference for a feature flag key
+// so that expensive query entries survive config reloads.
+func (c *InMemoryPlanCacheFallback) setExpensiveCacheForFF(featureFlagKey string, cache *expensivePlanCache) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if c.expensiveCaches == nil || cache == nil {
+		return
+	}
+	c.expensiveCaches[featureFlagKey] = cache
+}
+
+// extractQueriesAndOverridePlanCache extracts the queries from the plan cache and overrides the internal map.
+// It also merges entries from the expensive plan cache so they survive config reloads.
 func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 	c.mu.Lock()
 	defer c.mu.Unlock()
@@ -129,10 +147,16 @@ func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 	fallbackMap := make(map[string]any)
 	for k, v := range c.queriesForFeatureFlag {
 		if cache, ok := v.(planCache); ok {
-			fallbackMap[k] = convertToNodeOperation(cache)
+			ops := convertToNodeOperation(cache)
+			// Merge expensive cache entries that may not be in the main cache
+			if expCache, hasExp := c.expensiveCaches[k]; hasExp {
+				ops = mergeExpensiveCacheOperations(ops, expCache)
+			}
+			fallbackMap[k] = ops
 		}
 	}
 	c.queriesForFeatureFlag = fallbackMap
+	c.expensiveCaches = make(map[string]*expensivePlanCache)
 }
 
 // cleanupUnusedFeatureFlags removes any feature flags that were removed from the execution config
@@ -152,8 +176,10 @@ func (c *InMemoryPlanCacheFallback) cleanupUnusedFeatureFlags(routerCfg *nodev1.
 		}
 		if routerCfg.FeatureFlagConfigs == nil {
 			delete(c.queriesForFeatureFlag, ffName)
+			delete(c.expensiveCaches, ffName)
 		} else if _, exists := routerCfg.FeatureFlagConfigs.ConfigByFeatureFlagName[ffName]; !exists {
 			delete(c.queriesForFeatureFlag, ffName)
+			delete(c.expensiveCaches, ffName)
 		}
 	}
 }
@@ -169,3 +195,27 @@ func convertToNodeOperation(data planCache) []*nodev1.Operation {
 	})
 	return items
 }
+
+// mergeExpensiveCacheOperations appends operations from the expensive cache that
+// are not already present in the existing operations list.
+func mergeExpensiveCacheOperations(ops []*nodev1.Operation, expCache *expensivePlanCache) []*nodev1.Operation {
+	seen := make(map[string]struct{}, len(ops))
+	for _, op := range ops {
+		if op.Request != nil {
+			seen[op.Request.Query] = struct{}{}
+		}
+	}
+	expCache.IterValues(func(v *planWithMetaData) bool {
+		if v.content != "" {
+			if _, exists := seen[v.content]; !exists {
+				ops = append(ops, &nodev1.Operation{
+					Request: &nodev1.OperationRequest{Query: v.content},
+				})
+				seen[v.content] = struct{}{}
+			}
+		}
+		return false
+	})
+	return ops
+}
+
diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go
index e0998c6e4c..1f822fab1a 100644
--- a/router/pkg/config/config.go
+++ b/router/pkg/config/config.go
@@ -425,6 +425,8 @@ type EngineExecutionConfiguration struct {
 	WebSocketClientPingTimeout                             time.Duration `envDefault:"30s" env:"ENGINE_WEBSOCKET_CLIENT_PING_TIMEOUT" yaml:"websocket_client_ping_timeout,omitempty"`
 	WebSocketClientFrameTimeout                            time.Duration `envDefault:"100ms" env:"ENGINE_WEBSOCKET_CLIENT_FRAME_TIMEOUT" yaml:"websocket_client_frame_timeout,omitempty"`
 	ExecutionPlanCacheSize                                 int64         `envDefault:"1024" env:"ENGINE_EXECUTION_PLAN_CACHE_SIZE" yaml:"execution_plan_cache_size,omitempty"`
+	ExpensiveQueryCacheSize                                int64         `envDefault:"100" env:"ENGINE_EXPENSIVE_QUERY_CACHE_SIZE" yaml:"expensive_query_cache_size,omitempty"`
+	ExpensiveQueryThreshold                                time.Duration `envDefault:"5s" env:"ENGINE_EXPENSIVE_QUERY_THRESHOLD" yaml:"expensive_query_threshold,omitempty"`
 	MinifySubgraphOperations                               bool          `envDefault:"true" env:"ENGINE_MINIFY_SUBGRAPH_OPERATIONS" yaml:"minify_subgraph_operations"`
 	EnablePersistedOperationsCache                         bool          `envDefault:"true" env:"ENGINE_ENABLE_PERSISTED_OPERATIONS_CACHE" yaml:"enable_persisted_operations_cache"`
 	EnableNormalizationCache                               bool          `envDefault:"true" env:"ENGINE_ENABLE_NORMALIZATION_CACHE" yaml:"enable_normalization_cache"`
diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index 167cb875bd..6f2b6fc08f 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3286,6 +3286,20 @@
           "default": 1024,
           "description": "The size of the execution plan cache."
         },
+        "expensive_query_cache_size": {
+          "type": "integer",
+          "default": 100,
+          "description": "The maximum number of entries in the expensive query plan cache. Expensive queries are protected from TinyLFU eviction in the main plan cache."
+        },
+        "expensive_query_threshold": {
+          "type": "string",
+          "format": "go-duration",
+          "description": "The minimum planning duration for a query to be considered expensive and protected from TinyLFU cache eviction. Queries exceeding this threshold are re-validated in the background before promotion to the expensive cache.",
+          "default": "5s",
+          "duration": {
+            "minimum": "100ms"
+          }
+        },
         "operation_hash_cache_size": {
           "type": "integer",
           "default": 2048,
diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json
index 710630ae04..d8583a3660 100644
--- a/router/pkg/config/testdata/config_defaults.json
+++ b/router/pkg/config/testdata/config_defaults.json
@@ -419,6 +419,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
+    "ExpensiveQueryCacheSize": 100,
+    "ExpensiveQueryThreshold": 5000000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,
diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json
index fa60cb18e8..1f7ab012a0 100644
--- a/router/pkg/config/testdata/config_full.json
+++ b/router/pkg/config/testdata/config_full.json
@@ -818,6 +818,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
+    "ExpensiveQueryCacheSize": 100,
+    "ExpensiveQueryThreshold": 5000000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,

From fa033d7699895213921c462e42f7fcb5a8b0c21f Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 9 Mar 2026 18:52:52 +0530
Subject: [PATCH 02/46] feat: expensive query cache

---
 router-tests/expensive_query_cache_test.go    | 523 ++++++++++++++++++
 router/core/context.go                        |   4 +-
 router/core/expensive_query_cache.go          |   4 +
 ..._test.go => expensive_query_cache_test.go} | 130 +++++
 router/core/graph_server.go                   |  22 +-
 router/core/graphql_handler.go                |   8 +
 router/core/graphql_prehandler.go             |   6 +
 router/core/operation_planner.go              |   8 +-
 router/pkg/otel/attributes.go                 |   1 +
 9 files changed, 694 insertions(+), 12 deletions(-)
 create mode 100644 router-tests/expensive_query_cache_test.go
 rename router/core/{operation_planner_test.go => expensive_query_cache_test.go} (54%)

diff --git a/router-tests/expensive_query_cache_test.go b/router-tests/expensive_query_cache_test.go
new file mode 100644
index 0000000000..d5d065b5e5
--- /dev/null
+++ b/router-tests/expensive_query_cache_test.go
@@ -0,0 +1,523 @@
+package integration
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/wundergraph/cosmo/router-tests/testenv"
+	"github.com/wundergraph/cosmo/router/core"
+	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
+	"github.com/wundergraph/cosmo/router/pkg/config"
+	"github.com/wundergraph/cosmo/router/pkg/controlplane/configpoller"
+	"github.com/wundergraph/cosmo/router/pkg/otel"
+	"github.com/wundergraph/cosmo/router/pkg/trace/tracetest"
+	"go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/metric/metricdata"
+)
+
+func TestExpensiveQueryCache(t *testing.T) {
+	t.Parallel()
+
+	// distinctQueries are queries that normalize to different plans, used to overflow a small main cache.
+	distinctQueries := []testenv.GraphQLRequest{
+		{Query: `{ employees { id } }`},
+		{Query: `query { employees { id details { forename } } }`},
+		{Query: `query { employees { id details { forename surname } } }`},
+		{Query: `query m($id: Int!){ employee(id: $id) { id details { forename surname } } }`, Variables: []byte(`{"id": 1}`)},
+	}
+
+	// waitForExpensiveCacheHits sends all distinctQueries, retrying until each one
+	// is served from either the main or expensive cache. Then it does a single
+	// final pass and returns the number of expensive cache hits.
+	waitForExpensiveCacheHits := func(t *testing.T, xEnv *testenv.Environment, queries []testenv.GraphQLRequest, extraChecks ...func(*assert.CollectT, *testenv.TestResponse)) int {
+		t.Helper()
+
+		// Wait until every query is served from some cache
+		for _, q := range queries {
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				assert.Equal(ct, 200, res.Response.StatusCode)
+				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
+				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
+				assert.True(ct, planHit || expensiveHit, "expected plan to be served from main or expensive cache")
+				for _, check := range extraChecks {
+					check(ct, res)
+				}
+			}, 2*time.Second, 100*time.Millisecond)
+		}
+
+		// Single pass to count expensive cache hits
+		expensiveCacheHits := 0
+		for _, q := range queries {
+			res := xEnv.MakeGraphQLRequestOK(q)
+			if res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT" {
+				expensiveCacheHits++
+			}
+		}
+		return expensiveCacheHits
+	}
+
+	t.Run("expensive cache serves evicted plans from small main cache", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				// Tiny main cache: only 1 plan fits in Ristretto
+				cfg.ExecutionPlanCacheSize = 1
+				// All plans qualify as expensive (threshold effectively zero)
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send all distinct queries — each is a MISS and gets planned via singleflight.
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache")
+		})
+	})
+
+	t.Run("evicted plans survive config reload via expensive cache with small main cache", func(t *testing.T) {
+		t.Parallel()
+
+		pm := ConfigPollerMock{
+			ready: make(chan struct{}),
+		}
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+					Source: config.CacheWarmupSource{
+						CdnSource: config.CacheWarmupCDNSource{
+							Enabled: true,
+						},
+					},
+				}),
+				core.WithConfigVersionHeader(true),
+			},
+			RouterConfig: &testenv.RouterConfig{
+				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
+					pm.initConfig = config
+					return &pm
+				},
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Populate caches with multiple distinct queries
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Trigger config reload — new Ristretto cache is created (size 1).
+			<-pm.ready
+			pm.initConfig.Version = "updated"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
+
+			// After reload, all queries should still be available via expensive cache.
+			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+			})
+			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache after config reload")
+		})
+	})
+
+	t.Run("plans survive multiple config reloads with small main cache", func(t *testing.T) {
+		t.Parallel()
+
+		pm := ConfigPollerMock{
+			ready: make(chan struct{}),
+		}
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+					Source: config.CacheWarmupSource{
+						CdnSource: config.CacheWarmupCDNSource{
+							Enabled: true,
+						},
+					},
+				}),
+				core.WithConfigVersionHeader(true),
+			},
+			RouterConfig: &testenv.RouterConfig{
+				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
+					pm.initConfig = config
+					return &pm
+				},
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Warm up with distinct queries
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			<-pm.ready
+
+			// First reload
+			pm.initConfig.Version = "v2"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
+
+			waitForExpensiveCacheHits(t, xEnv, distinctQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+				assert.Equal(ct, "v2", res.Response.Header.Get("X-Router-Config-Version"))
+			})
+
+			// Second reload
+			pm.initConfig.Version = "v3"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "v2"))
+
+			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+				assert.Equal(ct, "v3", res.Response.Header.Get("X-Router-Config-Version"))
+			})
+			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache after multiple reloads")
+		})
+	})
+
+	t.Run("expensive cache works without config reload", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 10
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send multiple distinct queries to overflow the tiny main cache
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache")
+		})
+	})
+
+	t.Run("router shuts down cleanly with expensive cache enabled", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 50
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Make some requests to populate both caches
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+			}
+			// testenv.Run handles shutdown — test verifies no panic or hang
+		})
+	})
+
+	t.Run("expensive cache hit is recorded in span attributes", func(t *testing.T) {
+		t.Parallel()
+
+		exporter := tracetest.NewInMemoryExporter(t)
+		metricReader := metric.NewManualReader()
+
+		testenv.Run(t, &testenv.Config{
+			TraceExporter: exporter,
+			MetricReader:  metricReader,
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send multiple distinct queries to overflow the tiny main cache
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Wait for caches to converge, then reset spans for a clean measurement
+			waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+			exporter.Reset()
+
+			// Final pass to generate spans with known state
+			for _, q := range distinctQueries {
+				xEnv.MakeGraphQLRequestOK(q)
+			}
+
+			// Verify spans contain the expensive_plan_cache_hit attribute
+			sn := exporter.GetSpans().Snapshots()
+			expensiveHitSpanFound := false
+			for _, span := range sn {
+				if span.Name() == "Operation - Plan" {
+					for _, attr := range span.Attributes() {
+						if attr.Key == otel.WgEngineExpensivePlanCacheHit && attr.Value.AsBool() {
+							expensiveHitSpanFound = true
+							// plan_cache_hit should be false for expensive cache hits
+							require.Contains(t, span.Attributes(), otel.WgEnginePlanCacheHit.Bool(false))
+						}
+					}
+				}
+			}
+			require.True(t, expensiveHitSpanFound, "expected at least one 'Operation - Plan' span with wg.engine.expensive_plan_cache_hit=true")
+
+			// Verify OTEL metrics include the expensive_plan_cache_hit attribute
+			rm := metricdata.ResourceMetrics{}
+			err := metricReader.Collect(context.Background(), &rm)
+			require.NoError(t, err)
+
+			metricScope := GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router")
+			require.NotNil(t, metricScope)
+
+			planningMetric := GetMetricByName(metricScope, "router.graphql.operation.planning_time")
+			require.NotNil(t, planningMetric)
+
+			hist := planningMetric.Data.(metricdata.Histogram[float64])
+			expensiveHitMetricFound := false
+			for _, dp := range hist.DataPoints {
+				val, found := dp.Attributes.Value(otel.WgEngineExpensivePlanCacheHit)
+				if found && val.AsBool() {
+					expensiveHitMetricFound = true
+					// plan_cache_hit should be false for expensive cache hits
+					planVal, planFound := dp.Attributes.Value(otel.WgEnginePlanCacheHit)
+					require.True(t, planFound)
+					require.False(t, planVal.AsBool())
+					break
+				}
+			}
+			require.True(t, expensiveHitMetricFound, "expected planning_time metric with wg.engine.expensive_plan_cache_hit=true")
+		})
+	})
+
+	t.Run("expensive cache hit is recorded in Prometheus metrics", func(t *testing.T) {
+		t.Parallel()
+
+		exporter := tracetest.NewInMemoryExporter(t)
+		metricReader := metric.NewManualReader()
+		promRegistry := prometheus.NewRegistry()
+
+		testenv.Run(t, &testenv.Config{
+			TraceExporter:      exporter,
+			MetricReader:       metricReader,
+			PrometheusRegistry: promRegistry,
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Overflow the tiny main cache
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Wait for caches to converge, then make a final pass for Prometheus
+			waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+
+			for _, q := range distinctQueries {
+				xEnv.MakeGraphQLRequestOK(q)
+			}
+
+			// Gather Prometheus metrics
+			mf, err := promRegistry.Gather()
+			require.NoError(t, err)
+
+			planningTime := findMetricFamilyByName(mf, "router_graphql_operation_planning_time")
+			require.NotNil(t, planningTime, "expected router_graphql_operation_planning_time metric")
+
+			// Verify the expensive_plan_cache_hit label exists
+			expensiveHitFound := false
+			for _, m := range planningTime.GetMetric() {
+				for _, label := range m.GetLabel() {
+					if label.GetName() == "wg_engine_expensive_plan_cache_hit" && label.GetValue() == "true" {
+						expensiveHitFound = true
+						// plan_cache_hit should be false for expensive cache hits
+						for _, subLabel := range m.GetLabel() {
+							if subLabel.GetName() == "wg_engine_plan_cache_hit" {
+								require.Equal(t, "false", subLabel.GetValue(), "plan_cache_hit should be false when expensive_plan_cache_hit is true")
+							}
+						}
+					}
+				}
+			}
+			require.True(t, expensiveHitFound, "expected Prometheus metric with wg_engine_expensive_plan_cache_hit=true")
+
+			// Also verify that the false value exists (from initial MISS requests)
+			expensiveMissFound := false
+			for _, m := range planningTime.GetMetric() {
+				for _, label := range m.GetLabel() {
+					if label.GetName() == "wg_engine_expensive_plan_cache_hit" && label.GetValue() == "false" {
+						expensiveMissFound = true
+					}
+				}
+			}
+			require.True(t, expensiveMissFound, "expected Prometheus metric with wg_engine_expensive_plan_cache_hit=false")
+		})
+	})
+
+	t.Run("no expensive cache header or telemetry when feature is disabled", func(t *testing.T) {
+		t.Parallel()
+
+		exporter := tracetest.NewInMemoryExporter(t)
+		metricReader := metric.NewManualReader()
+		promRegistry := prometheus.NewRegistry()
+
+		testenv.Run(t, &testenv.Config{
+			TraceExporter:      exporter,
+			MetricReader:       metricReader,
+			PrometheusRegistry: promRegistry,
+			// InMemoryFallback is NOT set — expensive cache is disabled
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				// Header must be absent when feature is disabled
+				require.Empty(t, res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+					"X-WG-Expensive-Plan-Cache header should not be present when InMemoryFallback is disabled")
+			}
+
+			// Second pass — cache hits
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Empty(t, res.Response.Header.Get("X-WG-Expensive-Plan-Cache"))
+			}
+
+			// Verify spans do NOT contain the expensive_plan_cache_hit attribute
+			sn := exporter.GetSpans().Snapshots()
+			for _, span := range sn {
+				if span.Name() == "Operation - Plan" {
+					for _, attr := range span.Attributes() {
+						require.NotEqual(t, otel.WgEngineExpensivePlanCacheHit, attr.Key,
+							"wg.engine.expensive_plan_cache_hit attribute should not be present when feature is disabled")
+					}
+				}
+			}
+
+			// Verify OTEL metrics do NOT contain the attribute
+			rm := metricdata.ResourceMetrics{}
+			err := metricReader.Collect(context.Background(), &rm)
+			require.NoError(t, err)
+
+			metricScope := GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router")
+			if metricScope != nil {
+				planningMetric := GetMetricByName(metricScope, "router.graphql.operation.planning_time")
+				if planningMetric != nil {
+					hist := planningMetric.Data.(metricdata.Histogram[float64])
+					for _, dp := range hist.DataPoints {
+						_, found := dp.Attributes.Value(otel.WgEngineExpensivePlanCacheHit)
+						require.False(t, found,
+							"wg.engine.expensive_plan_cache_hit attribute should not be present in OTEL metrics when feature is disabled")
+					}
+				}
+			}
+
+			// Verify Prometheus metrics do NOT contain the label
+			mf, err := promRegistry.Gather()
+			require.NoError(t, err)
+
+			planningTime := findMetricFamilyByName(mf, "router_graphql_operation_planning_time")
+			if planningTime != nil {
+				for _, m := range planningTime.GetMetric() {
+					for _, label := range m.GetLabel() {
+						require.NotEqual(t, "wg_engine_expensive_plan_cache_hit", label.GetName(),
+							"wg_engine_expensive_plan_cache_hit label should not be present in Prometheus when feature is disabled")
+					}
+				}
+			}
+		})
+	})
+
+	t.Run("high threshold prevents fast plans from entering expensive cache", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				// Threshold so high no plan will qualify
+				cfg.ExpensiveQueryThreshold = 1 * time.Hour
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Populate — all plans are fast (well under 1h threshold)
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+				// Feature is enabled so header is present, but should be MISS
+				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"))
+			}
+
+			// Wait for Ristretto eviction
+			time.Sleep(200 * time.Millisecond)
+
+			// Re-query — with main cache size 1, most are evicted from Ristretto.
+			// Since no plan met the 1h threshold, the expensive cache is empty.
+			// These should be re-planned (MISS on both caches).
+			for _, q := range distinctQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+					"no plan should be in the expensive cache with a 1h threshold")
+			}
+		})
+	})
+}
diff --git a/router/core/context.go b/router/core/context.go
index 408abfeac9..78371833f3 100644
--- a/router/core/context.go
+++ b/router/core/context.go
@@ -620,7 +620,9 @@ type operationContext struct {
 	preparedPlan     *planWithMetaData
 	traceOptions     resolve.TraceOptions
 	executionOptions resolve.ExecutionOptions
-	planCacheHit     bool
+	planCacheHit          bool
+	expensivePlanCacheHit bool
+	expensiveCacheEnabled bool
 	initialPayload   []byte
 	extensions       []byte
 	persistedID      string
diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
index 3b115a9b5d..785d4da530 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/core/expensive_query_cache.go
@@ -43,6 +43,10 @@ func (c *expensivePlanCache) Set(key uint64, plan *planWithMetaData, duration ti
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
+	if c.entries == nil {
+		return
+	}
+
 	// If key already exists, update it
 	if _, ok := c.entries[key]; ok {
 		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
diff --git a/router/core/operation_planner_test.go b/router/core/expensive_query_cache_test.go
similarity index 54%
rename from router/core/operation_planner_test.go
rename to router/core/expensive_query_cache_test.go
index 72c25508d9..7f8bc28787 100644
--- a/router/core/operation_planner_test.go
+++ b/router/core/expensive_query_cache_test.go
@@ -143,3 +143,133 @@ func TestExpensivePlanCache_Close(t *testing.T) {
 	require.False(t, ok)
 }
 
+func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
+	c := newExpensivePlanCache(10)
+	c.Close()
+
+	// Set after Close should not panic
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+
+	_, ok := c.Get(1)
+	require.False(t, ok)
+}
+
+func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
+	c := newExpensivePlanCache(10)
+
+	count := 0
+	c.IterValues(func(v *planWithMetaData) bool {
+		count++
+		return false
+	})
+	require.Equal(t, 0, count)
+}
+
+func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
+	c := newExpensivePlanCache(10)
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Close()
+
+	count := 0
+	c.IterValues(func(v *planWithMetaData) bool {
+		count++
+		return false
+	})
+	require.Equal(t, 0, count)
+}
+
+func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
+	c := newExpensivePlanCache(2)
+
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+
+	// Same duration as minimum (10ms) — should NOT evict (requires strictly greater)
+	c.Set(3, &planWithMetaData{content: "q3"}, 10*time.Millisecond)
+
+	_, ok := c.Get(3)
+	require.False(t, ok, "entry with equal duration should not replace minimum")
+	_, ok = c.Get(1)
+	require.True(t, ok)
+	_, ok = c.Get(2)
+	require.True(t, ok)
+}
+
+func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
+	c := newExpensivePlanCache(1)
+
+	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	got, ok := c.Get(1)
+	require.True(t, ok)
+	require.Equal(t, "q1", got.content)
+
+	// Adding a more expensive entry should evict the only entry
+	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	_, ok = c.Get(1)
+	require.False(t, ok)
+	got, ok = c.Get(2)
+	require.True(t, ok)
+	require.Equal(t, "q2", got.content)
+
+	// Adding a cheaper entry should be rejected
+	c.Set(3, &planWithMetaData{content: "q3"}, 5*time.Millisecond)
+	_, ok = c.Get(3)
+	require.False(t, ok)
+	_, ok = c.Get(2)
+	require.True(t, ok)
+}
+
+func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
+	c := newExpensivePlanCache(100)
+	done := make(chan struct{})
+
+	// Concurrent writers — each goroutine writes to its own key range
+	for i := 0; i < 10; i++ {
+		go func(id int) {
+			defer func() { done <- struct{}{} }()
+			for j := 0; j < 100; j++ {
+				key := uint64(id*100 + j)
+				c.Set(key, &planWithMetaData{content: "q"}, time.Duration(j)*time.Millisecond)
+			}
+		}(i)
+	}
+
+	// Concurrent readers
+	for i := 0; i < 10; i++ {
+		go func(id int) {
+			defer func() { done <- struct{}{} }()
+			for j := 0; j < 100; j++ {
+				c.Get(uint64(id*100 + j))
+			}
+		}(i)
+	}
+
+	// Concurrent iterators
+	for i := 0; i < 5; i++ {
+		go func() {
+			defer func() { done <- struct{}{} }()
+			c.IterValues(func(v *planWithMetaData) bool {
+				return false
+			})
+		}()
+	}
+
+	// Wait for all goroutines
+	for i := 0; i < 25; i++ {
+		<-done
+	}
+
+	// Cache should be at capacity and all entries should be retrievable
+	count := 0
+	c.IterValues(func(v *planWithMetaData) bool {
+		count++
+		return false
+	})
+	require.Equal(t, 100, count, "cache should be at max capacity")
+
+	// Every entry in the cache should be gettable
+	c.IterValues(func(v *planWithMetaData) bool {
+		require.Equal(t, "q", v.content)
+		return false
+	})
+}
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 843d32e382..82a2c4d46b 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -1381,19 +1381,23 @@ func (s *graphServer) buildGraphMux(
 		}
 
 		warmupConfig.AfterOperation = func(item *CacheWarmupOperationPlanResult) {
+			attrs := []attribute.KeyValue{
+				otel.WgOperationName.String(item.OperationName),
+				otel.WgClientName.String(item.ClientName),
+				otel.WgClientVersion.String(item.ClientVersion),
+				otel.WgFeatureFlag.String(opts.FeatureFlagName),
+				otel.WgOperationHash.String(item.OperationHash),
+				otel.WgOperationType.String(item.OperationType),
+				otel.WgEnginePlanCacheHit.Bool(false),
+			}
+			if operationPlanner.useFallback {
+				attrs = append(attrs, otel.WgEngineExpensivePlanCacheHit.Bool(false))
+			}
 			gm.metricStore.MeasureOperationPlanningTime(ctx,
 				item.PlanningTime,
 				nil,
 				otelmetric.WithAttributes(
-					append([]attribute.KeyValue{
-						otel.WgOperationName.String(item.OperationName),
-						otel.WgClientName.String(item.ClientName),
-						otel.WgClientVersion.String(item.ClientVersion),
-						otel.WgFeatureFlag.String(opts.FeatureFlagName),
-						otel.WgOperationHash.String(item.OperationHash),
-						otel.WgOperationType.String(item.OperationType),
-						otel.WgEnginePlanCacheHit.Bool(false),
-					}, baseMetricAttributes...)...,
+					append(attrs, baseMetricAttributes...)...,
 				),
 			)
 		}
diff --git a/router/core/graphql_handler.go b/router/core/graphql_handler.go
index 1fab6a7732..1469945774 100644
--- a/router/core/graphql_handler.go
+++ b/router/core/graphql_handler.go
@@ -34,6 +34,7 @@ var (
 
 const (
 	ExecutionPlanCacheHeader          = "X-WG-Execution-Plan-Cache"
+	ExpensivePlanCacheHeader          = "X-WG-Expensive-Plan-Cache"
 	PersistedOperationCacheHeader     = "X-WG-Persisted-Operation-Cache"
 	NormalizationCacheHeader          = "X-WG-Normalization-Cache"
 	VariablesNormalizationCacheHeader = "X-WG-Variables-Normalization-Cache"
@@ -520,5 +521,12 @@ func (h *GraphQLHandler) setDebugCacheHeaders(w http.ResponseWriter, opCtx *oper
 		} else {
 			w.Header().Set(ExecutionPlanCacheHeader, "MISS")
 		}
+		if opCtx.expensiveCacheEnabled {
+			if opCtx.expensivePlanCacheHit {
+				w.Header().Set(ExpensivePlanCacheHeader, "HIT")
+			} else {
+				w.Header().Set(ExpensivePlanCacheHeader, "MISS")
+			}
+		}
 	}
 }
diff --git a/router/core/graphql_prehandler.go b/router/core/graphql_prehandler.go
index ee9eb10fc5..ce9508faa2 100644
--- a/router/core/graphql_prehandler.go
+++ b/router/core/graphql_prehandler.go
@@ -1089,10 +1089,16 @@ func (h *PreHandler) handleOperation(req *http.Request, httpOperation *httpOpera
 	setTelemetryAttributes(planCtx, requestContext, expr.BucketPlanningTime)
 
 	enginePlanSpan.SetAttributes(otel.WgEnginePlanCacheHit.Bool(requestContext.operation.planCacheHit))
+	if requestContext.operation.expensiveCacheEnabled {
+		enginePlanSpan.SetAttributes(otel.WgEngineExpensivePlanCacheHit.Bool(requestContext.operation.expensivePlanCacheHit))
+	}
 	enginePlanSpan.End()
 
 	planningAttrs := *requestContext.telemetry.AcquireAttributes()
 	planningAttrs = append(planningAttrs, otel.WgEnginePlanCacheHit.Bool(requestContext.operation.planCacheHit))
+	if requestContext.operation.expensiveCacheEnabled {
+		planningAttrs = append(planningAttrs, otel.WgEngineExpensivePlanCacheHit.Bool(requestContext.operation.expensivePlanCacheHit))
+	}
 	planningAttrs = append(planningAttrs, requestContext.telemetry.metricAttrs...)
 
 	httpOperation.operationMetrics.routerMetrics.MetricStore().MeasureOperationPlanningTime(
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index de87e10022..5a7467d65c 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -146,6 +146,8 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 	// if we have tracing enabled or want to include a query plan in the response we always prepare a new plan
 	// this is because in case of tracing, we're writing trace data to the plan
 	// in case of including the query plan, we don't want to cache this additional overhead
+	opContext.expensiveCacheEnabled = p.useFallback
+
 	skipCache := options.TraceOptions.Enable || options.ExecutionOptions.IncludeQueryPlanInResponse
 
 	if skipCache {
@@ -176,10 +178,12 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 		if cachedPlan, ok = p.expensiveCache.Get(operationID); ok {
 			// found in the expensive query cache — re-use and re-insert into main cache
 			opContext.preparedPlan = cachedPlan
-			opContext.planCacheHit = true
+			opContext.expensivePlanCacheHit = true
 			p.planCache.Set(operationID, cachedPlan, 1)
 		}
-	} else {
+	}
+
+	if opContext.preparedPlan == nil {
 		// prepare a new plan using single flight
 		// this ensures that we only prepare the plan once for this operation ID
 		operationIDStr := strconv.FormatUint(operationID, 10)
diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go
index 07e0deaab2..de209294b6 100644
--- a/router/pkg/otel/attributes.go
+++ b/router/pkg/otel/attributes.go
@@ -27,6 +27,7 @@ const (
 	WgRequestError                     = attribute.Key("wg.request.error")
 	WgOperationPersistedID             = attribute.Key("wg.operation.persisted_id")
 	WgEnginePlanCacheHit               = attribute.Key("wg.engine.plan_cache_hit")
+	WgEngineExpensivePlanCacheHit      = attribute.Key("wg.engine.expensive_plan_cache_hit")
 	WgEnginePersistedOperationCacheHit = attribute.Key("wg.engine.persisted_operation_cache_hit")
 	WgEngineRequestTracingEnabled      = attribute.Key("wg.engine.request_tracing_enabled")
 	WgRouterRootSpan                   = attribute.Key("wg.router.root_span")

From 4378c80d8005fa145b2b887a00ed1c03d751ee70 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 9 Mar 2026 19:41:39 +0530
Subject: [PATCH 03/46] fix: review comments

---
 router/core/expensive_query_cache.go      |  8 +++-
 router/core/expensive_query_cache_test.go | 47 ++++++++++++++++-------
 router/core/graph_server.go               |  6 ++-
 router/core/operation_planner.go          | 13 +++++--
 4 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
index 785d4da530..aa744c432d 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/core/expensive_query_cache.go
@@ -1,6 +1,7 @@
 package core
 
 import (
+	"fmt"
 	"sync"
 	"time"
 )
@@ -19,11 +20,14 @@ type expensivePlanCache struct {
 	maxSize int
 }
 
-func newExpensivePlanCache(maxSize int) *expensivePlanCache {
+func newExpensivePlanCache(maxSize int) (*expensivePlanCache, error) {
+	if maxSize < 1 {
+		return nil, fmt.Errorf("expensive query cache size must be at least 1, got %d", maxSize)
+	}
 	return &expensivePlanCache{
 		entries: make(map[uint64]*expensivePlanEntry, maxSize),
 		maxSize: maxSize,
-	}
+	}, nil
 }
 
 func (c *expensivePlanCache) Get(key uint64) (*planWithMetaData, bool) {
diff --git a/router/core/expensive_query_cache_test.go b/router/core/expensive_query_cache_test.go
index 7f8bc28787..b888aab57f 100644
--- a/router/core/expensive_query_cache_test.go
+++ b/router/core/expensive_query_cache_test.go
@@ -8,7 +8,8 @@ import (
 )
 
 func TestExpensivePlanCache_GetSet(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 
 	plan1 := &planWithMetaData{content: "query { a }"}
 	plan2 := &planWithMetaData{content: "query { b }"}
@@ -36,7 +37,8 @@ func TestExpensivePlanCache_GetSet(t *testing.T) {
 }
 
 func TestExpensivePlanCache_BoundedSize(t *testing.T) {
-	c := newExpensivePlanCache(3)
+	c, err := newExpensivePlanCache(3)
+	require.NoError(t, err)
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
@@ -59,7 +61,8 @@ func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 }
 
 func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
-	c := newExpensivePlanCache(3)
+	c, err := newExpensivePlanCache(3)
+	require.NoError(t, err)
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Second)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Second)
@@ -81,7 +84,8 @@ func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 }
 
 func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
-	c := newExpensivePlanCache(2)
+	c, err := newExpensivePlanCache(2)
+	require.NoError(t, err)
 
 	plan1 := &planWithMetaData{content: "q1"}
 	plan1Updated := &planWithMetaData{content: "q1-updated"}
@@ -102,7 +106,8 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValues(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
@@ -118,7 +123,8 @@ func TestExpensivePlanCache_IterValues(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
@@ -133,7 +139,8 @@ func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
 }
 
 func TestExpensivePlanCache_Close(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 
 	c.Close()
@@ -144,7 +151,8 @@ func TestExpensivePlanCache_Close(t *testing.T) {
 }
 
 func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 	c.Close()
 
 	// Set after Close should not panic
@@ -155,7 +163,8 @@ func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 
 	count := 0
 	c.IterValues(func(v *planWithMetaData) bool {
@@ -166,7 +175,8 @@ func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
-	c := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10)
+	require.NoError(t, err)
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Close()
 
@@ -179,7 +189,8 @@ func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
 }
 
 func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
-	c := newExpensivePlanCache(2)
+	c, err := newExpensivePlanCache(2)
+	require.NoError(t, err)
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
@@ -196,7 +207,8 @@ func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 }
 
 func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
-	c := newExpensivePlanCache(1)
+	c, err := newExpensivePlanCache(1)
+	require.NoError(t, err)
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	got, ok := c.Get(1)
@@ -220,7 +232,8 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 }
 
 func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
-	c := newExpensivePlanCache(100)
+	c, err := newExpensivePlanCache(100)
+	require.NoError(t, err)
 	done := make(chan struct{})
 
 	// Concurrent writers — each goroutine writes to its own key range
@@ -273,3 +286,11 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 		return false
 	})
 }
+
+func TestExpensivePlanCache_InvalidSize(t *testing.T) {
+	_, err := newExpensivePlanCache(0)
+	require.Error(t, err)
+
+	_, err = newExpensivePlanCache(-1)
+	require.Error(t, err)
+}
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 82a2c4d46b..7e20708bb7 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -1345,7 +1345,7 @@ func (s *graphServer) buildGraphMux(
 		ComplexityLimits:                                       s.securityConfiguration.ComplexityLimits,
 	})
 
-	operationPlanner := NewOperationPlanner(
+	operationPlanner, err := NewOperationPlanner(
 		s.logger,
 		executor,
 		gm.planCache,
@@ -1353,6 +1353,9 @@ func (s *graphServer) buildGraphMux(
 		int(s.engineExecutionConfiguration.ExpensiveQueryCacheSize),
 		s.engineExecutionConfiguration.ExpensiveQueryThreshold,
 	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create operation planner: %w", err)
+	}
 	gm.operationPlanner = operationPlanner
 
 	// We support the MCP only on the base graph. Feature flags are not supported yet.
@@ -1385,7 +1388,6 @@ func (s *graphServer) buildGraphMux(
 				otel.WgOperationName.String(item.OperationName),
 				otel.WgClientName.String(item.ClientName),
 				otel.WgClientVersion.String(item.ClientVersion),
-				otel.WgFeatureFlag.String(opts.FeatureFlagName),
 				otel.WgOperationHash.String(item.OperationHash),
 				otel.WgOperationType.String(item.OperationType),
 				otel.WgEnginePlanCacheHit.Bool(false),
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 5a7467d65c..f6d944c0c5 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -2,10 +2,11 @@ package core
 
 import (
 	"errors"
-	"go.uber.org/zap"
 	"strconv"
 	"time"
 
+	"go.uber.org/zap"
+
 	"golang.org/x/sync/singleflight"
 
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
@@ -55,7 +56,7 @@ type ExecutionPlanCache[K any, V any] interface {
 	Close()
 }
 
-func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], inMemoryPlanCacheFallback bool, expensiveCacheSize int, threshold time.Duration) *OperationPlanner {
+func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], inMemoryPlanCacheFallback bool, expensiveCacheSize int, threshold time.Duration) (*OperationPlanner, error) {
 	p := &OperationPlanner{
 		logger:         logger,
 		planCache:      planCache,
@@ -65,11 +66,15 @@ func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache Execu
 	}
 
 	if inMemoryPlanCacheFallback {
-		p.expensiveCache = newExpensivePlanCache(expensiveCacheSize)
+		var err error
+		p.expensiveCache, err = newExpensivePlanCache(expensiveCacheSize)
+		if err != nil {
+			return nil, err
+		}
 		p.threshold = threshold
 	}
 
-	return p
+	return p, nil
 }
 
 // Close releases expensive cache resources.

From 4f644cf9baeaa26c68ab02cf788acbb2cc10fbff Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 9 Mar 2026 20:15:13 +0530
Subject: [PATCH 04/46] fix: improvements

---
 router/core/expensive_query_cache.go | 41 +++++++++++++++++++---------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
index aa744c432d..d6694821ae 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/core/expensive_query_cache.go
@@ -14,10 +14,13 @@ type expensivePlanEntry struct {
 
 // expensivePlanCache is a bounded, mutex-protected map that holds expensive plans
 // that should not be subject to TinyLFU eviction in the main cache.
+// It tracks the minimum-duration entry so that rejection of cheaper entries is O(1).
 type expensivePlanCache struct {
 	mu      sync.RWMutex
 	entries map[uint64]*expensivePlanEntry
 	maxSize int
+	minKey  uint64
+	minDur  time.Duration
 }
 
 func newExpensivePlanCache(maxSize int) (*expensivePlanCache, error) {
@@ -54,31 +57,44 @@ func (c *expensivePlanCache) Set(key uint64, plan *planWithMetaData, duration ti
 	// If key already exists, update it
 	if _, ok := c.entries[key]; ok {
 		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+		// If this was the tracked min, or the new duration is lower, refresh the min
+		if key == c.minKey || duration < c.minDur {
+			c.refreshMin()
+		}
 		return
 	}
 
-	// If not at capacity, just add
+	// If not at capacity, just add and update min tracking
 	if len(c.entries) < c.maxSize {
 		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+		if len(c.entries) == 1 || duration < c.minDur {
+			c.minKey = key
+			c.minDur = duration
+		}
+		return
+	}
+
+	// At capacity: reject if new entry is not more expensive than the current minimum
+	if duration <= c.minDur {
 		return
 	}
 
-	// At capacity: find the minimum and only evict if new entry is more expensive
-	var minKey uint64
-	var minDur time.Duration
+	// Evict the minimum and insert the new entry
+	delete(c.entries, c.minKey)
+	c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+	c.refreshMin()
+}
+
+// refreshMin rescans the entries to find the new minimum. Must be called with mu held.
+func (c *expensivePlanCache) refreshMin() {
 	first := true
 	for k, e := range c.entries {
-		if first || e.duration < minDur {
-			minKey = k
-			minDur = e.duration
+		if first || e.duration < c.minDur {
+			c.minKey = k
+			c.minDur = e.duration
 			first = false
 		}
 	}
-
-	if duration > minDur {
-		delete(c.entries, minKey)
-		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
-	}
 }
 
 func (c *expensivePlanCache) IterValues(cb func(v *planWithMetaData) bool) {
@@ -98,4 +114,3 @@ func (c *expensivePlanCache) Close() {
 
 	c.entries = nil
 }
-

From 2515b07ff61e72658361e5885bb23f91aba17773 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 9 Mar 2026 20:30:38 +0530
Subject: [PATCH 05/46] fix: review comments

---
 router/core/graph_server.go          | 13 ++++++-------
 router/core/operation_planner.go     |  2 +-
 router/pkg/config/config.schema.json |  1 +
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 7e20708bb7..fff92cf12f 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -585,10 +585,11 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 		}
 		if srv.cacheWarmup != nil && srv.cacheWarmup.Enabled && srv.cacheWarmup.InMemoryFallback {
 			planCacheConfig.OnEvict = func(item *ristretto.Item[*planWithMetaData]) {
-				if s.operationPlanner != nil && s.operationPlanner.expensiveCache != nil {
-					if s.operationPlanner.threshold > 0 && item.Value.planningDuration >= s.operationPlanner.threshold && item.Value.content != "" {
-						s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
-					}
+				if s.operationPlanner == nil || s.operationPlanner.expensiveCache == nil || item.Value.content == "" {
+					return
+				}
+				if s.operationPlanner.threshold > 0 && item.Value.planningDuration >= s.operationPlanner.threshold {
+					s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
 				}
 			}
 		}
@@ -792,10 +793,8 @@ func (s *graphMux) configureCacheMetrics(srv *graphServer, baseOtelAttributes []
 }
 
 func (s *graphMux) Shutdown(ctx context.Context) error {
-	if s.operationPlanner != nil {
-		s.operationPlanner.Close()
-	}
 	s.planCache.Close()
+	s.operationPlanner.Close()
 	s.persistedOperationCache.Close()
 	s.normalizationCache.Close()
 	s.variablesNormalizationCache.Close()
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index f6d944c0c5..cb93690842 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -79,7 +79,7 @@ func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache Execu
 
 // Close releases expensive cache resources.
 func (p *OperationPlanner) Close() {
-	if !p.useFallback {
+	if p == nil || !p.useFallback {
 		return
 	}
 	p.expensiveCache.Close()
diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index de2b87f5ae..f451247ffa 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3322,6 +3322,7 @@
         },
         "expensive_query_cache_size": {
           "type": "integer",
+          "minimum": 1,
           "default": 100,
           "description": "The maximum number of entries in the expensive query plan cache. Expensive queries are protected from TinyLFU eviction in the main plan cache."
         },

From adf64982543a988fb85ed96d4df388503bca27b8 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 16:49:56 +0530
Subject: [PATCH 06/46] fix: updates

---
 router-tests/expensive_query_cache_test.go  | 200 ++++++++++++++++----
 router/core/expensive_query_cache.go        |  79 ++++++--
 router/core/expensive_query_cache_test.go   |  44 +++--
 router/core/graph_server.go                 |   5 +-
 router/core/operation_planner.go            |  18 +-
 router/core/reload_persistent_state.go      |  26 ++-
 router/core/reload_persistent_state_test.go |   8 +-
 router/core/router.go                       |   8 +
 router/core/router_config.go                |   1 +
 9 files changed, 304 insertions(+), 85 deletions(-)

diff --git a/router-tests/expensive_query_cache_test.go b/router-tests/expensive_query_cache_test.go
index d5d065b5e5..4e8a21ef5c 100644
--- a/router-tests/expensive_query_cache_test.go
+++ b/router-tests/expensive_query_cache_test.go
@@ -2,6 +2,7 @@ package integration
 
 import (
 	"context"
+	"strings"
 	"testing"
 	"time"
 
@@ -22,15 +23,32 @@ import (
 func TestExpensiveQueryCache(t *testing.T) {
 	t.Parallel()
 
-	// distinctQueries are queries that normalize to different plans, used to overflow a small main cache.
-	distinctQueries := []testenv.GraphQLRequest{
+	// slowQueries are queries whose planning duration is overridden to exceed the threshold.
+	slowQueries := []testenv.GraphQLRequest{
 		{Query: `{ employees { id } }`},
 		{Query: `query { employees { id details { forename } } }`},
+	}
+
+	// fastQueries are queries whose planning duration stays below the threshold.
+	fastQueries := []testenv.GraphQLRequest{
 		{Query: `query { employees { id details { forename surname } } }`},
 		{Query: `query m($id: Int!){ employee(id: $id) { id details { forename surname } } }`, Variables: []byte(`{"id": 1}`)},
 	}
 
-	// waitForExpensiveCacheHits sends all distinctQueries, retrying until each one
+	allQueries := append(slowQueries, fastQueries...)
+
+	expensiveThreshold := 1 * time.Second
+
+	// The override function receives the normalized (minified) query content.
+	// Both slow queries lack "surname", while all fast queries contain it.
+	planningDurationOverride := core.WithPlanningDurationOverride(func(content string) time.Duration {
+		if !strings.Contains(content, "surname") {
+			return 10 * time.Second
+		}
+		return 0
+	})
+
+	// waitForExpensiveCacheHits sends all queries, retrying until each one
 	// is served from either the main or expensive cache. Then it does a single
 	// final pass and returns the number of expensive cache hits.
 	waitForExpensiveCacheHits := func(t *testing.T, xEnv *testenv.Environment, queries []testenv.GraphQLRequest, extraChecks ...func(*assert.CollectT, *testenv.TestResponse)) int {
@@ -66,10 +84,8 @@ func TestExpensiveQueryCache(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				// Tiny main cache: only 1 plan fits in Ristretto
 				cfg.ExecutionPlanCacheSize = 1
-				// All plans qualify as expensive (threshold effectively zero)
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 100
 			},
 			RouterOptions: []core.Option{
@@ -77,17 +93,54 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				planningDurationOverride,
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send all distinct queries — each is a MISS and gets planned via singleflight.
-			for _, q := range distinctQueries {
+			// Send all queries — each is a MISS and gets planned via singleflight.
+			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
 			}
 
-			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries)
-			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache")
+			// Only slow queries should end up in the expensive cache
+			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries)
+			require.Greater(t, hits, 0, "expected at least one slow query to be served from the expensive cache")
+		})
+	})
+
+	t.Run("fast queries do not enter expensive cache", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				planningDurationOverride,
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send all queries
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+			}
+
+			// Wait for Ristretto eviction
+			time.Sleep(200 * time.Millisecond)
+
+			// Fast queries should never be served from the expensive cache
+			for _, q := range fastQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+					"fast query should not be in the expensive cache")
+			}
 		})
 	})
 
@@ -101,7 +154,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 100
 			},
 			RouterOptions: []core.Option{
@@ -115,6 +168,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 					},
 				}),
 				core.WithConfigVersionHeader(true),
+				planningDurationOverride,
 			},
 			RouterConfig: &testenv.RouterConfig{
 				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
@@ -123,8 +177,8 @@ func TestExpensiveQueryCache(t *testing.T) {
 				},
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Populate caches with multiple distinct queries
-			for _, q := range distinctQueries {
+			// Populate caches with slow queries
+			for _, q := range slowQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
@@ -135,8 +189,8 @@ func TestExpensiveQueryCache(t *testing.T) {
 			pm.initConfig.Version = "updated"
 			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
 
-			// After reload, all queries should still be available via expensive cache.
-			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+			// After reload, slow queries should still be available via expensive cache.
+			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
 				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
 			})
 			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache after config reload")
@@ -153,7 +207,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 100
 			},
 			RouterOptions: []core.Option{
@@ -167,6 +221,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 					},
 				}),
 				core.WithConfigVersionHeader(true),
+				planningDurationOverride,
 			},
 			RouterConfig: &testenv.RouterConfig{
 				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
@@ -175,8 +230,8 @@ func TestExpensiveQueryCache(t *testing.T) {
 				},
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Warm up with distinct queries
-			for _, q := range distinctQueries {
+			// Warm up with slow queries
+			for _, q := range slowQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
 			}
@@ -187,7 +242,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			pm.initConfig.Version = "v2"
 			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
 
-			waitForExpensiveCacheHits(t, xEnv, distinctQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+			waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
 				assert.Equal(ct, "v2", res.Response.Header.Get("X-Router-Config-Version"))
 			})
 
@@ -195,7 +250,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			pm.initConfig.Version = "v3"
 			require.NoError(t, pm.updateConfig(pm.initConfig, "v2"))
 
-			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
 				assert.Equal(ct, "v3", res.Response.Header.Get("X-Router-Config-Version"))
 			})
 			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache after multiple reloads")
@@ -208,7 +263,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 10
 			},
 			RouterOptions: []core.Option{
@@ -216,16 +271,17 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				planningDurationOverride,
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send multiple distinct queries to overflow the tiny main cache
-			for _, q := range distinctQueries {
+			// Send slow queries to overflow the tiny main cache
+			for _, q := range slowQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
 			}
 
-			hits := waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries)
 			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache")
 		})
 	})
@@ -236,7 +292,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 50
 			},
 			RouterOptions: []core.Option{
@@ -244,10 +300,11 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				planningDurationOverride,
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
 			// Make some requests to populate both caches
-			for _, q := range distinctQueries {
+			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 			}
@@ -266,7 +323,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			MetricReader:  metricReader,
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 100
 			},
 			RouterOptions: []core.Option{
@@ -274,20 +331,21 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				planningDurationOverride,
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send multiple distinct queries to overflow the tiny main cache
-			for _, q := range distinctQueries {
+			// Send slow queries to overflow the tiny main cache
+			for _, q := range slowQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
 			}
 
 			// Wait for caches to converge, then reset spans for a clean measurement
-			waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+			waitForExpensiveCacheHits(t, xEnv, slowQueries)
 			exporter.Reset()
 
 			// Final pass to generate spans with known state
-			for _, q := range distinctQueries {
+			for _, q := range slowQueries {
 				xEnv.MakeGraphQLRequestOK(q)
 			}
 
@@ -348,7 +406,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			PrometheusRegistry: promRegistry,
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Nanosecond
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 100
 			},
 			RouterOptions: []core.Option{
@@ -356,18 +414,19 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				planningDurationOverride,
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Overflow the tiny main cache
-			for _, q := range distinctQueries {
+			// Overflow the tiny main cache with slow queries
+			for _, q := range slowQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
 			}
 
 			// Wait for caches to converge, then make a final pass for Prometheus
-			waitForExpensiveCacheHits(t, xEnv, distinctQueries)
+			waitForExpensiveCacheHits(t, xEnv, slowQueries)
 
-			for _, q := range distinctQueries {
+			for _, q := range slowQueries {
 				xEnv.MakeGraphQLRequestOK(q)
 			}
 
@@ -421,7 +480,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			PrometheusRegistry: promRegistry,
 			// InMemoryFallback is NOT set — expensive cache is disabled
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			for _, q := range distinctQueries {
+			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				// Header must be absent when feature is disabled
@@ -430,7 +489,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			}
 
 			// Second pass — cache hits
-			for _, q := range distinctQueries {
+			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Empty(t, res.Response.Header.Get("X-WG-Expensive-Plan-Cache"))
 			}
@@ -480,13 +539,71 @@ func TestExpensiveQueryCache(t *testing.T) {
 		})
 	})
 
+	t.Run("expensive cache entries survive static execution config reload", func(t *testing.T) {
+		t.Parallel()
+
+		configFile := t.TempDir() + "/config.json"
+		writeTestConfig(t, "initial", configFile)
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithConfigVersionHeader(true),
+				core.WithExecutionConfig(&core.ExecutionConfig{
+					Path:          configFile,
+					Watch:         true,
+					WatchInterval: 100 * time.Millisecond,
+				}),
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				// Override the hello query to be slow
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					return 10 * time.Second
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			q := testenv.GraphQLRequest{Query: `query { hello }`}
+
+			// First request is a MISS
+			res := xEnv.MakeGraphQLRequestOK(q)
+			require.Equal(t, 200, res.Response.StatusCode)
+			require.Equal(t, "initial", res.Response.Header.Get("X-Router-Config-Version"))
+			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+
+			// Wait for the expensive cache to pick it up
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res = xEnv.MakeGraphQLRequestOK(q)
+				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
+				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
+				assert.True(ct, planHit || expensiveHit, "expected plan to be cached")
+			}, 2*time.Second, 100*time.Millisecond)
+
+			// Trigger schema reload by writing a new config version
+			writeTestConfig(t, "updated", configFile)
+
+			// After reload, the plan should still be available (carried forward via extractQueriesAndOverridePlanCache)
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res = xEnv.MakeGraphQLRequestOK(q)
+				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
+				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
+				assert.True(ct, planHit || expensiveHit, "expected plan to survive schema reload via expensive cache merge")
+			}, 2*time.Second, 100*time.Millisecond)
+		})
+	})
+
 	t.Run("high threshold prevents fast plans from entering expensive cache", func(t *testing.T) {
 		t.Parallel()
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				// Threshold so high no plan will qualify
 				cfg.ExpensiveQueryThreshold = 1 * time.Hour
 				cfg.ExpensiveQueryCacheSize = 100
 			},
@@ -495,10 +612,11 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				// No planning duration override — all plans are fast
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
 			// Populate — all plans are fast (well under 1h threshold)
-			for _, q := range distinctQueries {
+			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
@@ -512,7 +630,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			// Re-query — with main cache size 1, most are evicted from Ristretto.
 			// Since no plan met the 1h threshold, the expensive cache is empty.
 			// These should be re-planned (MISS on both caches).
-			for _, q := range distinctQueries {
+			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
index d6694821ae..9314f10eef 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/core/expensive_query_cache.go
@@ -12,8 +12,17 @@ type expensivePlanEntry struct {
 	duration time.Duration
 }
 
-// expensivePlanCache is a bounded, mutex-protected map that holds expensive plans
+type setRequest struct {
+	key      uint64
+	plan     *planWithMetaData
+	duration time.Duration
+	waitCh   chan struct{} // if non-nil, closed after this request is processed
+}
+
+// expensivePlanCache is a bounded map that holds expensive plans
 // that should not be subject to TinyLFU eviction in the main cache.
+// Writes are buffered through a channel and applied asynchronously by a
+// background goroutine, making Set non-blocking. Reads are protected by a RWMutex.
 // It tracks the minimum-duration entry so that rejection of cheaper entries is O(1).
 type expensivePlanCache struct {
 	mu      sync.RWMutex
@@ -21,16 +30,46 @@ type expensivePlanCache struct {
 	maxSize int
 	minKey  uint64
 	minDur  time.Duration
+
+	writeCh chan setRequest
+	stop    chan struct{}
+	done    chan struct{}
 }
 
+// We use the same value as ristretto (this would be the buffer size if we used ristretto as the backing cache)
+const defaultWriteBufferSize = 32 * 1024
+
 func newExpensivePlanCache(maxSize int) (*expensivePlanCache, error) {
 	if maxSize < 1 {
 		return nil, fmt.Errorf("expensive query cache size must be at least 1, got %d", maxSize)
 	}
-	return &expensivePlanCache{
+	c := &expensivePlanCache{
 		entries: make(map[uint64]*expensivePlanEntry, maxSize),
 		maxSize: maxSize,
-	}, nil
+		writeCh: make(chan setRequest, defaultWriteBufferSize),
+		stop:    make(chan struct{}),
+		done:    make(chan struct{}),
+	}
+	go c.processWrites()
+	return c, nil
+}
+
+// processWrites drains the write channel and applies sets under the write lock.
+// It exits when the stop channel is closed.
+func (c *expensivePlanCache) processWrites() {
+	for {
+		select {
+		case req := <-c.writeCh:
+			if req.waitCh != nil {
+				close(req.waitCh)
+				continue
+			}
+			c.applySet(req.key, req.plan, req.duration)
+		case <-c.stop:
+			c.done <- struct{}{}
+			return
+		}
+	}
 }
 
 func (c *expensivePlanCache) Get(key uint64) (*planWithMetaData, bool) {
@@ -44,20 +83,30 @@ func (c *expensivePlanCache) Get(key uint64) (*planWithMetaData, bool) {
 	return entry.plan, true
 }
 
-// Set stores a plan in the expensive cache. When at capacity, it only adds the
-// new entry if its duration exceeds the current minimum; otherwise, it is skipped.
+// Set enqueues a write to the cache. The write is applied asynchronously.
+// If the write buffer is full, the entry is silently dropped.
 func (c *expensivePlanCache) Set(key uint64, plan *planWithMetaData, duration time.Duration) {
+	select {
+	case c.writeCh <- setRequest{key: key, plan: plan, duration: duration}:
+	default:
+	}
+}
+
+// Wait blocks until all pending writes in the buffer have been processed.
+func (c *expensivePlanCache) Wait() {
+	ch := make(chan struct{})
+	c.writeCh <- setRequest{waitCh: ch}
+	<-ch
+}
+
+// applySet performs the actual cache mutation. Must only be called from processWrites.
+func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, duration time.Duration) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	if c.entries == nil {
-		return
-	}
-
 	// If key already exists, update it
 	if _, ok := c.entries[key]; ok {
 		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
-		// If this was the tracked min, or the new duration is lower, refresh the min
 		if key == c.minKey || duration < c.minDur {
 			c.refreshMin()
 		}
@@ -79,6 +128,7 @@ func (c *expensivePlanCache) Set(key uint64, plan *planWithMetaData, duration ti
 		return
 	}
 
+	// When at max capacity
 	// Evict the minimum and insert the new entry
 	delete(c.entries, c.minKey)
 	c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
@@ -108,9 +158,14 @@ func (c *expensivePlanCache) IterValues(cb func(v *planWithMetaData) bool) {
 	}
 }
 
+// Close stops the background goroutine and releases resources.
+// Pending writes in the buffer may be dropped.
 func (c *expensivePlanCache) Close() {
-	c.mu.Lock()
-	defer c.mu.Unlock()
+	close(c.stop)
+	<-c.done
 
+	close(c.done)
+	c.mu.Lock()
 	c.entries = nil
+	c.mu.Unlock()
 }
diff --git a/router/core/expensive_query_cache_test.go b/router/core/expensive_query_cache_test.go
index b888aab57f..af2e122674 100644
--- a/router/core/expensive_query_cache_test.go
+++ b/router/core/expensive_query_cache_test.go
@@ -10,6 +10,7 @@ import (
 func TestExpensivePlanCache_GetSet(t *testing.T) {
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
+	defer c.Close()
 
 	plan1 := &planWithMetaData{content: "query { a }"}
 	plan2 := &planWithMetaData{content: "query { b }"}
@@ -20,12 +21,14 @@ func TestExpensivePlanCache_GetSet(t *testing.T) {
 
 	// Set and get
 	c.Set(1, plan1, 10*time.Millisecond)
+	c.Wait()
 	got, ok := c.Get(1)
 	require.True(t, ok)
 	require.Equal(t, plan1, got)
 
 	// Different key
 	c.Set(2, plan2, 20*time.Millisecond)
+	c.Wait()
 	got, ok = c.Get(2)
 	require.True(t, ok)
 	require.Equal(t, plan2, got)
@@ -39,6 +42,7 @@ func TestExpensivePlanCache_GetSet(t *testing.T) {
 func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 	c, err := newExpensivePlanCache(3)
 	require.NoError(t, err)
+	defer c.Close()
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
@@ -46,6 +50,7 @@ func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 
 	// Cache is full (3/3). Adding a 4th with higher duration should evict the shortest (key=1, 10ms)
 	c.Set(4, &planWithMetaData{content: "q4"}, 25*time.Millisecond)
+	c.Wait()
 
 	// Key 1 should be evicted (it had the shortest duration: 10ms)
 	_, ok := c.Get(1)
@@ -63,6 +68,7 @@ func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 	c, err := newExpensivePlanCache(3)
 	require.NoError(t, err)
+	defer c.Close()
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Second)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Second)
@@ -70,6 +76,7 @@ func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 
 	// Try to add a cheaper entry (5s < 10s minimum) — should be rejected
 	c.Set(4, &planWithMetaData{content: "q4"}, 5*time.Second)
+	c.Wait()
 
 	_, ok := c.Get(4)
 	require.False(t, ok, "cheaper entry should not be added when cache is full")
@@ -86,12 +93,14 @@ func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 	c, err := newExpensivePlanCache(2)
 	require.NoError(t, err)
+	defer c.Close()
 
 	plan1 := &planWithMetaData{content: "q1"}
 	plan1Updated := &planWithMetaData{content: "q1-updated"}
 
 	c.Set(1, plan1, 10*time.Millisecond)
 	c.Set(1, plan1Updated, 50*time.Millisecond)
+	c.Wait()
 
 	got, ok := c.Get(1)
 	require.True(t, ok)
@@ -99,6 +108,7 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 
 	// Updating an existing key should not increase the count
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Wait()
 	_, ok = c.Get(1)
 	require.True(t, ok, "key 1 should still exist after adding key 2 (capacity is 2)")
 	_, ok = c.Get(2)
@@ -108,10 +118,12 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 func TestExpensivePlanCache_IterValues(t *testing.T) {
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
+	defer c.Close()
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
 	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+	c.Wait()
 
 	var contents []string
 	c.IterValues(func(v *planWithMetaData) bool {
@@ -125,10 +137,12 @@ func TestExpensivePlanCache_IterValues(t *testing.T) {
 func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
+	defer c.Close()
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
 	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+	c.Wait()
 
 	count := 0
 	c.IterValues(func(v *planWithMetaData) bool {
@@ -155,8 +169,10 @@ func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
 	require.NoError(t, err)
 	c.Close()
 
-	// Set after Close should not panic
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	// Set after Close should not panic — buffer drops silently
+	require.NotPanics(t, func() {
+		c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	})
 
 	_, ok := c.Get(1)
 	require.False(t, ok)
@@ -165,6 +181,7 @@ func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
 func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
+	defer c.Close()
 
 	count := 0
 	c.IterValues(func(v *planWithMetaData) bool {
@@ -191,12 +208,14 @@ func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
 func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 	c, err := newExpensivePlanCache(2)
 	require.NoError(t, err)
+	defer c.Close()
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
 
 	// Same duration as minimum (10ms) — should NOT evict (requires strictly greater)
 	c.Set(3, &planWithMetaData{content: "q3"}, 10*time.Millisecond)
+	c.Wait()
 
 	_, ok := c.Get(3)
 	require.False(t, ok, "entry with equal duration should not replace minimum")
@@ -209,14 +228,17 @@ func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 	c, err := newExpensivePlanCache(1)
 	require.NoError(t, err)
+	defer c.Close()
 
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Wait()
 	got, ok := c.Get(1)
 	require.True(t, ok)
 	require.Equal(t, "q1", got.content)
 
 	// Adding a more expensive entry should evict the only entry
 	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Wait()
 	_, ok = c.Get(1)
 	require.False(t, ok)
 	got, ok = c.Get(2)
@@ -225,6 +247,7 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 
 	// Adding a cheaper entry should be rejected
 	c.Set(3, &planWithMetaData{content: "q3"}, 5*time.Millisecond)
+	c.Wait()
 	_, ok = c.Get(3)
 	require.False(t, ok)
 	_, ok = c.Get(2)
@@ -234,9 +257,10 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 	c, err := newExpensivePlanCache(100)
 	require.NoError(t, err)
+	defer c.Close()
 	done := make(chan struct{})
 
-	// Concurrent writers — each goroutine writes to its own key range
+	// Concurrent writers
 	for i := 0; i < 10; i++ {
 		go func(id int) {
 			defer func() { done <- struct{}{} }()
@@ -271,20 +295,6 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 	for i := 0; i < 25; i++ {
 		<-done
 	}
-
-	// Cache should be at capacity and all entries should be retrievable
-	count := 0
-	c.IterValues(func(v *planWithMetaData) bool {
-		count++
-		return false
-	})
-	require.Equal(t, 100, count, "cache should be at max capacity")
-
-	// Every entry in the cache should be gettable
-	c.IterValues(func(v *planWithMetaData) bool {
-		require.Equal(t, "q", v.content)
-		return false
-	})
 }
 
 func TestExpensivePlanCache_InvalidSize(t *testing.T) {
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index fff92cf12f..12b0ee9e35 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -1355,6 +1355,7 @@ func (s *graphServer) buildGraphMux(
 	if err != nil {
 		return nil, fmt.Errorf("failed to create operation planner: %w", err)
 	}
+	operationPlanner.planningDurationOverride = s.planningDurationOverride
 	gm.operationPlanner = operationPlanner
 
 	// We support the MCP only on the base graph. Feature flags are not supported yet.
@@ -1416,7 +1417,7 @@ func (s *graphServer) buildGraphMux(
 		case s.cacheWarmup.InMemoryFallback && (s.selfRegister == nil || !s.cacheWarmup.Source.CdnSource.Enabled):
 			// We first utilize the existing plan cache (if it was already set, i.e., not on the first start) to create a list of queries
 			// and then reset the plan cache to the new plan cache for this start afterwards.
-			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
+			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getCachedOperationsForFF(opts.FeatureFlagName))
 			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planCache)
 			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 		case s.cacheWarmup.Source.CdnSource.Enabled:
@@ -1427,7 +1428,7 @@ func (s *graphServer) buildGraphMux(
 			// We use the in-memory cache as a fallback if enabled
 			// This is useful for when an issue occurs with the CDN when retrieving the required manifest
 			if s.cacheWarmup.InMemoryFallback {
-				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
+				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getCachedOperationsForFF(opts.FeatureFlagName))
 				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planCache)
 				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 			}
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index cb93690842..468b4ca19f 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -39,6 +39,10 @@ type OperationPlanner struct {
 	logger         *zap.Logger
 
 	threshold time.Duration
+
+	// planningDurationOverride, when set, replaces the measured planning duration.
+	// This is used in tests to simulate slow queries.
+	planningDurationOverride func(content string) time.Duration
 }
 
 type operationPlannerOpts struct {
@@ -201,9 +205,19 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 			prepared.planningDuration = time.Since(start)
 
 			p.planCache.Set(operationID, prepared, 1)
-			if p.useFallback && p.threshold > 0 && prepared.planningDuration >= p.threshold && prepared.content != "" {
-				p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
+
+			// Only run this when we care about expensive cache items
+			if p.useFallback {
+				// This is only used for test cases
+				if p.planningDurationOverride != nil {
+					prepared.planningDuration = p.planningDurationOverride(prepared.content)
+				}
+
+				if prepared.planningDuration >= p.threshold {
+					p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
+				}
 			}
+
 			return prepared, nil
 		})
 		if err != nil {
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index 54a41acc71..cd47102f2e 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -87,8 +87,9 @@ func (c *InMemoryPlanCacheFallback) IsEnabled() bool {
 	return c.queriesForFeatureFlag != nil
 }
 
-// getPlanCacheForFF gets the plan cache in the []*nodev1.Operation format for a specific feature flag key
-func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*nodev1.Operation {
+// getCachedOperationsForFF returns all cached operations for a feature flag key,
+// including entries from both the main plan cache and the expensive query cache.
+func (c *InMemoryPlanCacheFallback) getCachedOperationsForFF(featureFlagKey string) []*nodev1.Operation {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
@@ -96,19 +97,25 @@ func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*
 		return nil
 	}
 
+	var ops []*nodev1.Operation
+
 	switch cache := c.queriesForFeatureFlag[featureFlagKey].(type) {
 	case planCache:
-		return convertToNodeOperation(cache)
+		ops = convertToNodeOperation(cache)
 	case []*nodev1.Operation:
-		return cache
+		ops = cache
 	// This would occur during the first start (we add this case to specifically log any other cases)
 	case nil:
-		return nil
 	// This should not happen as we cannot have any types other than the above
 	default:
 		c.logger.Error("unexpected type")
-		return nil
 	}
+
+	if expCache, ok := c.expensiveCaches[featureFlagKey]; ok {
+		ops = mergeExpensiveCacheOperations(ops, expCache)
+	}
+
+	return ops
 }
 
 // setPlanCacheForFF sets the plan cache for a specific feature flag key
@@ -144,6 +151,12 @@ func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 		return
 	}
 
+	// Wait for all pending writes from expensive caches so that
+	// IterValues sees a complete snapshot before we extract.
+	for _, expCache := range c.expensiveCaches {
+		expCache.Wait()
+	}
+
 	fallbackMap := make(map[string]any)
 	for k, v := range c.queriesForFeatureFlag {
 		if cache, ok := v.(planCache); ok {
@@ -218,4 +231,3 @@ func mergeExpensiveCacheOperations(ops []*nodev1.Operation, expCache *expensiveP
 	})
 	return ops
 }
-
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index 4ec5e0638b..ac3adf31c1 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -142,7 +142,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		}
 		cache.queriesForFeatureFlag["test-ff"] = mockCache
 
-		result := cache.getPlanCacheForFF("test-ff")
+		result := cache.getCachedOperationsForFF("test-ff")
 
 		require.NotNil(t, result)
 		require.IsType(t, []*nodev1.Operation{}, result)
@@ -168,7 +168,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		}
 		cache.queriesForFeatureFlag["test-ff"] = expectedOps
 
-		result := cache.getPlanCacheForFF("test-ff")
+		result := cache.getCachedOperationsForFF("test-ff")
 
 		require.NotNil(t, result)
 		require.Equal(t, expectedOps, result)
@@ -181,7 +181,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 			queriesForFeatureFlag: make(map[string]any),
 		}
 
-		result := cache.getPlanCacheForFF("non-existent")
+		result := cache.getCachedOperationsForFF("non-existent")
 		require.Nil(t, result)
 	})
 
@@ -191,7 +191,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 			queriesForFeatureFlag: nil,
 		}
 
-		result := cache.getPlanCacheForFF("test-ff")
+		result := cache.getCachedOperationsForFF("test-ff")
 
 		require.Nil(t, result)
 	})
diff --git a/router/core/router.go b/router/core/router.go
index dd6d14e17d..5e8a505f8b 100644
--- a/router/core/router.go
+++ b/router/core/router.go
@@ -2236,6 +2236,14 @@ func WithCacheWarmupConfig(cfg *config.CacheWarmupConfiguration) Option {
 	}
 }
 
+// WithPlanningDurationOverride sets a function that overrides the measured planning duration.
+// Used in tests to simulate slow queries that exceed the expensive query threshold.
+func WithPlanningDurationOverride(fn func(content string) time.Duration) Option {
+	return func(r *Router) {
+		r.planningDurationOverride = fn
+	}
+}
+
 func WithMCP(cfg config.MCPConfiguration) Option {
 	return func(r *Router) {
 		r.mcp = cfg
diff --git a/router/core/router_config.go b/router/core/router_config.go
index bdb126614f..6580e1e639 100644
--- a/router/core/router_config.go
+++ b/router/core/router_config.go
@@ -139,6 +139,7 @@ type Config struct {
 	subgraphErrorPropagation      config.SubgraphErrorPropagationConfiguration
 	clientHeader                  config.ClientHeader
 	cacheWarmup                   *config.CacheWarmupConfiguration
+	planningDurationOverride      func(content string) time.Duration
 	subscriptionHeartbeatInterval time.Duration
 	hostName                      string
 	mcp                           config.MCPConfiguration

From e36f2b3a747226a566feb8052a9b1053b3074d2a Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 17:06:11 +0530
Subject: [PATCH 07/46] fix: updates

---
 router-tests/expensive_query_cache_test.go | 14 +++++++-----
 router/core/expensive_query_cache_test.go  | 26 +++++++++++++++++-----
 router/core/graph_server.go                |  4 ++--
 router/core/operation_planner.go           | 11 ++++-----
 4 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/router-tests/expensive_query_cache_test.go b/router-tests/expensive_query_cache_test.go
index 4e8a21ef5c..4a575d62c1 100644
--- a/router-tests/expensive_query_cache_test.go
+++ b/router-tests/expensive_query_cache_test.go
@@ -35,7 +35,9 @@ func TestExpensiveQueryCache(t *testing.T) {
 		{Query: `query m($id: Int!){ employee(id: $id) { id details { forename surname } } }`, Variables: []byte(`{"id": 1}`)},
 	}
 
-	allQueries := append(slowQueries, fastQueries...)
+	allQueries := make([]testenv.GraphQLRequest, 0, len(slowQueries)+len(fastQueries))
+	allQueries = append(allQueries, slowQueries...)
+	allQueries = append(allQueries, fastQueries...)
 
 	expensiveThreshold := 1 * time.Second
 
@@ -105,7 +107,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 
 			// Only slow queries should end up in the expensive cache
 			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries)
-			require.Greater(t, hits, 0, "expected at least one slow query to be served from the expensive cache")
+			require.Positive(t, hits, "expected at least one slow query to be served from the expensive cache")
 		})
 	})
 
@@ -193,7 +195,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
 				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
 			})
-			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache after config reload")
+			require.Positive(t, hits, "expected at least one query to be served from the expensive cache after config reload")
 		})
 	})
 
@@ -253,7 +255,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
 				assert.Equal(ct, "v3", res.Response.Header.Get("X-Router-Config-Version"))
 			})
-			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache after multiple reloads")
+			require.Positive(t, hits, "expected at least one query to be served from the expensive cache after multiple reloads")
 		})
 	})
 
@@ -282,7 +284,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			}
 
 			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries)
-			require.Greater(t, hits, 0, "expected at least one query to be served from the expensive cache")
+			require.Positive(t, hits, "expected at least one query to be served from the expensive cache")
 		})
 	})
 
@@ -563,7 +565,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 					InMemoryFallback: true,
 				}),
 				// Override the hello query to be slow
-				core.WithPlanningDurationOverride(func(content string) time.Duration {
+				core.WithPlanningDurationOverride(func(_ string) time.Duration {
 					return 10 * time.Second
 				}),
 			},
diff --git a/router/core/expensive_query_cache_test.go b/router/core/expensive_query_cache_test.go
index af2e122674..e9fd3b8000 100644
--- a/router/core/expensive_query_cache_test.go
+++ b/router/core/expensive_query_cache_test.go
@@ -8,6 +8,7 @@ import (
 )
 
 func TestExpensivePlanCache_GetSet(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	defer c.Close()
@@ -40,6 +41,7 @@ func TestExpensivePlanCache_GetSet(t *testing.T) {
 }
 
 func TestExpensivePlanCache_BoundedSize(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(3)
 	require.NoError(t, err)
 	defer c.Close()
@@ -66,6 +68,7 @@ func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 }
 
 func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(3)
 	require.NoError(t, err)
 	defer c.Close()
@@ -91,6 +94,7 @@ func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 }
 
 func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(2)
 	require.NoError(t, err)
 	defer c.Close()
@@ -116,6 +120,7 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValues(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	defer c.Close()
@@ -135,6 +140,7 @@ func TestExpensivePlanCache_IterValues(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	defer c.Close()
@@ -145,7 +151,7 @@ func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
 	c.Wait()
 
 	count := 0
-	c.IterValues(func(v *planWithMetaData) bool {
+	c.IterValues(func(_ *planWithMetaData) bool {
 		count++
 		return true // stop after first
 	})
@@ -153,6 +159,7 @@ func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
 }
 
 func TestExpensivePlanCache_Close(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
@@ -165,6 +172,7 @@ func TestExpensivePlanCache_Close(t *testing.T) {
 }
 
 func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	c.Close()
@@ -179,12 +187,13 @@ func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	defer c.Close()
 
 	count := 0
-	c.IterValues(func(v *planWithMetaData) bool {
+	c.IterValues(func(_ *planWithMetaData) bool {
 		count++
 		return false
 	})
@@ -192,13 +201,14 @@ func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
 }
 
 func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(10)
 	require.NoError(t, err)
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Close()
 
 	count := 0
-	c.IterValues(func(v *planWithMetaData) bool {
+	c.IterValues(func(_ *planWithMetaData) bool {
 		count++
 		return false
 	})
@@ -206,6 +216,7 @@ func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
 }
 
 func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(2)
 	require.NoError(t, err)
 	defer c.Close()
@@ -226,6 +237,7 @@ func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 }
 
 func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(1)
 	require.NoError(t, err)
 	defer c.Close()
@@ -255,6 +267,7 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 }
 
 func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
+	t.Parallel()
 	c, err := newExpensivePlanCache(100)
 	require.NoError(t, err)
 	defer c.Close()
@@ -265,7 +278,7 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 		go func(id int) {
 			defer func() { done <- struct{}{} }()
 			for j := 0; j < 100; j++ {
-				key := uint64(id*100 + j)
+				key := uint64(id*100 + j) //nolint:gosec // test code, no overflow risk
 				c.Set(key, &planWithMetaData{content: "q"}, time.Duration(j)*time.Millisecond)
 			}
 		}(i)
@@ -276,7 +289,7 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 		go func(id int) {
 			defer func() { done <- struct{}{} }()
 			for j := 0; j < 100; j++ {
-				c.Get(uint64(id*100 + j))
+				c.Get(uint64(id*100 + j)) //nolint:gosec // test code, no overflow risk
 			}
 		}(i)
 	}
@@ -285,7 +298,7 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 	for i := 0; i < 5; i++ {
 		go func() {
 			defer func() { done <- struct{}{} }()
-			c.IterValues(func(v *planWithMetaData) bool {
+			c.IterValues(func(_ *planWithMetaData) bool {
 				return false
 			})
 		}()
@@ -298,6 +311,7 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 }
 
 func TestExpensivePlanCache_InvalidSize(t *testing.T) {
+	t.Parallel()
 	_, err := newExpensivePlanCache(0)
 	require.Error(t, err)
 
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 12b0ee9e35..2a68eac662 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -585,10 +585,10 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 		}
 		if srv.cacheWarmup != nil && srv.cacheWarmup.Enabled && srv.cacheWarmup.InMemoryFallback {
 			planCacheConfig.OnEvict = func(item *ristretto.Item[*planWithMetaData]) {
-				if s.operationPlanner == nil || s.operationPlanner.expensiveCache == nil || item.Value.content == "" {
+				if s.operationPlanner == nil || s.operationPlanner.expensiveCache == nil {
 					return
 				}
-				if s.operationPlanner.threshold > 0 && item.Value.planningDuration >= s.operationPlanner.threshold {
+				if item.Value.planningDuration >= s.operationPlanner.threshold {
 					s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
 				}
 			}
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 468b4ca19f..2e7f681ae6 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -5,18 +5,15 @@ import (
 	"strconv"
 	"time"
 
-	"go.uber.org/zap"
-
-	"golang.org/x/sync/singleflight"
-
+	graphqlmetricsv1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/graphqlmetrics/v1"
+	"github.com/wundergraph/cosmo/router/pkg/graphqlschemausage"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/astparser"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/postprocess"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve"
-
-	graphqlmetricsv1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/graphqlmetrics/v1"
-	"github.com/wundergraph/cosmo/router/pkg/graphqlschemausage"
+	"go.uber.org/zap"
+	"golang.org/x/sync/singleflight"
 )
 
 type planWithMetaData struct {

From 80c559478fe75d797b9d244dd97e620c16fbe4e4 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 20:25:34 +0530
Subject: [PATCH 08/46] fix: only persist the expensive cache elements

---
 router-tests/cache_warmup_test.go           |  15 +-
 router/core/expensive_query_cache.go        |  13 +-
 router/core/graph_server.go                 |   2 -
 router/core/reload_persistent_state.go      | 134 +++------
 router/core/reload_persistent_state_test.go | 287 ++++++++------------
 5 files changed, 167 insertions(+), 284 deletions(-)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index 7692eaa9d4..f9a6206869 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -984,6 +984,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 					},
 				}),
 				core.WithConfigVersionHeader(true),
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					return 10 * time.Second
+				}),
 			},
 			RouterConfig: &testenv.RouterConfig{
 				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
@@ -1130,6 +1133,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					return 10 * time.Second
+				}),
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
 			res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
@@ -1183,6 +1189,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 						},
 					},
 				}),
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					return 10 * time.Second
+				}),
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
 			res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
@@ -1236,6 +1245,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 						},
 					},
 				}),
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					return 10 * time.Second
+				}),
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
 			res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{
@@ -1286,7 +1298,8 @@ cache_warmup:
     cdn:
       enabled: false
 
-engine: 
+engine:
+  expensive_query_threshold: "0s"
   debug:
     enable_cache_response_headers: true
 `
diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
index 9314f10eef..9cd218d213 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/core/expensive_query_cache.go
@@ -105,10 +105,15 @@ func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, durati
 	defer c.mu.Unlock()
 
 	// If key already exists, update it
-	if _, ok := c.entries[key]; ok {
-		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
-		if key == c.minKey || duration < c.minDur {
-			c.refreshMin()
+	if currEntry, ok := c.entries[key]; ok {
+		// Consider worst case, if the previous run was faster then increase
+		if currEntry.duration < duration {
+			c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+
+			// If the minKey duration was increased, there can be a new minKey
+			if c.minKey == key {
+				c.refreshMin()
+			}
 		}
 		return
 	}
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 2a68eac662..44d6c9b1a5 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -1418,7 +1418,6 @@ func (s *graphServer) buildGraphMux(
 			// We first utilize the existing plan cache (if it was already set, i.e., not on the first start) to create a list of queries
 			// and then reset the plan cache to the new plan cache for this start afterwards.
 			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getCachedOperationsForFF(opts.FeatureFlagName))
-			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planCache)
 			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 		case s.cacheWarmup.Source.CdnSource.Enabled:
 			if s.graphApiToken == "" {
@@ -1429,7 +1428,6 @@ func (s *graphServer) buildGraphMux(
 			// This is useful for when an issue occurs with the CDN when retrieving the required manifest
 			if s.cacheWarmup.InMemoryFallback {
 				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getCachedOperationsForFF(opts.FeatureFlagName))
-				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planCache)
 				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 			}
 			cdnSource, err := NewCDNSource(s.cdnConfig.URL, s.graphApiToken, s.logger)
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index cd47102f2e..8a2754d222 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -3,13 +3,10 @@ package core
 import (
 	"sync"
 
-	"github.com/dgraph-io/ristretto/v2"
 	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
 	"go.uber.org/zap"
 )
 
-type planCache = *ristretto.Cache[uint64, *planWithMetaData]
-
 // ReloadPersistentState This file describes any configuration which should persist or be shared across router restarts
 type ReloadPersistentState struct {
 	inMemoryPlanCacheFallback *InMemoryPlanCacheFallback
@@ -45,12 +42,14 @@ func (s *ReloadPersistentState) OnRouterConfigReload() {
 	s.inMemoryPlanCacheFallback.extractQueriesAndOverridePlanCache()
 }
 
-// InMemoryPlanCacheFallback is a store that stores either queries or references to the planner cache for use with the cache warmer
+// InMemoryPlanCacheFallback is a store that stores expensive query cache references or extracted operations
+// for use with the cache warmer across config reloads. Only expensive queries (planning duration >= threshold)
+// are persisted.
 type InMemoryPlanCacheFallback struct {
-	mu                    sync.RWMutex
-	queriesForFeatureFlag map[string]any
-	expensiveCaches       map[string]*expensivePlanCache
-	logger                *zap.Logger
+	mu              sync.RWMutex
+	expensiveCaches map[string]*expensivePlanCache // live references during runtime
+	cachedOps       map[string][]*nodev1.Operation // extracted snapshots after reload
+	logger          *zap.Logger
 }
 
 // updateStateFromConfig updates the internal state of the in-memory fallback cache based on the provided config
@@ -65,18 +64,18 @@ func (c *InMemoryPlanCacheFallback) updateStateFromConfig(config *Config) {
 	// If the configuration change occurred which disabled or enabled the fallback cache, we need to update the internal state
 	if enabled {
 		// Only initialize if its nil because its a first start, we dont want to override any old data in a map
-		if c.queriesForFeatureFlag == nil {
-			c.queriesForFeatureFlag = make(map[string]any)
-		}
 		if c.expensiveCaches == nil {
 			c.expensiveCaches = make(map[string]*expensivePlanCache)
 		}
+		if c.cachedOps == nil {
+			c.cachedOps = make(map[string][]*nodev1.Operation)
+		}
 		return
 	}
 
-	// Reset the map to free up memory
-	c.queriesForFeatureFlag = nil
+	// Reset the maps to free up memory
 	c.expensiveCaches = nil
+	c.cachedOps = nil
 }
 
 // IsEnabled returns whether the in-memory fallback cache is enabled
@@ -84,49 +83,19 @@ func (c *InMemoryPlanCacheFallback) IsEnabled() bool {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
-	return c.queriesForFeatureFlag != nil
+	return c.expensiveCaches != nil
 }
 
-// getCachedOperationsForFF returns all cached operations for a feature flag key,
-// including entries from both the main plan cache and the expensive query cache.
+// getCachedOperationsForFF returns all cached operations for a feature flag key.
 func (c *InMemoryPlanCacheFallback) getCachedOperationsForFF(featureFlagKey string) []*nodev1.Operation {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
-	if c.queriesForFeatureFlag == nil {
+	if c.cachedOps == nil {
 		return nil
 	}
 
-	var ops []*nodev1.Operation
-
-	switch cache := c.queriesForFeatureFlag[featureFlagKey].(type) {
-	case planCache:
-		ops = convertToNodeOperation(cache)
-	case []*nodev1.Operation:
-		ops = cache
-	// This would occur during the first start (we add this case to specifically log any other cases)
-	case nil:
-	// This should not happen as we cannot have any types other than the above
-	default:
-		c.logger.Error("unexpected type")
-	}
-
-	if expCache, ok := c.expensiveCaches[featureFlagKey]; ok {
-		ops = mergeExpensiveCacheOperations(ops, expCache)
-	}
-
-	return ops
-}
-
-// setPlanCacheForFF sets the plan cache for a specific feature flag key
-func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cache planCache) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-
-	if c.queriesForFeatureFlag == nil || cache == nil {
-		return
-	}
-	c.queriesForFeatureFlag[featureFlagKey] = cache
+	return c.cachedOps[featureFlagKey]
 }
 
 // setExpensiveCacheForFF stores the expensive plan cache reference for a feature flag key
@@ -141,13 +110,13 @@ func (c *InMemoryPlanCacheFallback) setExpensiveCacheForFF(featureFlagKey string
 	c.expensiveCaches[featureFlagKey] = cache
 }
 
-// extractQueriesAndOverridePlanCache extracts the queries from the plan cache and overrides the internal map.
-// It also merges entries from the expensive plan cache so they survive config reloads.
+// extractQueriesAndOverridePlanCache extracts operations from the expensive plan caches
+// and stores them in cachedOps so they survive config reloads.
 func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	if c.queriesForFeatureFlag == nil {
+	if c.expensiveCaches == nil {
 		return
 	}
 
@@ -157,18 +126,22 @@ func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 		expCache.Wait()
 	}
 
-	fallbackMap := make(map[string]any)
-	for k, v := range c.queriesForFeatureFlag {
-		if cache, ok := v.(planCache); ok {
-			ops := convertToNodeOperation(cache)
-			// Merge expensive cache entries that may not be in the main cache
-			if expCache, hasExp := c.expensiveCaches[k]; hasExp {
-				ops = mergeExpensiveCacheOperations(ops, expCache)
+	cachedOps := make(map[string][]*nodev1.Operation)
+	for k, expCache := range c.expensiveCaches {
+		var ops []*nodev1.Operation
+		expCache.IterValues(func(v *planWithMetaData) bool {
+			if v.content != "" {
+				ops = append(ops, &nodev1.Operation{
+					Request: &nodev1.OperationRequest{Query: v.content},
+				})
 			}
-			fallbackMap[k] = ops
+			return false
+		})
+		if len(ops) > 0 {
+			cachedOps[k] = ops
 		}
 	}
-	c.queriesForFeatureFlag = fallbackMap
+	c.cachedOps = cachedOps
 	c.expensiveCaches = make(map[string]*expensivePlanCache)
 }
 
@@ -178,56 +151,21 @@ func (c *InMemoryPlanCacheFallback) cleanupUnusedFeatureFlags(routerCfg *nodev1.
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	if c.queriesForFeatureFlag == nil {
+	if c.expensiveCaches == nil {
 		return
 	}
 
-	for ffName := range c.queriesForFeatureFlag {
+	for ffName := range c.expensiveCaches {
 		// Skip the base which is ""
 		if ffName == "" {
 			continue
 		}
 		if routerCfg.FeatureFlagConfigs == nil {
-			delete(c.queriesForFeatureFlag, ffName)
 			delete(c.expensiveCaches, ffName)
+			delete(c.cachedOps, ffName)
 		} else if _, exists := routerCfg.FeatureFlagConfigs.ConfigByFeatureFlagName[ffName]; !exists {
-			delete(c.queriesForFeatureFlag, ffName)
 			delete(c.expensiveCaches, ffName)
+			delete(c.cachedOps, ffName)
 		}
 	}
 }
-
-func convertToNodeOperation(data planCache) []*nodev1.Operation {
-	items := make([]*nodev1.Operation, 0)
-
-	data.IterValues(func(v *planWithMetaData) (stop bool) {
-		items = append(items, &nodev1.Operation{
-			Request: &nodev1.OperationRequest{Query: v.content},
-		})
-		return false
-	})
-	return items
-}
-
-// mergeExpensiveCacheOperations appends operations from the expensive cache that
-// are not already present in the existing operations list.
-func mergeExpensiveCacheOperations(ops []*nodev1.Operation, expCache *expensivePlanCache) []*nodev1.Operation {
-	seen := make(map[string]struct{}, len(ops))
-	for _, op := range ops {
-		if op.Request != nil {
-			seen[op.Request.Query] = struct{}{}
-		}
-	}
-	expCache.IterValues(func(v *planWithMetaData) bool {
-		if v.content != "" {
-			if _, exists := seen[v.content]; !exists {
-				ops = append(ops, &nodev1.Operation{
-					Request: &nodev1.OperationRequest{Query: v.content},
-				})
-				seen[v.content] = struct{}{}
-			}
-		}
-		return false
-	})
-	return ops
-}
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index ac3adf31c1..fb2a7f9a5a 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -3,7 +3,6 @@ package core
 import (
 	"testing"
 
-	"github.com/dgraph-io/ristretto/v2"
 	"github.com/stretchr/testify/require"
 	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
 	"github.com/wundergraph/cosmo/router/pkg/config"
@@ -24,16 +23,18 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.NotNil(t, cache.queriesForFeatureFlag)
-		require.Empty(t, cache.queriesForFeatureFlag)
+		require.NotNil(t, cache.expensiveCaches)
+		require.Empty(t, cache.expensiveCaches)
+		require.NotNil(t, cache.cachedOps)
+		require.Empty(t, cache.cachedOps)
 	})
 
 	t.Run("disable cache from enabled state", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: make(map[string]*expensivePlanCache),
+			cachedOps:       make(map[string][]*nodev1.Operation),
 		}
-		cache.queriesForFeatureFlag["test"] = nil
 
 		cfg := &Config{
 			cacheWarmup: &config.CacheWarmupConfiguration{
@@ -43,16 +44,18 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.queriesForFeatureFlag)
+		require.Nil(t, cache.expensiveCaches)
+		require.Nil(t, cache.cachedOps)
 	})
 
 	t.Run("update when already enabled keeps existing data", func(t *testing.T) {
 		t.Parallel()
-		existingMap := make(map[string]any)
-		existingMap["test"] = nil
+		existingCaches := make(map[string]*expensivePlanCache)
+		existingCaches["test"] = nil
 
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: existingMap,
+			expensiveCaches: existingCaches,
+			cachedOps:       make(map[string][]*nodev1.Operation),
 		}
 
 		cfg := &Config{
@@ -64,15 +67,15 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.NotNil(t, cache.queriesForFeatureFlag)
-		require.Len(t, cache.queriesForFeatureFlag, 1)
-		require.Contains(t, cache.queriesForFeatureFlag, "test")
+		require.NotNil(t, cache.expensiveCaches)
+		require.Len(t, cache.expensiveCaches, 1)
+		require.Contains(t, cache.expensiveCaches, "test")
 	})
 
 	t.Run("update when already disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: nil,
+			expensiveCaches: nil,
 		}
 
 		cfg := &Config{
@@ -83,13 +86,14 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.queriesForFeatureFlag)
+		require.Nil(t, cache.expensiveCaches)
 	})
 
 	t.Run("nil cacheWarmup config disables cache", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: make(map[string]*expensivePlanCache),
+			cachedOps:       make(map[string][]*nodev1.Operation),
 		}
 
 		cfg := &Config{
@@ -98,7 +102,8 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.queriesForFeatureFlag)
+		require.Nil(t, cache.expensiveCaches)
+		require.Nil(t, cache.cachedOps)
 	})
 
 	t.Run("cacheWarmup enabled but InMemoryFallback disabled", func(t *testing.T) {
@@ -114,49 +119,14 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.queriesForFeatureFlag)
+		require.Nil(t, cache.expensiveCaches)
 	})
 }
 
 func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Parallel()
-	t.Run("returns operations for existing feature flag when enabled with ristretto cache", func(t *testing.T) {
-		t.Parallel()
-		mockCache, err := ristretto.NewCache(&ristretto.Config[uint64, *planWithMetaData]{
-			MaxCost:            10000,
-			NumCounters:        10000000,
-			IgnoreInternalCost: true,
-			BufferItems:        64,
-		})
-		require.NoError(t, err)
-
-		query1 := "query { test1 }"
-		query2 := "query { test2 }"
-
-		mockCache.Set(1, &planWithMetaData{content: query1}, 1)
-		mockCache.Set(2, &planWithMetaData{content: query2}, 1)
-		mockCache.Wait()
-
-		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
-		}
-		cache.queriesForFeatureFlag["test-ff"] = mockCache
-
-		result := cache.getCachedOperationsForFF("test-ff")
-
-		require.NotNil(t, result)
-		require.IsType(t, []*nodev1.Operation{}, result)
-		require.Len(t, result, 2)
-
-		// Verify the operations contain the expected queries (order may vary)
-		queries := make([]string, len(result))
-		for i, op := range result {
-			queries[i] = op.Request.Query
-		}
-		require.ElementsMatch(t, []string{query1, query2}, queries)
-	})
 
-	t.Run("returns operations for existing feature flag when enabled with operation slice", func(t *testing.T) {
+	t.Run("returns operations for existing feature flag from cachedOps", func(t *testing.T) {
 		t.Parallel()
 		expectedOps := []*nodev1.Operation{
 			{Request: &nodev1.OperationRequest{Query: "query { test1 }"}},
@@ -164,9 +134,10 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		}
 
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			cachedOps: map[string][]*nodev1.Operation{
+				"test-ff": expectedOps,
+			},
 		}
-		cache.queriesForFeatureFlag["test-ff"] = expectedOps
 
 		result := cache.getCachedOperationsForFF("test-ff")
 
@@ -174,11 +145,11 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		require.Equal(t, expectedOps, result)
 	})
 
-	t.Run("returns empty slice for non-existent feature flag", func(t *testing.T) {
+	t.Run("returns nil for non-existent feature flag", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			logger:                zap.NewNop(),
-			queriesForFeatureFlag: make(map[string]any),
+			logger:    zap.NewNop(),
+			cachedOps: make(map[string][]*nodev1.Operation),
 		}
 
 		result := cache.getCachedOperationsForFF("non-existent")
@@ -188,7 +159,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Run("returns nil when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: nil,
+			cachedOps: nil,
 		}
 
 		result := cache.getCachedOperationsForFF("test-ff")
@@ -197,68 +168,22 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	})
 }
 
-func TestInMemoryPlanCacheFallback_SetPlanCacheForFF(t *testing.T) {
-	t.Parallel()
-	t.Run("sets cache for feature flag when enabled", func(t *testing.T) {
-		t.Parallel()
-		mockCache, err := ristretto.NewCache(&ristretto.Config[uint64, *planWithMetaData]{
-			MaxCost:     100,
-			NumCounters: 10000,
-			BufferItems: 64,
-		})
-		require.NoError(t, err)
-
-		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
-		}
-
-		cache.setPlanCacheForFF("test-ff", mockCache)
-
-		require.Contains(t, cache.queriesForFeatureFlag, "test-ff")
-		// Verify it's the same cache by comparing the underlying pointer
-		require.Equal(t, cache.queriesForFeatureFlag["test-ff"], mockCache)
-	})
-
-	t.Run("does not set cache when disabled", func(t *testing.T) {
-		t.Parallel()
-		mockCache, err := ristretto.NewCache(&ristretto.Config[uint64, *planWithMetaData]{
-			MaxCost:     100,
-			NumCounters: 10000,
-			BufferItems: 64,
-		})
-		require.NoError(t, err)
-
-		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: nil,
-		}
-
-		cache.setPlanCacheForFF("test-ff", mockCache)
-
-		require.Nil(t, cache.queriesForFeatureFlag)
-	})
-
-	t.Run("does not set nil cache", func(t *testing.T) {
-		t.Parallel()
-		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
-		}
-
-		cache.setPlanCacheForFF("test-ff", nil)
-
-		require.NotContains(t, cache.queriesForFeatureFlag, "test-ff")
-	})
-}
-
 func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 	t.Parallel()
 	t.Run("removes unused feature flags", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: map[string]*expensivePlanCache{
+				"ff1": nil,
+				"ff2": nil,
+				"ff3": nil,
+			},
+			cachedOps: map[string][]*nodev1.Operation{
+				"ff1": nil,
+				"ff2": nil,
+				"ff3": nil,
+			},
 		}
-		cache.queriesForFeatureFlag["ff1"] = nil
-		cache.queriesForFeatureFlag["ff2"] = nil
-		cache.queriesForFeatureFlag["ff3"] = nil
 
 		routerCfg := &nodev1.RouterConfig{
 			FeatureFlagConfigs: &nodev1.FeatureFlagRouterExecutionConfigs{
@@ -271,19 +196,26 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
-		require.Len(t, cache.queriesForFeatureFlag, 2)
-		require.Contains(t, cache.queriesForFeatureFlag, "ff1")
-		require.Contains(t, cache.queriesForFeatureFlag, "ff2")
-		require.NotContains(t, cache.queriesForFeatureFlag, "ff3")
+		require.Len(t, cache.expensiveCaches, 2)
+		require.Contains(t, cache.expensiveCaches, "ff1")
+		require.Contains(t, cache.expensiveCaches, "ff2")
+		require.NotContains(t, cache.expensiveCaches, "ff3")
+		require.Len(t, cache.cachedOps, 2)
+		require.NotContains(t, cache.cachedOps, "ff3")
 	})
 
 	t.Run("keeps empty string feature flag", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: map[string]*expensivePlanCache{
+				"":    nil,
+				"ff1": nil,
+			},
+			cachedOps: map[string][]*nodev1.Operation{
+				"":    nil,
+				"ff1": nil,
+			},
 		}
-		cache.queriesForFeatureFlag[""] = nil
-		cache.queriesForFeatureFlag["ff1"] = nil
 
 		routerCfg := &nodev1.RouterConfig{
 			FeatureFlagConfigs: &nodev1.FeatureFlagRouterExecutionConfigs{
@@ -293,15 +225,15 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
-		require.Len(t, cache.queriesForFeatureFlag, 1)
-		require.Contains(t, cache.queriesForFeatureFlag, "")
-		require.NotContains(t, cache.queriesForFeatureFlag, "ff1")
+		require.Len(t, cache.expensiveCaches, 1)
+		require.Contains(t, cache.expensiveCaches, "")
+		require.NotContains(t, cache.expensiveCaches, "ff1")
 	})
 
 	t.Run("does nothing when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: nil,
+			expensiveCaches: nil,
 		}
 
 		routerCfg := &nodev1.RouterConfig{
@@ -313,20 +245,27 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
 		// Should still be nil because cleanup is skipped when disabled
-		require.Nil(t, cache.queriesForFeatureFlag)
+		require.Nil(t, cache.expensiveCaches)
 	})
 
 	t.Run("removes feature flags when not in ConfigByFeatureFlagName", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: map[string]*expensivePlanCache{
+				"":    nil, // base should be kept
+				"ff1": nil,
+				"ff2": nil,
+				"ff3": nil,
+				"ff4": nil,
+				"ff5": nil,
+			},
+			cachedOps: map[string][]*nodev1.Operation{
+				"":    nil,
+				"ff1": nil,
+				"ff2": nil,
+				"ff3": nil,
+			},
 		}
-		cache.queriesForFeatureFlag[""] = nil // base should be kept
-		cache.queriesForFeatureFlag["ff1"] = nil
-		cache.queriesForFeatureFlag["ff2"] = nil
-		cache.queriesForFeatureFlag["ff3"] = nil
-		cache.queriesForFeatureFlag["ff4"] = nil
-		cache.queriesForFeatureFlag["ff5"] = nil
 
 		routerCfg := &nodev1.RouterConfig{
 			FeatureFlagConfigs: nil,
@@ -334,86 +273,77 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
-		require.Len(t, cache.queriesForFeatureFlag, 1)
-		require.Contains(t, cache.queriesForFeatureFlag, "")
-		require.NotContains(t, cache.queriesForFeatureFlag, "ff1")
-		require.NotContains(t, cache.queriesForFeatureFlag, "ff2")
-		require.NotContains(t, cache.queriesForFeatureFlag, "ff3")
+		require.Len(t, cache.expensiveCaches, 1)
+		require.Contains(t, cache.expensiveCaches, "")
+		require.NotContains(t, cache.expensiveCaches, "ff1")
+		require.NotContains(t, cache.expensiveCaches, "ff2")
+		require.NotContains(t, cache.expensiveCaches, "ff3")
 	})
 }
 
 func TestInMemoryPlanCacheFallback_ProcessOnConfigChangeRestart(t *testing.T) {
 	t.Parallel()
-	t.Run("converts ristretto caches to operation slices", func(t *testing.T) {
+	t.Run("extracts expensive cache entries to cachedOps", func(t *testing.T) {
 		t.Parallel()
-		mockCache1, err := ristretto.NewCache(&ristretto.Config[uint64, *planWithMetaData]{
-			MaxCost:            10000,
-			NumCounters:        10000000,
-			IgnoreInternalCost: true,
-			BufferItems:        64,
-		})
-		require.NoError(t, err)
-
-		mockCache2, err := ristretto.NewCache(&ristretto.Config[uint64, *planWithMetaData]{
-			MaxCost:            10000,
-			NumCounters:        10000000,
-			IgnoreInternalCost: true,
-			BufferItems:        64,
-		})
-		require.NoError(t, err)
 
 		query1 := "query { test1 }"
 		query2 := "query { test2 }"
 
-		mockCache1.Set(1, &planWithMetaData{content: query1}, 1)
-		mockCache1.Wait()
-		mockCache2.Set(2, &planWithMetaData{content: query2}, 1)
-		mockCache2.Wait()
+		expCache1, err := newExpensivePlanCache(100)
+		require.NoError(t, err)
+		expCache2, err := newExpensivePlanCache(100)
+		require.NoError(t, err)
+
+		expCache1.Set(1, &planWithMetaData{content: query1}, 5*1e9)
+		expCache1.Wait()
+		expCache2.Set(2, &planWithMetaData{content: query2}, 5*1e9)
+		expCache2.Wait()
 
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: map[string]*expensivePlanCache{
+				"ff1": expCache1,
+				"ff2": expCache2,
+			},
+			cachedOps: make(map[string][]*nodev1.Operation),
 		}
-		cache.queriesForFeatureFlag["ff1"] = mockCache1
-		cache.queriesForFeatureFlag["ff2"] = mockCache2
 
 		cache.extractQueriesAndOverridePlanCache()
 
-		// Verify both caches have been converted to operation slices
-		require.IsType(t, []*nodev1.Operation{}, cache.queriesForFeatureFlag["ff1"])
-		require.IsType(t, []*nodev1.Operation{}, cache.queriesForFeatureFlag["ff2"])
+		// Verify both caches have been extracted to cachedOps
+		require.Len(t, cache.cachedOps["ff1"], 1)
+		require.Len(t, cache.cachedOps["ff2"], 1)
+		require.Equal(t, query1, cache.cachedOps["ff1"][0].Request.Query)
+		require.Equal(t, query2, cache.cachedOps["ff2"][0].Request.Query)
 
-		ff1Ops := cache.queriesForFeatureFlag["ff1"].([]*nodev1.Operation)
-		ff2Ops := cache.queriesForFeatureFlag["ff2"].([]*nodev1.Operation)
-
-		require.Len(t, ff1Ops, 1)
-		require.Len(t, ff2Ops, 1)
-		require.Equal(t, query1, ff1Ops[0].Request.Query)
-		require.Equal(t, query2, ff2Ops[0].Request.Query)
+		// expensiveCaches should be reset to empty map
+		require.NotNil(t, cache.expensiveCaches)
+		require.Empty(t, cache.expensiveCaches)
 	})
 
 	t.Run("does nothing when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: nil,
+			expensiveCaches: nil,
 		}
 
 		cache.extractQueriesAndOverridePlanCache()
 
 		// Should remain nil since processing is skipped
-		require.Nil(t, cache.queriesForFeatureFlag)
+		require.Nil(t, cache.expensiveCaches)
 	})
 
 	t.Run("handles empty cache", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: make(map[string]*expensivePlanCache),
+			cachedOps:       make(map[string][]*nodev1.Operation),
 		}
 
 		require.NotPanics(t, func() {
 			cache.extractQueriesAndOverridePlanCache()
 		})
 
-		require.Empty(t, cache.queriesForFeatureFlag)
+		require.Empty(t, cache.cachedOps)
 	})
 }
 
@@ -422,7 +352,7 @@ func TestInMemoryPlanCacheFallback_IsEnabled(t *testing.T) {
 	t.Run("returns true when cache is enabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: make(map[string]any),
+			expensiveCaches: make(map[string]*expensivePlanCache),
 		}
 
 		require.True(t, cache.IsEnabled())
@@ -431,10 +361,9 @@ func TestInMemoryPlanCacheFallback_IsEnabled(t *testing.T) {
 	t.Run("returns false when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			queriesForFeatureFlag: nil,
+			expensiveCaches: nil,
 		}
 
 		require.False(t, cache.IsEnabled())
 	})
-
 }

From ed60ea517658b0d0b1039604170ff330237e9ac8 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 20:46:18 +0530
Subject: [PATCH 09/46] fix: updates

---
 router-tests/cache_warmup_test.go           | 13 +++++++++
 router/core/reload_persistent_state.go      | 28 +++++++++++++++++--
 router/core/reload_persistent_state_test.go | 31 +++++++++++++++++++--
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index f9a6206869..bb756ed21f 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -973,6 +973,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 		}
 
 		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExpensiveQueryCacheSize = 100
+			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
 					Enabled:          true,
@@ -1122,6 +1125,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 		writeTestConfig(t, "initial", configFile)
 
 		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExpensiveQueryCacheSize = 100
+			},
 			RouterOptions: []core.Option{
 				core.WithConfigVersionHeader(true),
 				core.WithExecutionConfig(&core.ExecutionConfig{
@@ -1169,6 +1175,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 		var impl *fakeSelfRegister = nil
 
 		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExpensiveQueryCacheSize = 100
+			},
 			CdnSever: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				w.WriteHeader(http.StatusNotFound)
 			})),
@@ -1225,6 +1234,9 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 		var impl *fakeSelfRegister = nil
 
 		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExpensiveQueryCacheSize = 100
+			},
 			CdnSever: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				w.WriteHeader(http.StatusUnauthorized)
 			})),
@@ -1300,6 +1312,7 @@ cache_warmup:
 
 engine:
   expensive_query_threshold: "0s"
+  expensive_query_cache_size: 100
   debug:
     enable_cache_response_headers: true
 `
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index 8a2754d222..e0132cffef 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -87,15 +87,39 @@ func (c *InMemoryPlanCacheFallback) IsEnabled() bool {
 }
 
 // getCachedOperationsForFF returns all cached operations for a feature flag key.
+// It first checks extracted snapshots (cachedOps), then falls back to reading
+// from live expensive cache references (used during execution config updates
+// where extractQueriesAndOverridePlanCache has not been called).
 func (c *InMemoryPlanCacheFallback) getCachedOperationsForFF(featureFlagKey string) []*nodev1.Operation {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
-	if c.cachedOps == nil {
+	if c.expensiveCaches == nil {
 		return nil
 	}
 
-	return c.cachedOps[featureFlagKey]
+	// Check extracted snapshots first (populated after router config reloads)
+	if ops, ok := c.cachedOps[featureFlagKey]; ok {
+		return ops
+	}
+
+	// Fall back to reading from live expensive cache references
+	// This path is used during execution config updates where the previous
+	// expensive cache is still valid and hasn't been extracted yet.
+	if expCache, ok := c.expensiveCaches[featureFlagKey]; ok {
+		var ops []*nodev1.Operation
+		expCache.IterValues(func(v *planWithMetaData) bool {
+			if v.content != "" {
+				ops = append(ops, &nodev1.Operation{
+					Request: &nodev1.OperationRequest{Query: v.content},
+				})
+			}
+			return false
+		})
+		return ops
+	}
+
+	return nil
 }
 
 // setExpensiveCacheForFF stores the expensive plan cache reference for a feature flag key
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index fb2a7f9a5a..79d1c02ded 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -134,6 +134,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		}
 
 		cache := &InMemoryPlanCacheFallback{
+			expensiveCaches: make(map[string]*expensivePlanCache),
 			cachedOps: map[string][]*nodev1.Operation{
 				"test-ff": expectedOps,
 			},
@@ -145,13 +146,36 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		require.Equal(t, expectedOps, result)
 	})
 
-	t.Run("returns nil for non-existent feature flag", func(t *testing.T) {
+	t.Run("returns operations from live expensive cache when cachedOps has no entry", func(t *testing.T) {
 		t.Parallel()
+
+		expCache, err := newExpensivePlanCache(100)
+		require.NoError(t, err)
+		expCache.Set(1, &planWithMetaData{content: "query { fromExpensive }"}, 5*1e9)
+		expCache.Wait()
+
 		cache := &InMemoryPlanCacheFallback{
-			logger:    zap.NewNop(),
+			expensiveCaches: map[string]*expensivePlanCache{
+				"test-ff": expCache,
+			},
 			cachedOps: make(map[string][]*nodev1.Operation),
 		}
 
+		result := cache.getCachedOperationsForFF("test-ff")
+
+		require.NotNil(t, result)
+		require.Len(t, result, 1)
+		require.Equal(t, "query { fromExpensive }", result[0].Request.Query)
+	})
+
+	t.Run("returns nil for non-existent feature flag", func(t *testing.T) {
+		t.Parallel()
+		cache := &InMemoryPlanCacheFallback{
+			logger:          zap.NewNop(),
+			expensiveCaches: make(map[string]*expensivePlanCache),
+			cachedOps:       make(map[string][]*nodev1.Operation),
+		}
+
 		result := cache.getCachedOperationsForFF("non-existent")
 		require.Nil(t, result)
 	})
@@ -159,7 +183,8 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Run("returns nil when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			cachedOps: nil,
+			expensiveCaches: nil,
+			cachedOps:       nil,
 		}
 
 		result := cache.getCachedOperationsForFF("test-ff")

From 56f65b3b569a1cc354c57c06fdbc82e616126d10 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 21:10:44 +0530
Subject: [PATCH 10/46] fix: updates

---
 router/core/expensive_query_cache.go        | 29 ++++++-----
 router/core/expensive_query_cache_test.go   | 55 +++++++++++++++------
 router/core/graph_server.go                 |  4 +-
 router/core/operation_planner.go            |  9 +---
 router/core/reload_persistent_state_test.go |  6 +--
 router/pkg/config/config.schema.json        |  4 +-
 6 files changed, 66 insertions(+), 41 deletions(-)

diff --git a/router/core/expensive_query_cache.go b/router/core/expensive_query_cache.go
index 9cd218d213..3c99d855ba 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/core/expensive_query_cache.go
@@ -25,11 +25,12 @@ type setRequest struct {
 // background goroutine, making Set non-blocking. Reads are protected by a RWMutex.
 // It tracks the minimum-duration entry so that rejection of cheaper entries is O(1).
 type expensivePlanCache struct {
-	mu      sync.RWMutex
-	entries map[uint64]*expensivePlanEntry
-	maxSize int
-	minKey  uint64
-	minDur  time.Duration
+	mu        sync.RWMutex
+	entries   map[uint64]*expensivePlanEntry
+	maxSize   int
+	threshold time.Duration
+	minKey    uint64
+	minDur    time.Duration
 
 	writeCh chan setRequest
 	stop    chan struct{}
@@ -39,16 +40,17 @@ type expensivePlanCache struct {
 // We use the same value as ristretto (this would be the buffer size if we used ristretto as the backing cache)
 const defaultWriteBufferSize = 32 * 1024
 
-func newExpensivePlanCache(maxSize int) (*expensivePlanCache, error) {
+func newExpensivePlanCache(maxSize int, threshold time.Duration) (*expensivePlanCache, error) {
 	if maxSize < 1 {
 		return nil, fmt.Errorf("expensive query cache size must be at least 1, got %d", maxSize)
 	}
 	c := &expensivePlanCache{
-		entries: make(map[uint64]*expensivePlanEntry, maxSize),
-		maxSize: maxSize,
-		writeCh: make(chan setRequest, defaultWriteBufferSize),
-		stop:    make(chan struct{}),
-		done:    make(chan struct{}),
+		entries:   make(map[uint64]*expensivePlanEntry, maxSize),
+		maxSize:   maxSize,
+		threshold: threshold,
+		writeCh:   make(chan setRequest, defaultWriteBufferSize),
+		stop:      make(chan struct{}),
+		done:      make(chan struct{}),
 	}
 	go c.processWrites()
 	return c, nil
@@ -101,6 +103,11 @@ func (c *expensivePlanCache) Wait() {
 
 // applySet performs the actual cache mutation. Must only be called from processWrites.
 func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, duration time.Duration) {
+	// Reject entries that don't meet the threshold
+	if duration < c.threshold {
+		return
+	}
+
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
diff --git a/router/core/expensive_query_cache_test.go b/router/core/expensive_query_cache_test.go
index e9fd3b8000..d1e5fda834 100644
--- a/router/core/expensive_query_cache_test.go
+++ b/router/core/expensive_query_cache_test.go
@@ -9,7 +9,7 @@ import (
 
 func TestExpensivePlanCache_GetSet(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -42,7 +42,7 @@ func TestExpensivePlanCache_GetSet(t *testing.T) {
 
 func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(3)
+	c, err := newExpensivePlanCache(3, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -69,7 +69,7 @@ func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 
 func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(3)
+	c, err := newExpensivePlanCache(3, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -95,7 +95,7 @@ func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 
 func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(2)
+	c, err := newExpensivePlanCache(2, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -121,7 +121,7 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 
 func TestExpensivePlanCache_IterValues(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -141,7 +141,7 @@ func TestExpensivePlanCache_IterValues(t *testing.T) {
 
 func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -160,7 +160,7 @@ func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
 
 func TestExpensivePlanCache_Close(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 
@@ -173,7 +173,7 @@ func TestExpensivePlanCache_Close(t *testing.T) {
 
 func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	c.Close()
 
@@ -188,7 +188,7 @@ func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
 
 func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -202,7 +202,7 @@ func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
 
 func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10)
+	c, err := newExpensivePlanCache(10, 0)
 	require.NoError(t, err)
 	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
 	c.Close()
@@ -217,7 +217,7 @@ func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
 
 func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(2)
+	c, err := newExpensivePlanCache(2, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -238,7 +238,7 @@ func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 
 func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(1)
+	c, err := newExpensivePlanCache(1, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
@@ -268,7 +268,7 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 
 func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(100)
+	c, err := newExpensivePlanCache(100, 0)
 	require.NoError(t, err)
 	defer c.Close()
 	done := make(chan struct{})
@@ -312,9 +312,34 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 
 func TestExpensivePlanCache_InvalidSize(t *testing.T) {
 	t.Parallel()
-	_, err := newExpensivePlanCache(0)
+	_, err := newExpensivePlanCache(0, 0)
 	require.Error(t, err)
 
-	_, err = newExpensivePlanCache(-1)
+	_, err = newExpensivePlanCache(-1, 0)
 	require.Error(t, err)
 }
+
+func TestExpensivePlanCache_ThresholdRejectsBelow(t *testing.T) {
+	t.Parallel()
+	c, err := newExpensivePlanCache(10, 100*time.Millisecond)
+	require.NoError(t, err)
+	defer c.Close()
+
+	// Below threshold — should be rejected
+	c.Set(1, &planWithMetaData{content: "q1"}, 50*time.Millisecond)
+	c.Wait()
+	_, ok := c.Get(1)
+	require.False(t, ok, "entry below threshold should be rejected")
+
+	// At threshold — should be accepted
+	c.Set(2, &planWithMetaData{content: "q2"}, 100*time.Millisecond)
+	c.Wait()
+	_, ok = c.Get(2)
+	require.True(t, ok, "entry at threshold should be accepted")
+
+	// Above threshold — should be accepted
+	c.Set(3, &planWithMetaData{content: "q3"}, 200*time.Millisecond)
+	c.Wait()
+	_, ok = c.Get(3)
+	require.True(t, ok, "entry above threshold should be accepted")
+}
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 44d6c9b1a5..660d3f95ad 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -588,9 +588,7 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 				if s.operationPlanner == nil || s.operationPlanner.expensiveCache == nil {
 					return
 				}
-				if item.Value.planningDuration >= s.operationPlanner.threshold {
-					s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
-				}
+				s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
 			}
 		}
 		s.planCache, err = ristretto.NewCache[uint64, *planWithMetaData](planCacheConfig)
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 2e7f681ae6..cedcafcc0e 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -35,8 +35,6 @@ type OperationPlanner struct {
 	useFallback    bool
 	logger         *zap.Logger
 
-	threshold time.Duration
-
 	// planningDurationOverride, when set, replaces the measured planning duration.
 	// This is used in tests to simulate slow queries.
 	planningDurationOverride func(content string) time.Duration
@@ -68,11 +66,10 @@ func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache Execu
 
 	if inMemoryPlanCacheFallback {
 		var err error
-		p.expensiveCache, err = newExpensivePlanCache(expensiveCacheSize)
+		p.expensiveCache, err = newExpensivePlanCache(expensiveCacheSize, threshold)
 		if err != nil {
 			return nil, err
 		}
-		p.threshold = threshold
 	}
 
 	return p, nil
@@ -210,9 +207,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 					prepared.planningDuration = p.planningDurationOverride(prepared.content)
 				}
 
-				if prepared.planningDuration >= p.threshold {
-					p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
-				}
+				p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
 			}
 
 			return prepared, nil
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index 79d1c02ded..e2b0721c06 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -149,7 +149,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Run("returns operations from live expensive cache when cachedOps has no entry", func(t *testing.T) {
 		t.Parallel()
 
-		expCache, err := newExpensivePlanCache(100)
+		expCache, err := newExpensivePlanCache(100, 0)
 		require.NoError(t, err)
 		expCache.Set(1, &planWithMetaData{content: "query { fromExpensive }"}, 5*1e9)
 		expCache.Wait()
@@ -314,9 +314,9 @@ func TestInMemoryPlanCacheFallback_ProcessOnConfigChangeRestart(t *testing.T) {
 		query1 := "query { test1 }"
 		query2 := "query { test2 }"
 
-		expCache1, err := newExpensivePlanCache(100)
+		expCache1, err := newExpensivePlanCache(100, 0)
 		require.NoError(t, err)
-		expCache2, err := newExpensivePlanCache(100)
+		expCache2, err := newExpensivePlanCache(100, 0)
 		require.NoError(t, err)
 
 		expCache1.Set(1, &planWithMetaData{content: query1}, 5*1e9)
diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index bd884539f8..a9df9d0666 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3342,7 +3342,7 @@
         "expensive_query_cache_size": {
           "type": "integer",
           "minimum": 1,
-          "default": 100,
+          "default": 256,
           "description": "The maximum number of entries in the expensive query plan cache. Expensive queries are protected from TinyLFU eviction in the main plan cache."
         },
         "expensive_query_threshold": {
@@ -3351,7 +3351,7 @@
           "description": "The minimum planning duration for a query to be considered expensive and protected from TinyLFU cache eviction. Queries exceeding this threshold are re-validated in the background before promotion to the expensive cache.",
           "default": "5s",
           "duration": {
-            "minimum": "100ms"
+            "minimum": "1ns"
           }
         },
         "operation_hash_cache_size": {

From 83314cee506e7d04d861b528b7e3bc9923ff6cba Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 21:13:11 +0530
Subject: [PATCH 11/46] fix: temporary skip

---
 router-tests/cache_warmup_test.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index bb756ed21f..8d1fab98a5 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -1282,6 +1282,8 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 	})
 
 	t.Run("Successfully persists cache across config change restarts", func(t *testing.T) {
+		t.Skip()
+		
 		t.Parallel()
 
 		updateConfig := func(t *testing.T, xEnv *testenv.Environment, ctx context.Context, listenString string, config string) {

From c495de1a30c4f7d0dbecb2894e76588b686aeb91 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 21:47:25 +0530
Subject: [PATCH 12/46] fix: tests

---
 router/core/graph_server.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 660d3f95ad..8d989ea46d 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -1386,6 +1386,7 @@ func (s *graphServer) buildGraphMux(
 				otel.WgOperationName.String(item.OperationName),
 				otel.WgClientName.String(item.ClientName),
 				otel.WgClientVersion.String(item.ClientVersion),
+				otel.WgFeatureFlag.String(opts.FeatureFlagName),
 				otel.WgOperationHash.String(item.OperationHash),
 				otel.WgOperationType.String(item.OperationType),
 				otel.WgEnginePlanCacheHit.Bool(false),

From ed895d87641a6fda27ff71960282b5c00423759c Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 22:08:02 +0530
Subject: [PATCH 13/46] fix: tests

---
 router-tests/cache_warmup_test.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index 8d1fab98a5..f735c05303 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -1282,8 +1282,6 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 	})
 
 	t.Run("Successfully persists cache across config change restarts", func(t *testing.T) {
-		t.Skip()
-		
 		t.Parallel()
 
 		updateConfig := func(t *testing.T, xEnv *testenv.Environment, ctx context.Context, listenString string, config string) {
@@ -1313,7 +1311,7 @@ cache_warmup:
       enabled: false
 
 engine:
-  expensive_query_threshold: "0s"
+  expensive_query_threshold: "1ns"
   expensive_query_cache_size: 100
   debug:
     enable_cache_response_headers: true

From 46c1fb5d78a7de41fea45fa7eea7cbbd5eb889e5 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 11 Mar 2026 22:25:08 +0530
Subject: [PATCH 14/46] fix: updates

---
 router/core/operation_planner.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index cedcafcc0e..de4c6292b2 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -198,15 +198,16 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 			}
 			prepared.planningDuration = time.Since(start)
 
+			// This is only used for test cases
+			if p.planningDurationOverride != nil {
+				prepared.planningDuration = p.planningDurationOverride(prepared.content)
+			}
+
+			// Set into the main cache after planningDuration is finalized,
+			// because the OnEvict callback reads planningDuration concurrently.
 			p.planCache.Set(operationID, prepared, 1)
 
-			// Only run this when we care about expensive cache items
 			if p.useFallback {
-				// This is only used for test cases
-				if p.planningDurationOverride != nil {
-					prepared.planningDuration = p.planningDurationOverride(prepared.content)
-				}
-
 				p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
 			}
 

From 8f2818d83b4e9c00d7d3ad12a4a0d72a0f2ea41d Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 01:06:05 +0530
Subject: [PATCH 15/46] fix: updates

---
 router-tests/config_hot_reload_test.go     |  21 +++-
 router-tests/expensive_query_cache_test.go | 123 +++++++++++++++++----
 2 files changed, 121 insertions(+), 23 deletions(-)

diff --git a/router-tests/config_hot_reload_test.go b/router-tests/config_hot_reload_test.go
index 91b5c9e11f..057df0a716 100644
--- a/router-tests/config_hot_reload_test.go
+++ b/router-tests/config_hot_reload_test.go
@@ -599,13 +599,32 @@ func writeTestConfig(t *testing.T, version string, path string) {
 					},
 					Id: "0",
 				},
+				{
+					Kind: nodev1.DataSourceKind_STATIC,
+					RootNodes: []*nodev1.TypeField{
+						{
+							TypeName:   "Query",
+							FieldNames: []string{"world"},
+						},
+					},
+					CustomStatic: &nodev1.DataSourceCustom_Static{
+						Data: &nodev1.ConfigurationVariable{
+							StaticVariableContent: `{"world": "World!"}`,
+						},
+					},
+					Id: "1",
+				},
 			},
-			GraphqlSchema: "schema {\n  query: Query\n}\ntype Query {\n  hello: String\n}",
+			GraphqlSchema: "schema {\n  query: Query\n}\ntype Query {\n  hello: String\n  world: String\n}",
 			FieldConfigurations: []*nodev1.FieldConfiguration{
 				{
 					TypeName:  "Query",
 					FieldName: "hello",
 				},
+				{
+					TypeName:  "Query",
+					FieldName: "world",
+				},
 			},
 		},
 	}
diff --git a/router-tests/expensive_query_cache_test.go b/router-tests/expensive_query_cache_test.go
index 4a575d62c1..03ab84222f 100644
--- a/router-tests/expensive_query_cache_test.go
+++ b/router-tests/expensive_query_cache_test.go
@@ -199,6 +199,80 @@ func TestExpensiveQueryCache(t *testing.T) {
 		})
 	})
 
+	t.Run("only expensive queries persist across config reload, fast queries do not", func(t *testing.T) {
+		t.Parallel()
+
+		pm := ConfigPollerMock{
+			ready: make(chan struct{}),
+		}
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				// Large enough to hold all queries — no evictions before reload
+				cfg.ExecutionPlanCacheSize = 1024
+				cfg.ExpensiveQueryThreshold = expensiveThreshold
+				cfg.ExpensiveQueryCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+					Source: config.CacheWarmupSource{
+						CdnSource: config.CacheWarmupCDNSource{
+							Enabled: true,
+						},
+					},
+				}),
+				core.WithConfigVersionHeader(true),
+				planningDurationOverride,
+			},
+			RouterConfig: &testenv.RouterConfig{
+				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
+					pm.initConfig = config
+					return &pm
+				},
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Populate caches with both slow and fast queries
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Verify all queries are cached in the main plan cache before reload
+			for _, q := range allQueries {
+				require.EventuallyWithT(t, func(ct *assert.CollectT) {
+					res := xEnv.MakeGraphQLRequestOK(q)
+					assert.Equal(ct, "HIT", res.Response.Header.Get("x-wg-execution-plan-cache"))
+				}, 2*time.Second, 100*time.Millisecond)
+			}
+
+			// Trigger config reload — main plan cache is reset.
+			<-pm.ready
+			pm.initConfig.Version = "updated"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
+
+			// Wait for reload to complete by checking a slow query (which will be
+			// served from the expensive cache, confirming the new server is active).
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res := xEnv.MakeGraphQLRequestOK(slowQueries[0])
+				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+			}, 2*time.Second, 100*time.Millisecond)
+
+			// After reload, fast queries must not be persisted anywhere — the first
+			// request on the new server should be a MISS on both caches.
+			for _, q := range fastQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
+					"fast query should not be in main plan cache after config reload")
+				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+					"fast query should not be in expensive cache after config reload")
+			}
+		})
+	})
+
 	t.Run("plans survive multiple config reloads with small main cache", func(t *testing.T) {
 		t.Parallel()
 
@@ -549,7 +623,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
+				cfg.ExecutionPlanCacheSize = 1024
 				cfg.ExpensiveQueryThreshold = expensiveThreshold
 				cfg.ExpensiveQueryCacheSize = 100
 			},
@@ -564,39 +638,44 @@ func TestExpensiveQueryCache(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
-				// Override the hello query to be slow
-				core.WithPlanningDurationOverride(func(_ string) time.Duration {
-					return 10 * time.Second
+				// "hello" is slow (enters expensive cache), "world" is fast (does not)
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					if strings.Contains(content, "hello") {
+						return 10 * time.Second
+					}
+					return 0
 				}),
 			},
 		}, func(t *testing.T, xEnv *testenv.Environment) {
-			q := testenv.GraphQLRequest{Query: `query { hello }`}
-
-			// First request is a MISS
-			res := xEnv.MakeGraphQLRequestOK(q)
-			require.Equal(t, 200, res.Response.StatusCode)
-			require.Equal(t, "initial", res.Response.Header.Get("X-Router-Config-Version"))
-			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			slowQ := testenv.GraphQLRequest{Query: `query { hello }`}
+			fastQ := testenv.GraphQLRequest{Query: `query { world }`}
 
-			// Wait for the expensive cache to pick it up
-			require.EventuallyWithT(t, func(ct *assert.CollectT) {
-				res = xEnv.MakeGraphQLRequestOK(q)
-				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
-				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
-				assert.True(ct, planHit || expensiveHit, "expected plan to be cached")
-			}, 2*time.Second, 100*time.Millisecond)
+			// Plan both queries
+			for _, q := range []testenv.GraphQLRequest{slowQ, fastQ} {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "initial", res.Response.Header.Get("X-Router-Config-Version"))
+			}
 
-			// Trigger schema reload by writing a new config version
+			// Trigger schema reload
 			writeTestConfig(t, "updated", configFile)
 
-			// After reload, the plan should still be available (carried forward via extractQueriesAndOverridePlanCache)
+			// Wait for reload to complete — slow query should survive via expensive cache
 			require.EventuallyWithT(t, func(ct *assert.CollectT) {
-				res = xEnv.MakeGraphQLRequestOK(q)
+				res := xEnv.MakeGraphQLRequestOK(slowQ)
 				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
 				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
 				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
-				assert.True(ct, planHit || expensiveHit, "expected plan to survive schema reload via expensive cache merge")
+				assert.True(ct, planHit || expensiveHit, "expected slow plan to survive schema reload")
 			}, 2*time.Second, 100*time.Millisecond)
+
+			// Fast query must not be persisted anywhere after reload
+			res := xEnv.MakeGraphQLRequestOK(fastQ)
+			require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
+				"fast query should not be in main plan cache after schema reload")
+			require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+				"fast query should not survive schema reload via expensive cache")
 		})
 	})
 

From 731829decb678cec8693886d2a23682b971aff61 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 01:12:00 +0530
Subject: [PATCH 16/46] fix: tests

---
 router-tests/expensive_query_cache_test.go | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/router-tests/expensive_query_cache_test.go b/router-tests/expensive_query_cache_test.go
index 03ab84222f..7e1e4e2024 100644
--- a/router-tests/expensive_query_cache_test.go
+++ b/router-tests/expensive_query_cache_test.go
@@ -62,7 +62,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				assert.Equal(ct, 200, res.Response.StatusCode)
 				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
-				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
+				expensiveHit := res.Response.Header.Get("x-wg-expensive-plan-cache") == "HIT"
 				assert.True(ct, planHit || expensiveHit, "expected plan to be served from main or expensive cache")
 				for _, check := range extraChecks {
 					check(ct, res)
@@ -74,7 +74,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 		expensiveCacheHits := 0
 		for _, q := range queries {
 			res := xEnv.MakeGraphQLRequestOK(q)
-			if res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT" {
+			if res.Response.Header.Get("x-wg-expensive-plan-cache") == "HIT" {
 				expensiveCacheHits++
 			}
 		}
@@ -140,7 +140,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			// Fast queries should never be served from the expensive cache
 			for _, q := range fastQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
 					"fast query should not be in the expensive cache")
 			}
 		})
@@ -267,7 +267,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 				require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
 					"fast query should not be in main plan cache after config reload")
-				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
 					"fast query should not be in expensive cache after config reload")
 			}
 		})
@@ -560,14 +560,14 @@ func TestExpensiveQueryCache(t *testing.T) {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
 				// Header must be absent when feature is disabled
-				require.Empty(t, res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
-					"X-WG-Expensive-Plan-Cache header should not be present when InMemoryFallback is disabled")
+				require.Empty(t, res.Response.Header.Get("x-wg-expensive-plan-cache"),
+					"x-wg-expensive-plan-cache header should not be present when InMemoryFallback is disabled")
 			}
 
 			// Second pass — cache hits
 			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Empty(t, res.Response.Header.Get("X-WG-Expensive-Plan-Cache"))
+				require.Empty(t, res.Response.Header.Get("x-wg-expensive-plan-cache"))
 			}
 
 			// Verify spans do NOT contain the expensive_plan_cache_hit attribute
@@ -665,7 +665,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 				res := xEnv.MakeGraphQLRequestOK(slowQ)
 				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
 				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
-				expensiveHit := res.Response.Header.Get("X-WG-Expensive-Plan-Cache") == "HIT"
+				expensiveHit := res.Response.Header.Get("x-wg-expensive-plan-cache") == "HIT"
 				assert.True(ct, planHit || expensiveHit, "expected slow plan to survive schema reload")
 			}, 2*time.Second, 100*time.Millisecond)
 
@@ -674,7 +674,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
 			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
 				"fast query should not be in main plan cache after schema reload")
-			require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
 				"fast query should not survive schema reload via expensive cache")
 		})
 	})
@@ -702,7 +702,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 				require.Equal(t, 200, res.Response.StatusCode)
 				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
 				// Feature is enabled so header is present, but should be MISS
-				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"))
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"))
 			}
 
 			// Wait for Ristretto eviction
@@ -714,7 +714,7 @@ func TestExpensiveQueryCache(t *testing.T) {
 			for _, q := range allQueries {
 				res := xEnv.MakeGraphQLRequestOK(q)
 				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("X-WG-Expensive-Plan-Cache"),
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
 					"no plan should be in the expensive cache with a 1h threshold")
 			}
 		})

From 4a0fed7d621f4b7bcef641ce34c41a2675adb191 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 01:26:17 +0530
Subject: [PATCH 17/46] fix: updates

---
 router/core/graph_server.go                 |   8 +-
 router/core/reload_persistent_state.go      | 134 +++++++--------
 router/core/reload_persistent_state_test.go | 171 ++++++++------------
 3 files changed, 136 insertions(+), 177 deletions(-)

diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 8d989ea46d..05a07c17eb 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -1416,8 +1416,8 @@ func (s *graphServer) buildGraphMux(
 		case s.cacheWarmup.InMemoryFallback && (s.selfRegister == nil || !s.cacheWarmup.Source.CdnSource.Enabled):
 			// We first utilize the existing plan cache (if it was already set, i.e., not on the first start) to create a list of queries
 			// and then reset the plan cache to the new plan cache for this start afterwards.
-			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getCachedOperationsForFF(opts.FeatureFlagName))
-			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
+			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
+			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 		case s.cacheWarmup.Source.CdnSource.Enabled:
 			if s.graphApiToken == "" {
 				return nil, fmt.Errorf("graph token is required for cache warmup in order to communicate with the CDN")
@@ -1426,8 +1426,8 @@ func (s *graphServer) buildGraphMux(
 			// We use the in-memory cache as a fallback if enabled
 			// This is useful for when an issue occurs with the CDN when retrieving the required manifest
 			if s.cacheWarmup.InMemoryFallback {
-				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getCachedOperationsForFF(opts.FeatureFlagName))
-				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setExpensiveCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
+				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
+				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
 			}
 			cdnSource, err := NewCDNSource(s.cdnConfig.URL, s.graphApiToken, s.logger)
 			if err != nil {
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index e0132cffef..9962435ddc 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -42,14 +42,12 @@ func (s *ReloadPersistentState) OnRouterConfigReload() {
 	s.inMemoryPlanCacheFallback.extractQueriesAndOverridePlanCache()
 }
 
-// InMemoryPlanCacheFallback is a store that stores expensive query cache references or extracted operations
-// for use with the cache warmer across config reloads. Only expensive queries (planning duration >= threshold)
-// are persisted.
+// InMemoryPlanCacheFallback is a store that stores either queries or references to the planner cache for use with the cache warmer.
+// Only expensive queries (planning duration >= threshold) are persisted.
 type InMemoryPlanCacheFallback struct {
-	mu              sync.RWMutex
-	expensiveCaches map[string]*expensivePlanCache // live references during runtime
-	cachedOps       map[string][]*nodev1.Operation // extracted snapshots after reload
-	logger          *zap.Logger
+	mu                    sync.RWMutex
+	queriesForFeatureFlag map[string]any
+	logger                *zap.Logger
 }
 
 // updateStateFromConfig updates the internal state of the in-memory fallback cache based on the provided config
@@ -64,18 +62,14 @@ func (c *InMemoryPlanCacheFallback) updateStateFromConfig(config *Config) {
 	// If the configuration change occurred which disabled or enabled the fallback cache, we need to update the internal state
 	if enabled {
 		// Only initialize if its nil because its a first start, we dont want to override any old data in a map
-		if c.expensiveCaches == nil {
-			c.expensiveCaches = make(map[string]*expensivePlanCache)
-		}
-		if c.cachedOps == nil {
-			c.cachedOps = make(map[string][]*nodev1.Operation)
+		if c.queriesForFeatureFlag == nil {
+			c.queriesForFeatureFlag = make(map[string]any)
 		}
 		return
 	}
 
-	// Reset the maps to free up memory
-	c.expensiveCaches = nil
-	c.cachedOps = nil
+	// Reset the map to free up memory
+	c.queriesForFeatureFlag = nil
 }
 
 // IsEnabled returns whether the in-memory fallback cache is enabled
@@ -83,90 +77,72 @@ func (c *InMemoryPlanCacheFallback) IsEnabled() bool {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
-	return c.expensiveCaches != nil
+	return c.queriesForFeatureFlag != nil
 }
 
-// getCachedOperationsForFF returns all cached operations for a feature flag key.
-// It first checks extracted snapshots (cachedOps), then falls back to reading
-// from live expensive cache references (used during execution config updates
-// where extractQueriesAndOverridePlanCache has not been called).
-func (c *InMemoryPlanCacheFallback) getCachedOperationsForFF(featureFlagKey string) []*nodev1.Operation {
+// getPlanCacheForFF gets the plan cache in the []*nodev1.Operation format for a specific feature flag key.
+// It handles both live expensive cache references and already-extracted operation snapshots.
+func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*nodev1.Operation {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
-	if c.expensiveCaches == nil {
+	if c.queriesForFeatureFlag == nil {
 		return nil
 	}
 
-	// Check extracted snapshots first (populated after router config reloads)
-	if ops, ok := c.cachedOps[featureFlagKey]; ok {
-		return ops
-	}
-
-	// Fall back to reading from live expensive cache references
-	// This path is used during execution config updates where the previous
-	// expensive cache is still valid and hasn't been extracted yet.
-	if expCache, ok := c.expensiveCaches[featureFlagKey]; ok {
-		var ops []*nodev1.Operation
-		expCache.IterValues(func(v *planWithMetaData) bool {
-			if v.content != "" {
-				ops = append(ops, &nodev1.Operation{
-					Request: &nodev1.OperationRequest{Query: v.content},
-				})
-			}
-			return false
-		})
-		return ops
+	switch cache := c.queriesForFeatureFlag[featureFlagKey].(type) {
+	case *expensivePlanCache:
+		return convertToNodeOperation(cache)
+	case []*nodev1.Operation:
+		return cache
+	// This would occur during the first start (we add this case to specifically log any other cases)
+	case nil:
+		return nil
+	// This should not happen as we cannot have any types other than the above
+	default:
+		c.logger.Error("unexpected type")
+		return nil
 	}
-
-	return nil
 }
 
-// setExpensiveCacheForFF stores the expensive plan cache reference for a feature flag key
-// so that expensive query entries survive config reloads.
-func (c *InMemoryPlanCacheFallback) setExpensiveCacheForFF(featureFlagKey string, cache *expensivePlanCache) {
+// setPlanCacheForFF sets the plan cache for a specific feature flag key
+func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cache *expensivePlanCache) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	if c.expensiveCaches == nil || cache == nil {
+	if c.queriesForFeatureFlag == nil || cache == nil {
 		return
 	}
-	c.expensiveCaches[featureFlagKey] = cache
+	c.queriesForFeatureFlag[featureFlagKey] = cache
 }
 
-// extractQueriesAndOverridePlanCache extracts operations from the expensive plan caches
-// and stores them in cachedOps so they survive config reloads.
+// extractQueriesAndOverridePlanCache extracts the queries from the plan cache and overrides the internal map
 func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	if c.expensiveCaches == nil {
+	if c.queriesForFeatureFlag == nil {
 		return
 	}
 
 	// Wait for all pending writes from expensive caches so that
 	// IterValues sees a complete snapshot before we extract.
-	for _, expCache := range c.expensiveCaches {
-		expCache.Wait()
+	for _, v := range c.queriesForFeatureFlag {
+		if expCache, ok := v.(*expensivePlanCache); ok {
+			expCache.Wait()
+		}
 	}
 
-	cachedOps := make(map[string][]*nodev1.Operation)
-	for k, expCache := range c.expensiveCaches {
-		var ops []*nodev1.Operation
-		expCache.IterValues(func(v *planWithMetaData) bool {
-			if v.content != "" {
-				ops = append(ops, &nodev1.Operation{
-					Request: &nodev1.OperationRequest{Query: v.content},
-				})
+	fallbackMap := make(map[string]any)
+	for k, v := range c.queriesForFeatureFlag {
+		if cache, ok := v.(*expensivePlanCache); ok {
+			ops := convertToNodeOperation(cache)
+			if len(ops) > 0 {
+				fallbackMap[k] = ops
 			}
-			return false
-		})
-		if len(ops) > 0 {
-			cachedOps[k] = ops
 		}
 	}
-	c.cachedOps = cachedOps
-	c.expensiveCaches = make(map[string]*expensivePlanCache)
+	c.queriesForFeatureFlag = fallbackMap
 }
 
 // cleanupUnusedFeatureFlags removes any feature flags that were removed from the execution config
@@ -175,21 +151,33 @@ func (c *InMemoryPlanCacheFallback) cleanupUnusedFeatureFlags(routerCfg *nodev1.
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	if c.expensiveCaches == nil {
+	if c.queriesForFeatureFlag == nil {
 		return
 	}
 
-	for ffName := range c.expensiveCaches {
+	for ffName := range c.queriesForFeatureFlag {
 		// Skip the base which is ""
 		if ffName == "" {
 			continue
 		}
 		if routerCfg.FeatureFlagConfigs == nil {
-			delete(c.expensiveCaches, ffName)
-			delete(c.cachedOps, ffName)
+			delete(c.queriesForFeatureFlag, ffName)
 		} else if _, exists := routerCfg.FeatureFlagConfigs.ConfigByFeatureFlagName[ffName]; !exists {
-			delete(c.expensiveCaches, ffName)
-			delete(c.cachedOps, ffName)
+			delete(c.queriesForFeatureFlag, ffName)
 		}
 	}
 }
+
+func convertToNodeOperation(data *expensivePlanCache) []*nodev1.Operation {
+	items := make([]*nodev1.Operation, 0)
+
+	data.IterValues(func(v *planWithMetaData) (stop bool) {
+		if v.content != "" {
+			items = append(items, &nodev1.Operation{
+				Request: &nodev1.OperationRequest{Query: v.content},
+			})
+		}
+		return false
+	})
+	return items
+}
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index e2b0721c06..3be8fd5ca4 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -23,17 +23,14 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.NotNil(t, cache.expensiveCaches)
-		require.Empty(t, cache.expensiveCaches)
-		require.NotNil(t, cache.cachedOps)
-		require.Empty(t, cache.cachedOps)
+		require.NotNil(t, cache.queriesForFeatureFlag)
+		require.Empty(t, cache.queriesForFeatureFlag)
 	})
 
 	t.Run("disable cache from enabled state", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: make(map[string]*expensivePlanCache),
-			cachedOps:       make(map[string][]*nodev1.Operation),
+			queriesForFeatureFlag: make(map[string]any),
 		}
 
 		cfg := &Config{
@@ -44,18 +41,16 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.expensiveCaches)
-		require.Nil(t, cache.cachedOps)
+		require.Nil(t, cache.queriesForFeatureFlag)
 	})
 
 	t.Run("update when already enabled keeps existing data", func(t *testing.T) {
 		t.Parallel()
-		existingCaches := make(map[string]*expensivePlanCache)
-		existingCaches["test"] = nil
+		existing := make(map[string]any)
+		existing["test"] = (*expensivePlanCache)(nil)
 
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: existingCaches,
-			cachedOps:       make(map[string][]*nodev1.Operation),
+			queriesForFeatureFlag: existing,
 		}
 
 		cfg := &Config{
@@ -67,15 +62,15 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.NotNil(t, cache.expensiveCaches)
-		require.Len(t, cache.expensiveCaches, 1)
-		require.Contains(t, cache.expensiveCaches, "test")
+		require.NotNil(t, cache.queriesForFeatureFlag)
+		require.Len(t, cache.queriesForFeatureFlag, 1)
+		require.Contains(t, cache.queriesForFeatureFlag, "test")
 	})
 
 	t.Run("update when already disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: nil,
+			queriesForFeatureFlag: nil,
 		}
 
 		cfg := &Config{
@@ -86,14 +81,13 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.expensiveCaches)
+		require.Nil(t, cache.queriesForFeatureFlag)
 	})
 
 	t.Run("nil cacheWarmup config disables cache", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: make(map[string]*expensivePlanCache),
-			cachedOps:       make(map[string][]*nodev1.Operation),
+			queriesForFeatureFlag: make(map[string]any),
 		}
 
 		cfg := &Config{
@@ -102,8 +96,7 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.expensiveCaches)
-		require.Nil(t, cache.cachedOps)
+		require.Nil(t, cache.queriesForFeatureFlag)
 	})
 
 	t.Run("cacheWarmup enabled but InMemoryFallback disabled", func(t *testing.T) {
@@ -119,14 +112,14 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 
 		cache.updateStateFromConfig(cfg)
 
-		require.Nil(t, cache.expensiveCaches)
+		require.Nil(t, cache.queriesForFeatureFlag)
 	})
 }
 
 func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Parallel()
 
-	t.Run("returns operations for existing feature flag from cachedOps", func(t *testing.T) {
+	t.Run("returns operations for existing feature flag from extracted ops", func(t *testing.T) {
 		t.Parallel()
 		expectedOps := []*nodev1.Operation{
 			{Request: &nodev1.OperationRequest{Query: "query { test1 }"}},
@@ -134,19 +127,18 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		}
 
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: make(map[string]*expensivePlanCache),
-			cachedOps: map[string][]*nodev1.Operation{
+			queriesForFeatureFlag: map[string]any{
 				"test-ff": expectedOps,
 			},
 		}
 
-		result := cache.getCachedOperationsForFF("test-ff")
+		result := cache.getPlanCacheForFF("test-ff")
 
 		require.NotNil(t, result)
 		require.Equal(t, expectedOps, result)
 	})
 
-	t.Run("returns operations from live expensive cache when cachedOps has no entry", func(t *testing.T) {
+	t.Run("returns operations from live expensive cache reference", func(t *testing.T) {
 		t.Parallel()
 
 		expCache, err := newExpensivePlanCache(100, 0)
@@ -155,13 +147,12 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		expCache.Wait()
 
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: map[string]*expensivePlanCache{
+			queriesForFeatureFlag: map[string]any{
 				"test-ff": expCache,
 			},
-			cachedOps: make(map[string][]*nodev1.Operation),
 		}
 
-		result := cache.getCachedOperationsForFF("test-ff")
+		result := cache.getPlanCacheForFF("test-ff")
 
 		require.NotNil(t, result)
 		require.Len(t, result, 1)
@@ -171,23 +162,21 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Run("returns nil for non-existent feature flag", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			logger:          zap.NewNop(),
-			expensiveCaches: make(map[string]*expensivePlanCache),
-			cachedOps:       make(map[string][]*nodev1.Operation),
+			logger:                zap.NewNop(),
+			queriesForFeatureFlag: make(map[string]any),
 		}
 
-		result := cache.getCachedOperationsForFF("non-existent")
+		result := cache.getPlanCacheForFF("non-existent")
 		require.Nil(t, result)
 	})
 
 	t.Run("returns nil when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: nil,
-			cachedOps:       nil,
+			queriesForFeatureFlag: nil,
 		}
 
-		result := cache.getCachedOperationsForFF("test-ff")
+		result := cache.getPlanCacheForFF("test-ff")
 
 		require.Nil(t, result)
 	})
@@ -198,15 +187,10 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 	t.Run("removes unused feature flags", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: map[string]*expensivePlanCache{
-				"ff1": nil,
-				"ff2": nil,
-				"ff3": nil,
-			},
-			cachedOps: map[string][]*nodev1.Operation{
-				"ff1": nil,
-				"ff2": nil,
-				"ff3": nil,
+			queriesForFeatureFlag: map[string]any{
+				"ff1": (*expensivePlanCache)(nil),
+				"ff2": (*expensivePlanCache)(nil),
+				"ff3": (*expensivePlanCache)(nil),
 			},
 		}
 
@@ -221,24 +205,18 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
-		require.Len(t, cache.expensiveCaches, 2)
-		require.Contains(t, cache.expensiveCaches, "ff1")
-		require.Contains(t, cache.expensiveCaches, "ff2")
-		require.NotContains(t, cache.expensiveCaches, "ff3")
-		require.Len(t, cache.cachedOps, 2)
-		require.NotContains(t, cache.cachedOps, "ff3")
+		require.Len(t, cache.queriesForFeatureFlag, 2)
+		require.Contains(t, cache.queriesForFeatureFlag, "ff1")
+		require.Contains(t, cache.queriesForFeatureFlag, "ff2")
+		require.NotContains(t, cache.queriesForFeatureFlag, "ff3")
 	})
 
 	t.Run("keeps empty string feature flag", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: map[string]*expensivePlanCache{
-				"":    nil,
-				"ff1": nil,
-			},
-			cachedOps: map[string][]*nodev1.Operation{
-				"":    nil,
-				"ff1": nil,
+			queriesForFeatureFlag: map[string]any{
+				"":    (*expensivePlanCache)(nil),
+				"ff1": (*expensivePlanCache)(nil),
 			},
 		}
 
@@ -250,15 +228,15 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
-		require.Len(t, cache.expensiveCaches, 1)
-		require.Contains(t, cache.expensiveCaches, "")
-		require.NotContains(t, cache.expensiveCaches, "ff1")
+		require.Len(t, cache.queriesForFeatureFlag, 1)
+		require.Contains(t, cache.queriesForFeatureFlag, "")
+		require.NotContains(t, cache.queriesForFeatureFlag, "ff1")
 	})
 
 	t.Run("does nothing when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: nil,
+			queriesForFeatureFlag: nil,
 		}
 
 		routerCfg := &nodev1.RouterConfig{
@@ -270,25 +248,19 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
 		// Should still be nil because cleanup is skipped when disabled
-		require.Nil(t, cache.expensiveCaches)
+		require.Nil(t, cache.queriesForFeatureFlag)
 	})
 
 	t.Run("removes feature flags when not in ConfigByFeatureFlagName", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: map[string]*expensivePlanCache{
-				"":    nil, // base should be kept
-				"ff1": nil,
-				"ff2": nil,
-				"ff3": nil,
-				"ff4": nil,
-				"ff5": nil,
-			},
-			cachedOps: map[string][]*nodev1.Operation{
-				"":    nil,
-				"ff1": nil,
-				"ff2": nil,
-				"ff3": nil,
+			queriesForFeatureFlag: map[string]any{
+				"":    (*expensivePlanCache)(nil), // base should be kept
+				"ff1": (*expensivePlanCache)(nil),
+				"ff2": (*expensivePlanCache)(nil),
+				"ff3": (*expensivePlanCache)(nil),
+				"ff4": (*expensivePlanCache)(nil),
+				"ff5": (*expensivePlanCache)(nil),
 			},
 		}
 
@@ -298,17 +270,17 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 		cache.cleanupUnusedFeatureFlags(routerCfg)
 
-		require.Len(t, cache.expensiveCaches, 1)
-		require.Contains(t, cache.expensiveCaches, "")
-		require.NotContains(t, cache.expensiveCaches, "ff1")
-		require.NotContains(t, cache.expensiveCaches, "ff2")
-		require.NotContains(t, cache.expensiveCaches, "ff3")
+		require.Len(t, cache.queriesForFeatureFlag, 1)
+		require.Contains(t, cache.queriesForFeatureFlag, "")
+		require.NotContains(t, cache.queriesForFeatureFlag, "ff1")
+		require.NotContains(t, cache.queriesForFeatureFlag, "ff2")
+		require.NotContains(t, cache.queriesForFeatureFlag, "ff3")
 	})
 }
 
 func TestInMemoryPlanCacheFallback_ProcessOnConfigChangeRestart(t *testing.T) {
 	t.Parallel()
-	t.Run("extracts expensive cache entries to cachedOps", func(t *testing.T) {
+	t.Run("extracts expensive cache entries to operations", func(t *testing.T) {
 		t.Parallel()
 
 		query1 := "query { test1 }"
@@ -325,50 +297,49 @@ func TestInMemoryPlanCacheFallback_ProcessOnConfigChangeRestart(t *testing.T) {
 		expCache2.Wait()
 
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: map[string]*expensivePlanCache{
+			queriesForFeatureFlag: map[string]any{
 				"ff1": expCache1,
 				"ff2": expCache2,
 			},
-			cachedOps: make(map[string][]*nodev1.Operation),
 		}
 
 		cache.extractQueriesAndOverridePlanCache()
 
-		// Verify both caches have been extracted to cachedOps
-		require.Len(t, cache.cachedOps["ff1"], 1)
-		require.Len(t, cache.cachedOps["ff2"], 1)
-		require.Equal(t, query1, cache.cachedOps["ff1"][0].Request.Query)
-		require.Equal(t, query2, cache.cachedOps["ff2"][0].Request.Query)
+		// Verify both caches have been extracted to operations
+		ff1Ops, ok := cache.queriesForFeatureFlag["ff1"].([]*nodev1.Operation)
+		require.True(t, ok)
+		require.Len(t, ff1Ops, 1)
+		require.Equal(t, query1, ff1Ops[0].Request.Query)
 
-		// expensiveCaches should be reset to empty map
-		require.NotNil(t, cache.expensiveCaches)
-		require.Empty(t, cache.expensiveCaches)
+		ff2Ops, ok := cache.queriesForFeatureFlag["ff2"].([]*nodev1.Operation)
+		require.True(t, ok)
+		require.Len(t, ff2Ops, 1)
+		require.Equal(t, query2, ff2Ops[0].Request.Query)
 	})
 
 	t.Run("does nothing when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: nil,
+			queriesForFeatureFlag: nil,
 		}
 
 		cache.extractQueriesAndOverridePlanCache()
 
 		// Should remain nil since processing is skipped
-		require.Nil(t, cache.expensiveCaches)
+		require.Nil(t, cache.queriesForFeatureFlag)
 	})
 
 	t.Run("handles empty cache", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: make(map[string]*expensivePlanCache),
-			cachedOps:       make(map[string][]*nodev1.Operation),
+			queriesForFeatureFlag: make(map[string]any),
 		}
 
 		require.NotPanics(t, func() {
 			cache.extractQueriesAndOverridePlanCache()
 		})
 
-		require.Empty(t, cache.cachedOps)
+		require.Empty(t, cache.queriesForFeatureFlag)
 	})
 }
 
@@ -377,7 +348,7 @@ func TestInMemoryPlanCacheFallback_IsEnabled(t *testing.T) {
 	t.Run("returns true when cache is enabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: make(map[string]*expensivePlanCache),
+			queriesForFeatureFlag: make(map[string]any),
 		}
 
 		require.True(t, cache.IsEnabled())
@@ -386,7 +357,7 @@ func TestInMemoryPlanCacheFallback_IsEnabled(t *testing.T) {
 	t.Run("returns false when cache is disabled", func(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
-			expensiveCaches: nil,
+			queriesForFeatureFlag: nil,
 		}
 
 		require.False(t, cache.IsEnabled())

From 2d804557a3e7e43eebc7ec88ec081f247b8fa74a Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 01:28:41 +0530
Subject: [PATCH 18/46] fix: updates

---
 router/core/reload_persistent_state.go | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index 9962435ddc..9d8f9593fb 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -125,21 +125,10 @@ func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 		return
 	}
 
-	// Wait for all pending writes from expensive caches so that
-	// IterValues sees a complete snapshot before we extract.
-	for _, v := range c.queriesForFeatureFlag {
-		if expCache, ok := v.(*expensivePlanCache); ok {
-			expCache.Wait()
-		}
-	}
-
 	fallbackMap := make(map[string]any)
 	for k, v := range c.queriesForFeatureFlag {
 		if cache, ok := v.(*expensivePlanCache); ok {
-			ops := convertToNodeOperation(cache)
-			if len(ops) > 0 {
-				fallbackMap[k] = ops
-			}
+			fallbackMap[k] = convertToNodeOperation(cache)
 		}
 	}
 	c.queriesForFeatureFlag = fallbackMap
@@ -172,11 +161,9 @@ func convertToNodeOperation(data *expensivePlanCache) []*nodev1.Operation {
 	items := make([]*nodev1.Operation, 0)
 
 	data.IterValues(func(v *planWithMetaData) (stop bool) {
-		if v.content != "" {
-			items = append(items, &nodev1.Operation{
-				Request: &nodev1.OperationRequest{Query: v.content},
-			})
-		}
+		items = append(items, &nodev1.Operation{
+			Request: &nodev1.OperationRequest{Query: v.content},
+		})
 		return false
 	})
 	return items

From c708af5248949bff9c84c180aa4509aa5b922ed9 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 16:24:39 +0530
Subject: [PATCH 19/46] fix: review comments

---
 router-tests/cache_warmup_test.go             |  12 +-
 router-tests/expensive_query_cache_test.go    | 722 ------------------
 router-tests/plan_fallback_cache_test.go      | 465 +++++++++++
 router/core/context.go                        |   2 -
 router/core/graph_server.go                   |  15 +-
 router/core/graphql_handler.go                |   8 -
 router/core/graphql_prehandler.go             |   6 -
 router/core/operation_planner.go              |  20 +-
 router/core/reload_persistent_state.go        |   9 +-
 router/core/reload_persistent_state_test.go   |  55 +-
 router/pkg/config/config.go                   |   4 +-
 router/pkg/config/config.schema.json          |   8 +-
 .../pkg/config/testdata/config_defaults.json  |   4 +-
 router/pkg/config/testdata/config_full.json   |   4 +-
 router/pkg/otel/attributes.go                 |   1 -
 .../planfallbackcache/plan_fallback_cache.go} |  71 +-
 .../plan_fallback_cache_test.go}              | 142 ++--
 17 files changed, 639 insertions(+), 909 deletions(-)
 delete mode 100644 router-tests/expensive_query_cache_test.go
 create mode 100644 router-tests/plan_fallback_cache_test.go
 rename router/{core/expensive_query_cache.go => pkg/planfallbackcache/plan_fallback_cache.go} (63%)
 rename router/{core/expensive_query_cache_test.go => pkg/planfallbackcache/plan_fallback_cache_test.go} (56%)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index f735c05303..4719dc3853 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -974,7 +974,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExpensiveQueryCacheSize = 100
+				cfg.PlanFallbackCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -1126,7 +1126,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExpensiveQueryCacheSize = 100
+				cfg.PlanFallbackCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithConfigVersionHeader(true),
@@ -1176,7 +1176,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExpensiveQueryCacheSize = 100
+				cfg.PlanFallbackCacheSize = 100
 			},
 			CdnSever: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				w.WriteHeader(http.StatusNotFound)
@@ -1235,7 +1235,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExpensiveQueryCacheSize = 100
+				cfg.PlanFallbackCacheSize = 100
 			},
 			CdnSever: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				w.WriteHeader(http.StatusUnauthorized)
@@ -1311,8 +1311,8 @@ cache_warmup:
       enabled: false
 
 engine:
-  expensive_query_threshold: "1ns"
-  expensive_query_cache_size: 100
+  plan_fallback_threshold: "1ns"
+  plan_fallback_cache_size: 100
   debug:
     enable_cache_response_headers: true
 `
diff --git a/router-tests/expensive_query_cache_test.go b/router-tests/expensive_query_cache_test.go
deleted file mode 100644
index 7e1e4e2024..0000000000
--- a/router-tests/expensive_query_cache_test.go
+++ /dev/null
@@ -1,722 +0,0 @@
-package integration
-
-import (
-	"context"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/wundergraph/cosmo/router-tests/testenv"
-	"github.com/wundergraph/cosmo/router/core"
-	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
-	"github.com/wundergraph/cosmo/router/pkg/config"
-	"github.com/wundergraph/cosmo/router/pkg/controlplane/configpoller"
-	"github.com/wundergraph/cosmo/router/pkg/otel"
-	"github.com/wundergraph/cosmo/router/pkg/trace/tracetest"
-	"go.opentelemetry.io/otel/sdk/metric"
-	"go.opentelemetry.io/otel/sdk/metric/metricdata"
-)
-
-func TestExpensiveQueryCache(t *testing.T) {
-	t.Parallel()
-
-	// slowQueries are queries whose planning duration is overridden to exceed the threshold.
-	slowQueries := []testenv.GraphQLRequest{
-		{Query: `{ employees { id } }`},
-		{Query: `query { employees { id details { forename } } }`},
-	}
-
-	// fastQueries are queries whose planning duration stays below the threshold.
-	fastQueries := []testenv.GraphQLRequest{
-		{Query: `query { employees { id details { forename surname } } }`},
-		{Query: `query m($id: Int!){ employee(id: $id) { id details { forename surname } } }`, Variables: []byte(`{"id": 1}`)},
-	}
-
-	allQueries := make([]testenv.GraphQLRequest, 0, len(slowQueries)+len(fastQueries))
-	allQueries = append(allQueries, slowQueries...)
-	allQueries = append(allQueries, fastQueries...)
-
-	expensiveThreshold := 1 * time.Second
-
-	// The override function receives the normalized (minified) query content.
-	// Both slow queries lack "surname", while all fast queries contain it.
-	planningDurationOverride := core.WithPlanningDurationOverride(func(content string) time.Duration {
-		if !strings.Contains(content, "surname") {
-			return 10 * time.Second
-		}
-		return 0
-	})
-
-	// waitForExpensiveCacheHits sends all queries, retrying until each one
-	// is served from either the main or expensive cache. Then it does a single
-	// final pass and returns the number of expensive cache hits.
-	waitForExpensiveCacheHits := func(t *testing.T, xEnv *testenv.Environment, queries []testenv.GraphQLRequest, extraChecks ...func(*assert.CollectT, *testenv.TestResponse)) int {
-		t.Helper()
-
-		// Wait until every query is served from some cache
-		for _, q := range queries {
-			require.EventuallyWithT(t, func(ct *assert.CollectT) {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				assert.Equal(ct, 200, res.Response.StatusCode)
-				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
-				expensiveHit := res.Response.Header.Get("x-wg-expensive-plan-cache") == "HIT"
-				assert.True(ct, planHit || expensiveHit, "expected plan to be served from main or expensive cache")
-				for _, check := range extraChecks {
-					check(ct, res)
-				}
-			}, 2*time.Second, 100*time.Millisecond)
-		}
-
-		// Single pass to count expensive cache hits
-		expensiveCacheHits := 0
-		for _, q := range queries {
-			res := xEnv.MakeGraphQLRequestOK(q)
-			if res.Response.Header.Get("x-wg-expensive-plan-cache") == "HIT" {
-				expensiveCacheHits++
-			}
-		}
-		return expensiveCacheHits
-	}
-
-	t.Run("expensive cache serves evicted plans from small main cache", func(t *testing.T) {
-		t.Parallel()
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				planningDurationOverride,
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send all queries — each is a MISS and gets planned via singleflight.
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			// Only slow queries should end up in the expensive cache
-			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries)
-			require.Positive(t, hits, "expected at least one slow query to be served from the expensive cache")
-		})
-	})
-
-	t.Run("fast queries do not enter expensive cache", func(t *testing.T) {
-		t.Parallel()
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				planningDurationOverride,
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send all queries
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-			}
-
-			// Wait for Ristretto eviction
-			time.Sleep(200 * time.Millisecond)
-
-			// Fast queries should never be served from the expensive cache
-			for _, q := range fastQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
-					"fast query should not be in the expensive cache")
-			}
-		})
-	})
-
-	t.Run("evicted plans survive config reload via expensive cache with small main cache", func(t *testing.T) {
-		t.Parallel()
-
-		pm := ConfigPollerMock{
-			ready: make(chan struct{}),
-		}
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-					Source: config.CacheWarmupSource{
-						CdnSource: config.CacheWarmupCDNSource{
-							Enabled: true,
-						},
-					},
-				}),
-				core.WithConfigVersionHeader(true),
-				planningDurationOverride,
-			},
-			RouterConfig: &testenv.RouterConfig{
-				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
-					pm.initConfig = config
-					return &pm
-				},
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Populate caches with slow queries
-			for _, q := range slowQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			// Trigger config reload — new Ristretto cache is created (size 1).
-			<-pm.ready
-			pm.initConfig.Version = "updated"
-			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
-
-			// After reload, slow queries should still be available via expensive cache.
-			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
-				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
-			})
-			require.Positive(t, hits, "expected at least one query to be served from the expensive cache after config reload")
-		})
-	})
-
-	t.Run("only expensive queries persist across config reload, fast queries do not", func(t *testing.T) {
-		t.Parallel()
-
-		pm := ConfigPollerMock{
-			ready: make(chan struct{}),
-		}
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				// Large enough to hold all queries — no evictions before reload
-				cfg.ExecutionPlanCacheSize = 1024
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-					Source: config.CacheWarmupSource{
-						CdnSource: config.CacheWarmupCDNSource{
-							Enabled: true,
-						},
-					},
-				}),
-				core.WithConfigVersionHeader(true),
-				planningDurationOverride,
-			},
-			RouterConfig: &testenv.RouterConfig{
-				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
-					pm.initConfig = config
-					return &pm
-				},
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Populate caches with both slow and fast queries
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			// Verify all queries are cached in the main plan cache before reload
-			for _, q := range allQueries {
-				require.EventuallyWithT(t, func(ct *assert.CollectT) {
-					res := xEnv.MakeGraphQLRequestOK(q)
-					assert.Equal(ct, "HIT", res.Response.Header.Get("x-wg-execution-plan-cache"))
-				}, 2*time.Second, 100*time.Millisecond)
-			}
-
-			// Trigger config reload — main plan cache is reset.
-			<-pm.ready
-			pm.initConfig.Version = "updated"
-			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
-
-			// Wait for reload to complete by checking a slow query (which will be
-			// served from the expensive cache, confirming the new server is active).
-			require.EventuallyWithT(t, func(ct *assert.CollectT) {
-				res := xEnv.MakeGraphQLRequestOK(slowQueries[0])
-				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
-			}, 2*time.Second, 100*time.Millisecond)
-
-			// After reload, fast queries must not be persisted anywhere — the first
-			// request on the new server should be a MISS on both caches.
-			for _, q := range fastQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
-					"fast query should not be in main plan cache after config reload")
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
-					"fast query should not be in expensive cache after config reload")
-			}
-		})
-	})
-
-	t.Run("plans survive multiple config reloads with small main cache", func(t *testing.T) {
-		t.Parallel()
-
-		pm := ConfigPollerMock{
-			ready: make(chan struct{}),
-		}
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-					Source: config.CacheWarmupSource{
-						CdnSource: config.CacheWarmupCDNSource{
-							Enabled: true,
-						},
-					},
-				}),
-				core.WithConfigVersionHeader(true),
-				planningDurationOverride,
-			},
-			RouterConfig: &testenv.RouterConfig{
-				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
-					pm.initConfig = config
-					return &pm
-				},
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Warm up with slow queries
-			for _, q := range slowQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			<-pm.ready
-
-			// First reload
-			pm.initConfig.Version = "v2"
-			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
-
-			waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
-				assert.Equal(ct, "v2", res.Response.Header.Get("X-Router-Config-Version"))
-			})
-
-			// Second reload
-			pm.initConfig.Version = "v3"
-			require.NoError(t, pm.updateConfig(pm.initConfig, "v2"))
-
-			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
-				assert.Equal(ct, "v3", res.Response.Header.Get("X-Router-Config-Version"))
-			})
-			require.Positive(t, hits, "expected at least one query to be served from the expensive cache after multiple reloads")
-		})
-	})
-
-	t.Run("expensive cache works without config reload", func(t *testing.T) {
-		t.Parallel()
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 10
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				planningDurationOverride,
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send slow queries to overflow the tiny main cache
-			for _, q := range slowQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			hits := waitForExpensiveCacheHits(t, xEnv, slowQueries)
-			require.Positive(t, hits, "expected at least one query to be served from the expensive cache")
-		})
-	})
-
-	t.Run("router shuts down cleanly with expensive cache enabled", func(t *testing.T) {
-		t.Parallel()
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 50
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				planningDurationOverride,
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Make some requests to populate both caches
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-			}
-			// testenv.Run handles shutdown — test verifies no panic or hang
-		})
-	})
-
-	t.Run("expensive cache hit is recorded in span attributes", func(t *testing.T) {
-		t.Parallel()
-
-		exporter := tracetest.NewInMemoryExporter(t)
-		metricReader := metric.NewManualReader()
-
-		testenv.Run(t, &testenv.Config{
-			TraceExporter: exporter,
-			MetricReader:  metricReader,
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				planningDurationOverride,
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Send slow queries to overflow the tiny main cache
-			for _, q := range slowQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			// Wait for caches to converge, then reset spans for a clean measurement
-			waitForExpensiveCacheHits(t, xEnv, slowQueries)
-			exporter.Reset()
-
-			// Final pass to generate spans with known state
-			for _, q := range slowQueries {
-				xEnv.MakeGraphQLRequestOK(q)
-			}
-
-			// Verify spans contain the expensive_plan_cache_hit attribute
-			sn := exporter.GetSpans().Snapshots()
-			expensiveHitSpanFound := false
-			for _, span := range sn {
-				if span.Name() == "Operation - Plan" {
-					for _, attr := range span.Attributes() {
-						if attr.Key == otel.WgEngineExpensivePlanCacheHit && attr.Value.AsBool() {
-							expensiveHitSpanFound = true
-							// plan_cache_hit should be false for expensive cache hits
-							require.Contains(t, span.Attributes(), otel.WgEnginePlanCacheHit.Bool(false))
-						}
-					}
-				}
-			}
-			require.True(t, expensiveHitSpanFound, "expected at least one 'Operation - Plan' span with wg.engine.expensive_plan_cache_hit=true")
-
-			// Verify OTEL metrics include the expensive_plan_cache_hit attribute
-			rm := metricdata.ResourceMetrics{}
-			err := metricReader.Collect(context.Background(), &rm)
-			require.NoError(t, err)
-
-			metricScope := GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router")
-			require.NotNil(t, metricScope)
-
-			planningMetric := GetMetricByName(metricScope, "router.graphql.operation.planning_time")
-			require.NotNil(t, planningMetric)
-
-			hist := planningMetric.Data.(metricdata.Histogram[float64])
-			expensiveHitMetricFound := false
-			for _, dp := range hist.DataPoints {
-				val, found := dp.Attributes.Value(otel.WgEngineExpensivePlanCacheHit)
-				if found && val.AsBool() {
-					expensiveHitMetricFound = true
-					// plan_cache_hit should be false for expensive cache hits
-					planVal, planFound := dp.Attributes.Value(otel.WgEnginePlanCacheHit)
-					require.True(t, planFound)
-					require.False(t, planVal.AsBool())
-					break
-				}
-			}
-			require.True(t, expensiveHitMetricFound, "expected planning_time metric with wg.engine.expensive_plan_cache_hit=true")
-		})
-	})
-
-	t.Run("expensive cache hit is recorded in Prometheus metrics", func(t *testing.T) {
-		t.Parallel()
-
-		exporter := tracetest.NewInMemoryExporter(t)
-		metricReader := metric.NewManualReader()
-		promRegistry := prometheus.NewRegistry()
-
-		testenv.Run(t, &testenv.Config{
-			TraceExporter:      exporter,
-			MetricReader:       metricReader,
-			PrometheusRegistry: promRegistry,
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				planningDurationOverride,
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Overflow the tiny main cache with slow queries
-			for _, q := range slowQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-			}
-
-			// Wait for caches to converge, then make a final pass for Prometheus
-			waitForExpensiveCacheHits(t, xEnv, slowQueries)
-
-			for _, q := range slowQueries {
-				xEnv.MakeGraphQLRequestOK(q)
-			}
-
-			// Gather Prometheus metrics
-			mf, err := promRegistry.Gather()
-			require.NoError(t, err)
-
-			planningTime := findMetricFamilyByName(mf, "router_graphql_operation_planning_time")
-			require.NotNil(t, planningTime, "expected router_graphql_operation_planning_time metric")
-
-			// Verify the expensive_plan_cache_hit label exists
-			expensiveHitFound := false
-			for _, m := range planningTime.GetMetric() {
-				for _, label := range m.GetLabel() {
-					if label.GetName() == "wg_engine_expensive_plan_cache_hit" && label.GetValue() == "true" {
-						expensiveHitFound = true
-						// plan_cache_hit should be false for expensive cache hits
-						for _, subLabel := range m.GetLabel() {
-							if subLabel.GetName() == "wg_engine_plan_cache_hit" {
-								require.Equal(t, "false", subLabel.GetValue(), "plan_cache_hit should be false when expensive_plan_cache_hit is true")
-							}
-						}
-					}
-				}
-			}
-			require.True(t, expensiveHitFound, "expected Prometheus metric with wg_engine_expensive_plan_cache_hit=true")
-
-			// Also verify that the false value exists (from initial MISS requests)
-			expensiveMissFound := false
-			for _, m := range planningTime.GetMetric() {
-				for _, label := range m.GetLabel() {
-					if label.GetName() == "wg_engine_expensive_plan_cache_hit" && label.GetValue() == "false" {
-						expensiveMissFound = true
-					}
-				}
-			}
-			require.True(t, expensiveMissFound, "expected Prometheus metric with wg_engine_expensive_plan_cache_hit=false")
-		})
-	})
-
-	t.Run("no expensive cache header or telemetry when feature is disabled", func(t *testing.T) {
-		t.Parallel()
-
-		exporter := tracetest.NewInMemoryExporter(t)
-		metricReader := metric.NewManualReader()
-		promRegistry := prometheus.NewRegistry()
-
-		testenv.Run(t, &testenv.Config{
-			TraceExporter:      exporter,
-			MetricReader:       metricReader,
-			PrometheusRegistry: promRegistry,
-			// InMemoryFallback is NOT set — expensive cache is disabled
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				// Header must be absent when feature is disabled
-				require.Empty(t, res.Response.Header.Get("x-wg-expensive-plan-cache"),
-					"x-wg-expensive-plan-cache header should not be present when InMemoryFallback is disabled")
-			}
-
-			// Second pass — cache hits
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Empty(t, res.Response.Header.Get("x-wg-expensive-plan-cache"))
-			}
-
-			// Verify spans do NOT contain the expensive_plan_cache_hit attribute
-			sn := exporter.GetSpans().Snapshots()
-			for _, span := range sn {
-				if span.Name() == "Operation - Plan" {
-					for _, attr := range span.Attributes() {
-						require.NotEqual(t, otel.WgEngineExpensivePlanCacheHit, attr.Key,
-							"wg.engine.expensive_plan_cache_hit attribute should not be present when feature is disabled")
-					}
-				}
-			}
-
-			// Verify OTEL metrics do NOT contain the attribute
-			rm := metricdata.ResourceMetrics{}
-			err := metricReader.Collect(context.Background(), &rm)
-			require.NoError(t, err)
-
-			metricScope := GetMetricScopeByName(rm.ScopeMetrics, "cosmo.router")
-			if metricScope != nil {
-				planningMetric := GetMetricByName(metricScope, "router.graphql.operation.planning_time")
-				if planningMetric != nil {
-					hist := planningMetric.Data.(metricdata.Histogram[float64])
-					for _, dp := range hist.DataPoints {
-						_, found := dp.Attributes.Value(otel.WgEngineExpensivePlanCacheHit)
-						require.False(t, found,
-							"wg.engine.expensive_plan_cache_hit attribute should not be present in OTEL metrics when feature is disabled")
-					}
-				}
-			}
-
-			// Verify Prometheus metrics do NOT contain the label
-			mf, err := promRegistry.Gather()
-			require.NoError(t, err)
-
-			planningTime := findMetricFamilyByName(mf, "router_graphql_operation_planning_time")
-			if planningTime != nil {
-				for _, m := range planningTime.GetMetric() {
-					for _, label := range m.GetLabel() {
-						require.NotEqual(t, "wg_engine_expensive_plan_cache_hit", label.GetName(),
-							"wg_engine_expensive_plan_cache_hit label should not be present in Prometheus when feature is disabled")
-					}
-				}
-			}
-		})
-	})
-
-	t.Run("expensive cache entries survive static execution config reload", func(t *testing.T) {
-		t.Parallel()
-
-		configFile := t.TempDir() + "/config.json"
-		writeTestConfig(t, "initial", configFile)
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1024
-				cfg.ExpensiveQueryThreshold = expensiveThreshold
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithConfigVersionHeader(true),
-				core.WithExecutionConfig(&core.ExecutionConfig{
-					Path:          configFile,
-					Watch:         true,
-					WatchInterval: 100 * time.Millisecond,
-				}),
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				// "hello" is slow (enters expensive cache), "world" is fast (does not)
-				core.WithPlanningDurationOverride(func(content string) time.Duration {
-					if strings.Contains(content, "hello") {
-						return 10 * time.Second
-					}
-					return 0
-				}),
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			slowQ := testenv.GraphQLRequest{Query: `query { hello }`}
-			fastQ := testenv.GraphQLRequest{Query: `query { world }`}
-
-			// Plan both queries
-			for _, q := range []testenv.GraphQLRequest{slowQ, fastQ} {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "initial", res.Response.Header.Get("X-Router-Config-Version"))
-			}
-
-			// Trigger schema reload
-			writeTestConfig(t, "updated", configFile)
-
-			// Wait for reload to complete — slow query should survive via expensive cache
-			require.EventuallyWithT(t, func(ct *assert.CollectT) {
-				res := xEnv.MakeGraphQLRequestOK(slowQ)
-				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
-				planHit := res.Response.Header.Get("x-wg-execution-plan-cache") == "HIT"
-				expensiveHit := res.Response.Header.Get("x-wg-expensive-plan-cache") == "HIT"
-				assert.True(ct, planHit || expensiveHit, "expected slow plan to survive schema reload")
-			}, 2*time.Second, 100*time.Millisecond)
-
-			// Fast query must not be persisted anywhere after reload
-			res := xEnv.MakeGraphQLRequestOK(fastQ)
-			require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
-			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
-				"fast query should not be in main plan cache after schema reload")
-			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
-				"fast query should not survive schema reload via expensive cache")
-		})
-	})
-
-	t.Run("high threshold prevents fast plans from entering expensive cache", func(t *testing.T) {
-		t.Parallel()
-
-		testenv.Run(t, &testenv.Config{
-			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.ExecutionPlanCacheSize = 1
-				cfg.ExpensiveQueryThreshold = 1 * time.Hour
-				cfg.ExpensiveQueryCacheSize = 100
-			},
-			RouterOptions: []core.Option{
-				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
-					Enabled:          true,
-					InMemoryFallback: true,
-				}),
-				// No planning duration override — all plans are fast
-			},
-		}, func(t *testing.T, xEnv *testenv.Environment) {
-			// Populate — all plans are fast (well under 1h threshold)
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
-				// Feature is enabled so header is present, but should be MISS
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"))
-			}
-
-			// Wait for Ristretto eviction
-			time.Sleep(200 * time.Millisecond)
-
-			// Re-query — with main cache size 1, most are evicted from Ristretto.
-			// Since no plan met the 1h threshold, the expensive cache is empty.
-			// These should be re-planned (MISS on both caches).
-			for _, q := range allQueries {
-				res := xEnv.MakeGraphQLRequestOK(q)
-				require.Equal(t, 200, res.Response.StatusCode)
-				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-expensive-plan-cache"),
-					"no plan should be in the expensive cache with a 1h threshold")
-			}
-		})
-	})
-}
diff --git a/router-tests/plan_fallback_cache_test.go b/router-tests/plan_fallback_cache_test.go
new file mode 100644
index 0000000000..041ba0314e
--- /dev/null
+++ b/router-tests/plan_fallback_cache_test.go
@@ -0,0 +1,465 @@
+package integration
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/wundergraph/cosmo/router-tests/testenv"
+	"github.com/wundergraph/cosmo/router/core"
+	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
+	"github.com/wundergraph/cosmo/router/pkg/config"
+	"github.com/wundergraph/cosmo/router/pkg/controlplane/configpoller"
+)
+
+func TestPlanFallbackCache(t *testing.T) {
+	t.Parallel()
+
+	// slowQueries are queries whose planning duration is overridden to exceed the threshold.
+	slowQueries := []testenv.GraphQLRequest{
+		{Query: `{ employees { id } }`},
+		{Query: `query { employees { id details { forename } } }`},
+	}
+
+	// fastQueries are queries whose planning duration stays below the threshold.
+	fastQueries := []testenv.GraphQLRequest{
+		{Query: `query { employees { id details { forename surname } } }`},
+		{Query: `query m($id: Int!){ employee(id: $id) { id details { forename surname } } }`, Variables: []byte(`{"id": 1}`)},
+	}
+
+	allQueries := make([]testenv.GraphQLRequest, 0, len(slowQueries)+len(fastQueries))
+	allQueries = append(allQueries, slowQueries...)
+	allQueries = append(allQueries, fastQueries...)
+
+	fallbackThreshold := 1 * time.Second
+
+	// The override function receives the normalized (minified) query content.
+	// Both slow queries lack "surname", while all fast queries contain it.
+	planningDurationOverride := core.WithPlanningDurationOverride(func(content string) time.Duration {
+		if !strings.Contains(content, "surname") {
+			return 10 * time.Second
+		}
+		return 0
+	})
+
+	// waitForPlanCacheHits sends all queries, retrying until each one
+	// is served from the plan cache (which includes fallback cache promotions).
+	waitForPlanCacheHits := func(t *testing.T, xEnv *testenv.Environment, queries []testenv.GraphQLRequest, extraChecks ...func(*assert.CollectT, *testenv.TestResponse)) {
+		t.Helper()
+
+		for _, q := range queries {
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				assert.Equal(ct, 200, res.Response.StatusCode)
+				assert.Equal(ct, "HIT", res.Response.Header.Get("x-wg-execution-plan-cache"),
+					"expected plan to be served from cache")
+				for _, check := range extraChecks {
+					check(ct, res)
+				}
+			}, 2*time.Second, 100*time.Millisecond)
+		}
+	}
+
+	t.Run("fallback cache serves evicted plans from small main cache", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				planningDurationOverride,
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send all queries — each is a MISS and gets planned via singleflight.
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Slow queries should be served from cache (via fallback promotion)
+			waitForPlanCacheHits(t, xEnv, slowQueries)
+		})
+	})
+
+	t.Run("fast queries do not enter fallback cache", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				planningDurationOverride,
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send all queries
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+			}
+
+			// Wait for Ristretto eviction
+			time.Sleep(200 * time.Millisecond)
+
+			// Fast queries should not be cached after eviction from the tiny main cache
+			for _, q := range fastQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
+					"fast query should not be in cache after eviction")
+			}
+		})
+	})
+
+	t.Run("evicted plans survive config reload via fallback cache with small main cache", func(t *testing.T) {
+		t.Parallel()
+
+		pm := ConfigPollerMock{
+			ready: make(chan struct{}),
+		}
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+					Source: config.CacheWarmupSource{
+						CdnSource: config.CacheWarmupCDNSource{
+							Enabled: true,
+						},
+					},
+				}),
+				core.WithConfigVersionHeader(true),
+				planningDurationOverride,
+			},
+			RouterConfig: &testenv.RouterConfig{
+				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
+					pm.initConfig = config
+					return &pm
+				},
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Populate caches with slow queries
+			for _, q := range slowQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Trigger config reload — new Ristretto cache is created (size 1).
+			<-pm.ready
+			pm.initConfig.Version = "updated"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
+
+			// After reload, slow queries should still be available via fallback cache.
+			waitForPlanCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+			})
+		})
+	})
+
+	t.Run("only slow queries persist across config reload, fast queries do not", func(t *testing.T) {
+		t.Parallel()
+
+		pm := ConfigPollerMock{
+			ready: make(chan struct{}),
+		}
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				// Large enough to hold all queries — no evictions before reload
+				cfg.ExecutionPlanCacheSize = 1024
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+					Source: config.CacheWarmupSource{
+						CdnSource: config.CacheWarmupCDNSource{
+							Enabled: true,
+						},
+					},
+				}),
+				core.WithConfigVersionHeader(true),
+				planningDurationOverride,
+			},
+			RouterConfig: &testenv.RouterConfig{
+				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
+					pm.initConfig = config
+					return &pm
+				},
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Populate caches with both slow and fast queries
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Verify all queries are cached in the main plan cache before reload
+			for _, q := range allQueries {
+				require.EventuallyWithT(t, func(ct *assert.CollectT) {
+					res := xEnv.MakeGraphQLRequestOK(q)
+					assert.Equal(ct, "HIT", res.Response.Header.Get("x-wg-execution-plan-cache"))
+				}, 2*time.Second, 100*time.Millisecond)
+			}
+
+			// Trigger config reload — main plan cache is reset.
+			<-pm.ready
+			pm.initConfig.Version = "updated"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
+
+			// Wait for reload to complete by checking a slow query (which will be
+			// served from the fallback cache, confirming the new server is active).
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res := xEnv.MakeGraphQLRequestOK(slowQueries[0])
+				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+			}, 2*time.Second, 100*time.Millisecond)
+
+			// After reload, fast queries must not be persisted anywhere — the first
+			// request on the new server should be a MISS on both caches.
+			for _, q := range fastQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
+					"fast query should not be in plan cache after config reload")
+			}
+		})
+	})
+
+	t.Run("plans survive multiple config reloads with small main cache", func(t *testing.T) {
+		t.Parallel()
+
+		pm := ConfigPollerMock{
+			ready: make(chan struct{}),
+		}
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+					Source: config.CacheWarmupSource{
+						CdnSource: config.CacheWarmupCDNSource{
+							Enabled: true,
+						},
+					},
+				}),
+				core.WithConfigVersionHeader(true),
+				planningDurationOverride,
+			},
+			RouterConfig: &testenv.RouterConfig{
+				ConfigPollerFactory: func(config *nodev1.RouterConfig) configpoller.ConfigPoller {
+					pm.initConfig = config
+					return &pm
+				},
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Warm up with slow queries
+			for _, q := range slowQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			<-pm.ready
+
+			// First reload
+			pm.initConfig.Version = "v2"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "old-1"))
+
+			waitForPlanCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+				assert.Equal(ct, "v2", res.Response.Header.Get("X-Router-Config-Version"))
+			})
+
+			// Second reload
+			pm.initConfig.Version = "v3"
+			require.NoError(t, pm.updateConfig(pm.initConfig, "v2"))
+
+			waitForPlanCacheHits(t, xEnv, slowQueries, func(ct *assert.CollectT, res *testenv.TestResponse) {
+				assert.Equal(ct, "v3", res.Response.Header.Get("X-Router-Config-Version"))
+			})
+		})
+	})
+
+	t.Run("fallback cache works without config reload", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 10
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				planningDurationOverride,
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Send slow queries to overflow the tiny main cache
+			for _, q := range slowQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			waitForPlanCacheHits(t, xEnv, slowQueries)
+		})
+	})
+
+	t.Run("router shuts down cleanly with fallback cache enabled", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 50
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				planningDurationOverride,
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Make some requests to populate both caches
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+			}
+			// testenv.Run handles shutdown — test verifies no panic or hang
+		})
+	})
+
+	t.Run("fallback cache entries survive static execution config reload", func(t *testing.T) {
+		t.Parallel()
+
+		configFile := t.TempDir() + "/config.json"
+		writeTestConfig(t, "initial", configFile)
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1024
+				cfg.PlanFallbackThreshold = fallbackThreshold
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithConfigVersionHeader(true),
+				core.WithExecutionConfig(&core.ExecutionConfig{
+					Path:          configFile,
+					Watch:         true,
+					WatchInterval: 100 * time.Millisecond,
+				}),
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				// "hello" is slow (enters fallback cache), "world" is fast (does not)
+				core.WithPlanningDurationOverride(func(content string) time.Duration {
+					if strings.Contains(content, "hello") {
+						return 10 * time.Second
+					}
+					return 0
+				}),
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			slowQ := testenv.GraphQLRequest{Query: `query { hello }`}
+			fastQ := testenv.GraphQLRequest{Query: `query { world }`}
+
+			// Plan both queries
+			for _, q := range []testenv.GraphQLRequest{slowQ, fastQ} {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "initial", res.Response.Header.Get("X-Router-Config-Version"))
+			}
+
+			// Trigger schema reload
+			writeTestConfig(t, "updated", configFile)
+
+			// Wait for reload to complete — slow query should survive via fallback cache
+			require.EventuallyWithT(t, func(ct *assert.CollectT) {
+				res := xEnv.MakeGraphQLRequestOK(slowQ)
+				assert.Equal(ct, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+				assert.Equal(ct, "HIT", res.Response.Header.Get("x-wg-execution-plan-cache"),
+					"expected slow plan to survive schema reload")
+			}, 2*time.Second, 100*time.Millisecond)
+
+			// Fast query must not be persisted anywhere after reload
+			res := xEnv.MakeGraphQLRequestOK(fastQ)
+			require.Equal(t, "updated", res.Response.Header.Get("X-Router-Config-Version"))
+			require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
+				"fast query should not be in plan cache after schema reload")
+		})
+	})
+
+	t.Run("high threshold prevents fast plans from entering fallback cache", func(t *testing.T) {
+		t.Parallel()
+
+		testenv.Run(t, &testenv.Config{
+			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
+				cfg.ExecutionPlanCacheSize = 1
+				cfg.PlanFallbackThreshold = 1 * time.Hour
+				cfg.PlanFallbackCacheSize = 100
+			},
+			RouterOptions: []core.Option{
+				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
+					Enabled:          true,
+					InMemoryFallback: true,
+				}),
+				// No planning duration override — all plans are fast
+			},
+		}, func(t *testing.T, xEnv *testenv.Environment) {
+			// Populate — all plans are fast (well under 1h threshold)
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"))
+			}
+
+			// Wait for Ristretto eviction
+			time.Sleep(200 * time.Millisecond)
+
+			// Re-query — with main cache size 1, most are evicted from Ristretto.
+			// Since no plan met the 1h threshold, the fallback cache is empty.
+			// These should be re-planned (MISS).
+			for _, q := range allQueries {
+				res := xEnv.MakeGraphQLRequestOK(q)
+				require.Equal(t, 200, res.Response.StatusCode)
+				require.Equal(t, "MISS", res.Response.Header.Get("x-wg-execution-plan-cache"),
+					"no plan should be cached with a 1h threshold")
+			}
+		})
+	})
+}
diff --git a/router/core/context.go b/router/core/context.go
index 78371833f3..53e12d0fc7 100644
--- a/router/core/context.go
+++ b/router/core/context.go
@@ -621,8 +621,6 @@ type operationContext struct {
 	traceOptions     resolve.TraceOptions
 	executionOptions resolve.ExecutionOptions
 	planCacheHit          bool
-	expensivePlanCacheHit bool
-	expensiveCacheEnabled bool
 	initialPayload   []byte
 	extensions       []byte
 	persistedID      string
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 05a07c17eb..7972ccaef3 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -585,10 +585,10 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 		}
 		if srv.cacheWarmup != nil && srv.cacheWarmup.Enabled && srv.cacheWarmup.InMemoryFallback {
 			planCacheConfig.OnEvict = func(item *ristretto.Item[*planWithMetaData]) {
-				if s.operationPlanner == nil || s.operationPlanner.expensiveCache == nil {
+				if s.operationPlanner == nil || s.operationPlanner.fallbackCache == nil {
 					return
 				}
-				s.operationPlanner.expensiveCache.Set(item.Key, item.Value, item.Value.planningDuration)
+				s.operationPlanner.fallbackCache.Set(item.Key, item.Value, item.Value.planningDuration)
 			}
 		}
 		s.planCache, err = ristretto.NewCache[uint64, *planWithMetaData](planCacheConfig)
@@ -1347,8 +1347,8 @@ func (s *graphServer) buildGraphMux(
 		executor,
 		gm.planCache,
 		opts.ReloadPersistentState.inMemoryPlanCacheFallback.IsEnabled(),
-		int(s.engineExecutionConfiguration.ExpensiveQueryCacheSize),
-		s.engineExecutionConfiguration.ExpensiveQueryThreshold,
+		int(s.engineExecutionConfiguration.PlanFallbackCacheSize),
+		s.engineExecutionConfiguration.PlanFallbackThreshold,
 	)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create operation planner: %w", err)
@@ -1391,9 +1391,6 @@ func (s *graphServer) buildGraphMux(
 				otel.WgOperationType.String(item.OperationType),
 				otel.WgEnginePlanCacheHit.Bool(false),
 			}
-			if operationPlanner.useFallback {
-				attrs = append(attrs, otel.WgEngineExpensivePlanCacheHit.Bool(false))
-			}
 			gm.metricStore.MeasureOperationPlanningTime(ctx,
 				item.PlanningTime,
 				nil,
@@ -1417,7 +1414,7 @@ func (s *graphServer) buildGraphMux(
 			// We first utilize the existing plan cache (if it was already set, i.e., not on the first start) to create a list of queries
 			// and then reset the plan cache to the new plan cache for this start afterwards.
 			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
-			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
+			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.fallbackCache)
 		case s.cacheWarmup.Source.CdnSource.Enabled:
 			if s.graphApiToken == "" {
 				return nil, fmt.Errorf("graph token is required for cache warmup in order to communicate with the CDN")
@@ -1427,7 +1424,7 @@ func (s *graphServer) buildGraphMux(
 			// This is useful for when an issue occurs with the CDN when retrieving the required manifest
 			if s.cacheWarmup.InMemoryFallback {
 				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
-				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.expensiveCache)
+				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.fallbackCache)
 			}
 			cdnSource, err := NewCDNSource(s.cdnConfig.URL, s.graphApiToken, s.logger)
 			if err != nil {
diff --git a/router/core/graphql_handler.go b/router/core/graphql_handler.go
index 1469945774..1fab6a7732 100644
--- a/router/core/graphql_handler.go
+++ b/router/core/graphql_handler.go
@@ -34,7 +34,6 @@ var (
 
 const (
 	ExecutionPlanCacheHeader          = "X-WG-Execution-Plan-Cache"
-	ExpensivePlanCacheHeader          = "X-WG-Expensive-Plan-Cache"
 	PersistedOperationCacheHeader     = "X-WG-Persisted-Operation-Cache"
 	NormalizationCacheHeader          = "X-WG-Normalization-Cache"
 	VariablesNormalizationCacheHeader = "X-WG-Variables-Normalization-Cache"
@@ -521,12 +520,5 @@ func (h *GraphQLHandler) setDebugCacheHeaders(w http.ResponseWriter, opCtx *oper
 		} else {
 			w.Header().Set(ExecutionPlanCacheHeader, "MISS")
 		}
-		if opCtx.expensiveCacheEnabled {
-			if opCtx.expensivePlanCacheHit {
-				w.Header().Set(ExpensivePlanCacheHeader, "HIT")
-			} else {
-				w.Header().Set(ExpensivePlanCacheHeader, "MISS")
-			}
-		}
 	}
 }
diff --git a/router/core/graphql_prehandler.go b/router/core/graphql_prehandler.go
index ce9508faa2..ee9eb10fc5 100644
--- a/router/core/graphql_prehandler.go
+++ b/router/core/graphql_prehandler.go
@@ -1089,16 +1089,10 @@ func (h *PreHandler) handleOperation(req *http.Request, httpOperation *httpOpera
 	setTelemetryAttributes(planCtx, requestContext, expr.BucketPlanningTime)
 
 	enginePlanSpan.SetAttributes(otel.WgEnginePlanCacheHit.Bool(requestContext.operation.planCacheHit))
-	if requestContext.operation.expensiveCacheEnabled {
-		enginePlanSpan.SetAttributes(otel.WgEngineExpensivePlanCacheHit.Bool(requestContext.operation.expensivePlanCacheHit))
-	}
 	enginePlanSpan.End()
 
 	planningAttrs := *requestContext.telemetry.AcquireAttributes()
 	planningAttrs = append(planningAttrs, otel.WgEnginePlanCacheHit.Bool(requestContext.operation.planCacheHit))
-	if requestContext.operation.expensiveCacheEnabled {
-		planningAttrs = append(planningAttrs, otel.WgEngineExpensivePlanCacheHit.Bool(requestContext.operation.expensivePlanCacheHit))
-	}
 	planningAttrs = append(planningAttrs, requestContext.telemetry.metricAttrs...)
 
 	httpOperation.operationMetrics.routerMetrics.MetricStore().MeasureOperationPlanningTime(
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index de4c6292b2..c791e3e511 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -7,6 +7,7 @@ import (
 
 	graphqlmetricsv1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/graphqlmetrics/v1"
 	"github.com/wundergraph/cosmo/router/pkg/graphqlschemausage"
+	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/astparser"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan"
@@ -29,7 +30,7 @@ type planWithMetaData struct {
 type OperationPlanner struct {
 	sf             singleflight.Group
 	planCache      ExecutionPlanCache[uint64, *planWithMetaData]
-	expensiveCache *expensivePlanCache
+	fallbackCache  *planfallbackcache.Cache[*planWithMetaData]
 	executor       *Executor
 	trackUsageInfo bool
 	useFallback    bool
@@ -55,7 +56,7 @@ type ExecutionPlanCache[K any, V any] interface {
 	Close()
 }
 
-func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], inMemoryPlanCacheFallback bool, expensiveCacheSize int, threshold time.Duration) (*OperationPlanner, error) {
+func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], inMemoryPlanCacheFallback bool, fallbackCacheSize int, threshold time.Duration) (*OperationPlanner, error) {
 	p := &OperationPlanner{
 		logger:         logger,
 		planCache:      planCache,
@@ -66,7 +67,7 @@ func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache Execu
 
 	if inMemoryPlanCacheFallback {
 		var err error
-		p.expensiveCache, err = newExpensivePlanCache(expensiveCacheSize, threshold)
+		p.fallbackCache, err = planfallbackcache.New[*planWithMetaData](fallbackCacheSize, threshold)
 		if err != nil {
 			return nil, err
 		}
@@ -75,12 +76,12 @@ func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache Execu
 	return p, nil
 }
 
-// Close releases expensive cache resources.
+// Close releases fallback cache resources.
 func (p *OperationPlanner) Close() {
 	if p == nil || !p.useFallback {
 		return
 	}
-	p.expensiveCache.Close()
+	p.fallbackCache.Close()
 }
 
 // planOperation performs the core planning work: parse, plan, and postprocess.
@@ -149,7 +150,6 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 	// if we have tracing enabled or want to include a query plan in the response we always prepare a new plan
 	// this is because in case of tracing, we're writing trace data to the plan
 	// in case of including the query plan, we don't want to cache this additional overhead
-	opContext.expensiveCacheEnabled = p.useFallback
 
 	skipCache := options.TraceOptions.Enable || options.ExecutionOptions.IncludeQueryPlanInResponse
 
@@ -178,10 +178,10 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 		opContext.preparedPlan = cachedPlan
 		opContext.planCacheHit = true
 	} else if p.useFallback {
-		if cachedPlan, ok = p.expensiveCache.Get(operationID); ok {
-			// found in the expensive query cache — re-use and re-insert into main cache
+		if cachedPlan, ok = p.fallbackCache.Get(operationID); ok {
+			// found in the plan fallback cache — re-use and re-insert into main cache
 			opContext.preparedPlan = cachedPlan
-			opContext.expensivePlanCacheHit = true
+			opContext.planCacheHit = true
 			p.planCache.Set(operationID, cachedPlan, 1)
 		}
 	}
@@ -208,7 +208,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 			p.planCache.Set(operationID, prepared, 1)
 
 			if p.useFallback {
-				p.expensiveCache.Set(operationID, prepared, prepared.planningDuration)
+				p.fallbackCache.Set(operationID, prepared, prepared.planningDuration)
 			}
 
 			return prepared, nil
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index 9d8f9593fb..7d1ad87b8f 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -4,6 +4,7 @@ import (
 	"sync"
 
 	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
+	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
 	"go.uber.org/zap"
 )
 
@@ -91,7 +92,7 @@ func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*
 	}
 
 	switch cache := c.queriesForFeatureFlag[featureFlagKey].(type) {
-	case *expensivePlanCache:
+	case *planfallbackcache.Cache[*planWithMetaData]:
 		return convertToNodeOperation(cache)
 	case []*nodev1.Operation:
 		return cache
@@ -106,7 +107,7 @@ func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*
 }
 
 // setPlanCacheForFF sets the plan cache for a specific feature flag key
-func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cache *expensivePlanCache) {
+func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cache *planfallbackcache.Cache[*planWithMetaData]) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
@@ -127,7 +128,7 @@ func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 
 	fallbackMap := make(map[string]any)
 	for k, v := range c.queriesForFeatureFlag {
-		if cache, ok := v.(*expensivePlanCache); ok {
+		if cache, ok := v.(*planfallbackcache.Cache[*planWithMetaData]); ok {
 			fallbackMap[k] = convertToNodeOperation(cache)
 		}
 	}
@@ -157,7 +158,7 @@ func (c *InMemoryPlanCacheFallback) cleanupUnusedFeatureFlags(routerCfg *nodev1.
 	}
 }
 
-func convertToNodeOperation(data *expensivePlanCache) []*nodev1.Operation {
+func convertToNodeOperation(data *planfallbackcache.Cache[*planWithMetaData]) []*nodev1.Operation {
 	items := make([]*nodev1.Operation, 0)
 
 	data.IterValues(func(v *planWithMetaData) (stop bool) {
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index 3be8fd5ca4..1c79841077 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -6,6 +6,7 @@ import (
 	"github.com/stretchr/testify/require"
 	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
 	"github.com/wundergraph/cosmo/router/pkg/config"
+	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
 	"go.uber.org/zap"
 )
 
@@ -47,7 +48,7 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 	t.Run("update when already enabled keeps existing data", func(t *testing.T) {
 		t.Parallel()
 		existing := make(map[string]any)
-		existing["test"] = (*expensivePlanCache)(nil)
+		existing["test"] = (*planfallbackcache.Cache[*planWithMetaData])(nil)
 
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: existing,
@@ -138,17 +139,17 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 		require.Equal(t, expectedOps, result)
 	})
 
-	t.Run("returns operations from live expensive cache reference", func(t *testing.T) {
+	t.Run("returns operations from live fallback cache reference", func(t *testing.T) {
 		t.Parallel()
 
-		expCache, err := newExpensivePlanCache(100, 0)
+		fallbackCache, err := planfallbackcache.New[*planWithMetaData](100, 0)
 		require.NoError(t, err)
-		expCache.Set(1, &planWithMetaData{content: "query { fromExpensive }"}, 5*1e9)
-		expCache.Wait()
+		fallbackCache.Set(1, &planWithMetaData{content: "query { fromFallback }"}, 5*1e9)
+		fallbackCache.Wait()
 
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"test-ff": expCache,
+				"test-ff": fallbackCache,
 			},
 		}
 
@@ -156,7 +157,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 
 		require.NotNil(t, result)
 		require.Len(t, result, 1)
-		require.Equal(t, "query { fromExpensive }", result[0].Request.Query)
+		require.Equal(t, "query { fromFallback }", result[0].Request.Query)
 	})
 
 	t.Run("returns nil for non-existent feature flag", func(t *testing.T) {
@@ -188,9 +189,9 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"ff1": (*expensivePlanCache)(nil),
-				"ff2": (*expensivePlanCache)(nil),
-				"ff3": (*expensivePlanCache)(nil),
+				"ff1": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff2": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff3": (*planfallbackcache.Cache[*planWithMetaData])(nil),
 			},
 		}
 
@@ -215,8 +216,8 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"":    (*expensivePlanCache)(nil),
-				"ff1": (*expensivePlanCache)(nil),
+				"":    (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff1": (*planfallbackcache.Cache[*planWithMetaData])(nil),
 			},
 		}
 
@@ -255,12 +256,12 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"":    (*expensivePlanCache)(nil), // base should be kept
-				"ff1": (*expensivePlanCache)(nil),
-				"ff2": (*expensivePlanCache)(nil),
-				"ff3": (*expensivePlanCache)(nil),
-				"ff4": (*expensivePlanCache)(nil),
-				"ff5": (*expensivePlanCache)(nil),
+				"":    (*planfallbackcache.Cache[*planWithMetaData])(nil), // base should be kept
+				"ff1": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff2": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff3": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff4": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff5": (*planfallbackcache.Cache[*planWithMetaData])(nil),
 			},
 		}
 
@@ -280,26 +281,26 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 
 func TestInMemoryPlanCacheFallback_ProcessOnConfigChangeRestart(t *testing.T) {
 	t.Parallel()
-	t.Run("extracts expensive cache entries to operations", func(t *testing.T) {
+	t.Run("extracts fallback cache entries to operations", func(t *testing.T) {
 		t.Parallel()
 
 		query1 := "query { test1 }"
 		query2 := "query { test2 }"
 
-		expCache1, err := newExpensivePlanCache(100, 0)
+		fallbackCache1, err := planfallbackcache.New[*planWithMetaData](100, 0)
 		require.NoError(t, err)
-		expCache2, err := newExpensivePlanCache(100, 0)
+		fallbackCache2, err := planfallbackcache.New[*planWithMetaData](100, 0)
 		require.NoError(t, err)
 
-		expCache1.Set(1, &planWithMetaData{content: query1}, 5*1e9)
-		expCache1.Wait()
-		expCache2.Set(2, &planWithMetaData{content: query2}, 5*1e9)
-		expCache2.Wait()
+		fallbackCache1.Set(1, &planWithMetaData{content: query1}, 5*1e9)
+		fallbackCache1.Wait()
+		fallbackCache2.Set(2, &planWithMetaData{content: query2}, 5*1e9)
+		fallbackCache2.Wait()
 
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"ff1": expCache1,
-				"ff2": expCache2,
+				"ff1": fallbackCache1,
+				"ff2": fallbackCache2,
 			},
 		}
 
diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go
index d1a95278d6..4272b20090 100644
--- a/router/pkg/config/config.go
+++ b/router/pkg/config/config.go
@@ -430,8 +430,8 @@ type EngineExecutionConfiguration struct {
 	WebSocketClientPingTimeout                             time.Duration `envDefault:"30s" env:"ENGINE_WEBSOCKET_CLIENT_PING_TIMEOUT" yaml:"websocket_client_ping_timeout,omitempty"`
 	WebSocketClientFrameTimeout                            time.Duration `envDefault:"100ms" env:"ENGINE_WEBSOCKET_CLIENT_FRAME_TIMEOUT" yaml:"websocket_client_frame_timeout,omitempty"`
 	ExecutionPlanCacheSize                                 int64         `envDefault:"1024" env:"ENGINE_EXECUTION_PLAN_CACHE_SIZE" yaml:"execution_plan_cache_size,omitempty"`
-	ExpensiveQueryCacheSize                                int64         `envDefault:"100" env:"ENGINE_EXPENSIVE_QUERY_CACHE_SIZE" yaml:"expensive_query_cache_size,omitempty"`
-	ExpensiveQueryThreshold                                time.Duration `envDefault:"5s" env:"ENGINE_EXPENSIVE_QUERY_THRESHOLD" yaml:"expensive_query_threshold,omitempty"`
+	PlanFallbackCacheSize                                  int64         `envDefault:"100" env:"ENGINE_PLAN_FALLBACK_CACHE_SIZE" yaml:"plan_fallback_cache_size,omitempty"`
+	PlanFallbackThreshold                                  time.Duration `envDefault:"5s" env:"ENGINE_PLAN_FALLBACK_THRESHOLD" yaml:"plan_fallback_threshold,omitempty"`
 	MinifySubgraphOperations                               bool          `envDefault:"true" env:"ENGINE_MINIFY_SUBGRAPH_OPERATIONS" yaml:"minify_subgraph_operations"`
 	EnablePersistedOperationsCache                         bool          `envDefault:"true" env:"ENGINE_ENABLE_PERSISTED_OPERATIONS_CACHE" yaml:"enable_persisted_operations_cache"`
 	EnableNormalizationCache                               bool          `envDefault:"true" env:"ENGINE_ENABLE_NORMALIZATION_CACHE" yaml:"enable_normalization_cache"`
diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index 4841386751..7bd2344132 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3325,16 +3325,16 @@
           "default": 1024,
           "description": "The size of the execution plan cache."
         },
-        "expensive_query_cache_size": {
+        "plan_fallback_cache_size": {
           "type": "integer",
           "minimum": 1,
           "default": 256,
-          "description": "The maximum number of entries in the expensive query plan cache. Expensive queries are protected from TinyLFU eviction in the main plan cache."
+          "description": "The maximum number of entries in the plan fallback cache. Plans that exceed the planning threshold are protected from TinyLFU eviction in the main plan cache."
         },
-        "expensive_query_threshold": {
+        "plan_fallback_threshold": {
           "type": "string",
           "format": "go-duration",
-          "description": "The minimum planning duration for a query to be considered expensive and protected from TinyLFU cache eviction. Queries exceeding this threshold are re-validated in the background before promotion to the expensive cache.",
+          "description": "The minimum planning duration for a query plan to be stored in the fallback cache, protecting it from TinyLFU cache eviction.",
           "default": "5s",
           "duration": {
             "minimum": "1ns"
diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json
index 1f064bda34..86da402710 100644
--- a/router/pkg/config/testdata/config_defaults.json
+++ b/router/pkg/config/testdata/config_defaults.json
@@ -425,8 +425,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
-    "ExpensiveQueryCacheSize": 100,
-    "ExpensiveQueryThreshold": 5000000000,
+    "PlanFallbackCacheSize": 100,
+    "PlanFallbackThreshold": 5000000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,
diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json
index 831b83fc9e..5bc6864904 100644
--- a/router/pkg/config/testdata/config_full.json
+++ b/router/pkg/config/testdata/config_full.json
@@ -829,8 +829,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
-    "ExpensiveQueryCacheSize": 100,
-    "ExpensiveQueryThreshold": 5000000000,
+    "PlanFallbackCacheSize": 100,
+    "PlanFallbackThreshold": 5000000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,
diff --git a/router/pkg/otel/attributes.go b/router/pkg/otel/attributes.go
index de209294b6..07e0deaab2 100644
--- a/router/pkg/otel/attributes.go
+++ b/router/pkg/otel/attributes.go
@@ -27,7 +27,6 @@ const (
 	WgRequestError                     = attribute.Key("wg.request.error")
 	WgOperationPersistedID             = attribute.Key("wg.operation.persisted_id")
 	WgEnginePlanCacheHit               = attribute.Key("wg.engine.plan_cache_hit")
-	WgEngineExpensivePlanCacheHit      = attribute.Key("wg.engine.expensive_plan_cache_hit")
 	WgEnginePersistedOperationCacheHit = attribute.Key("wg.engine.persisted_operation_cache_hit")
 	WgEngineRequestTracingEnabled      = attribute.Key("wg.engine.request_tracing_enabled")
 	WgRouterRootSpan                   = attribute.Key("wg.router.root_span")
diff --git a/router/core/expensive_query_cache.go b/router/pkg/planfallbackcache/plan_fallback_cache.go
similarity index 63%
rename from router/core/expensive_query_cache.go
rename to router/pkg/planfallbackcache/plan_fallback_cache.go
index 3c99d855ba..5d4c3ab8a4 100644
--- a/router/core/expensive_query_cache.go
+++ b/router/pkg/planfallbackcache/plan_fallback_cache.go
@@ -1,4 +1,4 @@
-package core
+package planfallbackcache
 
 import (
 	"fmt"
@@ -6,33 +6,33 @@ import (
 	"time"
 )
 
-// expensivePlanEntry holds a cached plan and the duration it took to plan.
-type expensivePlanEntry struct {
-	plan     *planWithMetaData
+// Entry holds a cached value and the duration it took to produce.
+type Entry[V any] struct {
+	value    V
 	duration time.Duration
 }
 
-type setRequest struct {
-	key      uint64
-	plan     *planWithMetaData
-	duration time.Duration
-	waitCh   chan struct{} // if non-nil, closed after this request is processed
+type setRequest[V any] struct {
+	key    uint64
+	value  V
+	dur    time.Duration
+	waitCh chan struct{} // if non-nil, closed after this request is processed
 }
 
-// expensivePlanCache is a bounded map that holds expensive plans
+// Cache is a bounded map that holds expensive-to-compute values
 // that should not be subject to TinyLFU eviction in the main cache.
 // Writes are buffered through a channel and applied asynchronously by a
 // background goroutine, making Set non-blocking. Reads are protected by a RWMutex.
 // It tracks the minimum-duration entry so that rejection of cheaper entries is O(1).
-type expensivePlanCache struct {
+type Cache[V any] struct {
 	mu        sync.RWMutex
-	entries   map[uint64]*expensivePlanEntry
+	entries   map[uint64]*Entry[V]
 	maxSize   int
 	threshold time.Duration
 	minKey    uint64
 	minDur    time.Duration
 
-	writeCh chan setRequest
+	writeCh chan setRequest[V]
 	stop    chan struct{}
 	done    chan struct{}
 }
@@ -40,15 +40,15 @@ type expensivePlanCache struct {
 // We use the same value as ristretto (this would be the buffer size if we used ristretto as the backing cache)
 const defaultWriteBufferSize = 32 * 1024
 
-func newExpensivePlanCache(maxSize int, threshold time.Duration) (*expensivePlanCache, error) {
+func New[V any](maxSize int, threshold time.Duration) (*Cache[V], error) {
 	if maxSize < 1 {
-		return nil, fmt.Errorf("expensive query cache size must be at least 1, got %d", maxSize)
+		return nil, fmt.Errorf("plan fallback cache size must be at least 1, got %d", maxSize)
 	}
-	c := &expensivePlanCache{
-		entries:   make(map[uint64]*expensivePlanEntry, maxSize),
+	c := &Cache[V]{
+		entries:   make(map[uint64]*Entry[V], maxSize),
 		maxSize:   maxSize,
 		threshold: threshold,
-		writeCh:   make(chan setRequest, defaultWriteBufferSize),
+		writeCh:   make(chan setRequest[V], defaultWriteBufferSize),
 		stop:      make(chan struct{}),
 		done:      make(chan struct{}),
 	}
@@ -58,7 +58,7 @@ func newExpensivePlanCache(maxSize int, threshold time.Duration) (*expensivePlan
 
 // processWrites drains the write channel and applies sets under the write lock.
 // It exits when the stop channel is closed.
-func (c *expensivePlanCache) processWrites() {
+func (c *Cache[V]) processWrites() {
 	for {
 		select {
 		case req := <-c.writeCh:
@@ -66,7 +66,7 @@ func (c *expensivePlanCache) processWrites() {
 				close(req.waitCh)
 				continue
 			}
-			c.applySet(req.key, req.plan, req.duration)
+			c.applySet(req.key, req.value, req.dur)
 		case <-c.stop:
 			c.done <- struct{}{}
 			return
@@ -74,35 +74,36 @@ func (c *expensivePlanCache) processWrites() {
 	}
 }
 
-func (c *expensivePlanCache) Get(key uint64) (*planWithMetaData, bool) {
+func (c *Cache[V]) Get(key uint64) (V, bool) {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
 	entry, ok := c.entries[key]
 	if !ok {
-		return nil, false
+		var zero V
+		return zero, false
 	}
-	return entry.plan, true
+	return entry.value, true
 }
 
 // Set enqueues a write to the cache. The write is applied asynchronously.
 // If the write buffer is full, the entry is silently dropped.
-func (c *expensivePlanCache) Set(key uint64, plan *planWithMetaData, duration time.Duration) {
+func (c *Cache[V]) Set(key uint64, value V, duration time.Duration) {
 	select {
-	case c.writeCh <- setRequest{key: key, plan: plan, duration: duration}:
+	case c.writeCh <- setRequest[V]{key: key, value: value, dur: duration}:
 	default:
 	}
 }
 
 // Wait blocks until all pending writes in the buffer have been processed.
-func (c *expensivePlanCache) Wait() {
+func (c *Cache[V]) Wait() {
 	ch := make(chan struct{})
-	c.writeCh <- setRequest{waitCh: ch}
+	c.writeCh <- setRequest[V]{waitCh: ch}
 	<-ch
 }
 
 // applySet performs the actual cache mutation. Must only be called from processWrites.
-func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, duration time.Duration) {
+func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
 	// Reject entries that don't meet the threshold
 	if duration < c.threshold {
 		return
@@ -115,7 +116,7 @@ func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, durati
 	if currEntry, ok := c.entries[key]; ok {
 		// Consider worst case, if the previous run was faster then increase
 		if currEntry.duration < duration {
-			c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+			c.entries[key] = &Entry[V]{value: value, duration: duration}
 
 			// If the minKey duration was increased, there can be a new minKey
 			if c.minKey == key {
@@ -127,7 +128,7 @@ func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, durati
 
 	// If not at capacity, just add and update min tracking
 	if len(c.entries) < c.maxSize {
-		c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+		c.entries[key] = &Entry[V]{value: value, duration: duration}
 		if len(c.entries) == 1 || duration < c.minDur {
 			c.minKey = key
 			c.minDur = duration
@@ -143,12 +144,12 @@ func (c *expensivePlanCache) applySet(key uint64, plan *planWithMetaData, durati
 	// When at max capacity
 	// Evict the minimum and insert the new entry
 	delete(c.entries, c.minKey)
-	c.entries[key] = &expensivePlanEntry{plan: plan, duration: duration}
+	c.entries[key] = &Entry[V]{value: value, duration: duration}
 	c.refreshMin()
 }
 
 // refreshMin rescans the entries to find the new minimum. Must be called with mu held.
-func (c *expensivePlanCache) refreshMin() {
+func (c *Cache[V]) refreshMin() {
 	first := true
 	for k, e := range c.entries {
 		if first || e.duration < c.minDur {
@@ -159,12 +160,12 @@ func (c *expensivePlanCache) refreshMin() {
 	}
 }
 
-func (c *expensivePlanCache) IterValues(cb func(v *planWithMetaData) bool) {
+func (c *Cache[V]) IterValues(cb func(v V) bool) {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
 	for _, e := range c.entries {
-		if cb(e.plan) {
+		if cb(e.value) {
 			return
 		}
 	}
@@ -172,7 +173,7 @@ func (c *expensivePlanCache) IterValues(cb func(v *planWithMetaData) bool) {
 
 // Close stops the background goroutine and releases resources.
 // Pending writes in the buffer may be dropped.
-func (c *expensivePlanCache) Close() {
+func (c *Cache[V]) Close() {
 	close(c.stop)
 	<-c.done
 
diff --git a/router/core/expensive_query_cache_test.go b/router/pkg/planfallbackcache/plan_fallback_cache_test.go
similarity index 56%
rename from router/core/expensive_query_cache_test.go
rename to router/pkg/planfallbackcache/plan_fallback_cache_test.go
index d1e5fda834..2cad5ee20f 100644
--- a/router/core/expensive_query_cache_test.go
+++ b/router/pkg/planfallbackcache/plan_fallback_cache_test.go
@@ -1,4 +1,4 @@
-package core
+package planfallbackcache
 
 import (
 	"testing"
@@ -7,14 +7,18 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-func TestExpensivePlanCache_GetSet(t *testing.T) {
+type testPlan struct {
+	content string
+}
+
+func TestCache_GetSet(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	plan1 := &planWithMetaData{content: "query { a }"}
-	plan2 := &planWithMetaData{content: "query { b }"}
+	plan1 := &testPlan{content: "query { a }"}
+	plan2 := &testPlan{content: "query { b }"}
 
 	// Miss
 	_, ok := c.Get(1)
@@ -40,18 +44,18 @@ func TestExpensivePlanCache_GetSet(t *testing.T) {
 	require.Equal(t, plan1, got)
 }
 
-func TestExpensivePlanCache_BoundedSize(t *testing.T) {
+func TestCache_BoundedSize(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(3, 0)
+	c, err := New[*testPlan](3, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
-	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Millisecond)
+	c.Set(3, &testPlan{content: "q3"}, 30*time.Millisecond)
 
 	// Cache is full (3/3). Adding a 4th with higher duration should evict the shortest (key=1, 10ms)
-	c.Set(4, &planWithMetaData{content: "q4"}, 25*time.Millisecond)
+	c.Set(4, &testPlan{content: "q4"}, 25*time.Millisecond)
 	c.Wait()
 
 	// Key 1 should be evicted (it had the shortest duration: 10ms)
@@ -67,18 +71,18 @@ func TestExpensivePlanCache_BoundedSize(t *testing.T) {
 	require.True(t, ok)
 }
 
-func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
+func TestCache_BoundedSize_SkipsCheaper(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(3, 0)
+	c, err := New[*testPlan](3, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Second)
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Second)
-	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Second)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Second)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Second)
+	c.Set(3, &testPlan{content: "q3"}, 30*time.Second)
 
 	// Try to add a cheaper entry (5s < 10s minimum) — should be rejected
-	c.Set(4, &planWithMetaData{content: "q4"}, 5*time.Second)
+	c.Set(4, &testPlan{content: "q4"}, 5*time.Second)
 	c.Wait()
 
 	_, ok := c.Get(4)
@@ -93,14 +97,14 @@ func TestExpensivePlanCache_BoundedSize_SkipsCheaper(t *testing.T) {
 	require.True(t, ok)
 }
 
-func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
+func TestCache_UpdateExisting(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(2, 0)
+	c, err := New[*testPlan](2, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	plan1 := &planWithMetaData{content: "q1"}
-	plan1Updated := &planWithMetaData{content: "q1-updated"}
+	plan1 := &testPlan{content: "q1"}
+	plan1Updated := &testPlan{content: "q1-updated"}
 
 	c.Set(1, plan1, 10*time.Millisecond)
 	c.Set(1, plan1Updated, 50*time.Millisecond)
@@ -111,7 +115,7 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 	require.Equal(t, "q1-updated", got.content)
 
 	// Updating an existing key should not increase the count
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Millisecond)
 	c.Wait()
 	_, ok = c.Get(1)
 	require.True(t, ok, "key 1 should still exist after adding key 2 (capacity is 2)")
@@ -119,19 +123,19 @@ func TestExpensivePlanCache_UpdateExisting(t *testing.T) {
 	require.True(t, ok)
 }
 
-func TestExpensivePlanCache_IterValues(t *testing.T) {
+func TestCache_IterValues(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
-	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Millisecond)
+	c.Set(3, &testPlan{content: "q3"}, 30*time.Millisecond)
 	c.Wait()
 
 	var contents []string
-	c.IterValues(func(v *planWithMetaData) bool {
+	c.IterValues(func(v *testPlan) bool {
 		contents = append(contents, v.content)
 		return false
 	})
@@ -139,30 +143,30 @@ func TestExpensivePlanCache_IterValues(t *testing.T) {
 	require.ElementsMatch(t, []string{"q1", "q2", "q3"}, contents)
 }
 
-func TestExpensivePlanCache_IterValues_EarlyStop(t *testing.T) {
+func TestCache_IterValues_EarlyStop(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
-	c.Set(3, &planWithMetaData{content: "q3"}, 30*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Millisecond)
+	c.Set(3, &testPlan{content: "q3"}, 30*time.Millisecond)
 	c.Wait()
 
 	count := 0
-	c.IterValues(func(_ *planWithMetaData) bool {
+	c.IterValues(func(_ *testPlan) bool {
 		count++
 		return true // stop after first
 	})
 	require.Equal(t, 1, count)
 }
 
-func TestExpensivePlanCache_Close(t *testing.T) {
+func TestCache_Close(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
 
 	c.Close()
 
@@ -171,61 +175,61 @@ func TestExpensivePlanCache_Close(t *testing.T) {
 	require.False(t, ok)
 }
 
-func TestExpensivePlanCache_SetAfterClose(t *testing.T) {
+func TestCache_SetAfterClose(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
 	c.Close()
 
 	// Set after Close should not panic — buffer drops silently
 	require.NotPanics(t, func() {
-		c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+		c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
 	})
 
 	_, ok := c.Get(1)
 	require.False(t, ok)
 }
 
-func TestExpensivePlanCache_IterValuesEmpty(t *testing.T) {
+func TestCache_IterValuesEmpty(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
 	count := 0
-	c.IterValues(func(_ *planWithMetaData) bool {
+	c.IterValues(func(_ *testPlan) bool {
 		count++
 		return false
 	})
 	require.Equal(t, 0, count)
 }
 
-func TestExpensivePlanCache_IterValuesAfterClose(t *testing.T) {
+func TestCache_IterValuesAfterClose(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 0)
+	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
 	c.Close()
 
 	count := 0
-	c.IterValues(func(_ *planWithMetaData) bool {
+	c.IterValues(func(_ *testPlan) bool {
 		count++
 		return false
 	})
 	require.Equal(t, 0, count)
 }
 
-func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
+func TestCache_EqualDurationNotEvicted(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(2, 0)
+	c, err := New[*testPlan](2, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Millisecond)
 
 	// Same duration as minimum (10ms) — should NOT evict (requires strictly greater)
-	c.Set(3, &planWithMetaData{content: "q3"}, 10*time.Millisecond)
+	c.Set(3, &testPlan{content: "q3"}, 10*time.Millisecond)
 	c.Wait()
 
 	_, ok := c.Get(3)
@@ -236,20 +240,20 @@ func TestExpensivePlanCache_EqualDurationNotEvicted(t *testing.T) {
 	require.True(t, ok)
 }
 
-func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
+func TestCache_MaxSizeOne(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(1, 0)
+	c, err := New[*testPlan](1, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
-	c.Set(1, &planWithMetaData{content: "q1"}, 10*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
 	c.Wait()
 	got, ok := c.Get(1)
 	require.True(t, ok)
 	require.Equal(t, "q1", got.content)
 
 	// Adding a more expensive entry should evict the only entry
-	c.Set(2, &planWithMetaData{content: "q2"}, 20*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 20*time.Millisecond)
 	c.Wait()
 	_, ok = c.Get(1)
 	require.False(t, ok)
@@ -258,7 +262,7 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 	require.Equal(t, "q2", got.content)
 
 	// Adding a cheaper entry should be rejected
-	c.Set(3, &planWithMetaData{content: "q3"}, 5*time.Millisecond)
+	c.Set(3, &testPlan{content: "q3"}, 5*time.Millisecond)
 	c.Wait()
 	_, ok = c.Get(3)
 	require.False(t, ok)
@@ -266,9 +270,9 @@ func TestExpensivePlanCache_MaxSizeOne(t *testing.T) {
 	require.True(t, ok)
 }
 
-func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
+func TestCache_ConcurrentAccess(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(100, 0)
+	c, err := New[*testPlan](100, 0)
 	require.NoError(t, err)
 	defer c.Close()
 	done := make(chan struct{})
@@ -279,7 +283,7 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 			defer func() { done <- struct{}{} }()
 			for j := 0; j < 100; j++ {
 				key := uint64(id*100 + j) //nolint:gosec // test code, no overflow risk
-				c.Set(key, &planWithMetaData{content: "q"}, time.Duration(j)*time.Millisecond)
+				c.Set(key, &testPlan{content: "q"}, time.Duration(j)*time.Millisecond)
 			}
 		}(i)
 	}
@@ -298,7 +302,7 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 	for i := 0; i < 5; i++ {
 		go func() {
 			defer func() { done <- struct{}{} }()
-			c.IterValues(func(_ *planWithMetaData) bool {
+			c.IterValues(func(_ *testPlan) bool {
 				return false
 			})
 		}()
@@ -310,35 +314,35 @@ func TestExpensivePlanCache_ConcurrentAccess(t *testing.T) {
 	}
 }
 
-func TestExpensivePlanCache_InvalidSize(t *testing.T) {
+func TestCache_InvalidSize(t *testing.T) {
 	t.Parallel()
-	_, err := newExpensivePlanCache(0, 0)
+	_, err := New[*testPlan](0, 0)
 	require.Error(t, err)
 
-	_, err = newExpensivePlanCache(-1, 0)
+	_, err = New[*testPlan](-1, 0)
 	require.Error(t, err)
 }
 
-func TestExpensivePlanCache_ThresholdRejectsBelow(t *testing.T) {
+func TestCache_ThresholdRejectsBelow(t *testing.T) {
 	t.Parallel()
-	c, err := newExpensivePlanCache(10, 100*time.Millisecond)
+	c, err := New[*testPlan](10, 100*time.Millisecond)
 	require.NoError(t, err)
 	defer c.Close()
 
 	// Below threshold — should be rejected
-	c.Set(1, &planWithMetaData{content: "q1"}, 50*time.Millisecond)
+	c.Set(1, &testPlan{content: "q1"}, 50*time.Millisecond)
 	c.Wait()
 	_, ok := c.Get(1)
 	require.False(t, ok, "entry below threshold should be rejected")
 
 	// At threshold — should be accepted
-	c.Set(2, &planWithMetaData{content: "q2"}, 100*time.Millisecond)
+	c.Set(2, &testPlan{content: "q2"}, 100*time.Millisecond)
 	c.Wait()
 	_, ok = c.Get(2)
 	require.True(t, ok, "entry at threshold should be accepted")
 
 	// Above threshold — should be accepted
-	c.Set(3, &planWithMetaData{content: "q3"}, 200*time.Millisecond)
+	c.Set(3, &testPlan{content: "q3"}, 200*time.Millisecond)
 	c.Wait()
 	_, ok = c.Get(3)
 	require.True(t, ok, "entry above threshold should be accepted")

From 64fa885732df4f5b23a28508d5b996e9dc26526d Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 16:34:54 +0530
Subject: [PATCH 20/46] fix: review comments

---
 router/core/context.go                        |  2 +-
 router/core/graph_server.go                   | 57 ++++++++++---------
 router/core/operation_planner.go              | 25 ++------
 .../planfallbackcache/plan_fallback_cache.go  |  6 ++
 4 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/router/core/context.go b/router/core/context.go
index 53e12d0fc7..408abfeac9 100644
--- a/router/core/context.go
+++ b/router/core/context.go
@@ -620,7 +620,7 @@ type operationContext struct {
 	preparedPlan     *planWithMetaData
 	traceOptions     resolve.TraceOptions
 	executionOptions resolve.ExecutionOptions
-	planCacheHit          bool
+	planCacheHit     bool
 	initialPayload   []byte
 	extensions       []byte
 	persistedID      string
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index 7972ccaef3..b4833f2386 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -54,6 +54,7 @@ import (
 	"github.com/wundergraph/cosmo/router/pkg/logging"
 	rmetric "github.com/wundergraph/cosmo/router/pkg/metric"
 	"github.com/wundergraph/cosmo/router/pkg/otel"
+	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
 	"github.com/wundergraph/cosmo/router/pkg/pubsub/datasource"
 	"github.com/wundergraph/cosmo/router/pkg/statistics"
 	rtrace "github.com/wundergraph/cosmo/router/pkg/trace"
@@ -542,10 +543,9 @@ func (s *graphServer) setupEngineStatistics(baseAttributes []attribute.KeyValue)
 }
 
 type graphMux struct {
-	mux *chi.Mux
-
-	operationPlanner            *OperationPlanner
+	mux                         *chi.Mux
 	planCache                   *ristretto.Cache[uint64, *planWithMetaData]
+	planFallbackCache           *planfallbackcache.Cache[*planWithMetaData]
 	persistedOperationCache     *ristretto.Cache[uint64, NormalizationCacheEntry]
 	normalizationCache          *ristretto.Cache[uint64, NormalizationCacheEntry]
 	complexityCalculationCache  *ristretto.Cache[uint64, ComplexityCacheEntry]
@@ -585,10 +585,7 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 		}
 		if srv.cacheWarmup != nil && srv.cacheWarmup.Enabled && srv.cacheWarmup.InMemoryFallback {
 			planCacheConfig.OnEvict = func(item *ristretto.Item[*planWithMetaData]) {
-				if s.operationPlanner == nil || s.operationPlanner.fallbackCache == nil {
-					return
-				}
-				s.operationPlanner.fallbackCache.Set(item.Key, item.Value, item.Value.planningDuration)
+				s.planFallbackCache.Set(item.Key, item.Value, item.Value.planningDuration)
 			}
 		}
 		s.planCache, err = ristretto.NewCache[uint64, *planWithMetaData](planCacheConfig)
@@ -792,7 +789,7 @@ func (s *graphMux) configureCacheMetrics(srv *graphServer, baseOtelAttributes []
 
 func (s *graphMux) Shutdown(ctx context.Context) error {
 	s.planCache.Close()
-	s.operationPlanner.Close()
+	s.planFallbackCache.Close()
 	s.persistedOperationCache.Close()
 	s.normalizationCache.Close()
 	s.variablesNormalizationCache.Close()
@@ -1342,19 +1339,24 @@ func (s *graphServer) buildGraphMux(
 		ComplexityLimits:                                       s.securityConfiguration.ComplexityLimits,
 	})
 
-	operationPlanner, err := NewOperationPlanner(
+	if opts.ReloadPersistentState.inMemoryPlanCacheFallback.IsEnabled() {
+		var err error
+		gm.planFallbackCache, err = planfallbackcache.New[*planWithMetaData](
+			int(s.engineExecutionConfiguration.PlanFallbackCacheSize),
+			s.engineExecutionConfiguration.PlanFallbackThreshold,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create plan fallback cache: %w", err)
+		}
+	}
+
+	operationPlanner := NewOperationPlanner(
 		s.logger,
 		executor,
 		gm.planCache,
-		opts.ReloadPersistentState.inMemoryPlanCacheFallback.IsEnabled(),
-		int(s.engineExecutionConfiguration.PlanFallbackCacheSize),
-		s.engineExecutionConfiguration.PlanFallbackThreshold,
+		gm.planFallbackCache,
 	)
-	if err != nil {
-		return nil, fmt.Errorf("failed to create operation planner: %w", err)
-	}
 	operationPlanner.planningDurationOverride = s.planningDurationOverride
-	gm.operationPlanner = operationPlanner
 
 	// We support the MCP only on the base graph. Feature flags are not supported yet.
 	if opts.IsBaseGraph() && s.mcpServer != nil {
@@ -1382,20 +1384,19 @@ func (s *graphServer) buildGraphMux(
 		}
 
 		warmupConfig.AfterOperation = func(item *CacheWarmupOperationPlanResult) {
-			attrs := []attribute.KeyValue{
-				otel.WgOperationName.String(item.OperationName),
-				otel.WgClientName.String(item.ClientName),
-				otel.WgClientVersion.String(item.ClientVersion),
-				otel.WgFeatureFlag.String(opts.FeatureFlagName),
-				otel.WgOperationHash.String(item.OperationHash),
-				otel.WgOperationType.String(item.OperationType),
-				otel.WgEnginePlanCacheHit.Bool(false),
-			}
 			gm.metricStore.MeasureOperationPlanningTime(ctx,
 				item.PlanningTime,
 				nil,
 				otelmetric.WithAttributes(
-					append(attrs, baseMetricAttributes...)...,
+					append([]attribute.KeyValue{
+						otel.WgOperationName.String(item.OperationName),
+						otel.WgClientName.String(item.ClientName),
+						otel.WgClientVersion.String(item.ClientVersion),
+						otel.WgFeatureFlag.String(opts.FeatureFlagName),
+						otel.WgOperationHash.String(item.OperationHash),
+						otel.WgOperationType.String(item.OperationType),
+						otel.WgEnginePlanCacheHit.Bool(false),
+					}, baseMetricAttributes...)...,
 				),
 			)
 		}
@@ -1414,7 +1415,7 @@ func (s *graphServer) buildGraphMux(
 			// We first utilize the existing plan cache (if it was already set, i.e., not on the first start) to create a list of queries
 			// and then reset the plan cache to the new plan cache for this start afterwards.
 			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
-			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.fallbackCache)
+			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planFallbackCache)
 		case s.cacheWarmup.Source.CdnSource.Enabled:
 			if s.graphApiToken == "" {
 				return nil, fmt.Errorf("graph token is required for cache warmup in order to communicate with the CDN")
@@ -1424,7 +1425,7 @@ func (s *graphServer) buildGraphMux(
 			// This is useful for when an issue occurs with the CDN when retrieving the required manifest
 			if s.cacheWarmup.InMemoryFallback {
 				warmupConfig.FallbackSource = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
-				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, operationPlanner.fallbackCache)
+				opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planFallbackCache)
 			}
 			cdnSource, err := NewCDNSource(s.cdnConfig.URL, s.graphApiToken, s.logger)
 			if err != nil {
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index c791e3e511..2a317deeea 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -56,32 +56,15 @@ type ExecutionPlanCache[K any, V any] interface {
 	Close()
 }
 
-func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], inMemoryPlanCacheFallback bool, fallbackCacheSize int, threshold time.Duration) (*OperationPlanner, error) {
-	p := &OperationPlanner{
+func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], fallbackCache *planfallbackcache.Cache[*planWithMetaData]) *OperationPlanner {
+	return &OperationPlanner{
 		logger:         logger,
 		planCache:      planCache,
 		executor:       executor,
 		trackUsageInfo: executor.TrackUsageInfo,
-		useFallback:    inMemoryPlanCacheFallback,
+		useFallback:    fallbackCache != nil,
+		fallbackCache:  fallbackCache,
 	}
-
-	if inMemoryPlanCacheFallback {
-		var err error
-		p.fallbackCache, err = planfallbackcache.New[*planWithMetaData](fallbackCacheSize, threshold)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	return p, nil
-}
-
-// Close releases fallback cache resources.
-func (p *OperationPlanner) Close() {
-	if p == nil || !p.useFallback {
-		return
-	}
-	p.fallbackCache.Close()
 }
 
 // planOperation performs the core planning work: parse, plan, and postprocess.
diff --git a/router/pkg/planfallbackcache/plan_fallback_cache.go b/router/pkg/planfallbackcache/plan_fallback_cache.go
index 5d4c3ab8a4..a2d5c0ba22 100644
--- a/router/pkg/planfallbackcache/plan_fallback_cache.go
+++ b/router/pkg/planfallbackcache/plan_fallback_cache.go
@@ -89,6 +89,9 @@ func (c *Cache[V]) Get(key uint64) (V, bool) {
 // Set enqueues a write to the cache. The write is applied asynchronously.
 // If the write buffer is full, the entry is silently dropped.
 func (c *Cache[V]) Set(key uint64, value V, duration time.Duration) {
+	if c == nil {
+		return
+	}
 	select {
 	case c.writeCh <- setRequest[V]{key: key, value: value, dur: duration}:
 	default:
@@ -174,6 +177,9 @@ func (c *Cache[V]) IterValues(cb func(v V) bool) {
 // Close stops the background goroutine and releases resources.
 // Pending writes in the buffer may be dropped.
 func (c *Cache[V]) Close() {
+	if c == nil {
+		return
+	}
 	close(c.stop)
 	<-c.done
 

From 40dc67fb03063e13a04ede1f5b415a914d869cff Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 16:39:40 +0530
Subject: [PATCH 21/46] fix: review comments

---
 router/core/graph_server.go      | 11 +++--------
 router/core/operation_planner.go | 21 ++++++++++++---------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index b4833f2386..937c77ad19 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -543,7 +543,8 @@ func (s *graphServer) setupEngineStatistics(baseAttributes []attribute.KeyValue)
 }
 
 type graphMux struct {
-	mux                         *chi.Mux
+	mux *chi.Mux
+
 	planCache                   *ristretto.Cache[uint64, *planWithMetaData]
 	planFallbackCache           *planfallbackcache.Cache[*planWithMetaData]
 	persistedOperationCache     *ristretto.Cache[uint64, NormalizationCacheEntry]
@@ -1350,13 +1351,7 @@ func (s *graphServer) buildGraphMux(
 		}
 	}
 
-	operationPlanner := NewOperationPlanner(
-		s.logger,
-		executor,
-		gm.planCache,
-		gm.planFallbackCache,
-	)
-	operationPlanner.planningDurationOverride = s.planningDurationOverride
+	operationPlanner := NewOperationPlanner(executor, gm.planCache, gm.planFallbackCache, s.planningDurationOverride)
 
 	// We support the MCP only on the base graph. Feature flags are not supported yet.
 	if opts.IsBaseGraph() && s.mcpServer != nil {
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 2a317deeea..fedc0e05f4 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -13,7 +13,6 @@ import (
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/postprocess"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve"
-	"go.uber.org/zap"
 	"golang.org/x/sync/singleflight"
 )
 
@@ -34,7 +33,6 @@ type OperationPlanner struct {
 	executor       *Executor
 	trackUsageInfo bool
 	useFallback    bool
-	logger         *zap.Logger
 
 	// planningDurationOverride, when set, replaces the measured planning duration.
 	// This is used in tests to simulate slow queries.
@@ -56,14 +54,19 @@ type ExecutionPlanCache[K any, V any] interface {
 	Close()
 }
 
-func NewOperationPlanner(logger *zap.Logger, executor *Executor, planCache ExecutionPlanCache[uint64, *planWithMetaData], fallbackCache *planfallbackcache.Cache[*planWithMetaData]) *OperationPlanner {
+func NewOperationPlanner(
+	executor *Executor,
+	planCache ExecutionPlanCache[uint64, *planWithMetaData],
+	fallbackCache *planfallbackcache.Cache[*planWithMetaData],
+	planningDurationOverride func(content string) time.Duration,
+) *OperationPlanner {
 	return &OperationPlanner{
-		logger:         logger,
-		planCache:      planCache,
-		executor:       executor,
-		trackUsageInfo: executor.TrackUsageInfo,
-		useFallback:    fallbackCache != nil,
-		fallbackCache:  fallbackCache,
+		planCache:                planCache,
+		executor:                 executor,
+		trackUsageInfo:           executor.TrackUsageInfo,
+		useFallback:              fallbackCache != nil,
+		fallbackCache:            fallbackCache,
+		planningDurationOverride: planningDurationOverride,
 	}
 }
 

From 6e0923c081239db7bdeec6c782bb01d65b9b99b6 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 16:46:49 +0530
Subject: [PATCH 22/46] fix: cleanup

---
 router-tests/cache_warmup_test.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index 4719dc3853..a9d1dbe26e 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -987,7 +987,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 					},
 				}),
 				core.WithConfigVersionHeader(true),
-				core.WithPlanningDurationOverride(func(content string) time.Duration {
+				core.WithPlanningDurationOverride(func(_ string) time.Duration {
 					return 10 * time.Second
 				}),
 			},
@@ -1139,7 +1139,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 					Enabled:          true,
 					InMemoryFallback: true,
 				}),
-				core.WithPlanningDurationOverride(func(content string) time.Duration {
+				core.WithPlanningDurationOverride(func(_ string) time.Duration {
 					return 10 * time.Second
 				}),
 			},
@@ -1198,7 +1198,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 						},
 					},
 				}),
-				core.WithPlanningDurationOverride(func(content string) time.Duration {
+				core.WithPlanningDurationOverride(func(_ string) time.Duration {
 					return 10 * time.Second
 				}),
 			},
@@ -1257,7 +1257,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 						},
 					},
 				}),
-				core.WithPlanningDurationOverride(func(content string) time.Duration {
+				core.WithPlanningDurationOverride(func(_ string) time.Duration {
 					return 10 * time.Second
 				}),
 			},

From 424c9996ddfaf5b85f8317f7e3ca1df78e0d7df6 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 16:51:13 +0530
Subject: [PATCH 23/46] fix: review comments

---
 router/core/operation_planner.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index fedc0e05f4..3f820102c8 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -71,7 +71,6 @@ func NewOperationPlanner(
 }
 
 // planOperation performs the core planning work: parse, plan, and postprocess.
-// This is the single source of truth for query planning logic.
 func (p *OperationPlanner) planOperation(content string, name string, includeQueryPlan bool) (*planWithMetaData, error) {
 	doc, report := astparser.ParseGraphqlDocumentString(content)
 	if report.HasErrors() {

From 478e3de5175aab7952f17e4470d52a86ff82c46b Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 18:46:13 +0530
Subject: [PATCH 24/46] fix: review comments

---
 router-tests/cache_warmup_test.go             | 12 +++----
 router-tests/plan_fallback_cache_test.go      | 36 +++++++++----------
 router/core/graph_server.go                   | 12 +++----
 router/core/operation_planner.go              |  6 ++--
 router/core/reload_persistent_state.go        | 10 +++---
 router/core/reload_persistent_state_test.go   | 32 ++++++++---------
 router/pkg/config/config.go                   |  4 +--
 router/pkg/config/config.schema.json          |  8 ++---
 .../pkg/config/testdata/config_defaults.json  |  4 +--
 router/pkg/config/testdata/config_full.json   |  4 +--
 .../slow_plan_cache.go}                       |  4 +--
 .../slow_plan_cache_test.go}                  |  2 +-
 12 files changed, 67 insertions(+), 67 deletions(-)
 rename router/pkg/{planfallbackcache/plan_fallback_cache.go => slowplancache/slow_plan_cache.go} (97%)
 rename router/pkg/{planfallbackcache/plan_fallback_cache_test.go => slowplancache/slow_plan_cache_test.go} (99%)

diff --git a/router-tests/cache_warmup_test.go b/router-tests/cache_warmup_test.go
index 92a78e593b..52802eb7ac 100644
--- a/router-tests/cache_warmup_test.go
+++ b/router-tests/cache_warmup_test.go
@@ -976,7 +976,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -1128,7 +1128,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithConfigVersionHeader(true),
@@ -1178,7 +1178,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheSize = 100
 			},
 			CdnSever: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				w.WriteHeader(http.StatusNotFound)
@@ -1237,7 +1237,7 @@ func TestInMemoryPlanCacheFallback(t *testing.T) {
 
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheSize = 100
 			},
 			CdnSever: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				w.WriteHeader(http.StatusUnauthorized)
@@ -1313,8 +1313,8 @@ cache_warmup:
       enabled: false
 
 engine:
-  plan_fallback_threshold: "1ns"
-  plan_fallback_cache_size: 100
+  slow_plan_cache_threshold: "1ns"
+  slow_plan_cache_size: 100
   debug:
     enable_cache_response_headers: true
 `
diff --git a/router-tests/plan_fallback_cache_test.go b/router-tests/plan_fallback_cache_test.go
index 041ba0314e..1b00ce0181 100644
--- a/router-tests/plan_fallback_cache_test.go
+++ b/router-tests/plan_fallback_cache_test.go
@@ -68,8 +68,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -97,8 +97,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -136,8 +136,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -189,8 +189,8 @@ func TestPlanFallbackCache(t *testing.T) {
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				// Large enough to hold all queries — no evictions before reload
 				cfg.ExecutionPlanCacheSize = 1024
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -260,8 +260,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -315,8 +315,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 10
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 10
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -343,8 +343,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 50
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 50
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
@@ -372,8 +372,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1024
-				cfg.PlanFallbackThreshold = fallbackThreshold
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = fallbackThreshold
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithConfigVersionHeader(true),
@@ -430,8 +430,8 @@ func TestPlanFallbackCache(t *testing.T) {
 		testenv.Run(t, &testenv.Config{
 			ModifyEngineExecutionConfiguration: func(cfg *config.EngineExecutionConfiguration) {
 				cfg.ExecutionPlanCacheSize = 1
-				cfg.PlanFallbackThreshold = 1 * time.Hour
-				cfg.PlanFallbackCacheSize = 100
+				cfg.SlowPlanCacheThreshold = 1 * time.Hour
+				cfg.SlowPlanCacheSize = 100
 			},
 			RouterOptions: []core.Option{
 				core.WithCacheWarmupConfig(&config.CacheWarmupConfiguration{
diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index a2c7f0f0ba..ad65925900 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -54,8 +54,8 @@ import (
 	"github.com/wundergraph/cosmo/router/pkg/logging"
 	rmetric "github.com/wundergraph/cosmo/router/pkg/metric"
 	"github.com/wundergraph/cosmo/router/pkg/otel"
-	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
 	"github.com/wundergraph/cosmo/router/pkg/pubsub/datasource"
+	"github.com/wundergraph/cosmo/router/pkg/slowplancache"
 	"github.com/wundergraph/cosmo/router/pkg/statistics"
 	rtrace "github.com/wundergraph/cosmo/router/pkg/trace"
 )
@@ -546,7 +546,7 @@ type graphMux struct {
 	mux *chi.Mux
 
 	planCache                   *ristretto.Cache[uint64, *planWithMetaData]
-	planFallbackCache           *planfallbackcache.Cache[*planWithMetaData]
+	planFallbackCache           *slowplancache.Cache[*planWithMetaData]
 	persistedOperationCache     *ristretto.Cache[uint64, NormalizationCacheEntry]
 	normalizationCache          *ristretto.Cache[uint64, NormalizationCacheEntry]
 	complexityCalculationCache  *ristretto.Cache[uint64, ComplexityCacheEntry]
@@ -1346,9 +1346,9 @@ func (s *graphServer) buildGraphMux(
 
 	if opts.ReloadPersistentState.inMemoryPlanCacheFallback.IsEnabled() {
 		var err error
-		gm.planFallbackCache, err = planfallbackcache.New[*planWithMetaData](
-			int(s.engineExecutionConfiguration.PlanFallbackCacheSize),
-			s.engineExecutionConfiguration.PlanFallbackThreshold,
+		gm.planFallbackCache, err = slowplancache.New[*planWithMetaData](
+			int(s.engineExecutionConfiguration.SlowPlanCacheSize),
+			s.engineExecutionConfiguration.SlowPlanCacheThreshold,
 		)
 		if err != nil {
 			return nil, fmt.Errorf("failed to create plan fallback cache: %w", err)
@@ -1411,7 +1411,7 @@ func (s *graphServer) buildGraphMux(
 		//   - Using static execution config (not Cosmo): s.selfRegister == nil
 		//   - OR CDN cache warmer is explictly disabled
 		case s.cacheWarmup.InMemoryFallback && (s.selfRegister == nil || !s.cacheWarmup.Source.CdnSource.Enabled):
-			// We first utilize the existing plan cache (if it was already set, i.e., not on the first start) to create a list of queries
+			// We first utilize the existing cache (if it was already set, i.e., not on the first start) to create a list of queries
 			// and then reset the plan cache to the new plan cache for this start afterwards.
 			warmupConfig.Source = NewPlanSource(opts.ReloadPersistentState.inMemoryPlanCacheFallback.getPlanCacheForFF(opts.FeatureFlagName))
 			opts.ReloadPersistentState.inMemoryPlanCacheFallback.setPlanCacheForFF(opts.FeatureFlagName, gm.planFallbackCache)
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 3f820102c8..69fd188b2d 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -7,7 +7,7 @@ import (
 
 	graphqlmetricsv1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/graphqlmetrics/v1"
 	"github.com/wundergraph/cosmo/router/pkg/graphqlschemausage"
-	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
+	"github.com/wundergraph/cosmo/router/pkg/slowplancache"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/ast"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/astparser"
 	"github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan"
@@ -29,7 +29,7 @@ type planWithMetaData struct {
 type OperationPlanner struct {
 	sf             singleflight.Group
 	planCache      ExecutionPlanCache[uint64, *planWithMetaData]
-	fallbackCache  *planfallbackcache.Cache[*planWithMetaData]
+	fallbackCache  *slowplancache.Cache[*planWithMetaData]
 	executor       *Executor
 	trackUsageInfo bool
 	useFallback    bool
@@ -57,7 +57,7 @@ type ExecutionPlanCache[K any, V any] interface {
 func NewOperationPlanner(
 	executor *Executor,
 	planCache ExecutionPlanCache[uint64, *planWithMetaData],
-	fallbackCache *planfallbackcache.Cache[*planWithMetaData],
+	fallbackCache *slowplancache.Cache[*planWithMetaData],
 	planningDurationOverride func(content string) time.Duration,
 ) *OperationPlanner {
 	return &OperationPlanner{
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index 7d1ad87b8f..7722a176c6 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -4,7 +4,7 @@ import (
 	"sync"
 
 	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
-	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
+	"github.com/wundergraph/cosmo/router/pkg/slowplancache"
 	"go.uber.org/zap"
 )
 
@@ -92,7 +92,7 @@ func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*
 	}
 
 	switch cache := c.queriesForFeatureFlag[featureFlagKey].(type) {
-	case *planfallbackcache.Cache[*planWithMetaData]:
+	case *slowplancache.Cache[*planWithMetaData]:
 		return convertToNodeOperation(cache)
 	case []*nodev1.Operation:
 		return cache
@@ -107,7 +107,7 @@ func (c *InMemoryPlanCacheFallback) getPlanCacheForFF(featureFlagKey string) []*
 }
 
 // setPlanCacheForFF sets the plan cache for a specific feature flag key
-func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cache *planfallbackcache.Cache[*planWithMetaData]) {
+func (c *InMemoryPlanCacheFallback) setPlanCacheForFF(featureFlagKey string, cache *slowplancache.Cache[*planWithMetaData]) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
@@ -128,7 +128,7 @@ func (c *InMemoryPlanCacheFallback) extractQueriesAndOverridePlanCache() {
 
 	fallbackMap := make(map[string]any)
 	for k, v := range c.queriesForFeatureFlag {
-		if cache, ok := v.(*planfallbackcache.Cache[*planWithMetaData]); ok {
+		if cache, ok := v.(*slowplancache.Cache[*planWithMetaData]); ok {
 			fallbackMap[k] = convertToNodeOperation(cache)
 		}
 	}
@@ -158,7 +158,7 @@ func (c *InMemoryPlanCacheFallback) cleanupUnusedFeatureFlags(routerCfg *nodev1.
 	}
 }
 
-func convertToNodeOperation(data *planfallbackcache.Cache[*planWithMetaData]) []*nodev1.Operation {
+func convertToNodeOperation(data *slowplancache.Cache[*planWithMetaData]) []*nodev1.Operation {
 	items := make([]*nodev1.Operation, 0)
 
 	data.IterValues(func(v *planWithMetaData) (stop bool) {
diff --git a/router/core/reload_persistent_state_test.go b/router/core/reload_persistent_state_test.go
index 1c79841077..247091dba0 100644
--- a/router/core/reload_persistent_state_test.go
+++ b/router/core/reload_persistent_state_test.go
@@ -6,7 +6,7 @@ import (
 	"github.com/stretchr/testify/require"
 	nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1"
 	"github.com/wundergraph/cosmo/router/pkg/config"
-	"github.com/wundergraph/cosmo/router/pkg/planfallbackcache"
+	"github.com/wundergraph/cosmo/router/pkg/slowplancache"
 	"go.uber.org/zap"
 )
 
@@ -48,7 +48,7 @@ func TestInMemoryPlanCacheFallback_UpdateInMemoryFallbackCacheForConfigChanges(t
 	t.Run("update when already enabled keeps existing data", func(t *testing.T) {
 		t.Parallel()
 		existing := make(map[string]any)
-		existing["test"] = (*planfallbackcache.Cache[*planWithMetaData])(nil)
+		existing["test"] = (*slowplancache.Cache[*planWithMetaData])(nil)
 
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: existing,
@@ -142,7 +142,7 @@ func TestInMemoryPlanCacheFallback_GetPlanCacheForFF(t *testing.T) {
 	t.Run("returns operations from live fallback cache reference", func(t *testing.T) {
 		t.Parallel()
 
-		fallbackCache, err := planfallbackcache.New[*planWithMetaData](100, 0)
+		fallbackCache, err := slowplancache.New[*planWithMetaData](100, 0)
 		require.NoError(t, err)
 		fallbackCache.Set(1, &planWithMetaData{content: "query { fromFallback }"}, 5*1e9)
 		fallbackCache.Wait()
@@ -189,9 +189,9 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"ff1": (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff2": (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff3": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"ff1": (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff2": (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff3": (*slowplancache.Cache[*planWithMetaData])(nil),
 			},
 		}
 
@@ -216,8 +216,8 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"":    (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff1": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"":    (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff1": (*slowplancache.Cache[*planWithMetaData])(nil),
 			},
 		}
 
@@ -256,12 +256,12 @@ func TestInMemoryPlanCacheFallback_CleanupUnusedFeatureFlags(t *testing.T) {
 		t.Parallel()
 		cache := &InMemoryPlanCacheFallback{
 			queriesForFeatureFlag: map[string]any{
-				"":    (*planfallbackcache.Cache[*planWithMetaData])(nil), // base should be kept
-				"ff1": (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff2": (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff3": (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff4": (*planfallbackcache.Cache[*planWithMetaData])(nil),
-				"ff5": (*planfallbackcache.Cache[*planWithMetaData])(nil),
+				"":    (*slowplancache.Cache[*planWithMetaData])(nil), // base should be kept
+				"ff1": (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff2": (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff3": (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff4": (*slowplancache.Cache[*planWithMetaData])(nil),
+				"ff5": (*slowplancache.Cache[*planWithMetaData])(nil),
 			},
 		}
 
@@ -287,9 +287,9 @@ func TestInMemoryPlanCacheFallback_ProcessOnConfigChangeRestart(t *testing.T) {
 		query1 := "query { test1 }"
 		query2 := "query { test2 }"
 
-		fallbackCache1, err := planfallbackcache.New[*planWithMetaData](100, 0)
+		fallbackCache1, err := slowplancache.New[*planWithMetaData](100, 0)
 		require.NoError(t, err)
-		fallbackCache2, err := planfallbackcache.New[*planWithMetaData](100, 0)
+		fallbackCache2, err := slowplancache.New[*planWithMetaData](100, 0)
 		require.NoError(t, err)
 
 		fallbackCache1.Set(1, &planWithMetaData{content: query1}, 5*1e9)
diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go
index 4272b20090..fa12bb3cbc 100644
--- a/router/pkg/config/config.go
+++ b/router/pkg/config/config.go
@@ -430,8 +430,8 @@ type EngineExecutionConfiguration struct {
 	WebSocketClientPingTimeout                             time.Duration `envDefault:"30s" env:"ENGINE_WEBSOCKET_CLIENT_PING_TIMEOUT" yaml:"websocket_client_ping_timeout,omitempty"`
 	WebSocketClientFrameTimeout                            time.Duration `envDefault:"100ms" env:"ENGINE_WEBSOCKET_CLIENT_FRAME_TIMEOUT" yaml:"websocket_client_frame_timeout,omitempty"`
 	ExecutionPlanCacheSize                                 int64         `envDefault:"1024" env:"ENGINE_EXECUTION_PLAN_CACHE_SIZE" yaml:"execution_plan_cache_size,omitempty"`
-	PlanFallbackCacheSize                                  int64         `envDefault:"100" env:"ENGINE_PLAN_FALLBACK_CACHE_SIZE" yaml:"plan_fallback_cache_size,omitempty"`
-	PlanFallbackThreshold                                  time.Duration `envDefault:"5s" env:"ENGINE_PLAN_FALLBACK_THRESHOLD" yaml:"plan_fallback_threshold,omitempty"`
+	SlowPlanCacheSize                                      int64         `envDefault:"100" env:"ENGINE_SLOW_PLAN_CACHE_SIZE" yaml:"slow_plan_cache_size,omitempty"`
+	SlowPlanCacheThreshold                                 time.Duration `envDefault:"5s" env:"ENGINE_SLOW_PLAN_CACHE_THRESHOLD" yaml:"slow_plan_cache_threshold,omitempty"`
 	MinifySubgraphOperations                               bool          `envDefault:"true" env:"ENGINE_MINIFY_SUBGRAPH_OPERATIONS" yaml:"minify_subgraph_operations"`
 	EnablePersistedOperationsCache                         bool          `envDefault:"true" env:"ENGINE_ENABLE_PERSISTED_OPERATIONS_CACHE" yaml:"enable_persisted_operations_cache"`
 	EnableNormalizationCache                               bool          `envDefault:"true" env:"ENGINE_ENABLE_NORMALIZATION_CACHE" yaml:"enable_normalization_cache"`
diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index 7bd2344132..e7e1137e4b 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3325,16 +3325,16 @@
           "default": 1024,
           "description": "The size of the execution plan cache."
         },
-        "plan_fallback_cache_size": {
+        "slow_plan_cache_size": {
           "type": "integer",
           "minimum": 1,
           "default": 256,
-          "description": "The maximum number of entries in the plan fallback cache. Plans that exceed the planning threshold are protected from TinyLFU eviction in the main plan cache."
+          "description": "The maximum number of entries in the slow plan cache. Plans that exceed the planning threshold are protected from TinyLFU eviction in the main plan cache."
         },
-        "plan_fallback_threshold": {
+        "slow_plan_cache_threshold": {
           "type": "string",
           "format": "go-duration",
-          "description": "The minimum planning duration for a query plan to be stored in the fallback cache, protecting it from TinyLFU cache eviction.",
+          "description": "The minimum planning duration for a query plan to be stored in the slow plan cache, protecting it from TinyLFU cache eviction.",
           "default": "5s",
           "duration": {
             "minimum": "1ns"
diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json
index 86da402710..ca84871212 100644
--- a/router/pkg/config/testdata/config_defaults.json
+++ b/router/pkg/config/testdata/config_defaults.json
@@ -425,8 +425,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
-    "PlanFallbackCacheSize": 100,
-    "PlanFallbackThreshold": 5000000000,
+    "SlowPlanCacheSize": 100,
+    "SlowPlanCacheThreshold": 5000000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,
diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json
index 5bc6864904..ed2f406d4b 100644
--- a/router/pkg/config/testdata/config_full.json
+++ b/router/pkg/config/testdata/config_full.json
@@ -829,8 +829,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
-    "PlanFallbackCacheSize": 100,
-    "PlanFallbackThreshold": 5000000000,
+    "SlowPlanCacheSize": 100,
+    "SlowPlanCacheThreshold": 5000000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,
diff --git a/router/pkg/planfallbackcache/plan_fallback_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
similarity index 97%
rename from router/pkg/planfallbackcache/plan_fallback_cache.go
rename to router/pkg/slowplancache/slow_plan_cache.go
index a2d5c0ba22..515284d5d4 100644
--- a/router/pkg/planfallbackcache/plan_fallback_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -1,4 +1,4 @@
-package planfallbackcache
+package slowplancache
 
 import (
 	"fmt"
@@ -42,7 +42,7 @@ const defaultWriteBufferSize = 32 * 1024
 
 func New[V any](maxSize int, threshold time.Duration) (*Cache[V], error) {
 	if maxSize < 1 {
-		return nil, fmt.Errorf("plan fallback cache size must be at least 1, got %d", maxSize)
+		return nil, fmt.Errorf("slow plan cache size must be at least 1, got %d", maxSize)
 	}
 	c := &Cache[V]{
 		entries:   make(map[uint64]*Entry[V], maxSize),
diff --git a/router/pkg/planfallbackcache/plan_fallback_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
similarity index 99%
rename from router/pkg/planfallbackcache/plan_fallback_cache_test.go
rename to router/pkg/slowplancache/slow_plan_cache_test.go
index 2cad5ee20f..3def0e30c7 100644
--- a/router/pkg/planfallbackcache/plan_fallback_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -1,4 +1,4 @@
-package planfallbackcache
+package slowplancache
 
 import (
 	"testing"

From 87ac36d39aa36527e42535530aae5c90e82df998 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 21:05:18 +0530
Subject: [PATCH 25/46] fix: changes

---
 router/pkg/config/config.schema.json          |  4 +-
 router/pkg/slowplancache/slow_plan_cache.go   | 91 ++++++++++++++-----
 .../pkg/slowplancache/slow_plan_cache_test.go | 28 ++++++
 3 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index e7e1137e4b..427e4f437f 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3328,8 +3328,8 @@
         "slow_plan_cache_size": {
           "type": "integer",
           "minimum": 1,
-          "default": 256,
-          "description": "The maximum number of entries in the slow plan cache. Plans that exceed the planning threshold are protected from TinyLFU eviction in the main plan cache."
+          "default": 100,
+          "description": "The maximum number of entries in the slow plan cache."
         },
         "slow_plan_cache_threshold": {
           "type": "string",
diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index 515284d5d4..8201ae33fc 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -3,6 +3,7 @@ package slowplancache
 import (
 	"fmt"
 	"sync"
+	"sync/atomic"
 	"time"
 )
 
@@ -32,9 +33,11 @@ type Cache[V any] struct {
 	minKey    uint64
 	minDur    time.Duration
 
-	writeCh chan setRequest[V]
-	stop    chan struct{}
-	done    chan struct{}
+	writeCh   chan setRequest[V]
+	stop      chan struct{}
+	done      chan struct{}
+	closeOnce sync.Once
+	closed    atomic.Bool
 }
 
 // We use the same value as ristretto (this would be the buffer size if we used ristretto as the backing cache)
@@ -59,6 +62,8 @@ func New[V any](maxSize int, threshold time.Duration) (*Cache[V], error) {
 // processWrites drains the write channel and applies sets under the write lock.
 // It exits when the stop channel is closed.
 func (c *Cache[V]) processWrites() {
+	defer close(c.done)
+
 	for {
 		select {
 		case req := <-c.writeCh:
@@ -68,13 +73,17 @@ func (c *Cache[V]) processWrites() {
 			}
 			c.applySet(req.key, req.value, req.dur)
 		case <-c.stop:
-			c.done <- struct{}{}
 			return
 		}
 	}
 }
 
 func (c *Cache[V]) Get(key uint64) (V, bool) {
+	if c == nil || c.closed.Load() {
+		var zero V
+		return zero, false
+	}
+
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
@@ -83,15 +92,17 @@ func (c *Cache[V]) Get(key uint64) (V, bool) {
 		var zero V
 		return zero, false
 	}
+
 	return entry.value, true
 }
 
 // Set enqueues a write to the cache. The write is applied asynchronously.
-// If the write buffer is full, the entry is silently dropped.
+// If the write buffer is full or the cache is closed, the entry is silently dropped.
 func (c *Cache[V]) Set(key uint64, value V, duration time.Duration) {
-	if c == nil {
+	if c == nil || c.closed.Load() {
 		return
 	}
+
 	select {
 	case c.writeCh <- setRequest[V]{key: key, value: value, dur: duration}:
 	default:
@@ -99,10 +110,19 @@ func (c *Cache[V]) Set(key uint64, value V, duration time.Duration) {
 }
 
 // Wait blocks until all pending writes in the buffer have been processed.
+// Returns immediately if the cache is closed.
 func (c *Cache[V]) Wait() {
+	if c == nil || c.closed.Load() {
+		return
+	}
+
 	ch := make(chan struct{})
-	c.writeCh <- setRequest[V]{waitCh: ch}
-	<-ch
+
+	select {
+	case c.writeCh <- setRequest[V]{waitCh: ch}:
+		<-ch
+	case <-c.done:
+	}
 }
 
 // applySet performs the actual cache mutation. Must only be called from processWrites.
@@ -153,38 +173,63 @@ func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
 
 // refreshMin rescans the entries to find the new minimum. Must be called with mu held.
 func (c *Cache[V]) refreshMin() {
-	first := true
+	var (
+		minKey uint64
+		minDur time.Duration
+		first  = true
+	)
+
 	for k, e := range c.entries {
-		if first || e.duration < c.minDur {
-			c.minKey = k
-			c.minDur = e.duration
+		if first || e.duration < minDur {
+			minKey = k
+			minDur = e.duration
 			first = false
 		}
 	}
+
+	if !first {
+		c.minKey = minKey
+		c.minDur = minDur
+	}
 }
 
+// IterValues iterates over all cached values. The callback is invoked outside
+// the read lock to avoid holding it during user code execution.
 func (c *Cache[V]) IterValues(cb func(v V) bool) {
-	c.mu.RLock()
-	defer c.mu.RUnlock()
+	if c == nil || c.closed.Load() {
+		return
+	}
 
+	c.mu.RLock()
+	values := make([]V, 0, len(c.entries))
 	for _, e := range c.entries {
-		if cb(e.value) {
+		values = append(values, e.value)
+	}
+	c.mu.RUnlock()
+
+	for _, v := range values {
+		if cb(v) {
 			return
 		}
 	}
 }
 
 // Close stops the background goroutine and releases resources.
-// Pending writes in the buffer may be dropped.
+// Pending writes in the buffer may be dropped. Safe to call multiple times.
 func (c *Cache[V]) Close() {
-	if c == nil {
+	if c == nil || c.closed.Load() {
 		return
 	}
-	close(c.stop)
-	<-c.done
 
-	close(c.done)
-	c.mu.Lock()
-	c.entries = nil
-	c.mu.Unlock()
+	c.closeOnce.Do(func() {
+		c.closed.Store(true)
+
+		close(c.stop)
+		<-c.done
+		close(c.writeCh)
+
+		c.mu.Lock()
+		c.entries = nil
+		c.mu.Unlock()
+	})
 }
diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 3def0e30c7..fe807151dc 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -347,3 +347,31 @@ func TestCache_ThresholdRejectsBelow(t *testing.T) {
 	_, ok = c.Get(3)
 	require.True(t, ok, "entry above threshold should be accepted")
 }
+
+func TestCache_WaitAfterClose(t *testing.T) {
+	t.Parallel()
+	c, err := New[*testPlan](10, 0)
+	require.NoError(t, err)
+
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
+	c.Close()
+
+	// Wait after Close should not deadlock or panic
+	require.NotPanics(t, func() {
+		c.Wait()
+	})
+}
+
+func TestCache_DoubleClose(t *testing.T) {
+	t.Parallel()
+	c, err := New[*testPlan](10, 0)
+	require.NoError(t, err)
+
+	c.Set(1, &testPlan{content: "q1"}, 10*time.Millisecond)
+
+	// Double Close should not panic
+	require.NotPanics(t, func() {
+		c.Close()
+		c.Close()
+	})
+}

From cce5c7f289a7f76c32bfeee7bb1d2c55cf42d50e Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 21:19:14 +0530
Subject: [PATCH 26/46] fix: review comments

---
 router/core/graph_server.go            |  3 +++
 router/core/operation_planner.go       | 10 +++-------
 router/core/reload_persistent_state.go |  1 +
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/router/core/graph_server.go b/router/core/graph_server.go
index ad65925900..e9c0e992cc 100644
--- a/router/core/graph_server.go
+++ b/router/core/graph_server.go
@@ -586,6 +586,9 @@ func (s *graphMux) buildOperationCaches(srv *graphServer) (computeSha256 bool, e
 		}
 		if srv.cacheWarmup != nil && srv.cacheWarmup.Enabled && srv.cacheWarmup.InMemoryFallback {
 			planCacheConfig.OnEvict = func(item *ristretto.Item[*planWithMetaData]) {
+				// This could be called before planFallbackCache is set, but it's not a problem
+				// because there is a nil guard inside, as well as items should not really be evicted
+				// on startup
 				s.planFallbackCache.Set(item.Key, item.Value, item.Value.planningDuration)
 			}
 		}
diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index 69fd188b2d..bfee69dabe 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -32,7 +32,6 @@ type OperationPlanner struct {
 	fallbackCache  *slowplancache.Cache[*planWithMetaData]
 	executor       *Executor
 	trackUsageInfo bool
-	useFallback    bool
 
 	// planningDurationOverride, when set, replaces the measured planning duration.
 	// This is used in tests to simulate slow queries.
@@ -64,7 +63,6 @@ func NewOperationPlanner(
 		planCache:                planCache,
 		executor:                 executor,
 		trackUsageInfo:           executor.TrackUsageInfo,
-		useFallback:              fallbackCache != nil,
 		fallbackCache:            fallbackCache,
 		planningDurationOverride: planningDurationOverride,
 	}
@@ -162,7 +160,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 		// re-use a prepared plan from the main cache
 		opContext.preparedPlan = cachedPlan
 		opContext.planCacheHit = true
-	} else if p.useFallback {
+	} else if p.fallbackCache != nil {
 		if cachedPlan, ok = p.fallbackCache.Get(operationID); ok {
 			// found in the plan fallback cache — re-use and re-insert into main cache
 			opContext.preparedPlan = cachedPlan
@@ -177,7 +175,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 		operationIDStr := strconv.FormatUint(operationID, 10)
 		sharedPreparedPlan, err, _ := p.sf.Do(operationIDStr, func() (interface{}, error) {
 			start := time.Now()
-			prepared, err := p.preparePlan(opContext, operationPlannerOpts{operationContent: p.useFallback})
+			prepared, err := p.preparePlan(opContext, operationPlannerOpts{operationContent: p.fallbackCache != nil})
 			if err != nil {
 				return nil, err
 			}
@@ -192,9 +190,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 			// because the OnEvict callback reads planningDuration concurrently.
 			p.planCache.Set(operationID, prepared, 1)
 
-			if p.useFallback {
-				p.fallbackCache.Set(operationID, prepared, prepared.planningDuration)
-			}
+			p.fallbackCache.Set(operationID, prepared, prepared.planningDuration)
 
 			return prepared, nil
 		})
diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index 7722a176c6..db269fc2d4 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -32,6 +32,7 @@ func (s *ReloadPersistentState) CleanupFeatureFlags(routerCfg *nodev1.RouterConf
 	s.inMemoryPlanCacheFallback.cleanupUnusedFeatureFlags(routerCfg)
 }
 
+// This should always be called before graphMux.Shutdown() as ordering matters
 func (s *ReloadPersistentState) OnRouterConfigReload() {
 	// For cases of router config changes (not execution config), we shut down before creating the
 	// graph mux, because we need to initialize everything from the start

From 830dfc0fa0b26febed2215937bba4d50ceb9632c Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 21:25:23 +0530
Subject: [PATCH 27/46] fix: refactoring

---
 router/core/operation_planner.go            | 13 ++++++-------
 router/pkg/slowplancache/slow_plan_cache.go |  5 +++++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/router/core/operation_planner.go b/router/core/operation_planner.go
index bfee69dabe..8b38d97a89 100644
--- a/router/core/operation_planner.go
+++ b/router/core/operation_planner.go
@@ -29,7 +29,7 @@ type planWithMetaData struct {
 type OperationPlanner struct {
 	sf             singleflight.Group
 	planCache      ExecutionPlanCache[uint64, *planWithMetaData]
-	fallbackCache  *slowplancache.Cache[*planWithMetaData]
+	slowPlanCache  *slowplancache.Cache[*planWithMetaData]
 	executor       *Executor
 	trackUsageInfo bool
 
@@ -63,7 +63,7 @@ func NewOperationPlanner(
 		planCache:                planCache,
 		executor:                 executor,
 		trackUsageInfo:           executor.TrackUsageInfo,
-		fallbackCache:            fallbackCache,
+		slowPlanCache:            fallbackCache,
 		planningDurationOverride: planningDurationOverride,
 	}
 }
@@ -160,8 +160,8 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 		// re-use a prepared plan from the main cache
 		opContext.preparedPlan = cachedPlan
 		opContext.planCacheHit = true
-	} else if p.fallbackCache != nil {
-		if cachedPlan, ok = p.fallbackCache.Get(operationID); ok {
+	} else if p.slowPlanCache != nil {
+		if cachedPlan, ok = p.slowPlanCache.Get(operationID); ok {
 			// found in the plan fallback cache — re-use and re-insert into main cache
 			opContext.preparedPlan = cachedPlan
 			opContext.planCacheHit = true
@@ -175,7 +175,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 		operationIDStr := strconv.FormatUint(operationID, 10)
 		sharedPreparedPlan, err, _ := p.sf.Do(operationIDStr, func() (interface{}, error) {
 			start := time.Now()
-			prepared, err := p.preparePlan(opContext, operationPlannerOpts{operationContent: p.fallbackCache != nil})
+			prepared, err := p.preparePlan(opContext, operationPlannerOpts{operationContent: p.slowPlanCache != nil})
 			if err != nil {
 				return nil, err
 			}
@@ -189,8 +189,7 @@ func (p *OperationPlanner) plan(opContext *operationContext, options PlanOptions
 			// Set into the main cache after planningDuration is finalized,
 			// because the OnEvict callback reads planningDuration concurrently.
 			p.planCache.Set(operationID, prepared, 1)
-
-			p.fallbackCache.Set(operationID, prepared, prepared.planningDuration)
+			p.slowPlanCache.Set(operationID, prepared, prepared.planningDuration)
 
 			return prepared, nil
 		})
diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index 8201ae33fc..f2d595fe7a 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -226,6 +226,11 @@ func (c *Cache[V]) Close() {
 
 		close(c.stop)
 		<-c.done
+
+		// This downside is also there in ristretto
+		// where a user can call set after close, since it hasnt affected us
+		// however we let in flight requests finish first before swapping
+		// and we have got no complains about panics from customers
 		close(c.writeCh)
 
 		c.mu.Lock()

From 238c379cd5c18bbefdcf0377048409331d9f63e7 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 21:40:33 +0530
Subject: [PATCH 28/46] fix: updates

---
 router/pkg/slowplancache/slow_plan_cache.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index f2d595fe7a..c86530961d 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -227,10 +227,8 @@ func (c *Cache[V]) Close() {
 		close(c.stop)
 		<-c.done
 
-		// This downside is also there in ristretto
-		// where a user can call set after close, since it hasnt affected us
-		// however we let in flight requests finish first before swapping
-		// and we have got no complains about panics from customers
+		// This downside is also there in ristretto (if set is called after)
+		// it is even documented in the ristretto code as a comment
 		close(c.writeCh)
 
 		c.mu.Lock()

From 91c0b79330681aba9bc6b22ee5407172638e8cb7 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Thu, 12 Mar 2026 21:40:57 +0530
Subject: [PATCH 29/46] fix: comments

---
 router/pkg/slowplancache/slow_plan_cache.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index c86530961d..9c7e4dcc06 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -227,7 +227,7 @@ func (c *Cache[V]) Close() {
 		close(c.stop)
 		<-c.done
 
-		// This downside is also there in ristretto (if set is called after)
+		// This downside is also there in ristretto (if set is called concurrently)
 		// it is even documented in the ristretto code as a comment
 		close(c.writeCh)
 

From 426093f308579b0aaf4253f45f68ea07f6552d0b Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Sun, 15 Mar 2026 22:29:16 +0530
Subject: [PATCH 30/46] feat: documentation

---
 docs-website/concepts/cache-warmer.mdx        | 75 +++++++++++++++----
 docs-website/router/configuration.mdx         |  4 +
 .../router/metrics-and-monitoring.mdx         |  2 +-
 3 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/docs-website/concepts/cache-warmer.mdx b/docs-website/concepts/cache-warmer.mdx
index 985e5b40a3..0b63b73dd8 100644
--- a/docs-website/concepts/cache-warmer.mdx
+++ b/docs-website/concepts/cache-warmer.mdx
@@ -84,19 +84,16 @@ Users can manually recompute slow queries from the Cosmo Studio. Currently, reco
 
 ## In-Memory Fallback Cache Warming
 
-The in-memory fallback cache warming feature preserves the planner cache across hot config reloads and schema changes, allowing it to be rewarmed automatically and reducing latency spikes during restarts.
+The in-memory fallback cache warming feature uses the **[slow plan cache](#slow-plan-cache)** to preserve query plans across hot config reloads and schema changes, reducing latency spikes during restarts.
 
 ### How It Works
 
-After the router has started, the router can be reloaded for two reasons: either a config change or a schema change. Due to the structure of the router internals, we have two slight variations on how we handle the in-memory switchover cache warming:
+The in-memory fallback relies on the slow plan cache — a secondary, bounded cache that tracks queries whose planning time exceeds a configurable threshold (`slow_plan_cache_threshold`, default 5s). During normal operation, this cache is populated in two ways:
 
-1. **Before Reload**: In case of config changes (from hot config reloading), the router extracts all queries from the current plan cache, preserving the queries that were in the planner cache before the cache is cleared for reloading.
+1. **On first plan**: When a query is planned and its planning duration exceeds the threshold, the plan is stored in both the main cache and the slow plan cache.
+2. **On eviction**: If the main TinyLFU cache evicts a plan that is in the slow plan cache, the query plan won't be recomputed and would simply be served from the slow plan cache.
 
-2. **During Reload**: The router with the updated config receives the queries from the previous plan cache that existed before reloading, and uses them to warm up its current plan cache before serving traffic.
-
-3. **Result**: The updated router reloads with a fully warmed cache, eliminating latency spikes that would normally occur during cold starts.
-
-**Important Limitation:** When using the in-memory fallback, the first start will still experience a cold start, as there is no prior populated planner cache. *Only subsequent reloads* will benefit from the in-memory fallback. This is why it works best when combined with CDN cache warming (the default configuration).
+When the router reloads, the slow plan cache contents are used to rewarm the cache.
 
 ### When to Use the In-Memory Fallback
 
@@ -106,7 +103,7 @@ When the in-memory fallback is used with the Cosmo Cloud CDN cache warmer, the f
 * Getting the list of operations from the CDN fails
 * The request to the CDN succeeds but does not return a list of operations (either no operations are cached or the manifest has not been created yet)
 
-In these cases, the router will use the fallback and load the list of operations from the in-memory fallback (if any operations exist).
+In these cases, the router will use the fallback and load the list of operations from the slow plan cache (if any operations exist).
 
 <Note>
     The in-memory fallback cannot be used as a fallback for sources other than the Cosmo Cloud CDN cache warmer.
@@ -115,13 +112,11 @@ In these cases, the router will use the fallback and load the list of operations
 ### Key Characteristics of In-Memory Fallback
 
 **Advantages:**
-- **Comprehensive coverage**: After the initial start, all queries that have been executed are preserved and warmed on reload, including both slow and fast queries. This provides broader coverage than CDN cache warming.
-- **Eliminates reload spikes**: You won't experience query planning spikes after configuration or schema reloads, as the cache persists across these changes.
-- **Built-in feature**: No enterprise plan required; it's available to all users and enabled by default.
+- **Coverage of expensive queries**: By default, queries with planning times above the threshold (5s) are preserved and warmed on reload, protecting slow-to-plan queries from cold-start latency. Users can lower the threshold to any positive duration (e.g., `slow_plan_cache_threshold: 100ms`) to capture all queries. Users can also set the duration to 1 nanosecond (`slow_plan_cache_threshold: 1ns`), this would ensure that all queries are cached in the fallback, and thus would be available to rewarm the cache upon reloads.
+- **Eliminates reload spikes for expensive queries**: You won't experience query planning spikes for queries above the threshold after configuration or schema reloads. Users can tune the threshold to cover more or fewer queries.
 
 **Tradeoffs:**
 - **Cold start on first start**: The first router start will experience normal cache warming latency, as there's no existing cache to preserve.
-- **Cache can accumulate stale entries**: Without a full restart, the planner cache can eventually fill up with query plans for outdated or rarely-used queries. However, the cache uses a LFU (Least Frequently Used) eviction policy, ensuring that older, less-used items are removed when the cache reaches capacity.
 
 ### Configuration
 
@@ -157,3 +152,57 @@ cache_warmup:
     cdn:
       enabled: false
 ```
+
+## Slow Plan Cache
+
+When in-memory fallback is enabled, the cache the in memory fallback uses is the **Slow Plan Cache**. This is different from the main query plan cache which uses a TinyLFU (Least Frequently Used) eviction policy, which is optimized for frequently accessed items. However, this can cause problems for queries that are slow to plan but infrequently accessed — the LFU policy may evict them in favor of cheaper, more frequent queries. When an expensive query is evicted and re-requested, the router must re-plan it from scratch, causing a latency spike.
+
+The slow plan cache is a secondary cache that protects these slow-to-plan queries from eviction. It is automatically enabled when `in_memory_fallback` is set to `true`.
+
+### How It Works
+
+1. When a query is planned for the first time, its planning duration is measured.
+2. If the planning duration exceeds the configured threshold (`slow_plan_cache_threshold`, default 5s), the query plan is stored in both the main cache and the slow plan cache.
+3. If the main cache later evicts this plan (due to LFU pressure from more frequent queries), the OnEvict hook pushes it to the slow plan cache (if it meets the threshold).
+4. On subsequent requests, if the plan is not found in the main cache, the router checks the slow plan cache before re-planning. If found, the plan is served immediately and re-inserted into the main cache.
+5. During config reloads, slow plan cache entries are used as the warmup source, ensuring slow queries survive cache rebuilds.
+
+### Cache Size and Eviction
+
+The slow plan cache has a configurable maximum size (`slow_plan_cache_size`, default 100). When the cache is full and a new expensive query needs to be added:
+
+- The new query's planning duration is compared to the shortest duration in the cache.
+- If the new query is more expensive (took longer to plan), it replaces the least expensive entry.
+- If the new query is cheaper or equal, it is not added. This ensures the cache always contains the most expensive queries.
+
+<Note>
+Whenever an existing item in the cache is attempted to be added to the cache while full, we will not remove the entry and will only update it's plan time duration if it was higher than the previous duration it took to plan. This way we only consider the worst case planning duration.
+</Note>
+
+### Configuration
+
+The slow plan cache is configured through the engine configuration:
+
+```yaml
+engine:
+  slow_plan_cache_size: 100  # Maximum entries (default: 100)
+  slow_plan_cache_threshold: 5s    # Minimum planning time to qualify (default: 5s)
+
+cache_warmup:
+  enabled: true
+  in_memory_fallback: true  # Required to enable the slow plan cache
+```
+
+For the full list of engine configuration options, see [Router Engine Configuration](/router/configuration#router-engine-configuration).
+
+### Tuning
+
+You can tune the threshold and cache size to control warmup coverage:
+
+- **Lower threshold → more queries protected**: Setting `slow_plan_cache_threshold: 1ns` captures all queries regardless of planning time. This gives you full "carry forward everything" behaviour similar to preserving the entire plan cache.
+- **Higher cache size → more entries held**: Increase `slow_plan_cache_size` to hold more entries. For full coverage, set it to match or exceed `execution_plan_cache_size`.
+- **Tradeoff**: Lower thresholds and larger cache sizes increase memory usage but provide broader warmup coverage.
+
+### Observability
+
+Slow plan cache hits are counted as regular plan cache hits — the `wg.engine.plan_cache_hit` attribute is set to `true` for hits from either the main cache or the slow plan cache. There is no separate observability signal for slow plan cache hits.
diff --git a/docs-website/router/configuration.mdx b/docs-website/router/configuration.mdx
index 7cd14b7d78..56329a03b7 100644
--- a/docs-website/router/configuration.mdx
+++ b/docs-website/router/configuration.mdx
@@ -1768,6 +1768,8 @@ Configure the GraphQL Execution Engine of the Router.
 | ENGINE_WEBSOCKET_CLIENT_PING_TIMEOUT                          | websocket_client_ping_timeout                          | <Icon icon="square" /> | The Websocket client ping timeout to the subgraph. Defines how long the router will wait for a ping response from the subgraph. The timeout is specified as a string with a number and a unit, e.g. 10ms, 1s, 1m, 1h. The supported units are 'ms', 's', 'm', 'h'.                                                                          | 30s           |
 | ENGINE_WEBSOCKET_CLIENT_FRAME_TIMEOUT                         | websocket_client_frame_timeout                         | <Icon icon="square" /> | The Websocket client frame timeout to the subgraph. Defines how long the router will wait for a frame response from the subgraph. The timeout is specified as a string with a number and a unit, e.g. 10ms, 1s, 1m, 1h. The supported units are 'ms', 's', 'm', 'h'.                                                                        | 100ms         |
 | ENGINE_EXECUTION_PLAN_CACHE_SIZE                              | execution_plan_cache_size                              | <Icon icon="square" /> | Define how many GraphQL Operations should be stored in the execution plan cache. A low number will lead to more frequent cache misses, which will lead to increased latency.                                                                                                                                                                | 1024          |
+| ENGINE_SLOW_PLAN_CACHE_SIZE                                   | slow_plan_cache_size                                   | <Icon icon="square" /> | The maximum number of entries in the slow plan cache. This cache protects slow-to-plan queries from being evicted by the main plan cache's LFU policy. Only used when `in_memory_fallback` is enabled. See [Slow Plan Cache](/concepts/cache-warmer#slow-plan-cache).                                                                        | 100           |
+| ENGINE_SLOW_PLAN_CACHE_THRESHOLD                              | slow_plan_cache_threshold                              | <Icon icon="square" /> | The minimum planning duration for a query to be promoted into the slow plan cache. Queries that take longer than this threshold to plan are considered expensive and protected from eviction. The period is specified as a string with a number and a unit, e.g. 10ms, 1s, 5s. The supported units are 'ms', 's', 'm', 'h'.                  | 5s            |
 | ENGINE_MINIFY_SUBGRAPH_OPERATIONS                             | minify_subgraph_operations                             | <Icon icon="square" /> | Minify the subgraph operations. If the value is true, GraphQL Operations get minified after planning. This reduces the amount of GraphQL AST nodes the Subgraph has to parse, which ultimately saves CPU time and memory, resulting in faster response times.                                                                               | false         |
 | ENGINE_ENABLE_PERSISTED_OPERATIONS_CACHE                      | enable_persisted_operations_cache                      | <Icon icon="square" /> | Enable the persisted operations cache. The persisted operations cache is used to cache normalized persisted operations to improve performance.                                                                                                                                                                                              | true          |
 | ENGINE_ENABLE_NORMALIZATION_CACHE                             | enable_normalization_cache                             | <Icon icon="square" /> | Enable the normalization cache. The normalization cache is used to cache normalized operations to improve performance.                                                                                                                                                                                                                      | true          |
@@ -1802,6 +1804,8 @@ engine:
   websocket_client_ping_timeout: "30s"
   websocket_client_frame_timeout: "100ms"
   execution_plan_cache_size: 10000
+  slow_plan_cache_size: 100
+  slow_plan_cache_threshold: 5s
   minify_subgraph_operations: true
   enable_persisted_operations_cache: true
   enable_normalization_cache: true
diff --git a/docs-website/router/metrics-and-monitoring.mdx b/docs-website/router/metrics-and-monitoring.mdx
index a5da03ec5b..4f574d22ea 100644
--- a/docs-website/router/metrics-and-monitoring.mdx
+++ b/docs-website/router/metrics-and-monitoring.mdx
@@ -69,7 +69,7 @@ All the below mentioned metrics have the `wg.subgraph.name` dimensions. Do note
 
 #### GraphQL specific metrics
 
-* `router.graphql.operation.planning_time`: Time taken to plan the operation. An additional attribute `wg.engine.plan_cache_hit` indicates if the plan was served from the cache.
+* `router.graphql.operation.planning_time`: Time taken to plan the operation. An additional attribute `wg.engine.plan_cache_hit` indicates if the plan was served from the main execution plan cache or the plan fallback cache.
 
 #### Cost Control metrics
 

From 09a4df6f8fc24b51836903c6184a14190d25315a Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Sun, 15 Mar 2026 22:40:28 +0530
Subject: [PATCH 31/46] fix: updates

---
 router-tests/{ => operations}/plan_fallback_cache_test.go | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename router-tests/{ => operations}/plan_fallback_cache_test.go (100%)

diff --git a/router-tests/plan_fallback_cache_test.go b/router-tests/operations/plan_fallback_cache_test.go
similarity index 100%
rename from router-tests/plan_fallback_cache_test.go
rename to router-tests/operations/plan_fallback_cache_test.go

From 6f3cfd0467a3e45920be176d07ad88c42bd2c0d3 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Sun, 15 Mar 2026 22:55:37 +0530
Subject: [PATCH 32/46] fix: updates

---
 router-tests/operations/cache_warmup_test.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/router-tests/operations/cache_warmup_test.go b/router-tests/operations/cache_warmup_test.go
index b5867edffd..2f9c662abd 100644
--- a/router-tests/operations/cache_warmup_test.go
+++ b/router-tests/operations/cache_warmup_test.go
@@ -1378,23 +1378,27 @@ func writeTestConfig(t *testing.T, version string, path string) {
 					RootNodes: []*nodev1.TypeField{
 						{
 							TypeName:   "Query",
-							FieldNames: []string{"hello"},
+							FieldNames: []string{"hello", "world"},
 						},
 					},
 					CustomStatic: &nodev1.DataSourceCustom_Static{
 						Data: &nodev1.ConfigurationVariable{
-							StaticVariableContent: `{"hello": "Hello!"}`,
+							StaticVariableContent: `{"hello": "Hello!", "world": "World!"}`,
 						},
 					},
 					Id: "0",
 				},
 			},
-			GraphqlSchema: "schema {\n  query: Query\n}\ntype Query {\n  hello: String\n}",
+			GraphqlSchema: "schema {\n  query: Query\n}\ntype Query {\n  hello: String\n  world: String\n}",
 			FieldConfigurations: []*nodev1.FieldConfiguration{
 				{
 					TypeName:  "Query",
 					FieldName: "hello",
 				},
+				{
+					TypeName:  "Query",
+					FieldName: "world",
+				},
 			},
 		},
 	}

From 9e7540e7219de42c39a218d7028dee2d59ccb87b Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 16 Mar 2026 20:07:26 +0530
Subject: [PATCH 33/46] fix: iterators

---
 router/core/reload_persistent_state.go        |  5 +--
 router/pkg/slowplancache/slow_plan_cache.go   | 37 +++++++++++--------
 .../pkg/slowplancache/slow_plan_cache_test.go | 34 ++++++++---------
 3 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/router/core/reload_persistent_state.go b/router/core/reload_persistent_state.go
index db269fc2d4..c8a5af6ecc 100644
--- a/router/core/reload_persistent_state.go
+++ b/router/core/reload_persistent_state.go
@@ -162,11 +162,10 @@ func (c *InMemoryPlanCacheFallback) cleanupUnusedFeatureFlags(routerCfg *nodev1.
 func convertToNodeOperation(data *slowplancache.Cache[*planWithMetaData]) []*nodev1.Operation {
 	items := make([]*nodev1.Operation, 0)
 
-	data.IterValues(func(v *planWithMetaData) (stop bool) {
+	for v := range data.Values() {
 		items = append(items, &nodev1.Operation{
 			Request: &nodev1.OperationRequest{Query: v.content},
 		})
-		return false
-	})
+	}
 	return items
 }
diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index 9c7e4dcc06..a9b3d8b9e8 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -2,6 +2,7 @@ package slowplancache
 
 import (
 	"fmt"
+	"iter"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -193,23 +194,29 @@ func (c *Cache[V]) refreshMin() {
 	}
 }
 
-// IterValues iterates over all cached values. The callback is invoked outside
-// the read lock to avoid holding it during user code execution.
-func (c *Cache[V]) IterValues(cb func(v V) bool) {
-	if c == nil || c.closed.Load() {
-		return
-	}
+// Values returns an iterator over all cached values. The snapshot is taken
+// under the read lock, but iteration happens outside the lock to avoid
+// holding it during user code execution.
+func (c *Cache[V]) Values() iter.Seq[V] {
+	return func(yield func(V) bool) {
+		if c == nil || c.closed.Load() {
+			return
+		}
 
-	c.mu.RLock()
-	values := make([]V, 0, len(c.entries))
-	for _, e := range c.entries {
-		values = append(values, e.value)
-	}
-	c.mu.RUnlock()
+		// We extract this to a separate slice so we don't need to hold the lock
+		// since this would be expensive based on what the iterator is doing
+		c.mu.RLock()
+		values := make([]V, 0, len(c.entries))
+		for _, e := range c.entries {
+			values = append(values, e.value)
+		}
+		c.mu.RUnlock()
 
-	for _, v := range values {
-		if cb(v) {
-			return
+		for _, v := range values {
+			b := yield(v)
+			if !b {
+				return
+			}
 		}
 	}
 }
diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index fe807151dc..3cb7e72ea0 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -123,7 +123,7 @@ func TestCache_UpdateExisting(t *testing.T) {
 	require.True(t, ok)
 }
 
-func TestCache_IterValues(t *testing.T) {
+func TestCache_Values(t *testing.T) {
 	t.Parallel()
 	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
@@ -135,15 +135,14 @@ func TestCache_IterValues(t *testing.T) {
 	c.Wait()
 
 	var contents []string
-	c.IterValues(func(v *testPlan) bool {
+	for v := range c.Values() {
 		contents = append(contents, v.content)
-		return false
-	})
+	}
 	require.Len(t, contents, 3)
 	require.ElementsMatch(t, []string{"q1", "q2", "q3"}, contents)
 }
 
-func TestCache_IterValues_EarlyStop(t *testing.T) {
+func TestCache_Values_EarlyStop(t *testing.T) {
 	t.Parallel()
 	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
@@ -155,10 +154,10 @@ func TestCache_IterValues_EarlyStop(t *testing.T) {
 	c.Wait()
 
 	count := 0
-	c.IterValues(func(_ *testPlan) bool {
+	for range c.Values() {
 		count++
-		return true // stop after first
-	})
+		break // stop after first
+	}
 	require.Equal(t, 1, count)
 }
 
@@ -190,21 +189,20 @@ func TestCache_SetAfterClose(t *testing.T) {
 	require.False(t, ok)
 }
 
-func TestCache_IterValuesEmpty(t *testing.T) {
+func TestCache_ValuesEmpty(t *testing.T) {
 	t.Parallel()
 	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
 	defer c.Close()
 
 	count := 0
-	c.IterValues(func(_ *testPlan) bool {
+	for range c.Values() {
 		count++
-		return false
-	})
+	}
 	require.Equal(t, 0, count)
 }
 
-func TestCache_IterValuesAfterClose(t *testing.T) {
+func TestCache_ValuesAfterClose(t *testing.T) {
 	t.Parallel()
 	c, err := New[*testPlan](10, 0)
 	require.NoError(t, err)
@@ -212,10 +210,9 @@ func TestCache_IterValuesAfterClose(t *testing.T) {
 	c.Close()
 
 	count := 0
-	c.IterValues(func(_ *testPlan) bool {
+	for range c.Values() {
 		count++
-		return false
-	})
+	}
 	require.Equal(t, 0, count)
 }
 
@@ -302,9 +299,8 @@ func TestCache_ConcurrentAccess(t *testing.T) {
 	for i := 0; i < 5; i++ {
 		go func() {
 			defer func() { done <- struct{}{} }()
-			c.IterValues(func(_ *testPlan) bool {
-				return false
-			})
+			for range c.Values() {
+			}
 		}()
 	}
 

From bac502b3ea231d3eac9ae6c361eb49e067497bf0 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 16 Mar 2026 20:17:01 +0530
Subject: [PATCH 34/46] fix: refactor

---
 router/pkg/slowplancache/slow_plan_cache.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index a9b3d8b9e8..a274c1bda1 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -213,8 +213,7 @@ func (c *Cache[V]) Values() iter.Seq[V] {
 		c.mu.RUnlock()
 
 		for _, v := range values {
-			b := yield(v)
-			if !b {
+			if !yield(v) {
 				return
 			}
 		}

From e99824ea3e5bbea99def6f83e8313b690daf50d4 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Mon, 16 Mar 2026 21:24:29 +0530
Subject: [PATCH 35/46] fix: updates

---
 docs-website/concepts/cache-warmer.mdx          | 12 ++++++------
 docs-website/router/configuration.mdx           |  8 ++++----
 router/pkg/config/config.go                     |  4 ++--
 router/pkg/config/config.schema.json            |  4 ++--
 router/pkg/config/testdata/config_defaults.json |  4 ++--
 router/pkg/config/testdata/config_full.json     |  4 ++--
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/docs-website/concepts/cache-warmer.mdx b/docs-website/concepts/cache-warmer.mdx
index 0b63b73dd8..c3f4f35ed4 100644
--- a/docs-website/concepts/cache-warmer.mdx
+++ b/docs-website/concepts/cache-warmer.mdx
@@ -88,7 +88,7 @@ The in-memory fallback cache warming feature uses the **[slow plan cache](#slow-
 
 ### How It Works
 
-The in-memory fallback relies on the slow plan cache — a secondary, bounded cache that tracks queries whose planning time exceeds a configurable threshold (`slow_plan_cache_threshold`, default 5s). During normal operation, this cache is populated in two ways:
+The in-memory fallback relies on the slow plan cache — a secondary, bounded cache that tracks queries whose planning time exceeds a configurable threshold (`slow_plan_cache_threshold`, default 100ms). During normal operation, this cache is populated in two ways:
 
 1. **On first plan**: When a query is planned and its planning duration exceeds the threshold, the plan is stored in both the main cache and the slow plan cache.
 2. **On eviction**: If the main TinyLFU cache evicts a plan that is in the slow plan cache, the query plan won't be recomputed and would simply be served from the slow plan cache.
@@ -112,7 +112,7 @@ In these cases, the router will use the fallback and load the list of operations
 ### Key Characteristics of In-Memory Fallback
 
 **Advantages:**
-- **Coverage of expensive queries**: By default, queries with planning times above the threshold (5s) are preserved and warmed on reload, protecting slow-to-plan queries from cold-start latency. Users can lower the threshold to any positive duration (e.g., `slow_plan_cache_threshold: 100ms`) to capture all queries. Users can also set the duration to 1 nanosecond (`slow_plan_cache_threshold: 1ns`), this would ensure that all queries are cached in the fallback, and thus would be available to rewarm the cache upon reloads.
+- **Coverage of expensive queries**: By default, queries with planning times above the threshold (100ms) are preserved and warmed on reload, protecting slow-to-plan queries from cold-start latency. Users can lower the threshold to any positive duration (e.g., `slow_plan_cache_threshold: 100ms`) to capture all queries. Users can also set the duration to 1 nanosecond (`slow_plan_cache_threshold: 1ns`), this would ensure that all queries are cached in the fallback, and thus would be available to rewarm the cache upon reloads.
 - **Eliminates reload spikes for expensive queries**: You won't experience query planning spikes for queries above the threshold after configuration or schema reloads. Users can tune the threshold to cover more or fewer queries.
 
 **Tradeoffs:**
@@ -162,14 +162,14 @@ The slow plan cache is a secondary cache that protects these slow-to-plan querie
 ### How It Works
 
 1. When a query is planned for the first time, its planning duration is measured.
-2. If the planning duration exceeds the configured threshold (`slow_plan_cache_threshold`, default 5s), the query plan is stored in both the main cache and the slow plan cache.
+2. If the planning duration exceeds the configured threshold (`slow_plan_cache_threshold`, default 100ms), the query plan is stored in both the main cache and the slow plan cache.
 3. If the main cache later evicts this plan (due to LFU pressure from more frequent queries), the OnEvict hook pushes it to the slow plan cache (if it meets the threshold).
 4. On subsequent requests, if the plan is not found in the main cache, the router checks the slow plan cache before re-planning. If found, the plan is served immediately and re-inserted into the main cache.
 5. During config reloads, slow plan cache entries are used as the warmup source, ensuring slow queries survive cache rebuilds.
 
 ### Cache Size and Eviction
 
-The slow plan cache has a configurable maximum size (`slow_plan_cache_size`, default 100). When the cache is full and a new expensive query needs to be added:
+The slow plan cache has a configurable maximum size (`slow_plan_cache_size`, default 300). When the cache is full and a new expensive query needs to be added:
 
 - The new query's planning duration is compared to the shortest duration in the cache.
 - If the new query is more expensive (took longer to plan), it replaces the least expensive entry.
@@ -185,8 +185,8 @@ The slow plan cache is configured through the engine configuration:
 
 ```yaml
 engine:
-  slow_plan_cache_size: 100  # Maximum entries (default: 100)
-  slow_plan_cache_threshold: 5s    # Minimum planning time to qualify (default: 5s)
+  slow_plan_cache_size: 300  # Maximum entries (default: 300)
+  slow_plan_cache_threshold: 100ms    # Minimum planning time to qualify (default: 100ms)
 
 cache_warmup:
   enabled: true
diff --git a/docs-website/router/configuration.mdx b/docs-website/router/configuration.mdx
index 56329a03b7..b0df8644f7 100644
--- a/docs-website/router/configuration.mdx
+++ b/docs-website/router/configuration.mdx
@@ -1768,8 +1768,8 @@ Configure the GraphQL Execution Engine of the Router.
 | ENGINE_WEBSOCKET_CLIENT_PING_TIMEOUT                          | websocket_client_ping_timeout                          | <Icon icon="square" /> | The Websocket client ping timeout to the subgraph. Defines how long the router will wait for a ping response from the subgraph. The timeout is specified as a string with a number and a unit, e.g. 10ms, 1s, 1m, 1h. The supported units are 'ms', 's', 'm', 'h'.                                                                          | 30s           |
 | ENGINE_WEBSOCKET_CLIENT_FRAME_TIMEOUT                         | websocket_client_frame_timeout                         | <Icon icon="square" /> | The Websocket client frame timeout to the subgraph. Defines how long the router will wait for a frame response from the subgraph. The timeout is specified as a string with a number and a unit, e.g. 10ms, 1s, 1m, 1h. The supported units are 'ms', 's', 'm', 'h'.                                                                        | 100ms         |
 | ENGINE_EXECUTION_PLAN_CACHE_SIZE                              | execution_plan_cache_size                              | <Icon icon="square" /> | Define how many GraphQL Operations should be stored in the execution plan cache. A low number will lead to more frequent cache misses, which will lead to increased latency.                                                                                                                                                                | 1024          |
-| ENGINE_SLOW_PLAN_CACHE_SIZE                                   | slow_plan_cache_size                                   | <Icon icon="square" /> | The maximum number of entries in the slow plan cache. This cache protects slow-to-plan queries from being evicted by the main plan cache's LFU policy. Only used when `in_memory_fallback` is enabled. See [Slow Plan Cache](/concepts/cache-warmer#slow-plan-cache).                                                                        | 100           |
-| ENGINE_SLOW_PLAN_CACHE_THRESHOLD                              | slow_plan_cache_threshold                              | <Icon icon="square" /> | The minimum planning duration for a query to be promoted into the slow plan cache. Queries that take longer than this threshold to plan are considered expensive and protected from eviction. The period is specified as a string with a number and a unit, e.g. 10ms, 1s, 5s. The supported units are 'ms', 's', 'm', 'h'.                  | 5s            |
+| ENGINE_SLOW_PLAN_CACHE_SIZE                                   | slow_plan_cache_size                                   | <Icon icon="square" /> | The maximum number of entries in the slow plan cache. This cache protects slow-to-plan queries from being evicted by the main plan cache's LFU policy. Only used when `in_memory_fallback` is enabled. See [Slow Plan Cache](/concepts/cache-warmer#slow-plan-cache).                                                                        | 300           |
+| ENGINE_SLOW_PLAN_CACHE_THRESHOLD                              | slow_plan_cache_threshold                              | <Icon icon="square" /> | The minimum planning duration for a query to be promoted into the slow plan cache. Queries that take longer than this threshold to plan are considered expensive and protected from eviction. The period is specified as a string with a number and a unit, e.g. 10ms, 1s, 5s. The supported units are 'ms', 's', 'm', 'h'.                  | 100ms         |
 | ENGINE_MINIFY_SUBGRAPH_OPERATIONS                             | minify_subgraph_operations                             | <Icon icon="square" /> | Minify the subgraph operations. If the value is true, GraphQL Operations get minified after planning. This reduces the amount of GraphQL AST nodes the Subgraph has to parse, which ultimately saves CPU time and memory, resulting in faster response times.                                                                               | false         |
 | ENGINE_ENABLE_PERSISTED_OPERATIONS_CACHE                      | enable_persisted_operations_cache                      | <Icon icon="square" /> | Enable the persisted operations cache. The persisted operations cache is used to cache normalized persisted operations to improve performance.                                                                                                                                                                                              | true          |
 | ENGINE_ENABLE_NORMALIZATION_CACHE                             | enable_normalization_cache                             | <Icon icon="square" /> | Enable the normalization cache. The normalization cache is used to cache normalized operations to improve performance.                                                                                                                                                                                                                      | true          |
@@ -1804,8 +1804,8 @@ engine:
   websocket_client_ping_timeout: "30s"
   websocket_client_frame_timeout: "100ms"
   execution_plan_cache_size: 10000
-  slow_plan_cache_size: 100
-  slow_plan_cache_threshold: 5s
+  slow_plan_cache_size: 300
+  slow_plan_cache_threshold: 100ms
   minify_subgraph_operations: true
   enable_persisted_operations_cache: true
   enable_normalization_cache: true
diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go
index fa12bb3cbc..d34c029ee4 100644
--- a/router/pkg/config/config.go
+++ b/router/pkg/config/config.go
@@ -430,8 +430,8 @@ type EngineExecutionConfiguration struct {
 	WebSocketClientPingTimeout                             time.Duration `envDefault:"30s" env:"ENGINE_WEBSOCKET_CLIENT_PING_TIMEOUT" yaml:"websocket_client_ping_timeout,omitempty"`
 	WebSocketClientFrameTimeout                            time.Duration `envDefault:"100ms" env:"ENGINE_WEBSOCKET_CLIENT_FRAME_TIMEOUT" yaml:"websocket_client_frame_timeout,omitempty"`
 	ExecutionPlanCacheSize                                 int64         `envDefault:"1024" env:"ENGINE_EXECUTION_PLAN_CACHE_SIZE" yaml:"execution_plan_cache_size,omitempty"`
-	SlowPlanCacheSize                                      int64         `envDefault:"100" env:"ENGINE_SLOW_PLAN_CACHE_SIZE" yaml:"slow_plan_cache_size,omitempty"`
-	SlowPlanCacheThreshold                                 time.Duration `envDefault:"5s" env:"ENGINE_SLOW_PLAN_CACHE_THRESHOLD" yaml:"slow_plan_cache_threshold,omitempty"`
+	SlowPlanCacheSize                                      int64         `envDefault:"300" env:"ENGINE_SLOW_PLAN_CACHE_SIZE" yaml:"slow_plan_cache_size,omitempty"`
+	SlowPlanCacheThreshold                                 time.Duration `envDefault:"100ms" env:"ENGINE_SLOW_PLAN_CACHE_THRESHOLD" yaml:"slow_plan_cache_threshold,omitempty"`
 	MinifySubgraphOperations                               bool          `envDefault:"true" env:"ENGINE_MINIFY_SUBGRAPH_OPERATIONS" yaml:"minify_subgraph_operations"`
 	EnablePersistedOperationsCache                         bool          `envDefault:"true" env:"ENGINE_ENABLE_PERSISTED_OPERATIONS_CACHE" yaml:"enable_persisted_operations_cache"`
 	EnableNormalizationCache                               bool          `envDefault:"true" env:"ENGINE_ENABLE_NORMALIZATION_CACHE" yaml:"enable_normalization_cache"`
diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index 427e4f437f..5ae0892e1b 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3328,14 +3328,14 @@
         "slow_plan_cache_size": {
           "type": "integer",
           "minimum": 1,
-          "default": 100,
+          "default": 300,
           "description": "The maximum number of entries in the slow plan cache."
         },
         "slow_plan_cache_threshold": {
           "type": "string",
           "format": "go-duration",
           "description": "The minimum planning duration for a query plan to be stored in the slow plan cache, protecting it from TinyLFU cache eviction.",
-          "default": "5s",
+          "default": "100ms",
           "duration": {
             "minimum": "1ns"
           }
diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json
index ca84871212..7e3f9d4281 100644
--- a/router/pkg/config/testdata/config_defaults.json
+++ b/router/pkg/config/testdata/config_defaults.json
@@ -425,8 +425,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
-    "SlowPlanCacheSize": 100,
-    "SlowPlanCacheThreshold": 5000000000,
+    "SlowPlanCacheSize": 300,
+    "SlowPlanCacheThreshold": 100000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,
diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json
index ed2f406d4b..dec0e9e5d3 100644
--- a/router/pkg/config/testdata/config_full.json
+++ b/router/pkg/config/testdata/config_full.json
@@ -829,8 +829,8 @@
     "WebSocketClientPingTimeout": 30000000000,
     "WebSocketClientFrameTimeout": 100000000,
     "ExecutionPlanCacheSize": 1024,
-    "SlowPlanCacheSize": 100,
-    "SlowPlanCacheThreshold": 5000000000,
+    "SlowPlanCacheSize": 300,
+    "SlowPlanCacheThreshold": 100000000,
     "MinifySubgraphOperations": true,
     "EnablePersistedOperationsCache": true,
     "EnableNormalizationCache": true,

From d1ffde4a92d14040cb3b9375dc13c852a92d49d7 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 02:27:27 +0530
Subject: [PATCH 36/46] fix: review comments

---
 router/pkg/config/config.schema.json          |  2 +-
 router/pkg/slowplancache/slow_plan_cache.go   | 10 ++---
 .../pkg/slowplancache/slow_plan_cache_test.go | 40 ++++++++++---------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index 5ae0892e1b..a613001f2b 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3334,7 +3334,7 @@
         "slow_plan_cache_threshold": {
           "type": "string",
           "format": "go-duration",
-          "description": "The minimum planning duration for a query plan to be stored in the slow plan cache, protecting it from TinyLFU cache eviction.",
+          "description": "The minimum planning duration for a query plan to be stored in the slow plan cache.",
           "default": "100ms",
           "duration": {
             "minimum": "1ns"
diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index a274c1bda1..fda2696348 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -104,6 +104,10 @@ func (c *Cache[V]) Set(key uint64, value V, duration time.Duration) {
 		return
 	}
 
+	if duration < c.threshold {
+		return
+	}
+
 	select {
 	case c.writeCh <- setRequest[V]{key: key, value: value, dur: duration}:
 	default:
@@ -127,12 +131,8 @@ func (c *Cache[V]) Wait() {
 }
 
 // applySet performs the actual cache mutation. Must only be called from processWrites.
+// This will hold the lock while it is running
 func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
-	// Reject entries that don't meet the threshold
-	if duration < c.threshold {
-		return
-	}
-
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 3cb7e72ea0..617adc0f6d 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -1,6 +1,7 @@
 package slowplancache
 
 import (
+	"sync"
 	"testing"
 	"time"
 
@@ -272,42 +273,43 @@ func TestCache_ConcurrentAccess(t *testing.T) {
 	c, err := New[*testPlan](100, 0)
 	require.NoError(t, err)
 	defer c.Close()
-	done := make(chan struct{})
+	var wg sync.WaitGroup
 
 	// Concurrent writers
-	for i := 0; i < 10; i++ {
-		go func(id int) {
-			defer func() { done <- struct{}{} }()
-			for j := 0; j < 100; j++ {
-				key := uint64(id*100 + j) //nolint:gosec // test code, no overflow risk
+	for i := range 10 {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for j := range 100 {
+				key := uint64(i*100 + j) //nolint:gosec // test code, no overflow risk
 				c.Set(key, &testPlan{content: "q"}, time.Duration(j)*time.Millisecond)
 			}
-		}(i)
+		}()
 	}
 
 	// Concurrent readers
-	for i := 0; i < 10; i++ {
-		go func(id int) {
-			defer func() { done <- struct{}{} }()
-			for j := 0; j < 100; j++ {
-				c.Get(uint64(id*100 + j)) //nolint:gosec // test code, no overflow risk
+	for i := range 10 {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for j := range 100 {
+				c.Get(uint64(i*100 + j)) //nolint:gosec // test code, no overflow risk
 			}
-		}(i)
+		}()
 	}
 
 	// Concurrent iterators
-	for i := 0; i < 5; i++ {
+	for range 5 {
+		wg.Add(1)
 		go func() {
-			defer func() { done <- struct{}{} }()
+			defer wg.Done()
 			for range c.Values() {
+				_ = struct{}{} // prevent loop optimization
 			}
 		}()
 	}
 
-	// Wait for all goroutines
-	for i := 0; i < 25; i++ {
-		<-done
-	}
+	wg.Wait()
 }
 
 func TestCache_InvalidSize(t *testing.T) {

From 04ee8b09fdc42a937e8ed5d71bda59b00f28d5ac Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 14:44:37 +0530
Subject: [PATCH 37/46] fix: review comments

---
 router/pkg/config/config.schema.json        | 2 +-
 router/pkg/slowplancache/slow_plan_cache.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json
index a613001f2b..0e5248d313 100644
--- a/router/pkg/config/config.schema.json
+++ b/router/pkg/config/config.schema.json
@@ -3337,7 +3337,7 @@
           "description": "The minimum planning duration for a query plan to be stored in the slow plan cache.",
           "default": "100ms",
           "duration": {
-            "minimum": "1ns"
+            "minimum": "0ns"
           }
         },
         "operation_hash_cache_size": {
diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index fda2696348..4696be98ab 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -18,7 +18,7 @@ type setRequest[V any] struct {
 	key    uint64
 	value  V
 	dur    time.Duration
-	waitCh chan struct{} // if non-nil, closed after this request is processed
+	waitCh chan struct{} // if non-nil, will be closed after previous requests in the buffer are processed
 }
 
 // Cache is a bounded map that holds expensive-to-compute values

From 8ded16b1d9bad4c2db4a4215a86bcce3d7b6f25b Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 15:06:43 +0530
Subject: [PATCH 38/46] fix: review comments

---
 .../pkg/slowplancache/slow_plan_cache_test.go | 83 ++++++++++++++++++-
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 617adc0f6d..f022a7837f 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -281,7 +281,7 @@ func TestCache_ConcurrentAccess(t *testing.T) {
 		go func() {
 			defer wg.Done()
 			for j := range 100 {
-				key := uint64(i*100 + j) //nolint:gosec // test code, no overflow risk
+				key := uint64(i*100 + j) // test code, no overflow risk
 				c.Set(key, &testPlan{content: "q"}, time.Duration(j)*time.Millisecond)
 			}
 		}()
@@ -293,7 +293,7 @@ func TestCache_ConcurrentAccess(t *testing.T) {
 		go func() {
 			defer wg.Done()
 			for j := range 100 {
-				c.Get(uint64(i*100 + j)) //nolint:gosec // test code, no overflow risk
+				c.Get(uint64(i*100 + j)) // test code, no overflow risk
 			}
 		}()
 	}
@@ -373,3 +373,82 @@ func TestCache_DoubleClose(t *testing.T) {
 		c.Close()
 	})
 }
+
+func BenchmarkCache_Set(b *testing.B) {
+	c, err := New[*testPlan](1000, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	plan := &testPlan{content: "query { benchmarkField }"}
+
+	i := 0
+	for b.Loop() {
+		c.Set(uint64(i), plan, time.Duration(i)*time.Millisecond)
+		i++
+	}
+	c.Wait()
+}
+
+func BenchmarkCache_Set_Eviction(b *testing.B) {
+	c, err := New[*testPlan](100, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	plan := &testPlan{content: "query { benchmarkField }"}
+
+	i := 0
+	for b.Loop() {
+		c.Set(uint64(i), plan, time.Duration(i)*time.Millisecond)
+		i++
+	}
+	c.Wait()
+}
+
+func BenchmarkCache_Get_Hit(b *testing.B) {
+	c, err := New[*testPlan](1000, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	for i := range 1000 {
+		c.Set(uint64(i), &testPlan{content: "q"}, time.Duration(i+1)*time.Millisecond)
+	}
+	c.Wait()
+
+	i := 0
+	for b.Loop() {
+		c.Get(uint64(i % 1000))
+		i++
+	}
+}
+
+func BenchmarkCache_Get_Miss(b *testing.B) {
+	c, err := New[*testPlan](1000, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	i := 0
+	for b.Loop() {
+		c.Get(uint64(i))
+		i++
+	}
+}
+
+func BenchmarkCache_Mixed(b *testing.B) {
+	c, err := New[*testPlan](1000, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	plan := &testPlan{content: "query { benchmarkField }"}
+
+	i := 0
+	for b.Loop() {
+		key := uint64(i % 2000)
+		if i%3 == 0 {
+			c.Set(key, plan, time.Duration(i)*time.Millisecond)
+		} else {
+			c.Get(key)
+		}
+		i++
+	}
+	c.Wait()
+}

From a9828a0bff6447b015775a18f30b58b29c7f31cf Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 15:15:12 +0530
Subject: [PATCH 39/46] fix: updates

---
 .../pkg/slowplancache/slow_plan_cache_test.go | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index f022a7837f..04630a298b 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -374,6 +374,7 @@ func TestCache_DoubleClose(t *testing.T) {
 	})
 }
 
+// 3.726 ns/op | 3.695 ns/op | 3.702 ns/op : SyncMap
 func BenchmarkCache_Set(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -389,6 +390,7 @@ func BenchmarkCache_Set(b *testing.B) {
 	c.Wait()
 }
 
+// 4.399 ns/op | 4.602 ns/op | 4.454 ns/op | 4.506 ns/op : SyncMap
 func BenchmarkCache_Set_Eviction(b *testing.B) {
 	c, err := New[*testPlan](100, 0)
 	require.NoError(b, err)
@@ -404,6 +406,7 @@ func BenchmarkCache_Set_Eviction(b *testing.B) {
 	c.Wait()
 }
 
+// 17.14 ns/op | 17.11 ns/op | 17.65 ns/op : SyncMap
 func BenchmarkCache_Get_Hit(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -421,6 +424,7 @@ func BenchmarkCache_Get_Hit(b *testing.B) {
 	}
 }
 
+// 6.644 ns/op | 6.507 ns/op | 6.496 ns/op : SyncMap
 func BenchmarkCache_Get_Miss(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -433,6 +437,26 @@ func BenchmarkCache_Get_Miss(b *testing.B) {
 	}
 }
 
+// 7.874 ns/op | 8.178 ns/op | 7.957 ns/op : SyncMap
+func BenchmarkCache_Set_SameKey(b *testing.B) {
+	c, err := New[*testPlan](1000, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	plan := &testPlan{content: "query { benchmarkField }"}
+
+	// Pre-populate so the key exists
+	c.Set(42, plan, 10*time.Millisecond)
+	c.Wait()
+
+	i := 0
+	for b.Loop() {
+		c.Set(42, plan, time.Duration(i)*time.Millisecond)
+		i++
+	}
+	c.Wait()
+}
+
 func BenchmarkCache_Mixed(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)

From 283437c06f4ad952f10a5e0626c361776c4c92c4 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 15:21:09 +0530
Subject: [PATCH 40/46] fix: updates

---
 router/pkg/slowplancache/slow_plan_cache_test.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 04630a298b..5361ec542e 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -375,6 +375,7 @@ func TestCache_DoubleClose(t *testing.T) {
 }
 
 // 3.726 ns/op | 3.695 ns/op | 3.702 ns/op : SyncMap
+// 4.962 | 3.771 ns/op | 5.269 ns/op | 3.947 ns/op | 4.049 ns/op : Normal
 func BenchmarkCache_Set(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -391,6 +392,7 @@ func BenchmarkCache_Set(b *testing.B) {
 }
 
 // 4.399 ns/op | 4.602 ns/op | 4.454 ns/op | 4.506 ns/op : SyncMap
+// 4.683 ns/op | 5.099 ns/op | 5.055 ns/op | 4.546 ns/op : Mutexes
 func BenchmarkCache_Set_Eviction(b *testing.B) {
 	c, err := New[*testPlan](100, 0)
 	require.NoError(b, err)
@@ -407,6 +409,7 @@ func BenchmarkCache_Set_Eviction(b *testing.B) {
 }
 
 // 17.14 ns/op | 17.11 ns/op | 17.65 ns/op : SyncMap
+// 14.79 ns/op | 16.58 ns/op | 15.15 ns/op : Mutexes
 func BenchmarkCache_Get_Hit(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -425,6 +428,7 @@ func BenchmarkCache_Get_Hit(b *testing.B) {
 }
 
 // 6.644 ns/op | 6.507 ns/op | 6.496 ns/op : SyncMap
+// 15.00 ns/op | 14.83 ns/op | 14.73 ns/op : Mutexes
 func BenchmarkCache_Get_Miss(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -438,6 +442,7 @@ func BenchmarkCache_Get_Miss(b *testing.B) {
 }
 
 // 7.874 ns/op | 8.178 ns/op | 7.957 ns/op : SyncMap
+// 4.882 ns/op | 4.816 ns/op | 5.666 ns/op : Mutexes
 func BenchmarkCache_Set_SameKey(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -457,6 +462,8 @@ func BenchmarkCache_Set_SameKey(b *testing.B) {
 	c.Wait()
 }
 
+// 19.22 ns/op | 21.75 ns/op | 18.95 ns/op : SyncMap
+// 3504 ns/op | 4020 ns/op | 3607 ns/op | 2738 ns/op : Mutexes
 func BenchmarkCache_Mixed(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)

From 41f64a61838cd1793b40619dd6fe46739b2b2ecf Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 16:02:15 +0530
Subject: [PATCH 41/46] fix: updates

---
 router/pkg/slowplancache/slow_plan_cache.go   | 25 +++++++++++++------
 .../pkg/slowplancache/slow_plan_cache_test.go |  2 +-
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index 4696be98ab..57a1c190d6 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -131,8 +131,18 @@ func (c *Cache[V]) Wait() {
 }
 
 // applySet performs the actual cache mutation. Must only be called from processWrites.
-// This will hold the lock while it is running
 func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
+	needsRefreshMin := c.mutateEntries(key, value, duration)
+	if needsRefreshMin {
+		c.mu.RLock()
+		defer c.mu.RUnlock()
+		c.refreshMin()
+	}
+}
+
+// mutateEntries applies the cache mutation under the write lock and returns
+// whether refreshMin needs to be called afterwards.
+func (c *Cache[V]) mutateEntries(key uint64, value V, duration time.Duration) bool {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
@@ -144,10 +154,10 @@ func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
 
 			// If the minKey duration was increased, there can be a new minKey
 			if c.minKey == key {
-				c.refreshMin()
+				return true
 			}
 		}
-		return
+		return false
 	}
 
 	// If not at capacity, just add and update min tracking
@@ -157,22 +167,23 @@ func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
 			c.minKey = key
 			c.minDur = duration
 		}
-		return
+		return false
 	}
 
 	// At capacity: reject if new entry is not more expensive than the current minimum
 	if duration <= c.minDur {
-		return
+		return false
 	}
 
 	// When at max capacity
 	// Evict the minimum and insert the new entry
 	delete(c.entries, c.minKey)
 	c.entries[key] = &Entry[V]{value: value, duration: duration}
-	c.refreshMin()
+	return true
 }
 
-// refreshMin rescans the entries to find the new minimum. Must be called with mu held.
+// refreshMin rescans the entries to find the new minimum. Must only be called from processWrites.
+// Called without the lock: no writes occur during the scan (sole writer), and concurrent reads from Get are safe.
 func (c *Cache[V]) refreshMin() {
 	var (
 		minKey uint64
diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 5361ec542e..5830ced609 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -463,7 +463,7 @@ func BenchmarkCache_Set_SameKey(b *testing.B) {
 }
 
 // 19.22 ns/op | 21.75 ns/op | 18.95 ns/op : SyncMap
-// 3504 ns/op | 4020 ns/op | 3607 ns/op | 2738 ns/op : Mutexes
+// 43.91 ns/op | 41.16 ns/op | 39.43 ns/op : Mutexes
 func BenchmarkCache_Mixed(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)

From 4fb68602ae091c2bdc906f8f4a8a9d0fb6f1aa0f Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 16:02:53 +0530
Subject: [PATCH 42/46] fix: bench cleanup

---
 router/pkg/slowplancache/slow_plan_cache_test.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 5830ced609..19c2a372f0 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -374,8 +374,6 @@ func TestCache_DoubleClose(t *testing.T) {
 	})
 }
 
-// 3.726 ns/op | 3.695 ns/op | 3.702 ns/op : SyncMap
-// 4.962 | 3.771 ns/op | 5.269 ns/op | 3.947 ns/op | 4.049 ns/op : Normal
 func BenchmarkCache_Set(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -391,8 +389,6 @@ func BenchmarkCache_Set(b *testing.B) {
 	c.Wait()
 }
 
-// 4.399 ns/op | 4.602 ns/op | 4.454 ns/op | 4.506 ns/op : SyncMap
-// 4.683 ns/op | 5.099 ns/op | 5.055 ns/op | 4.546 ns/op : Mutexes
 func BenchmarkCache_Set_Eviction(b *testing.B) {
 	c, err := New[*testPlan](100, 0)
 	require.NoError(b, err)
@@ -408,8 +404,6 @@ func BenchmarkCache_Set_Eviction(b *testing.B) {
 	c.Wait()
 }
 
-// 17.14 ns/op | 17.11 ns/op | 17.65 ns/op : SyncMap
-// 14.79 ns/op | 16.58 ns/op | 15.15 ns/op : Mutexes
 func BenchmarkCache_Get_Hit(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -427,8 +421,6 @@ func BenchmarkCache_Get_Hit(b *testing.B) {
 	}
 }
 
-// 6.644 ns/op | 6.507 ns/op | 6.496 ns/op : SyncMap
-// 15.00 ns/op | 14.83 ns/op | 14.73 ns/op : Mutexes
 func BenchmarkCache_Get_Miss(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)
@@ -441,8 +433,6 @@ func BenchmarkCache_Get_Miss(b *testing.B) {
 	}
 }
 
-// 7.874 ns/op | 8.178 ns/op | 7.957 ns/op : SyncMap
-// 4.882 ns/op | 4.816 ns/op | 5.666 ns/op : Mutexes
 func BenchmarkCache_Set_SameKey(b *testing.B) {
 	c, err := New[*testPlan](1000, 0)
 	require.NoError(b, err)

From 9be8a479db506ccbc8dca7c67b4babf97bc54efc Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 16:09:24 +0530
Subject: [PATCH 43/46] fix: changes

---
 .../pkg/slowplancache/slow_plan_cache_test.go | 89 +++++++++++++------
 1 file changed, 64 insertions(+), 25 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 19c2a372f0..1b140333ae 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -268,6 +268,43 @@ func TestCache_MaxSizeOne(t *testing.T) {
 	require.True(t, ok)
 }
 
+// runMixedOps exercises all cache operations deterministically based on the counter i.
+// Operation distribution: ~29% writes, ~14% same-key writes, ~29% read hits,
+// ~14% read misses, ~14% iteration (80% full, 20% early stop) + occasional Wait.
+func runMixedOps(c *Cache[*testPlan], i int) {
+	plan := &testPlan{content: "q"}
+	op := i % 7
+	key := uint64(i % 2000)
+
+	switch {
+	case op < 2:
+		// ~29% writes with varying keys (triggers eviction when cache is full)
+		c.Set(key, plan, time.Duration(i%500+1)*time.Millisecond)
+	case op < 3:
+		// ~14% writes to same key (triggers update path and possible refreshMin)
+		c.Set(42, plan, time.Duration(i%500+1)*time.Millisecond)
+	case op < 5:
+		// ~29% reads that may hit
+		c.Get(uint64(i % 500))
+	case op < 6:
+		// ~14% reads that will mostly miss (keys beyond cache capacity)
+		c.Get(key + 1000)
+	default:
+		// ~14% iteration + Wait
+		if i%5 == 0 {
+			for range c.Values() {
+				break
+			}
+		} else {
+			for range c.Values() {
+			}
+		}
+		if i%13 == 0 {
+			c.Wait()
+		}
+	}
+}
+
 func TestCache_ConcurrentAccess(t *testing.T) {
 	t.Parallel()
 	c, err := New[*testPlan](100, 0)
@@ -275,41 +312,43 @@ func TestCache_ConcurrentAccess(t *testing.T) {
 	defer c.Close()
 	var wg sync.WaitGroup
 
-	// Concurrent writers
-	for i := range 10 {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			for j := range 100 {
-				key := uint64(i*100 + j) // test code, no overflow risk
-				c.Set(key, &testPlan{content: "q"}, time.Duration(j)*time.Millisecond)
-			}
-		}()
-	}
+	const (
+		numGoroutines = 2000
+		opsPerRoutine = 5000
+	)
 
-	// Concurrent readers
-	for i := range 10 {
+	for g := range numGoroutines {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
-			for j := range 100 {
-				c.Get(uint64(i*100 + j)) // test code, no overflow risk
+			for j := range opsPerRoutine {
+				runMixedOps(c, g*opsPerRoutine+j)
 			}
 		}()
 	}
 
-	// Concurrent iterators
-	for range 5 {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			for range c.Values() {
-				_ = struct{}{} // prevent loop optimization
-			}
-		}()
+	wg.Wait()
+}
+
+func BenchmarkCache_ConcurrentMixed(b *testing.B) {
+	c, err := New[*testPlan](1000, 0)
+	require.NoError(b, err)
+	defer c.Close()
+
+	// Pre-populate half the key space so we get a mix of hits and misses
+	for i := range 500 {
+		c.Set(uint64(i), &testPlan{content: "q"}, time.Duration(i+1)*time.Millisecond)
 	}
+	c.Wait()
 
-	wg.Wait()
+	b.RunParallel(func(pb *testing.PB) {
+		i := 0
+		for pb.Next() {
+			runMixedOps(c, i)
+			i++
+		}
+	})
+	c.Wait()
 }
 
 func TestCache_InvalidSize(t *testing.T) {

From e3935885ba0bfea233aa1a40c058ac69536e485b Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 16:28:29 +0530
Subject: [PATCH 44/46] fix: changes

---
 router/pkg/slowplancache/slow_plan_cache.go | 95 +++++++--------------
 1 file changed, 33 insertions(+), 62 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index 57a1c190d6..0dcd4518c7 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -24,12 +24,12 @@ type setRequest[V any] struct {
 // Cache is a bounded map that holds expensive-to-compute values
 // that should not be subject to TinyLFU eviction in the main cache.
 // Writes are buffered through a channel and applied asynchronously by a
-// background goroutine, making Set non-blocking. Reads are protected by a RWMutex.
+// background goroutine, making Set non-blocking. Reads use sync.Map for lock-free access.
 // It tracks the minimum-duration entry so that rejection of cheaper entries is O(1).
 type Cache[V any] struct {
-	mu        sync.RWMutex
-	entries   map[uint64]*Entry[V]
-	maxSize   int
+	entries   sync.Map // map[uint64]*Entry[V]
+	size      atomic.Int64
+	maxSize   int64
 	threshold time.Duration
 	minKey    uint64
 	minDur    time.Duration
@@ -49,8 +49,7 @@ func New[V any](maxSize int, threshold time.Duration) (*Cache[V], error) {
 		return nil, fmt.Errorf("slow plan cache size must be at least 1, got %d", maxSize)
 	}
 	c := &Cache[V]{
-		entries:   make(map[uint64]*Entry[V], maxSize),
-		maxSize:   maxSize,
+		maxSize:   int64(maxSize),
 		threshold: threshold,
 		writeCh:   make(chan setRequest[V], defaultWriteBufferSize),
 		stop:      make(chan struct{}),
@@ -60,7 +59,7 @@ func New[V any](maxSize int, threshold time.Duration) (*Cache[V], error) {
 	return c, nil
 }
 
-// processWrites drains the write channel and applies sets under the write lock.
+// processWrites drains the write channel and applies sets.
 // It exits when the stop channel is closed.
 func (c *Cache[V]) processWrites() {
 	defer close(c.done)
@@ -85,16 +84,13 @@ func (c *Cache[V]) Get(key uint64) (V, bool) {
 		return zero, false
 	}
 
-	c.mu.RLock()
-	defer c.mu.RUnlock()
-
-	entry, ok := c.entries[key]
+	val, ok := c.entries.Load(key)
 	if !ok {
 		var zero V
 		return zero, false
 	}
 
-	return entry.value, true
+	return val.(*Entry[V]).value, true
 }
 
 // Set enqueues a write to the cache. The write is applied asynchronously.
@@ -132,58 +128,48 @@ func (c *Cache[V]) Wait() {
 
 // applySet performs the actual cache mutation. Must only be called from processWrites.
 func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
-	needsRefreshMin := c.mutateEntries(key, value, duration)
-	if needsRefreshMin {
-		c.mu.RLock()
-		defer c.mu.RUnlock()
-		c.refreshMin()
-	}
-}
-
-// mutateEntries applies the cache mutation under the write lock and returns
-// whether refreshMin needs to be called afterwards.
-func (c *Cache[V]) mutateEntries(key uint64, value V, duration time.Duration) bool {
-	c.mu.Lock()
-	defer c.mu.Unlock()
+	entry := &Entry[V]{value: value, duration: duration}
 
 	// If key already exists, update it
-	if currEntry, ok := c.entries[key]; ok {
+	if existing, ok := c.entries.Load(key); ok {
+		currEntry := existing.(*Entry[V])
 		// Consider worst case, if the previous run was faster then increase
 		if currEntry.duration < duration {
-			c.entries[key] = &Entry[V]{value: value, duration: duration}
+			c.entries.Store(key, entry)
 
 			// If the minKey duration was increased, there can be a new minKey
 			if c.minKey == key {
-				return true
+				c.refreshMin()
 			}
 		}
-		return false
+		return
 	}
 
 	// If not at capacity, just add and update min tracking
-	if len(c.entries) < c.maxSize {
-		c.entries[key] = &Entry[V]{value: value, duration: duration}
-		if len(c.entries) == 1 || duration < c.minDur {
+	if c.size.Load() < c.maxSize {
+		c.entries.Store(key, entry)
+		newSize := c.size.Add(1)
+		if newSize == 1 || duration < c.minDur {
 			c.minKey = key
 			c.minDur = duration
 		}
-		return false
+		return
 	}
 
 	// At capacity: reject if new entry is not more expensive than the current minimum
 	if duration <= c.minDur {
-		return false
+		return
 	}
 
 	// When at max capacity
 	// Evict the minimum and insert the new entry
-	delete(c.entries, c.minKey)
-	c.entries[key] = &Entry[V]{value: value, duration: duration}
-	return true
+	c.entries.Delete(c.minKey)
+	c.entries.Store(key, entry)
+	// size stays the same: deleted one, added one
+	c.refreshMin()
 }
 
 // refreshMin rescans the entries to find the new minimum. Must only be called from processWrites.
-// Called without the lock: no writes occur during the scan (sole writer), and concurrent reads from Get are safe.
 func (c *Cache[V]) refreshMin() {
 	var (
 		minKey uint64
@@ -191,13 +177,15 @@ func (c *Cache[V]) refreshMin() {
 		first  = true
 	)
 
-	for k, e := range c.entries {
+	c.entries.Range(func(k, v any) bool {
+		e := v.(*Entry[V])
 		if first || e.duration < minDur {
-			minKey = k
+			minKey = k.(uint64)
 			minDur = e.duration
 			first = false
 		}
-	}
+		return true
+	})
 
 	if !first {
 		c.minKey = minKey
@@ -205,29 +193,16 @@ func (c *Cache[V]) refreshMin() {
 	}
 }
 
-// Values returns an iterator over all cached values. The snapshot is taken
-// under the read lock, but iteration happens outside the lock to avoid
-// holding it during user code execution.
+// Values returns an iterator over all cached values.
 func (c *Cache[V]) Values() iter.Seq[V] {
 	return func(yield func(V) bool) {
 		if c == nil || c.closed.Load() {
 			return
 		}
 
-		// We extract this to a separate slice so we don't need to hold the lock
-		// since this would be expensive based on what the iterator is doing
-		c.mu.RLock()
-		values := make([]V, 0, len(c.entries))
-		for _, e := range c.entries {
-			values = append(values, e.value)
-		}
-		c.mu.RUnlock()
-
-		for _, v := range values {
-			if !yield(v) {
-				return
-			}
-		}
+		c.entries.Range(func(_, v any) bool {
+			return yield(v.(*Entry[V]).value)
+		})
 	}
 }
 
@@ -247,9 +222,5 @@ func (c *Cache[V]) Close() {
 		// This downside is also there in ristretto (if set is called concurrently)
 		// it is even documented in the ristretto code as a comment
 		close(c.writeCh)
-
-		c.mu.Lock()
-		c.entries = nil
-		c.mu.Unlock()
 	})
 }

From 853135becbfca1a2ab0253d55f583295ff4c00cb Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 16:56:11 +0530
Subject: [PATCH 45/46] fix: updates

---
 router/pkg/slowplancache/slow_plan_cache.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache.go b/router/pkg/slowplancache/slow_plan_cache.go
index 0dcd4518c7..17fba9aa5f 100644
--- a/router/pkg/slowplancache/slow_plan_cache.go
+++ b/router/pkg/slowplancache/slow_plan_cache.go
@@ -28,7 +28,7 @@ type setRequest[V any] struct {
 // It tracks the minimum-duration entry so that rejection of cheaper entries is O(1).
 type Cache[V any] struct {
 	entries   sync.Map // map[uint64]*Entry[V]
-	size      atomic.Int64
+	size      int64
 	maxSize   int64
 	threshold time.Duration
 	minKey    uint64
@@ -146,10 +146,10 @@ func (c *Cache[V]) applySet(key uint64, value V, duration time.Duration) {
 	}
 
 	// If not at capacity, just add and update min tracking
-	if c.size.Load() < c.maxSize {
+	if c.size < c.maxSize {
 		c.entries.Store(key, entry)
-		newSize := c.size.Add(1)
-		if newSize == 1 || duration < c.minDur {
+		c.size++
+		if c.size == 1 || duration < c.minDur {
 			c.minKey = key
 			c.minDur = duration
 		}

From 6cfe7cf5000d86677ecbc9cc2177c9c62534e570 Mon Sep 17 00:00:00 2001
From: Milinda Dias <skay.archon@gmail.com>
Date: Wed, 18 Mar 2026 17:03:05 +0530
Subject: [PATCH 46/46] fix: tests

---
 router/pkg/slowplancache/slow_plan_cache_test.go | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/router/pkg/slowplancache/slow_plan_cache_test.go b/router/pkg/slowplancache/slow_plan_cache_test.go
index 1b140333ae..69734bf771 100644
--- a/router/pkg/slowplancache/slow_plan_cache_test.go
+++ b/router/pkg/slowplancache/slow_plan_cache_test.go
@@ -318,13 +318,11 @@ func TestCache_ConcurrentAccess(t *testing.T) {
 	)
 
 	for g := range numGoroutines {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
+		wg.Go(func() {
 			for j := range opsPerRoutine {
 				runMixedOps(c, g*opsPerRoutine+j)
 			}
-		}()
+		})
 	}
 
 	wg.Wait()