refactor: Integrate prefix-cache configuration into a single knob (#237)

kfirtoledo · web-flow · commit 086c2e0871f5 · 2025-07-16T14:13:30.000+03:00
Simplifies the configuration structure of the prefix-cache-scorer plugin by unifying all mode-specific parameters into a single configuration type. The prefix-cache-scorer now supports a mode option:
- When set to estimate (the default), it uses the GIE prefix cache scorer based on estimation from previous requests.
- When set to cache_tracking, it creates a prefix cache scorer based on KV-events from vLLM.

Signed-off-by: Kfir Toledo &lt;kfir.toledo@ibm.com&gt;
diff --git a/deploy/components/inference-gateway/deployments.yaml b/deploy/components/inference-gateway/deployments.yaml
@@ -19,7 +19,7 @@ spec:
       containers:
       - name: epp
         image: ghcr.io/llm-d/llm-d-inference-scheduler:latest
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         args:
         - -poolName
         - "${POOL_NAME}"
diff --git a/deploy/config/epp-prefix-cache-tracking-config.yaml b/deploy/config/epp-prefix-cache-tracking-config.yaml
@@ -5,14 +5,17 @@ kind: EndpointPickerConfig
 plugins:
 - type: single-profile-handler
 - type: decode-filter
-- type: kvcache-aware-scorer
+- type: prefix-cache-scorer
+  parameters:
+    mode: cache_tracking
+    kvCacheRedisAddr: ${REDIS_HOST}:${REDIS_PORT}
 - type: load-aware-scorer
 - type: max-score-picker
 schedulingProfiles:
 - name: default
   plugins:
   - pluginRef: decode-filter
-  - pluginRef: kvcache-aware-scorer
+  - pluginRef: prefix-cache-scorer
     weight: 2.0
   - pluginRef: load-aware-scorer
     weight: 1.0
diff --git a/deploy/config/epp-prefix-estimate-config.yaml b/deploy/config/epp-prefix-estimate-config.yaml
@@ -0,0 +1,19 @@
+# Sample EPP configuration for running with prefix cache in estimate mode and load-aware scorers
+#
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: EndpointPickerConfig
+plugins:
+- type: single-profile-handler
+- type: decode-filter
+- type: prefix-cache-scorer
+- type: load-aware-scorer
+- type: max-score-picker
+schedulingProfiles:
+- name: default
+  plugins:
+  - pluginRef: decode-filter
+  - pluginRef: prefix-cache-scorer
+    weight: 2.0
+  - pluginRef: load-aware-scorer
+    weight: 1.0
+  - pluginRef: max-score-picker
diff --git a/deploy/environments/dev/kubernetes-kgateway/patch-deployments.yaml b/deploy/environments/dev/kubernetes-kgateway/patch-deployments.yaml
@@ -24,8 +24,6 @@ spec:
         - --configFile
         - "/etc/epp/epp-config.yaml"
         env:
-          - name: KVCACHE_INDEXER_REDIS_ADDR
-            value: ${REDIS_HOST}:${REDIS_PORT}
           - name: HF_TOKEN
             valueFrom:
               secretKeyRef:
diff --git a/docs/architecture.md b/docs/architecture.md
@@ -211,14 +211,30 @@ with a value of `prefill`.<br>
 *Type:* prefill-filter<br>
 *Parameters:* None<br>
 
-**KvCacheAwareScorer**<br>
-Scores based on real KV-cache state on vLLM. It is more accurate than either the SessionAffinity
-or PrefixCachePlugin, but requires extra computation and cycles to track the current cache state<br>
-*Type:* kvcache-aware-scorer<br>
-*Parameters:* Due to the sensitivity of the parameters of this plugin, the following
-environment variables are used to configure the scorer:<br>
-`KVCACHE_INDEXER_REDIS_ADDR` - the address of the Redis server used<br>
-`HF_TOKEN` - the Hugginface token to be used.<br>
+**PrefixCacheScorer**<br>
+The `prefix-cache-scorer` scores a request based on the KV cache localities.
+It supports two modes: `estimate` and `cache_tracking`.<br>
+
+**`estimate` mode** (default):<br>
+This mode uses the default GIE prefix scorer and scores pods based on how much of the prompt is estimated to be present in the pod’s KV cache.<br>
+*Type*: `prefix-cache-scorer`<br>
+*Parameters:*<br>
+
+\- `hashBlockSize`: Specifies the size of the blocks used to split the input **prompt** when calculating block hashes. Defaults to `64` if not specified.<br>
+\- `maxPrefixBlocksToMatch`: Specifies the maximum number of prefix blocks to match. Defaults to `256` if not specified.<br>
+\- `lruCapacityPerServer`: Specifies the capacity of the LRU indexer, in number of entries per server (pod). Defaults to `31,250` if not specified.<br>
+
+**Note:** \-  `mode: estimate` is not required, as it is the default.
+
+**`cache_tracking` mode**:<br>
+This mode scores requests based on the actual KV cache state in vLLM. It is more accurate than both `SessionAffinity` and `PrefixCachePlugin` in `estimate` mode,
+but incurs additional computation overhead to track the current cache state.<br>
+*Type*: `prefix-cache-scorer`<br>
+*Parameters:*<br>
+\- `mode: cache_tracking`<br>
+\- `kvCacheRedisAddr`: The address of the Redis instance used for cache tracking.
+Due to the sensitivity of this plugin’s parameters, the following environment variable is required when using `cache_tracking` mode:
+`HF_TOKEN`: The Hugging Face token to be used.
 
 **LoadAwareScorer**<br>
 Scores pods based on their load, based on the number of requests concurrently being processed.
diff --git a/go.mod b/go.mod
@@ -5,6 +5,7 @@ go 1.24.1
 toolchain go1.24.2
 
 require (
+	github.com/alicebob/miniredis/v2 v2.34.0
 	github.com/go-logr/logr v1.4.3
 	github.com/google/go-cmp v0.7.0
 	github.com/llm-d/llm-d-kv-cache-manager v0.1.1
@@ -19,6 +20,7 @@ require (
 
 require (
 	cel.dev/expr v0.23.0 // indirect
+	github.com/alicebob/gopher-json v0.0.0-20230218143504-906a9b012302 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
@@ -40,20 +42,27 @@ require (
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
 	github.com/go-openapi/jsonreference v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/google/btree v1.1.3 // indirect
 	github.com/google/cel-go v0.23.2 // indirect
 	github.com/google/gnostic-models v0.6.9 // indirect
+	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
 	github.com/google/uuid v1.6.0 // indirect
+	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/moby/spdystream v0.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
+	github.com/onsi/ginkgo/v2 v2.23.4 // indirect
+	github.com/onsi/gomega v1.37.0 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
@@ -65,6 +74,7 @@ require (
 	github.com/spf13/pflag v1.0.6 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
+	github.com/yuin/gopher-lua v1.1.1 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect
 	go.opentelemetry.io/otel v1.35.0 // indirect
@@ -74,6 +84,7 @@ require (
 	go.opentelemetry.io/otel/sdk v1.35.0 // indirect
 	go.opentelemetry.io/otel/trace v1.35.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.4.0 // indirect
+	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
@@ -85,6 +96,7 @@ require (
 	golang.org/x/term v0.32.0 // indirect
 	golang.org/x/text v0.25.0 // indirect
 	golang.org/x/time v0.9.0 // indirect
+	golang.org/x/tools v0.31.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20250324211829-b45e905df463 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250428153025-10db94c68c34 // indirect
diff --git a/go.sum b/go.sum
@@ -1,7 +1,13 @@
 cel.dev/expr v0.23.0 h1:wUb94w6OYQS4uXraxo9U+wUAs9jT47Xvl4iPgAwM2ss=
 cel.dev/expr v0.23.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
+github.com/alicebob/gopher-json v0.0.0-20230218143504-906a9b012302 h1:uvdUDbHQHO85qeSydJtItA4T55Pw6BtAejd0APRJOCE=
+github.com/alicebob/gopher-json v0.0.0-20230218143504-906a9b012302/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc=
+github.com/alicebob/miniredis/v2 v2.34.0 h1:mBFWMaJSNL9RwdGRyEDoAAv8OQc5UlEhLDQggTglU/0=
+github.com/alicebob/miniredis/v2 v2.34.0/go.mod h1:kWShP4b58T1CW0Y5dViCd5ztzrDqRWqM3nksiyXk5s8=
 github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
 github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
+github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
@@ -124,6 +130,8 @@ github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
+github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
 github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
 github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
 github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
@@ -158,6 +166,8 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
+github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU=
diff --git a/pkg/plugins/register.go b/pkg/plugins/register.go
@@ -2,6 +2,7 @@ package plugins
 
 import (
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
 
 	"github.com/llm-d/llm-d-inference-scheduler/pkg/plugins/filter"
 	prerequest "github.com/llm-d/llm-d-inference-scheduler/pkg/plugins/pre-request"
@@ -17,7 +18,7 @@ func RegisterAllPlugins() {
 	plugins.Register(filter.PrefillFilterType, filter.PrefillFilterFactory)
 	plugins.Register(prerequest.PrefillHeaderHandlerType, prerequest.PrefillHeaderHandlerFactory)
 	plugins.Register(profile.PdProfileHandlerType, profile.PdProfileHandlerFactory)
-	plugins.Register(scorer.KvCacheAwareScorerType, scorer.KvCacheAwareScorerFactory)
+	plugins.Register(prefix.PrefixCachePluginType, scorer.PrefixCachePluginFactory)
 	plugins.Register(scorer.LoadAwareScorerType, scorer.LoadAwareScorerFactory)
 	plugins.Register(scorer.SessionAffinityScorerType, scorer.SessionAffinityScorerFactory)
 }
diff --git a/pkg/plugins/scorer/kvcache_aware.go b/pkg/plugins/scorer/kvcache_aware.go
@@ -3,6 +3,7 @@ package scorer
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"strings"
@@ -13,41 +14,85 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
-const (
-	// KvCacheAwareScorerType is the type of the KvCacheAwareScorer
-	KvCacheAwareScorerType = "kvcache-aware-scorer"
+// PrefixCachePluginMode defines the mode of the prefix cache plugin. It can be either `estimate` or `cache_tracking`.
+type PrefixCachePluginMode string
 
-	kvCacheRedisEnvVar     = "KVCACHE_INDEXER_REDIS_ADDR"
+const (
+	// PrefixCachePluginModeEstimate is the mode where the plugin use estimated prefix.
+	PrefixCachePluginModeEstimate PrefixCachePluginMode = "estimate"
+	// PrefixCachePluginModeCacheTracking is the mode where the plugin uses cache tracking using KVevents.
+	PrefixCachePluginModeCacheTracking PrefixCachePluginMode = "cache_tracking"
+	// huggingFaceTokenEnvVar is the environment variable that holds the Hugging Face token.
 	huggingFaceTokenEnvVar = "HF_TOKEN"
 )
 
+// PrefixCachePluginConfig holds the configuration for the PrefixCachePlugin.
+type PrefixCachePluginConfig struct {
+	// Mode defines the mode of the prefix cache plugin.
+	Mode PrefixCachePluginMode `json:"mode"` // "prefix" or "cache_tracking"
+	// Config holds the configuration for the prefix cache plugin.
+	prefix.Config
+	// kvCacheRedisAddr is the address of the Redis instance used for cache tracking.
+	KVCacheRedisAddr string `json:"kvCacheRedisAddr"`
+}
+
 // compile-time type assertion
 var _ framework.Scorer = &KVCacheAwareScorer{}
 
-// KvCacheAwareScorerFactory defines the factory function for the KVCacheAwareScorer
-func KvCacheAwareScorerFactory(name string, _ json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
-	plugin, err := NewKVCacheAwareScorer(handle.Context())
-	if err != nil {
-		return nil, err
+// PrefixCachePluginFactory creates a new instance of the PrefixCachePlugin based on the provided configuration.
+func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
+	var cfg PrefixCachePluginConfig
+
+	logger := log.FromContext(handle.Context()).WithName("PrefixCachePluginFactory").V(logutil.DEFAULT)
+	// Fallback to empty JSON if parameters are missing
+	if rawParameters == nil {
+		rawParameters = []byte(`{}`)
+	}
+	// Unmarshal directly into the flat config struct
+	if err := json.Unmarshal(rawParameters, &cfg); err != nil {
+		return nil, fmt.Errorf("failed to parse %s plugin config: %w", prefix.PrefixCachePluginType, err)
+	}
+
+	mode := cfg.Mode
+	if mode == "" {
+		mode = PrefixCachePluginModeEstimate
+	}
+
+	switch mode {
+	case PrefixCachePluginModeEstimate:
+		logger.Info("Creating PrefixCachePlugin in estimate mode", "parameters", rawParameters)
+		return prefix.PrefixCachePluginFactory(name, rawParameters, handle)
+
+	case PrefixCachePluginModeCacheTracking:
+		logger.Info("Creating PrefixCachePluginConfig in cache tracking mode", "parameters", rawParameters)
+
+		plugin, err := NewKVCacheAwareScorer(handle.Context(), &cfg)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create %s plugin: %w", prefix.PrefixCachePluginType, err)
+		}
+		return plugin.WithName(name), nil
+
+	default:
+		return nil, fmt.Errorf("unknown mode for %s plugin: %s", prefix.PrefixCachePluginType, mode)
 	}
-	return plugin.WithName(name), nil
 }
 
 // NewKVCacheAwareScorer creates a new KVCacheAwareScorer instance.
 // It initializes the KVCacheIndexer from environment variables.
 //
 // If the environment variables are not set, or if the indexer
 // fails to initialize, an error is returned.
-func NewKVCacheAwareScorer(ctx context.Context) (*KVCacheAwareScorer, error) {
+func NewKVCacheAwareScorer(ctx context.Context, cfg *PrefixCachePluginConfig) (*KVCacheAwareScorer, error) {
 	config := kvcache.NewDefaultConfig()
 
-	redisAddr := os.Getenv(kvCacheRedisEnvVar)
+	redisAddr := cfg.KVCacheRedisAddr
 	if redisAddr == "" {
-		return nil, fmt.Errorf("environment variable '%s' is not set", kvCacheRedisEnvVar)
+		return nil, errors.New("environment variable kvCacheRedisAddr is not set")
 	}
 
 	// to keep compatibility with deployments only specifying hostname:port: need to add protocol to front to enable parsing
@@ -76,7 +121,7 @@ func NewKVCacheAwareScorer(ctx context.Context) (*KVCacheAwareScorer, error) {
 	go kvCacheIndexer.Run(ctx)
 
 	return &KVCacheAwareScorer{
-		typedName:      plugins.TypedName{Type: KvCacheAwareScorerType},
+		typedName:      plugins.TypedName{Type: prefix.PrefixCachePluginType},
 		kvCacheIndexer: kvCacheIndexer,
 	}, nil
 }
diff --git a/scripts/kubernetes-dev-env.sh b/scripts/kubernetes-dev-env.sh
@@ -83,7 +83,7 @@ export PD_ENABLED="\"${PD_ENABLED:-false}\""
 # Token length threshold to trigger P/D logic
 export PD_PROMPT_LEN_THRESHOLD="\"${PD_PROMPT_LEN_THRESHOLD:-10}\""
 
-export EPP_CONFIG="${EPP_CONFIG:-deploy/config/epp-kvcache-load-config.yaml}"
+export EPP_CONFIG="${EPP_CONFIG:-deploy/config/epp-prefix-cache-tracking-config.yaml}"
 
 # Redis deployment name
 export REDIS_DEPLOYMENT_NAME="${REDIS_DEPLOYMENT_NAME:-lookup-server}"
@@ -92,7 +92,7 @@ export REDIS_DEPLOYMENT_NAME="${REDIS_DEPLOYMENT_NAME:-lookup-server}"
 export REDIS_SVC_NAME="${REDIS_SVC_NAME:-${REDIS_DEPLOYMENT_NAME}-service}"
 
 # Redis FQDN for internal Kubernetes communication
-export REDIS_HOST="${REDIS_HOST:-${REDIS_SVC_NAME}.${NAMESPACE}.svc.cluster.local}"
+export REDIS_HOST="${REDIS_HOST:-vllm-${REDIS_SVC_NAME}.${NAMESPACE}.svc.cluster.local}"
 
 # Redis port
 export REDIS_PORT="${REDIS_PORT:-8100}"
@@ -191,7 +191,7 @@ helm upgrade --install "$VLLM_HELM_RELEASE_NAME" "$VLLM_CHART_DIR" \
   --set redis.service.port="$REDIS_PORT"
 
 echo "INFO: Deploying Gateway Environment in namespace ${NAMESPACE}, ${POOL_NAME}"
-kubectl -n "${NAMESPACE}" create configmap epp-config --from-file=epp-config.yaml=${EPP_CONFIG}
+kubectl -n "${NAMESPACE}" create configmap epp-config --from-file=epp-config.yaml=<(envsubst < "${EPP_CONFIG}") --dry-run=client -o yaml | kubectl apply -f -
 kustomize build deploy/environments/dev/kubernetes-kgateway | envsubst | kubectl -n "${NAMESPACE}" apply -f -
 echo "INFO: Waiting for resources in namespace ${NAMESPACE} to become ready"
 # Wait for gateway resources
diff --git a/test/config/prefix_cache_mode_test.go b/test/config/prefix_cache_mode_test.go