diff --git a/.github/workflows/check-typos.yaml b/.github/workflows/check-typos.yaml
index cc543edf..25f71cf3 100644
--- a/.github/workflows/check-typos.yaml
+++ b/.github/workflows/check-typos.yaml
@@ -13,5 +13,5 @@ jobs:
         uses: actions/checkout@v5
 
       - name: Check typos
-        uses: crate-ci/typos@v1.36.2
+        uses: crate-ci/typos@v1.38.1
 
diff --git a/deploy/config/sim-epp-no-hit-lru.yaml b/deploy/config/sim-epp-no-hit-lru.yaml
new file mode 100644
index 00000000..8d022441
--- /dev/null
+++ b/deploy/config/sim-epp-no-hit-lru.yaml
@@ -0,0 +1,25 @@
+# Sample EPP configuration for running without P/D
+# with small hash block size for simulation purposes
+apiVersion: inference.networking.x-k8s.io/v1alpha1
+kind: EndpointPickerConfig
+plugins:
+- type: prefix-cache-scorer
+  parameters:
+    hashBlockSize: 5
+    maxPrefixBlocksToMatch: 256
+    lruCapacityPerServer: 31250
+- type: no-hit-lru-scorer
+  parameters:
+    lruSize: 2048
+- type: decode-filter
+- type: max-score-picker
+- type: single-profile-handler
+schedulingProfiles:
+- name: default
+  plugins:
+  - pluginRef: decode-filter
+  - pluginRef: max-score-picker
+  - pluginRef: prefix-cache-scorer
+    weight: 2
+  - pluginRef: no-hit-lru-scorer
+    weight: 1
diff --git a/docs/architecture.md b/docs/architecture.md
index 7465e590..10a9134b 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -364,6 +364,57 @@ used for the same session.
 
 ---
 
+#### NoHitLRUScorer
+
+Scores pods based on least recently used (LRU) ordering for cold requests (requests with no KV cache hits).
+This helps evenly distribute cache growth across pods, since cold requests result in new KV blocks being created.
+
+The scorer integrates with a prefix cache plugin to determine if a request has cache hits:
+- For cold requests (no cache hits): Ranks pods by LRU order, with never-used or least recently used pods
+  receiving higher scores (up to 1.0) and most recently used pods receiving lower scores (approaching 0.0)
+- For warm requests (cache hits): Returns neutral scores (0.5) for all pods to avoid interfering with
+  cache locality optimization
+
+The LRU tracking is specific to cold requests only - pods are added to the LRU cache when they serve
+a cold request, not when they serve requests with cache hits.
+
+- **Type**: `no-hit-lru-scorer`
+- **Parameters**:
+  - `prefixPluginName` (optional): The name of the prefix cache plugin to read state from. Defaults to `prefix-cache-scorer`.
+  - `lruSize` (optional): The maximum number of pods to track in the LRU cache. Defaults to 1024.
+
+Example configuration:
+
+```yaml
+plugins:
+  - type: prefix-cache-scorer
+    parameters:
+      hashBlockSize: 5
+      maxPrefixBlocksToMatch: 256
+      lruCapacityPerServer: 31250
+  - type: no-hit-lru-scorer
+    parameters:
+      lruSize: 2048
+  - type: decode-filter
+  - type: max-score-picker
+  - type: single-profile-handler
+schedulingProfiles:
+  - name: default
+    plugins:
+      - pluginRef: decode-filter
+      - pluginRef: max-score-picker
+      - pluginRef: prefix-cache-scorer
+        weight: 2
+      - pluginRef: no-hit-lru-scorer
+        weight: 1
+```
+
+**Note:** This scorer is designed to work alongside a prefix cache scorer (such as `prefix-cache-scorer` or
+`precise-prefix-cache-scorer`). If no prefix cache state is available, all requests are treated as cold.
+When integrating with a prefix-cache scorer, the prefix-cache scorer should be defined first in the scheduling profile.
+
+---
+
 ### Sample Disaggregated Prefill/Decode Configuration
 
 The following is an example of what a configuration for disaggregated Prefill/Decode might look like:
diff --git a/go.mod b/go.mod
index cd415a91..16aa6894 100644
--- a/go.mod
+++ b/go.mod
@@ -8,20 +8,21 @@ require (
 	github.com/go-logr/logr v1.4.3
 	github.com/google/go-cmp v0.7.0
 	github.com/google/uuid v1.6.0
+	github.com/hashicorp/golang-lru/v2 v2.0.7
 	github.com/jellydator/ttlcache/v3 v3.4.0
-	github.com/llm-d/llm-d-kv-cache-manager v0.3.1
-	github.com/onsi/ginkgo/v2 v2.25.3
+	github.com/llm-d/llm-d-kv-cache-manager v0.3.2
+	github.com/onsi/ginkgo/v2 v2.26.0
 	github.com/onsi/gomega v1.38.2
 	github.com/openai/openai-go v1.12.0
 	github.com/stretchr/testify v1.11.1
-	google.golang.org/grpc v1.75.1
+	google.golang.org/grpc v1.76.0
 	k8s.io/api v0.34.1
 	k8s.io/apiextensions-apiserver v0.34.1
 	k8s.io/apimachinery v0.34.1
 	k8s.io/client-go v0.34.1
-	sigs.k8s.io/controller-runtime v0.22.1
-	sigs.k8s.io/gateway-api v1.3.0
-	sigs.k8s.io/gateway-api-inference-extension v1.0.0
+	sigs.k8s.io/controller-runtime v0.22.3
+	sigs.k8s.io/gateway-api v1.4.0
+	sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba
 )
 
 require (
@@ -30,7 +31,7 @@ require (
 	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
-	github.com/cenkalti/backoff/v5 v5.0.2 // indirect
+	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
 	github.com/daulet/tokenizers v1.22.1 // indirect
@@ -39,8 +40,8 @@ require (
 	github.com/dgraph-io/ristretto/v2 v2.3.0 // indirect
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
-	github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
+	github.com/emicklei/go-restful/v3 v3.13.0 // indirect
+	github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect
 	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
@@ -48,42 +49,40 @@ require (
 	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-logr/zapr v1.3.0 // indirect
-	github.com/go-openapi/jsonpointer v0.21.0 // indirect
+	github.com/go-openapi/jsonpointer v0.21.2 // indirect
 	github.com/go-openapi/jsonreference v0.21.0 // indirect
-	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/go-openapi/swag v0.23.1 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/google/btree v1.1.3 // indirect
 	github.com/google/cel-go v0.26.0 // indirect
 	github.com/google/gnostic-models v0.7.0 // indirect
-	github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a // indirect
+	github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 // indirect
 	github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
 	github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
-	github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
-	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/moby/spdystream v0.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
 	github.com/pebbe/zmq4 v1.4.0 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
-	github.com/prometheus/client_golang v1.23.0 // indirect
+	github.com/prometheus/client_golang v1.23.2 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
-	github.com/prometheus/common v0.65.0 // indirect
-	github.com/prometheus/procfs v0.16.1 // indirect
-	github.com/prometheus/prometheus v0.305.0 // indirect
+	github.com/prometheus/common v0.67.1 // indirect
+	github.com/prometheus/procfs v0.17.0 // indirect
+	github.com/prometheus/prometheus v0.306.0 // indirect
 	github.com/redis/go-redis/v9 v9.11.0 // indirect
 	github.com/spf13/cobra v1.9.1 // indirect
-	github.com/spf13/pflag v1.0.6 // indirect
+	github.com/spf13/pflag v1.0.7 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
-	github.com/tidwall/gjson v1.14.4 // indirect
+	github.com/tidwall/gjson v1.18.0 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
 	github.com/tidwall/sjson v1.2.5 // indirect
@@ -92,42 +91,44 @@ require (
 	github.com/x448/float16 v0.8.4 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
-	go.opentelemetry.io/otel v1.37.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 // indirect
-	go.opentelemetry.io/otel/metric v1.37.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.37.0 // indirect
-	go.opentelemetry.io/otel/trace v1.37.0 // indirect
-	go.opentelemetry.io/proto/otlp v1.6.0 // indirect
+	go.opentelemetry.io/otel v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
+	go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 // indirect
+	go.opentelemetry.io/otel/metric v1.38.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.38.0 // indirect
+	go.opentelemetry.io/otel/trace v1.38.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.7.1 // indirect
 	go.uber.org/atomic v1.11.0 // indirect
 	go.uber.org/automaxprocs v1.6.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	go.yaml.in/yaml/v2 v2.4.2 // indirect
+	go.yaml.in/yaml/v2 v2.4.3 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
 	golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect
-	golang.org/x/net v0.43.0 // indirect
-	golang.org/x/oauth2 v0.30.0 // indirect
-	golang.org/x/sync v0.16.0 // indirect
-	golang.org/x/sys v0.35.0 // indirect
-	golang.org/x/term v0.34.0 // indirect
-	golang.org/x/text v0.28.0 // indirect
+	golang.org/x/mod v0.28.0 // indirect
+	golang.org/x/net v0.44.0 // indirect
+	golang.org/x/oauth2 v0.31.0 // indirect
+	golang.org/x/sync v0.17.0 // indirect
+	golang.org/x/sys v0.36.0 // indirect
+	golang.org/x/term v0.35.0 // indirect
+	golang.org/x/text v0.29.0 // indirect
 	golang.org/x/time v0.12.0 // indirect
-	golang.org/x/tools v0.36.0 // indirect
+	golang.org/x/tools v0.37.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect
-	google.golang.org/protobuf v1.36.7 // indirect
-	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 // indirect
+	google.golang.org/protobuf v1.36.10 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	k8s.io/apiserver v0.34.1 // indirect
 	k8s.io/component-base v0.34.1 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
-	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
-	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
+	k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
+	k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d // indirect
 	sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect
-	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
+	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
 	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
 	sigs.k8s.io/yaml v1.6.0 // indirect
diff --git a/go.sum b/go.sum
index 256fb72d..5b6f6f25 100644
--- a/go.sum
+++ b/go.sum
@@ -60,8 +60,8 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
 github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
 github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
 github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
-github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8=
-github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
+github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
+github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
@@ -83,10 +83,10 @@ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/r
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
-github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
-github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
-github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
+github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes=
+github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo=
+github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs=
 github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
 github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
 github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
@@ -99,6 +99,12 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S
 github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
 github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
 github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
+github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs=
+github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo=
+github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M=
+github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk=
+github.com/gkampitakis/go-snaps v0.5.14 h1:3fAqdB6BCPKHDMHAKRwtPUwYexKtGrNuw8HX/T/4neo=
+github.com/gkampitakis/go-snaps v0.5.14/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@@ -106,14 +112,16 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
 github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
-github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
-github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonpointer v0.21.2 h1:AqQaNADVwq/VnkCmQg6ogE+M3FOsKTytwges0JdwVuA=
+github.com/go-openapi/jsonpointer v0.21.2/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk=
 github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
 github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
-github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
-github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU=
+github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
+github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
@@ -133,8 +141,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a h1://KbezygeMJZCSHH+HgUZiTeSoiuFspbMg1ge+eFj18=
-github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA=
+github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 h1:EEHtgt9IwisQ2AZ4pIsMjahcegHh6rmhqxzIRQIyepY=
+github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
 github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
 github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
@@ -147,8 +155,8 @@ github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5T
 github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA=
 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -157,6 +165,8 @@ github.com/jellydator/ttlcache/v3 v3.4.0 h1:YS4P125qQS0tNhtL6aeYkheEaB/m8HCqdMMP
 github.com/jellydator/ttlcache/v3 v3.4.0/go.mod h1:Hw9EgjymziQD3yGsQdf1FqFdpp7YjFMd4Srg5EJlgD4=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE=
+github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung=
 github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
 github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
@@ -171,10 +181,14 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/llm-d/llm-d-kv-cache-manager v0.3.1 h1:SY3z/kg1RI8tNVDdMSgvNgmVkBQqtsJYY0aVMX24zL0=
-github.com/llm-d/llm-d-kv-cache-manager v0.3.1/go.mod h1:q6u7LnzMxNcHHb5/LRdHNNeZzzGMSENFSP1NGfsJEmA=
-github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
-github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/llm-d/llm-d-kv-cache-manager v0.3.2 h1:omSTXtuII3ol37CaoI9h+2VxE0m8EoeVOor+CkQh99I=
+github.com/llm-d/llm-d-kv-cache-manager v0.3.2/go.mod h1:q6u7LnzMxNcHHb5/LRdHNNeZzzGMSENFSP1NGfsJEmA=
+github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
+github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
+github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo=
+github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg=
+github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE=
+github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A=
 github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
 github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -192,8 +206,8 @@ github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+
 github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
 github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s=
 github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ=
-github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
-github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
+github.com/onsi/ginkgo/v2 v2.26.0 h1:1J4Wut1IlYZNEAWIV3ALrT9NfiaGW2cDCJQSFQMs/gE=
+github.com/onsi/ginkgo/v2 v2.26.0/go.mod h1:qhEywmzWTBUY88kfO0BRvX4py7scov9yR+Az2oavUzw=
 github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
 github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
 github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
@@ -211,16 +225,16 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
 github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
-github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
-github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
 github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
-github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
-github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
-github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
-github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
-github.com/prometheus/prometheus v0.305.0 h1:UO/LsM32/E9yBDtvQj8tN+WwhbyWKR10lO35vmFLx0U=
-github.com/prometheus/prometheus v0.305.0/go.mod h1:JG+jKIDUJ9Bn97anZiCjwCxRyAx+lpcEQ0QnZlUlbwY=
+github.com/prometheus/common v0.67.1 h1:OTSON1P4DNxzTg4hmKCc37o4ZAZDv0cfXLkOt0oEowI=
+github.com/prometheus/common v0.67.1/go.mod h1:RpmT9v35q2Y+lsieQsdOh5sXZ6ajUGC8NjZAmr8vb0Q=
+github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
+github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
+github.com/prometheus/prometheus v0.306.0 h1:Q0Pvz/ZKS6vVWCa1VSgNyNJlEe8hxdRlKklFg7SRhNw=
+github.com/prometheus/prometheus v0.306.0/go.mod h1:7hMSGyZHt0dcmZ5r4kFPJ/vxPQU99N5/BGwSPDxeZrQ=
 github.com/prometheus/sigv4 v0.2.0 h1:qDFKnHYFswJxdzGeRP63c4HlH3Vbn1Yf/Ao2zabtVXk=
 github.com/prometheus/sigv4 v0.2.0/go.mod h1:D04rqmAaPPEUkjRQxGqjoxdyJuyCh6E0M18fZr0zBiE=
 github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs=
@@ -230,8 +244,9 @@ github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWN
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
 github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
-github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
 github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M=
+github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
 github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -246,8 +261,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
-github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM=
-github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
 github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
 github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
@@ -269,22 +284,24 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
-go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
-go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 h1:JgtbA0xkWHnTmYk7YusopJFX6uleBmAuZ8n05NEh8nQ=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0/go.mod h1:179AK5aar5R3eS9FucPy6rggvU0g52cvKId8pv4+v0c=
-go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
-go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
-go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI=
-go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg=
-go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc=
-go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps=
-go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
-go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
-go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI=
-go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc=
+go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
+go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
+go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE=
+go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE=
+go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
+go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
+go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
+go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
+go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
+go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
+go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
+go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
+go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
+go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
 go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
 go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
 go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
@@ -295,51 +312,53 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
 go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
 go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
 go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
-go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
-go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
+go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
+go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
 go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
-golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
+golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
+golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
 golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA=
 golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
+golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
-golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
-golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
+golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I=
+golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
+golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo=
+golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
-golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
-golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
-golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
+golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
+golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
+golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
-golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
+golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
+golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
 golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
 golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
+golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
+golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -348,21 +367,21 @@ gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw
 gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
 gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
 gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
-google.golang.org/api v0.238.0 h1:+EldkglWIg/pWjkq97sd+XxH7PxakNYoe/rkSTbnvOs=
-google.golang.org/api v0.238.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50=
-google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7 h1:FiusG7LWj+4byqhbvmB+Q93B/mOxJLN2DTozDuZm4EU=
-google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:kXqgZtrWaf6qS3jZOCnCH7WYfrvFjkC51bM8fz3RsCA=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 h1:pFyd6EwwL2TqFf8emdthzeX+gZE1ElRq3iM8pui4KBY=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
-google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI=
-google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ=
-google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
-google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/api v0.239.0 h1:2hZKUnFZEy81eugPs4e2XzIJ5SOwQg0G82bpXD65Puo=
+google.golang.org/api v0.239.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50=
+google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY=
+google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 h1:pmJpJEvT846VzausCQ5d7KreSROcDqmO388w5YbnltA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1/go.mod h1:GmFNa4BdJZ2a8G+wCe9Bg3wwThLrJun751XstdJt5Og=
+google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A=
+google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c=
+google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
+google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
-gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
+gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
@@ -384,20 +403,20 @@ k8s.io/component-base v0.34.1 h1:v7xFgG+ONhytZNFpIz5/kecwD+sUhVE6HU7qQUiRM4A=
 k8s.io/component-base v0.34.1/go.mod h1:mknCpLlTSKHzAQJJnnHVKqjxR7gBeHRv0rPXA7gdtQ0=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
-k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
-k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
-k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
-k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 h1:liMHz39T5dJO1aOKHLvwaCjDbf07wVh6yaUlTpunnkE=
+k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
+k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0=
+k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM=
 sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
-sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg=
-sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY=
-sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
-sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
-sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
-sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
-sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
-sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/controller-runtime v0.22.3 h1:I7mfqz/a/WdmDCEnXmSPm8/b/yRTy6JsKKENTijTq8Y=
+sigs.k8s.io/controller-runtime v0.22.3/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8=
+sigs.k8s.io/gateway-api v1.4.0 h1:ZwlNM6zOHq0h3WUX2gfByPs2yAEsy/EenYJB78jpQfQ=
+sigs.k8s.io/gateway-api v1.4.0/go.mod h1:AR5RSqciWP98OPckEjOjh2XJhAe2Na4LHyXD2FUY7Qk=
+sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba h1:NfFYYePPDs+DOSwm/KerEwm5qETJ1y0dyyN1CpDAlAw=
+sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba/go.mod h1:f9lu6hnm0Ywrdm7SYSjzBR16UGv7BmwjArQveY9IynM=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
 sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
 sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
diff --git a/pkg/plugins/profile/pd_profile_handler.go b/pkg/plugins/profile/pd_profile_handler.go
index 3bfe056d..ff0a5f23 100644
--- a/pkg/plugins/profile/pd_profile_handler.go
+++ b/pkg/plugins/profile/pd_profile_handler.go
@@ -42,7 +42,7 @@ func PdProfileHandlerFactory(name string, rawParameters json.RawMessage, _ plugi
 		DecodeProfile:    defaultDecodeProfile,
 		PrefillProfile:   defaultPrefillProfile,
 		PrefixPluginName: defaultPrefixPluginName,
-		HashBlockSize:    prefix.DefaultHashBlockSize,
+		HashBlockSize:    prefix.DefaultBlockSize,
 	}
 	if rawParameters != nil {
 		if err := json.Unmarshal(rawParameters, &parameters); err != nil {
@@ -106,6 +106,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat
 	}
 
 	if h.pdThreshold > 0 {
+		userInput, err := getUserInputBytes(request)
+		if err != nil {
+			log.FromContext(ctx).V(logutil.DEBUG).Error(err, "Failed to get user input bytes")
+			return nil
+		}
+
 		// if we're here that means decode profile ran successfully, and we have additional profile configured that didn't run yet,
 		// which means PD is enabled (otherwise, prefill profile is not configured at all and this profile handler is not used).
 		// inspect decode execution result to decide if prefill should run or not.
@@ -117,12 +123,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat
 		} else {
 			decodePod := profileResults[h.decodeProfile].TargetPods[0].GetPod().NamespacedName
 			hitPrefix := max(prefixState.PrefixCacheServers[prefix.ServerID(decodePod)]-1, 0) // The first hit is always the model name
-			hitPercentagePrefix = float64(hitPrefix*h.hashBlockSize) / float64(len(request.Prompt))
+			hitPercentagePrefix = float64(hitPrefix*h.hashBlockSize) / float64(len(userInput))
 			log.FromContext(ctx).V(logutil.DEBUG).Info("Computed hit percentage for prefix cache", "hitPercentage", hitPercentagePrefix,
-				"promptLength", len(request.Prompt))
+				"promptLength", len(userInput))
 		}
 
-		if (1.0-hitPercentagePrefix)*float64(len(request.Prompt)) < float64(h.pdThreshold) {
+		if (1.0-hitPercentagePrefix)*float64(len(userInput)) < float64(h.pdThreshold) {
 			log.FromContext(ctx).Info("Non-cached suffix is smaller than threshold, using decode profile only", "hitPercentage", hitPercentagePrefix)
 			return map[string]*framework.SchedulerProfile{} // do not run prefill
 		}
@@ -160,3 +166,12 @@ func (h *PdProfileHandler) ProcessResults(_ context.Context, _ *types.CycleState
 		},
 	}, nil
 }
+
+func getUserInputBytes(request *types.LLMRequest) ([]byte, error) {
+	if request.Body.Completions != nil { // assumed to be valid if not nil
+		return []byte(request.Body.Completions.Prompt), nil
+	}
+
+	// must be chat-completions request at this point, return bytes of entire messages
+	return json.Marshal(request.Body.ChatCompletions.Messages)
+}
diff --git a/pkg/plugins/register.go b/pkg/plugins/register.go
index 6f28a0c9..c3e5a665 100644
--- a/pkg/plugins/register.go
+++ b/pkg/plugins/register.go
@@ -20,4 +20,5 @@ func RegisterAllPlugins() {
 	plugins.Register(scorer.LoadAwareType, scorer.LoadAwareFactory)
 	plugins.Register(scorer.SessionAffinityType, scorer.SessionAffinityFactory)
 	plugins.Register(scorer.ActiveRequestType, scorer.ActiveRequestFactory)
+	plugins.Register(scorer.NoHitLRUType, scorer.NoHitLRUFactory)
 }
diff --git a/pkg/plugins/scorer/active_request.go b/pkg/plugins/scorer/active_request.go
index f4018d96..da9b53c0 100644
--- a/pkg/plugins/scorer/active_request.go
+++ b/pkg/plugins/scorer/active_request.go
@@ -49,6 +49,8 @@ func (r *requestEntry) String() string {
 
 // compile-time type assertion
 var _ framework.Scorer = &ActiveRequest{}
+var _ requestcontrol.PreRequest = &ActiveRequest{}
+var _ requestcontrol.ResponseComplete = &ActiveRequest{}
 
 // ActiveRequestFactory defines the factory function for the ActiveRequest scorer.
 func ActiveRequestFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
@@ -90,8 +92,8 @@ func NewActiveRequest(ctx context.Context, params *ActiveRequestParameters) *Act
 		mutex:        &sync.RWMutex{},
 	}
 	// callback to decrement count when requests expire
-	// most requests will be removed in PostResponse, but this ensures
-	// that we don't leak pod counts if PostResponse is not called
+	// most requests will be removed in ResponseComplete, but this ensures
+	// that we don't leak pod counts if ResponseComplete is not called
 	requestCache.OnEviction(func(_ context.Context, reason ttlcache.EvictionReason,
 		item *ttlcache.Item[string, *requestEntry]) {
 		if reason == ttlcache.EvictionReasonExpired {
@@ -187,14 +189,14 @@ func (s *ActiveRequest) PreRequest(ctx context.Context, request *types.LLMReques
 	}
 }
 
-// PostResponse is called after a response is sent to the client.
+// ResponseComplete is called after a response is sent to the client.
 // It removes the specific request entry from the cache and decrements
 // the pod count.
-func (s *ActiveRequest) PostResponse(ctx context.Context, request *types.LLMRequest,
+func (s *ActiveRequest) ResponseComplete(ctx context.Context, request *types.LLMRequest,
 	_ *requestcontrol.Response, targetPod *backend.Pod) {
-	debugLogger := log.FromContext(ctx).V(logutil.DEBUG).WithName("ActiveRequest.PostResponse")
+	debugLogger := log.FromContext(ctx).V(logutil.DEBUG).WithName("ActiveRequest.ResponseComplete")
 	if targetPod == nil {
-		debugLogger.Info("Skipping PostResponse because targetPod is nil")
+		debugLogger.Info("Skipping ResponseComplete because targetPod is nil")
 		return
 	}
 
diff --git a/pkg/plugins/scorer/active_request_test.go b/pkg/plugins/scorer/active_request_test.go
index 72ea0655..11e09b47 100644
--- a/pkg/plugins/scorer/active_request_test.go
+++ b/pkg/plugins/scorer/active_request_test.go
@@ -168,7 +168,7 @@ func TestActiveRequestScorer_PreRequest(t *testing.T) {
 	}
 }
 
-func TestActiveRequestScorer_PostResponse(t *testing.T) {
+func TestActiveRequestScorer_ResponseComplete(t *testing.T) {
 	ctx := context.Background()
 
 	scorer := NewActiveRequest(ctx, nil)
@@ -197,7 +197,7 @@ func TestActiveRequestScorer_PostResponse(t *testing.T) {
 	// Verify initial state
 	compositeKey := "default/pod-a.test-request-1"
 	if !scorer.requestCache.Has(compositeKey) {
-		t.Fatal("Request should be in cache before PostResponse")
+		t.Fatal("Request should be in cache before ResponseComplete")
 	}
 
 	scorer.mutex.RLock()
@@ -208,11 +208,11 @@ func TestActiveRequestScorer_PostResponse(t *testing.T) {
 	}
 
 	// Call PostResponse
-	scorer.PostResponse(ctx, request, &requestcontrol.Response{}, podA.GetPod())
+	scorer.ResponseComplete(ctx, request, &requestcontrol.Response{}, podA.GetPod())
 
 	// Check request is removed from cache
 	if scorer.requestCache.Has(compositeKey) {
-		t.Errorf("Request should be removed from cache after PostResponse")
+		t.Errorf("Request should be removed from cache after ResponseComplete")
 	}
 
 	// Check pod count is decremented and removed (since it was 1)
diff --git a/pkg/plugins/scorer/no_hit_lru.go b/pkg/plugins/scorer/no_hit_lru.go
new file mode 100644
index 00000000..2e9a5fa6
--- /dev/null
+++ b/pkg/plugins/scorer/no_hit_lru.go
@@ -0,0 +1,297 @@
+package scorer
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	lru "github.com/hashicorp/golang-lru/v2"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
+	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
+)
+
+const (
+	// NoHitLRUType is the type of the NoHitLRU scorer
+	NoHitLRUType = "no-hit-lru-scorer"
+
+	// defaultLRUSize is the maximum number of pods we'll consider in the cache
+	defaultLRUSize = 1024
+)
+
+// compile-time type assertions
+var _ framework.Scorer = &NoHitLRU{}
+var _ requestcontrol.PreRequest = &NoHitLRU{}
+
+// NoHitLRUParameters defines the parameters for the NoHitLRU scorer.
+type NoHitLRUParameters struct {
+	// PrefixPluginName defines the name of the prefix cache plugin to read state from.
+	// Defaults to "prefix-cache-scorer".
+	PrefixPluginName string `json:"prefixPluginName"`
+
+	// LRUSize defines the maximum number of pods to track in the LRU cache.
+	LRUSize int `json:"lruSize"`
+}
+
+// coldRequestState tracks whether a request triggered a KV cache hit
+// when the cache is missed, isCold is true.
+type coldRequestState struct {
+	isCold bool
+}
+
+// Clone implements the plugins.StateData interface
+func (c *coldRequestState) Clone() plugins.StateData {
+	return &coldRequestState{isCold: c.isCold}
+}
+
+// NoHitLRUFactory defines the factory function for the NoHitLRU
+func NoHitLRUFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) {
+	parameters := NoHitLRUParameters{}
+	if rawParameters != nil {
+		if err := json.Unmarshal(rawParameters, &parameters); err != nil {
+			return nil, fmt.Errorf("failed to parse the parameters of the '%s' scorer - %w", NoHitLRUType, err)
+		}
+	}
+
+	if parameters.PrefixPluginName == "" {
+		parameters.PrefixPluginName = prefix.PrefixCachePluginType
+	}
+
+	// Note: We don't enforce that the prefix plugin exists here
+	// The scorer will gracefully handle missing prefix cache state as an optimization
+
+	return NewNoHitLRU(handle.Context(), &parameters).WithName(name), nil
+}
+
+// NewNoHitLRU creates a new NoHitLRU scorer
+func NewNoHitLRU(ctx context.Context, params *NoHitLRUParameters) *NoHitLRU {
+	prefixPluginName := prefix.PrefixCachePluginType
+	lruSize := defaultLRUSize
+
+	if params != nil {
+		if params.PrefixPluginName != "" {
+			prefixPluginName = params.PrefixPluginName
+		}
+		if params.LRUSize > 0 {
+			lruSize = params.LRUSize
+		}
+	}
+
+	lruCache, err := lru.New[string, struct{}](lruSize)
+	if err != nil {
+		log.FromContext(ctx).Error(err, fmt.Sprintf("failed to initialize NoHitLRU scorer: could not create LRU cache with size %d: %v", lruSize, err))
+		return nil
+	}
+
+	return &NoHitLRU{
+		typedName:        plugins.TypedName{Type: NoHitLRUType},
+		lruCache:         lruCache,
+		prefixPluginName: prefixPluginName,
+		pluginState:      plugins.NewPluginState(ctx),
+	}
+}
+
+// NoHitLRU scorer that favors pods that were least recently used for cold requests.
+// This can help evenly distribute cache growth, since cold requests result in more
+// new KV blocks.
+type NoHitLRU struct {
+	typedName        plugins.TypedName
+	lruCache         *lru.Cache[string, struct{}] // pod name -> dummy value (we only care about order)
+	prefixPluginName string
+	pluginState      *plugins.PluginState
+}
+
+// TypedName returns the typed name of the plugin.
+func (s *NoHitLRU) TypedName() plugins.TypedName {
+	return s.typedName
+}
+
+// WithName sets the name of the plugin.
+func (s *NoHitLRU) WithName(name string) *NoHitLRU {
+	s.typedName.Name = name
+	return s
+}
+
+// isColdRequest determines if a request is cold by reading the prefix cache state.
+// Returns true if no prefix cache hits were found, or if prefix cache state is unavailable.
+func (s *NoHitLRU) isColdRequest(ctx context.Context, cycleState *types.CycleState) bool {
+	logger := log.FromContext(ctx).V(logutil.DEBUG)
+
+	// Read prefix cache state to determine if this is a cold request
+	// This is treated as an optimization - if the state isn't available, we assume cold request
+	prefixState, err := types.ReadCycleStateKey[*prefix.SchedulingContextState](cycleState, plugins.StateKey(s.prefixPluginName))
+
+	if err != nil {
+		logger.Info("No prefix cache state found, treating as cold request for LRU optimization", "error", err)
+		return true
+	}
+
+	// Check if this is a cold request (no prefix cache hits)
+	return len(prefixState.PrefixCacheServers) == 0
+}
+
+// scoreNeutral returns neutral scores (0.5) for all pods.
+// Used when a request has cache hits and LRU optimization should not apply.
+func (s *NoHitLRU) scoreNeutral(pods []types.Pod) map[types.Pod]float64 {
+	scoredPods := make(map[types.Pod]float64, len(pods))
+	for _, pod := range pods {
+		scoredPods[pod] = 0.5
+	}
+	return scoredPods
+}
+
+// getLRUPositions returns a map of pod names to their LRU position.
+// Position 0 represents the oldest (least recently used) entry.
+func (s *NoHitLRU) getLRUPositions() map[string]int {
+	// Get all keys from LRU cache in order (oldest first)
+	// https://pkg.go.dev/github.com/hashicorp/golang-lru/v2#Cache.Keys
+	lruKeys := s.lruCache.Keys()
+
+	lruPosition := make(map[string]int, len(lruKeys))
+	for i, key := range lruKeys {
+		lruPosition[key] = i
+	}
+	return lruPosition
+}
+
+// partitionPodsByUsage separates pods into those that have received cold requests
+// (usedPods) and those that have never received cold requests (neverUsedPods).
+func (s *NoHitLRU) partitionPodsByUsage(pods []types.Pod, lruPosition map[string]int) (usedPods, neverUsedPods []types.Pod) {
+	for _, pod := range pods {
+		podName := pod.GetPod().NamespacedName.String()
+		if _, exists := lruPosition[podName]; exists {
+			usedPods = append(usedPods, pod)
+		} else {
+			neverUsedPods = append(neverUsedPods, pod)
+		}
+	}
+	return usedPods, neverUsedPods
+}
+
+// scoreNeverUsedPods assigns scores to pods that have never received a cold request.
+// The first never-used pod gets the highest score (1.0), with subsequent pods
+// receiving progressively lower scores.
+func (s *NoHitLRU) scoreNeverUsedPods(scoredPods map[types.Pod]float64, neverUsedPods []types.Pod, totalPods int) {
+	// Avoid possibility of dividing by zero.
+	if totalPods <= 1 {
+		return
+	}
+	for i, pod := range neverUsedPods {
+		score := 1.0 - float64(i)/float64(totalPods-1)
+		scoredPods[pod] = score
+	}
+}
+
+// scoreUsedPods assigns scores to pods based on their LRU position.
+// Pods that were least recently used for cold requests receive higher scores.
+func (s *NoHitLRU) scoreUsedPods(scoredPods map[types.Pod]float64, usedPods []types.Pod, lruPosition map[string]int, neverUsedCount, totalPods int) {
+	// Avoid possibility of dividing by zero.
+	if totalPods <= 1 {
+		return
+	}
+	for _, pod := range usedPods {
+		podName := pod.GetPod().NamespacedName.String()
+		lruPos := lruPosition[podName]
+		// LRU keys are oldest to newest so rank 0 = oldest
+		// The never used pod count is added to the rank so that
+		// a never-used pod will always have the highest score.
+		rank := neverUsedCount + lruPos
+		score := 1.0 - float64(rank)/float64(totalPods-1)
+		if score < 0 {
+			score = 0
+		}
+		scoredPods[pod] = score
+	}
+}
+
+// scoreColdRequestByLRU scores pods based on their LRU position for cold requests.
+// Pods that have never received a cold request get the highest scores.
+// Among previously used pods, least recently used ones get higher scores.
+func (s *NoHitLRU) scoreColdRequestByLRU(pods []types.Pod) map[types.Pod]float64 {
+	scoredPods := make(map[types.Pod]float64, len(pods))
+	totalPods := len(pods)
+
+	// Avoid possibility of dividing by zero.
+	if totalPods == 1 {
+		scoredPods[pods[0]] = 1.0
+		return scoredPods
+	}
+
+	lruPosition := s.getLRUPositions()
+	usedPods, neverUsedPods := s.partitionPodsByUsage(pods, lruPosition)
+
+	s.scoreNeverUsedPods(scoredPods, neverUsedPods, totalPods)
+	s.scoreUsedPods(scoredPods, usedPods, lruPosition, len(neverUsedPods), totalPods)
+
+	return scoredPods
+}
+
+// Score scores the given pods based on LRU for cold requests.
+// For cache hits, returns neutral scores (0.5) for all pods.
+// For cache misses, ranks pods by their LRU order.
+// - LRU ordering is with respect to when a pod last received a cold request.
+// - Least recently used (or never used) pods get highest score (1.0)
+// - Most recently used pods get lowest score (approaching 0.0)
+func (s *NoHitLRU) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
+	logger := log.FromContext(ctx).V(logutil.DEBUG)
+
+	isCold := s.isColdRequest(ctx, cycleState)
+
+	// Store the cold request state in plugin state for PreRequest to use
+	coldState := &coldRequestState{isCold: isCold}
+	s.pluginState.Write(request.RequestId, plugins.StateKey(s.typedName.String()), coldState)
+
+	if !isCold {
+		logger.Info("Cache hit detected, returning neutral scores")
+		return s.scoreNeutral(pods)
+	}
+
+	logger.Info("Cold request detected, scoring pods by LRU")
+	return s.scoreColdRequestByLRU(pods)
+}
+
+// PreRequest is called before a request is sent to the target pod.
+// For cold requests, it updates the LRU cache to track which pods have been used recently.
+func (s *NoHitLRU) PreRequest(ctx context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult, _ int) {
+	logger := log.FromContext(ctx).V(logutil.DEBUG)
+
+	if schedulingResult == nil || len(schedulingResult.ProfileResults) == 0 {
+		logger.Info("No scheduling result available")
+		return
+	}
+
+	// Read the cold request state we stored in Score
+	coldState, err := plugins.ReadPluginStateKey[*coldRequestState](s.pluginState, request.RequestId, plugins.StateKey(s.typedName.String()))
+	// After fetching the cold state, drop it from the plugin state immediately (otherwise it will hang around until it becomes stale).
+	s.pluginState.Delete(request.RequestId)
+
+	if err != nil {
+		logger.Info("No cold request state found, treating as non-cold request", "error", err)
+		return
+	}
+
+	if !coldState.isCold {
+		logger.Info("Not a cold request, skipping LRU update")
+		return
+	}
+
+	// Get the primary profile's target pod
+	primaryProfile := schedulingResult.ProfileResults[schedulingResult.PrimaryProfileName]
+	if primaryProfile == nil || len(primaryProfile.TargetPods) == 0 {
+		logger.Info("No target pod in primary profile")
+		return
+	}
+
+	targetPod := primaryProfile.TargetPods[0]
+	podName := targetPod.GetPod().NamespacedName.String()
+
+	// Move the pod to the front of the LRU.
+	var present struct{} // dummy value
+	s.lruCache.Add(podName, present)
+
+	logger.Info("Updated LRU cache for cold request", "pod", podName, "requestId", request.RequestId)
+}
diff --git a/pkg/plugins/scorer/no_hit_lru_test.go b/pkg/plugins/scorer/no_hit_lru_test.go
new file mode 100644
index 00000000..76f44aa2
--- /dev/null
+++ b/pkg/plugins/scorer/no_hit_lru_test.go
@@ -0,0 +1,449 @@
+package scorer_test
+
+import (
+	"context"
+	"encoding/json"
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	k8stypes "k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
+	backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
+
+	"github.com/llm-d/llm-d-inference-scheduler/pkg/plugins/scorer"
+)
+
+var _ plugins.Handle = &fakeHandle{}
+
+type fakeHandle struct {
+	ctx     context.Context
+	plugins map[string]plugins.Plugin
+}
+
+func newFakeHandle(ctx context.Context) *fakeHandle {
+	return &fakeHandle{ctx: ctx, plugins: map[string]plugins.Plugin{}}
+}
+
+func (h *fakeHandle) Context() context.Context {
+	return h.ctx
+}
+
+func (h *fakeHandle) Plugin(name string) plugins.Plugin {
+	return h.plugins[name]
+}
+
+func (h *fakeHandle) AddPlugin(name string, plugin plugins.Plugin) {
+	h.plugins[name] = plugin
+}
+
+func (h *fakeHandle) GetAllPlugins() []plugins.Plugin {
+	result := make([]plugins.Plugin, 0, len(h.plugins))
+	for _, plugin := range h.plugins {
+		result = append(result, plugin)
+	}
+	return result
+}
+
+func (h *fakeHandle) GetAllPluginsWithNames() map[string]plugins.Plugin {
+	return h.plugins
+}
+
+func (h *fakeHandle) PodList(_ func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics {
+	return make([]backendmetrics.PodMetrics, 0)
+}
+
+type stubPlugin struct {
+	name plugins.TypedName
+}
+
+func (p *stubPlugin) TypedName() plugins.TypedName {
+	return p.name
+}
+
+func TestNoHitLRUFactoryDependencyValidation(t *testing.T) {
+	tests := []struct {
+		name         string
+		handle       *fakeHandle
+		params       map[string]any
+		expectError  bool
+		errorMessage string
+	}{
+		{
+			name:        "missing prefix cache plugin - should work as optimization",
+			handle:      newFakeHandle(context.Background()),
+			expectError: false,
+		},
+		{
+			name: "prefix plugin present - should work",
+			handle: func() *fakeHandle {
+				h := newFakeHandle(context.Background())
+				h.AddPlugin(prefix.PrefixCachePluginType, &stubPlugin{name: plugins.TypedName{Type: prefix.PrefixCachePluginType, Name: prefix.PrefixCachePluginType}})
+				return h
+			}(),
+			expectError: false,
+		},
+	}
+
+	for _, tt := range tests {
+		// Marshal params if provided
+		var raw json.RawMessage
+		if tt.params != nil {
+			bytes, err := json.Marshal(tt.params)
+			if err != nil {
+				t.Fatalf("failed to marshal parameters: %v", err)
+			}
+			raw = bytes
+		}
+
+		plugin, err := scorer.NoHitLRUFactory("test", raw, tt.handle)
+		if tt.expectError {
+			if err == nil {
+				t.Fatalf("expected error for case %q, got none", tt.name)
+			}
+			if tt.errorMessage != "" && !strings.Contains(err.Error(), tt.errorMessage) {
+				t.Fatalf("error message mismatch for case %q: %v", tt.name, err)
+			}
+			continue
+		}
+
+		if err != nil {
+			t.Fatalf("unexpected error for case %q: %v", tt.name, err)
+		}
+		if plugin == nil {
+			t.Fatalf("expected plugin instance for case %q", tt.name)
+		}
+	}
+}
+
+func TestNoHitLRUScorer(t *testing.T) {
+	podA := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	podB := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-b"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	podC := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-c"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+
+	tests := []struct {
+		name        string
+		scorer      framework.Scorer
+		req         *types.LLMRequest
+		input       []types.Pod
+		prefixState *prefix.SchedulingContextState
+		wantScores  map[types.Pod]float64
+		description string
+	}{
+		{
+			name:   "cold request - all pods never used",
+			scorer: scorer.NewNoHitLRU(context.Background(), nil),
+			req: &types.LLMRequest{
+				TargetModel: "test-model",
+			},
+			input: []types.Pod{podA, podB, podC},
+			prefixState: &prefix.SchedulingContextState{
+				PrefixCacheServers: make(map[prefix.ServerID]int), // empty = cold request
+			},
+			wantScores: map[types.Pod]float64{
+				podA: 1.0, // All never-used pods get high scores
+				podB: 0.5,
+				podC: 0.0,
+			},
+			description: "Never-used pods should get high scores for cold requests",
+		},
+		{
+			name:   "cache hit - neutral scores",
+			scorer: scorer.NewNoHitLRU(context.Background(), nil),
+			req: &types.LLMRequest{
+				TargetModel: "test-model",
+			},
+			input: []types.Pod{podA, podB, podC},
+			prefixState: &prefix.SchedulingContextState{
+				PrefixCacheServers: map[prefix.ServerID]int{
+					{Name: "server1", Namespace: "default"}: 5, // non-empty = cache hit
+				},
+			},
+			wantScores: map[types.Pod]float64{
+				podA: 0.5, // All pods get neutral scores for cache hits
+				podB: 0.5,
+				podC: 0.5,
+			},
+			description: "Cache hits should return neutral scores",
+		},
+		{
+			name:   "single pod - max score",
+			scorer: scorer.NewNoHitLRU(context.Background(), nil),
+			req: &types.LLMRequest{
+				TargetModel: "test-model",
+			},
+			input: []types.Pod{podA},
+			prefixState: &prefix.SchedulingContextState{
+				PrefixCacheServers: make(map[prefix.ServerID]int), // empty = cold request
+			},
+			wantScores: map[types.Pod]float64{
+				podA: 1.0, // Single pod gets max score
+			},
+			description: "Single pod should get maximum score",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			// Create cycle state and set prefix state
+			cycleState := &types.CycleState{}
+			if test.prefixState != nil {
+				cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), test.prefixState)
+			}
+
+			got := test.scorer.Score(context.Background(), cycleState, test.req, test.input)
+
+			if diff := cmp.Diff(test.wantScores, got); diff != "" {
+				t.Errorf("%s: Unexpected output (-want +got): %v", test.description, diff)
+			}
+		})
+	}
+}
+
+func TestNoHitLRUBasicFunctionality(t *testing.T) {
+	ctx := context.Background()
+	scorer := scorer.NewNoHitLRU(ctx, nil)
+
+	podA := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	podB := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-b"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+
+	pods := []types.Pod{podA, podB}
+
+	// Test basic scoring for cold request (no crashes, returns valid scores)
+	coldPrefixState := &prefix.SchedulingContextState{
+		PrefixCacheServers: make(map[prefix.ServerID]int), // empty = cold request
+	}
+	cycleState := &types.CycleState{}
+	cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), coldPrefixState)
+
+	scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, pods)
+
+	// Should return scores for all pods
+	if len(scores) != 2 {
+		t.Errorf("Expected 2 scores, got %d", len(scores))
+	}
+
+	// All scores should be valid (between 0 and 1)
+	for pod, score := range scores {
+		if score < 0 || score > 1 {
+			t.Errorf("Invalid score %f for pod %s", score, pod.GetPod().NamespacedName.String())
+		}
+	}
+
+	// For never-used pods, should have different scores (to provide ordering)
+	if scores[podA] == scores[podB] {
+		t.Errorf("Expected different scores for different pods, both got %f", scores[podA])
+	}
+}
+
+func TestNoPrefixCacheStateFound(t *testing.T) {
+	ctx := context.Background()
+	scorer := scorer.NewNoHitLRU(ctx, nil)
+
+	podA := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	pods := []types.Pod{podA}
+	cycleState := &types.CycleState{}
+
+	scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, pods)
+
+	if scores[podA] != 1.0 {
+		t.Errorf("Failure to find a prefix cache should result in scoring as a cold request.")
+	}
+}
+
+func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) {
+	ctx := context.Background()
+	scorer := scorer.NewNoHitLRU(ctx, nil)
+
+	podA := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a", Namespace: "default"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	podB := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-b", Namespace: "default"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	podC := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-c", Namespace: "default"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+	pods := []types.Pod{podA, podB, podC}
+
+	primaryProfile := "primary-profile"
+	toPrefixState := func(entries map[prefix.ServerID]int) *types.CycleState {
+		cycle := &types.CycleState{}
+		cycle.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{PrefixCacheServers: entries})
+		return cycle
+	}
+
+	requestToPod := func(target types.Pod) *types.SchedulingResult {
+		return &types.SchedulingResult{
+			PrimaryProfileName: primaryProfile,
+			ProfileResults: map[string]*types.ProfileRunResult{
+				primaryProfile: {
+					TargetPods: []types.Pod{target},
+				},
+			},
+		}
+	}
+
+	// Test LRU behavior indirectly through scoring rather than internal state
+	assertHighestScoredPod := func(expectedPod types.Pod, testName string) {
+		t.Helper()
+		coldReq := &types.LLMRequest{RequestId: testName + "-scoring-check"}
+		scores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReq, pods)
+
+		highestScore := -1.0
+		var highestPod types.Pod
+		for pod, score := range scores {
+			if score > highestScore {
+				highestScore = score
+				highestPod = pod
+			}
+		}
+
+		if highestPod.GetPod().NamespacedName.String() != expectedPod.GetPod().NamespacedName.String() {
+			t.Fatalf("expected %s to have highest score for LRU behavior, but %s had highest score (%f). All scores: %+v",
+				expectedPod.GetPod().NamespacedName.String(),
+				highestPod.GetPod().NamespacedName.String(),
+				highestScore,
+				scores)
+		}
+	}
+
+	t.Run("initial cold request seeds cache", func(_ *testing.T) {
+		coldReqA := &types.LLMRequest{RequestId: "cold-1"}
+		scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqA, pods)
+		scorer.PreRequest(ctx, coldReqA, requestToPod(podA), 0)
+		// After podA handles a cold request, other pods should score higher for new cold requests
+		assertHighestScoredPod(podB, "after-podA-used")
+	})
+
+	t.Run("unused pods rank above existing ones", func(t *testing.T) {
+		coldReqCheck := &types.LLMRequest{RequestId: "cold-check"}
+		coldScores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqCheck, pods)
+		if coldScores[podB] <= coldScores[podA] {
+			t.Fatalf("expected pod-b to outrank pod-a after pod-a handled previous cold request, scores=%+v", coldScores)
+		}
+		if coldScores[podB] != 1.0 {
+			t.Fatalf("expected pod-b to score 1.0, scores=%+v", coldScores)
+		}
+		if coldScores[podC] != 0.5 {
+			t.Fatalf("expected pod-c to score 0.5, scores=%+v", coldScores)
+		}
+	})
+
+	t.Run("warm request leaves LRU untouched", func(t *testing.T) {
+		warmReq := &types.LLMRequest{RequestId: "warm-1"}
+		warmState := map[prefix.ServerID]int{
+			{Name: "server1", Namespace: "default"}: 1,
+		}
+		warmScores := scorer.Score(ctx, toPrefixState(warmState), warmReq, pods)
+		for _, score := range warmScores {
+			if score != 0.5 {
+				t.Fatalf("expected neutral score for warm request, got %f", score)
+			}
+		}
+		scorer.PreRequest(ctx, warmReq, requestToPod(podB), 0)
+		postWarmReq := &types.LLMRequest{RequestId: "cold-after-warm"}
+		postWarmScores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), postWarmReq, pods)
+		if postWarmScores[podB] <= postWarmScores[podA] {
+			t.Fatalf("expected warm request to leave ordering unchanged, scores=%+v", postWarmScores)
+		}
+	})
+
+	t.Run("second cold request rotates to podB", func(_ *testing.T) {
+		// Simulate podB handling a cold request
+		coldReqB := &types.LLMRequest{RequestId: "cold-2"}
+		scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqB, pods)
+		scorer.PreRequest(ctx, coldReqB, requestToPod(podB), 0)
+		// Now podC should score highest since both podA and podB have been used
+		assertHighestScoredPod(podC, "after-podB-used")
+	})
+
+	t.Run("third cold request rotates back to podA", func(_ *testing.T) {
+		// Simulate podC handling a cold request
+		coldReqC := &types.LLMRequest{RequestId: "cold-3"}
+		scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqC, pods)
+		scorer.PreRequest(ctx, coldReqC, requestToPod(podC), 0)
+		// Now podA should score highest again (LRU rotation)
+		assertHighestScoredPod(podA, "after-podC-used")
+	})
+}
+
+func TestNoHitLRUEdgeCases(t *testing.T) {
+	ctx := context.Background()
+	scorer := scorer.NewNoHitLRU(ctx, nil)
+
+	podA := &types.PodMetrics{
+		Pod:          &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}},
+		MetricsState: &backendmetrics.MetricsState{},
+	}
+
+	t.Run("empty pods list", func(t *testing.T) {
+		emptyPods := []types.Pod{}
+		cycleState := &types.CycleState{}
+		cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{
+			PrefixCacheServers: make(map[prefix.ServerID]int), // cold request
+		})
+
+		scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, emptyPods)
+
+		if len(scores) != 0 {
+			t.Errorf("Expected empty scores for empty pods list, got %d scores", len(scores))
+		}
+	})
+
+	t.Run("nil pods list", func(t *testing.T) {
+		cycleState := &types.CycleState{}
+		cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{
+			PrefixCacheServers: make(map[prefix.ServerID]int), // cold request
+		})
+
+		scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, nil)
+
+		if scores == nil {
+			t.Errorf("Expected non-nil scores map for nil pods list")
+		}
+		if len(scores) != 0 {
+			t.Errorf("Expected empty scores for nil pods list, got %d scores", len(scores))
+		}
+	})
+
+	t.Run("single pod returns 1.0", func(t *testing.T) {
+		pods := []types.Pod{podA}
+		cycleState := &types.CycleState{}
+		cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{
+			PrefixCacheServers: make(map[prefix.ServerID]int), // cold request
+		})
+
+		scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, pods)
+
+		if scores[podA] != 1.0 {
+			t.Errorf("Expected single pod to get score 1.0, got %f", scores[podA])
+		}
+	})
+}
diff --git a/pkg/plugins/scorer/precise_prefix_cache.go b/pkg/plugins/scorer/precise_prefix_cache.go
index 636fb288..fe4c2d6d 100644
--- a/pkg/plugins/scorer/precise_prefix_cache.go
+++ b/pkg/plugins/scorer/precise_prefix_cache.go
@@ -121,7 +121,13 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
 		return nil
 	}
 
-	scores, err := s.kvCacheIndexer.GetPodScores(ctx, request.Prompt, request.TargetModel, nil)
+	prompt, err := getUserInput(request)
+	if err != nil {
+		loggerDebug.Error(err, "Failed to get user input")
+		return nil
+	}
+
+	scores, err := s.kvCacheIndexer.GetPodScores(ctx, prompt, request.TargetModel, nil)
 	if err != nil {
 		loggerDebug.Error(err, "Failed to get pod scores")
 		return nil
@@ -139,3 +145,16 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
 
 	return indexedScoresToNormalizedScoredPods(pods, podToKey, scores)
 }
+
+func getUserInput(request *types.LLMRequest) (string, error) {
+	if request.Body.Completions != nil { // assumed to be valid if not nil
+		return request.Body.Completions.Prompt, nil
+	}
+
+	// must be chat-completions request at this point, return string of entire messages
+	data, err := json.Marshal(request.Body.ChatCompletions.Messages)
+	if err != nil {
+		return "", fmt.Errorf("failed to marshal chat-completions messages: %w", err)
+	}
+	return string(data), nil
+}
diff --git a/pkg/plugins/scorer/session_affinity.go b/pkg/plugins/scorer/session_affinity.go
index a20de574..3ac9230c 100644
--- a/pkg/plugins/scorer/session_affinity.go
+++ b/pkg/plugins/scorer/session_affinity.go
@@ -23,7 +23,7 @@ const (
 
 // compile-time type assertion
 var _ framework.Scorer = &SessionAffinity{}
-var _ requestcontrol.PostResponse = &SessionAffinity{}
+var _ requestcontrol.ResponseComplete = &SessionAffinity{}
 
 // SessionAffinityFactory defines the factory function for SessionAffinity scorer.
 func SessionAffinityFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
@@ -80,11 +80,11 @@ func (s *SessionAffinity) Score(ctx context.Context, _ *types.CycleState, reques
 	return scoredPods
 }
 
-// PostResponse sets the session header on the response sent to the client
+// ResponseComplete sets the session header on the response sent to the client
 // TODO: this should be using a cookie and ensure not overriding any other
 // cookie values if present.
 // Tracked in https://github.com/llm-d/llm-d-inference-scheduler/issues/28
-func (s *SessionAffinity) PostResponse(ctx context.Context, _ *types.LLMRequest, response *requestcontrol.Response, targetPod *backend.Pod) {
+func (s *SessionAffinity) ResponseComplete(ctx context.Context, _ *types.LLMRequest, response *requestcontrol.Response, targetPod *backend.Pod) {
 	if response == nil || targetPod == nil {
 		reqID := "undefined"
 		if response != nil {
diff --git a/pkg/plugins/scorer/session_affinity_test.go b/pkg/plugins/scorer/session_affinity_test.go
index 8c1844e6..481b209a 100644
--- a/pkg/plugins/scorer/session_affinity_test.go
+++ b/pkg/plugins/scorer/session_affinity_test.go
@@ -94,7 +94,7 @@ func TestSessionAffinity_Score(t *testing.T) {
 	}
 }
 
-func TestSessionAffinity_PostResponse(t *testing.T) {
+func TestSessionAffinity_ResponseComplete(t *testing.T) {
 
 	targetPod := &backend.Pod{
 		NamespacedName: k8stypes.NamespacedName{Name: "pod1"},
@@ -135,7 +135,7 @@ func TestSessionAffinity_PostResponse(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			s.PostResponse(ctx, nil, test.initialResponse, test.targetPod)
+			s.ResponseComplete(ctx, nil, test.initialResponse, test.targetPod)
 
 			if diff := cmp.Diff(test.wantHeaders, test.initialResponse.Headers); diff != "" {
 				t.Errorf("Unexpected output (-want +got): %v", diff)
diff --git a/pkg/scheduling/pd/scheduler_test.go b/pkg/scheduling/pd/scheduler_test.go
index 023708aa..74cf0215 100644
--- a/pkg/scheduling/pd/scheduler_test.go
+++ b/pkg/scheduling/pd/scheduler_test.go
@@ -102,7 +102,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "any-model",
-				Prompt:      "12345678901",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345678901",
+					},
+				},
 			},
 			input: []types.Pod{},
 			err:   true,
@@ -112,7 +116,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "critical",
-				Prompt:      "12345678901",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345678901",
+					},
+				},
 			},
 			// pod2 will be picked because it is the only pod with Decode role
 			input:   []types.Pod{pod2},
@@ -123,7 +131,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "critical",
-				Prompt:      "12345678901",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345678901",
+					},
+				},
 			},
 			// no Decode pod
 			input: []types.Pod{pod1},
@@ -134,7 +146,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "critical",
-				Prompt:      "12345678906",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345678906",
+					},
+				},
 			},
 			// pod2 will be picked in the decode profile result, pod1 will be in the prefill profile result
 			input:    []types.Pod{pod1, pod2},
@@ -146,7 +162,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "critical",
-				Prompt:      "12345",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345",
+					},
+				},
 			},
 			// pod2 will be picked because it is the decode pod, pod1 shouldn't be picked,
 			// because the prompt is too short
@@ -159,7 +179,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "critical",
-				Prompt:      "12345678901",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345678901",
+					},
+				},
 			},
 			input: []types.Pod{pod1, noRolePod1},
 			wantRes: &types.SchedulingResult{
@@ -187,7 +211,11 @@ func TestPDSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				RequestId:   uuid.NewString(),
 				TargetModel: "critical",
-				Prompt:      "12345678906",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "12345678906",
+					},
+				},
 			},
 			// pod2 will be picked in the decode profile result cause it has higher score than noRolePod1
 			// pod1 will be in the prefill profile result
@@ -204,7 +232,7 @@ func TestPDSchedule(t *testing.T) {
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			//  initialize scheduler with config
-			prefixScorer := prefix.New(ctx, prefix.Config{HashBlockSize: 5, MaxPrefixBlocksToMatch: 256, LRUCapacityPerServer: 31250})
+			prefixScorer := prefix.New(ctx, prefix.Config{DefaultBlockSize: 5, MaxPrefixBlocksToMatch: 256, LRUCapacityPerServer: 31250})
 
 			prefillSchedulerProfile := framework.NewSchedulerProfile().
 				WithFilters(filter.NewPrefillRole()).
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
index a69246b2..bfb5c835 100644
--- a/test/e2e/e2e_suite_test.go
+++ b/test/e2e/e2e_suite_test.go
@@ -2,8 +2,10 @@ package e2e
 
 import (
 	"context"
+	"fmt"
 	"io"
 	"os/exec"
+	"runtime"
 	"strings"
 	"testing"
 	"time"
@@ -12,7 +14,7 @@ import (
 	"github.com/onsi/gomega"
 	"github.com/onsi/gomega/gexec"
 	apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
-	"k8s.io/apimachinery/pkg/runtime"
+	k8sruntime "k8s.io/apimachinery/pkg/runtime"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/config"
@@ -58,7 +60,7 @@ var (
 	ctx       = context.Background()
 	k8sClient client.Client
 	port      string
-	scheme    = runtime.NewScheme()
+	scheme    = k8sruntime.NewScheme()
 
 	eppTag            = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr)
 	vllmSimTag        = env.GetEnvString("VLLM_SIMULATOR_TAG", "dev", ginkgo.GinkgoLogr)
@@ -118,20 +120,24 @@ func setupK8sCluster() {
 	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
 	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
 
-	command = exec.Command("kind", "--name", "e2e-tests", "load", "docker-image",
-		"ghcr.io/llm-d/llm-d-inference-sim:"+vllmSimTag)
-	session, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
-	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
-	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
+	kindLoadImage("ghcr.io/llm-d/llm-d-inference-sim:" + vllmSimTag)
+	kindLoadImage("ghcr.io/llm-d/llm-d-inference-scheduler:" + eppTag)
+	kindLoadImage("ghcr.io/llm-d/llm-d-routing-sidecar:" + routingSideCarTag)
+}
 
-	command = exec.Command("kind", "--name", "e2e-tests", "load", "docker-image",
-		"ghcr.io/llm-d/llm-d-inference-scheduler:"+eppTag)
-	session, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
+func kindLoadImage(image string) {
+	tempDir := ginkgo.GinkgoT().TempDir()
+	target := tempDir + "/docker.tar"
+
+	ginkgo.By(fmt.Sprintf("Loading %s into the cluster e2e-tests", image))
+
+	command := exec.Command("docker", "save", "--platform", "linux/"+runtime.GOARCH,
+		"--output", target, image)
+	session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
 	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
 	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
 
-	command = exec.Command("kind", "--name", "e2e-tests", "load", "docker-image",
-		"ghcr.io/llm-d/llm-d-routing-sidecar:"+routingSideCarTag)
+	command = exec.Command("kind", "--name", "e2e-tests", "load", "image-archive", target)
 	session, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter)
 	gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
 	gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0))
diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go
index a291f926..19ea9425 100644
--- a/test/e2e/utils_test.go
+++ b/test/e2e/utils_test.go
@@ -62,19 +62,30 @@ func createObjsFromYaml(docs []string) []string {
 
 		// Wait for the created object to exist.
 		clientObj := getClientObject(kind)
-		testutils.EventuallyExists(ctx, func() error {
+		testutils.EventuallyExists(&testutils.TestConfig{
+			ExistsTimeout: existsTimeout,
+			Interval:      interval,
+		}, func() error {
 			return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: name}, clientObj)
-		}, existsTimeout, interval)
+		})
 
 		switch kind {
 		case "CustomResourceDefinition":
 			// Wait for the CRD to be established.
-			testutils.CRDEstablished(ctx, k8sClient, clientObj.(*apiextv1.CustomResourceDefinition),
-				readyTimeout, interval)
+			testutils.CRDEstablished(&testutils.TestConfig{
+				Context:      ctx,
+				K8sClient:    k8sClient,
+				ReadyTimeout: readyTimeout,
+				Interval:     interval,
+			}, clientObj.(*apiextv1.CustomResourceDefinition))
 		case "Deployment":
 			// Wait for the deployment to be available.
-			testutils.DeploymentAvailable(ctx, k8sClient, clientObj.(*appsv1.Deployment),
-				modelReadyTimeout, interval)
+			testutils.DeploymentAvailable(&testutils.TestConfig{
+				Context:           ctx,
+				K8sClient:         k8sClient,
+				ModelReadyTimeout: modelReadyTimeout,
+				Interval:          interval,
+			}, clientObj.(*appsv1.Deployment))
 		}
 	}
 	return objNames