diff --git a/.github/workflows/check-typos.yaml b/.github/workflows/check-typos.yaml index cc543edf..25f71cf3 100644 --- a/.github/workflows/check-typos.yaml +++ b/.github/workflows/check-typos.yaml @@ -13,5 +13,5 @@ jobs: uses: actions/checkout@v5 - name: Check typos - uses: crate-ci/typos@v1.36.2 + uses: crate-ci/typos@v1.38.1 diff --git a/deploy/config/sim-epp-no-hit-lru.yaml b/deploy/config/sim-epp-no-hit-lru.yaml new file mode 100644 index 00000000..8d022441 --- /dev/null +++ b/deploy/config/sim-epp-no-hit-lru.yaml @@ -0,0 +1,25 @@ +# Sample EPP configuration for running without P/D +# with small hash block size for simulation purposes +apiVersion: inference.networking.x-k8s.io/v1alpha1 +kind: EndpointPickerConfig +plugins: +- type: prefix-cache-scorer + parameters: + hashBlockSize: 5 + maxPrefixBlocksToMatch: 256 + lruCapacityPerServer: 31250 +- type: no-hit-lru-scorer + parameters: + lruSize: 2048 +- type: decode-filter +- type: max-score-picker +- type: single-profile-handler +schedulingProfiles: +- name: default + plugins: + - pluginRef: decode-filter + - pluginRef: max-score-picker + - pluginRef: prefix-cache-scorer + weight: 2 + - pluginRef: no-hit-lru-scorer + weight: 1 diff --git a/docs/architecture.md b/docs/architecture.md index 7465e590..10a9134b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -364,6 +364,57 @@ used for the same session. --- +#### NoHitLRUScorer + +Scores pods based on least recently used (LRU) ordering for cold requests (requests with no KV cache hits). +This helps evenly distribute cache growth across pods, since cold requests result in new KV blocks being created. + +The scorer integrates with a prefix cache plugin to determine if a request has cache hits: +- For cold requests (no cache hits): Ranks pods by LRU order, with never-used or least recently used pods + receiving higher scores (up to 1.0) and most recently used pods receiving lower scores (approaching 0.0) +- For warm requests (cache hits): Returns neutral scores (0.5) for all pods to avoid interfering with + cache locality optimization + +The LRU tracking is specific to cold requests only - pods are added to the LRU cache when they serve +a cold request, not when they serve requests with cache hits. + +- **Type**: `no-hit-lru-scorer` +- **Parameters**: + - `prefixPluginName` (optional): The name of the prefix cache plugin to read state from. Defaults to `prefix-cache-scorer`. + - `lruSize` (optional): The maximum number of pods to track in the LRU cache. Defaults to 1024. + +Example configuration: + +```yaml +plugins: + - type: prefix-cache-scorer + parameters: + hashBlockSize: 5 + maxPrefixBlocksToMatch: 256 + lruCapacityPerServer: 31250 + - type: no-hit-lru-scorer + parameters: + lruSize: 2048 + - type: decode-filter + - type: max-score-picker + - type: single-profile-handler +schedulingProfiles: + - name: default + plugins: + - pluginRef: decode-filter + - pluginRef: max-score-picker + - pluginRef: prefix-cache-scorer + weight: 2 + - pluginRef: no-hit-lru-scorer + weight: 1 +``` + +**Note:** This scorer is designed to work alongside a prefix cache scorer (such as `prefix-cache-scorer` or +`precise-prefix-cache-scorer`). If no prefix cache state is available, all requests are treated as cold. +When integrating with a prefix-cache scorer, the prefix-cache scorer should be defined first in the scheduling profile. + +--- + ### Sample Disaggregated Prefill/Decode Configuration The following is an example of what a configuration for disaggregated Prefill/Decode might look like: diff --git a/go.mod b/go.mod index cd415a91..16aa6894 100644 --- a/go.mod +++ b/go.mod @@ -8,20 +8,21 @@ require ( github.com/go-logr/logr v1.4.3 github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 + github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/jellydator/ttlcache/v3 v3.4.0 - github.com/llm-d/llm-d-kv-cache-manager v0.3.1 - github.com/onsi/ginkgo/v2 v2.25.3 + github.com/llm-d/llm-d-kv-cache-manager v0.3.2 + github.com/onsi/ginkgo/v2 v2.26.0 github.com/onsi/gomega v1.38.2 github.com/openai/openai-go v1.12.0 github.com/stretchr/testify v1.11.1 - google.golang.org/grpc v1.75.1 + google.golang.org/grpc v1.76.0 k8s.io/api v0.34.1 k8s.io/apiextensions-apiserver v0.34.1 k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 - sigs.k8s.io/controller-runtime v0.22.1 - sigs.k8s.io/gateway-api v1.3.0 - sigs.k8s.io/gateway-api-inference-extension v1.0.0 + sigs.k8s.io/controller-runtime v0.22.3 + sigs.k8s.io/gateway-api v1.4.0 + sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba ) require ( @@ -30,7 +31,7 @@ require ( github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect - github.com/cenkalti/backoff/v5 v5.0.2 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect github.com/daulet/tokenizers v1.22.1 // indirect @@ -39,8 +40,8 @@ require ( github.com/dgraph-io/ristretto/v2 v2.3.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dustin/go-humanize v1.0.1 // indirect - github.com/emicklei/go-restful/v3 v3.12.2 // indirect - github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -48,42 +49,40 @@ require ( github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonpointer v0.21.2 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect - github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-openapi/swag v0.23.1 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.26.0 // indirect github.com/google/gnostic-models v0.7.0 // indirect - github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a // indirect + github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect - github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/mailru/easyjson v0.7.7 // indirect + github.com/mailru/easyjson v0.9.0 // indirect github.com/moby/spdystream v0.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/pebbe/zmq4 v1.4.0 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.23.0 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.65.0 // indirect - github.com/prometheus/procfs v0.16.1 // indirect - github.com/prometheus/prometheus v0.305.0 // indirect + github.com/prometheus/common v0.67.1 // indirect + github.com/prometheus/procfs v0.17.0 // indirect + github.com/prometheus/prometheus v0.306.0 // indirect github.com/redis/go-redis/v9 v9.11.0 // indirect github.com/spf13/cobra v1.9.1 // indirect - github.com/spf13/pflag v1.0.6 // indirect + github.com/spf13/pflag v1.0.7 // indirect github.com/stoewer/go-strcase v1.3.0 // indirect - github.com/tidwall/gjson v1.14.4 // indirect + github.com/tidwall/gjson v1.18.0 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/tidwall/sjson v1.2.5 // indirect @@ -92,42 +91,44 @@ require ( github.com/x448/float16 v0.8.4 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect - go.opentelemetry.io/otel v1.37.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 // indirect - go.opentelemetry.io/otel/metric v1.37.0 // indirect - go.opentelemetry.io/otel/sdk v1.37.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect - go.opentelemetry.io/proto/otlp v1.6.0 // indirect + go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/sdk v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect + go.opentelemetry.io/proto/otlp v1.7.1 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/automaxprocs v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect - golang.org/x/net v0.43.0 // indirect - golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/term v0.34.0 // indirect - golang.org/x/text v0.28.0 // indirect + golang.org/x/mod v0.28.0 // indirect + golang.org/x/net v0.44.0 // indirect + golang.org/x/oauth2 v0.31.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect golang.org/x/time v0.12.0 // indirect - golang.org/x/tools v0.36.0 // indirect + golang.org/x/tools v0.37.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect - google.golang.org/protobuf v1.36.7 // indirect - gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 // indirect + google.golang.org/protobuf v1.36.10 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiserver v0.34.1 // indirect k8s.io/component-base v0.34.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect - k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect + k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect - sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect diff --git a/go.sum b/go.sum index 256fb72d..5b6f6f25 100644 --- a/go.sum +++ b/go.sum @@ -60,8 +60,8 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= -github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8= -github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls= @@ -83,10 +83,10 @@ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/r github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= -github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= -github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo= +github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs= github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= @@ -99,6 +99,12 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= +github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= +github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= +github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= +github.com/gkampitakis/go-snaps v0.5.14 h1:3fAqdB6BCPKHDMHAKRwtPUwYexKtGrNuw8HX/T/4neo= +github.com/gkampitakis/go-snaps v0.5.14/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -106,14 +112,16 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonpointer v0.21.2 h1:AqQaNADVwq/VnkCmQg6ogE+M3FOsKTytwges0JdwVuA= +github.com/go-openapi/jsonpointer v0.21.2/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk= github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU= +github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8= @@ -133,8 +141,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a h1://KbezygeMJZCSHH+HgUZiTeSoiuFspbMg1ge+eFj18= -github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA= +github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6 h1:EEHtgt9IwisQ2AZ4pIsMjahcegHh6rmhqxzIRQIyepY= +github.com/google/pprof v0.0.0-20250820193118-f64d9cf942d6/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -147,8 +155,8 @@ github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5T github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -157,6 +165,8 @@ github.com/jellydator/ttlcache/v3 v3.4.0 h1:YS4P125qQS0tNhtL6aeYkheEaB/m8HCqdMMP github.com/jellydator/ttlcache/v3 v3.4.0/go.mod h1:Hw9EgjymziQD3yGsQdf1FqFdpp7YjFMd4Srg5EJlgD4= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= +github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -171,10 +181,14 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/llm-d/llm-d-kv-cache-manager v0.3.1 h1:SY3z/kg1RI8tNVDdMSgvNgmVkBQqtsJYY0aVMX24zL0= -github.com/llm-d/llm-d-kv-cache-manager v0.3.1/go.mod h1:q6u7LnzMxNcHHb5/LRdHNNeZzzGMSENFSP1NGfsJEmA= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/llm-d/llm-d-kv-cache-manager v0.3.2 h1:omSTXtuII3ol37CaoI9h+2VxE0m8EoeVOor+CkQh99I= +github.com/llm-d/llm-d-kv-cache-manager v0.3.2/go.mod h1:q6u7LnzMxNcHHb5/LRdHNNeZzzGMSENFSP1NGfsJEmA= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= +github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= +github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= +github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= +github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU= github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -192,8 +206,8 @@ github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+ github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= -github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw= -github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= +github.com/onsi/ginkgo/v2 v2.26.0 h1:1J4Wut1IlYZNEAWIV3ALrT9NfiaGW2cDCJQSFQMs/gE= +github.com/onsi/ginkgo/v2 v2.26.0/go.mod h1:qhEywmzWTBUY88kfO0BRvX4py7scov9yR+Az2oavUzw= github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= @@ -211,16 +225,16 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= -github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= -github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= -github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= -github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= -github.com/prometheus/prometheus v0.305.0 h1:UO/LsM32/E9yBDtvQj8tN+WwhbyWKR10lO35vmFLx0U= -github.com/prometheus/prometheus v0.305.0/go.mod h1:JG+jKIDUJ9Bn97anZiCjwCxRyAx+lpcEQ0QnZlUlbwY= +github.com/prometheus/common v0.67.1 h1:OTSON1P4DNxzTg4hmKCc37o4ZAZDv0cfXLkOt0oEowI= +github.com/prometheus/common v0.67.1/go.mod h1:RpmT9v35q2Y+lsieQsdOh5sXZ6ajUGC8NjZAmr8vb0Q= +github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= +github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= +github.com/prometheus/prometheus v0.306.0 h1:Q0Pvz/ZKS6vVWCa1VSgNyNJlEe8hxdRlKklFg7SRhNw= +github.com/prometheus/prometheus v0.306.0/go.mod h1:7hMSGyZHt0dcmZ5r4kFPJ/vxPQU99N5/BGwSPDxeZrQ= github.com/prometheus/sigv4 v0.2.0 h1:qDFKnHYFswJxdzGeRP63c4HlH3Vbn1Yf/Ao2zabtVXk= github.com/prometheus/sigv4 v0.2.0/go.mod h1:D04rqmAaPPEUkjRQxGqjoxdyJuyCh6E0M18fZr0zBiE= github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs= @@ -230,8 +244,9 @@ github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWN github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= -github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= +github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -246,8 +261,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= -github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= @@ -269,22 +284,24 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 h1:JgtbA0xkWHnTmYk7YusopJFX6uleBmAuZ8n05NEh8nQ= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0/go.mod h1:179AK5aar5R3eS9FucPy6rggvU0g52cvKId8pv4+v0c= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= -go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= -go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= -go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= -go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI= -go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0 h1:kJxSDN4SgWWTjG/hPp3O7LCGLcHXFlvS2/FFOrwL+SE= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.38.0/go.mod h1:mgIOzS7iZeKJdeB8/NYHrJ48fdGc71Llo5bJ1J4DWUE= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= @@ -295,51 +312,53 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= -go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= -golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= +golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= +golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA= golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= +golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= -golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= -golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo= +golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= -golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= +golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -348,21 +367,21 @@ gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/api v0.238.0 h1:+EldkglWIg/pWjkq97sd+XxH7PxakNYoe/rkSTbnvOs= -google.golang.org/api v0.238.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= -google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7 h1:FiusG7LWj+4byqhbvmB+Q93B/mOxJLN2DTozDuZm4EU= -google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:kXqgZtrWaf6qS3jZOCnCH7WYfrvFjkC51bM8fz3RsCA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 h1:pFyd6EwwL2TqFf8emdthzeX+gZE1ElRq3iM8pui4KBY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= -google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI= -google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= -google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/api v0.239.0 h1:2hZKUnFZEy81eugPs4e2XzIJ5SOwQg0G82bpXD65Puo= +google.golang.org/api v0.239.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1 h1:pmJpJEvT846VzausCQ5d7KreSROcDqmO388w5YbnltA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250826171959-ef028d996bc1/go.mod h1:GmFNa4BdJZ2a8G+wCe9Bg3wwThLrJun751XstdJt5Og= +google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A= +google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= @@ -384,20 +403,20 @@ k8s.io/component-base v0.34.1 h1:v7xFgG+ONhytZNFpIz5/kecwD+sUhVE6HU7qQUiRM4A= k8s.io/component-base v0.34.1/go.mod h1:mknCpLlTSKHzAQJJnnHVKqjxR7gBeHRv0rPXA7gdtQ0= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= -k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 h1:liMHz39T5dJO1aOKHLvwaCjDbf07wVh6yaUlTpunnkE= +k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0= +k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= -sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV24Eqg= -sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= -sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M= -sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk= -sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8= -sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/controller-runtime v0.22.3 h1:I7mfqz/a/WdmDCEnXmSPm8/b/yRTy6JsKKENTijTq8Y= +sigs.k8s.io/controller-runtime v0.22.3/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/gateway-api v1.4.0 h1:ZwlNM6zOHq0h3WUX2gfByPs2yAEsy/EenYJB78jpQfQ= +sigs.k8s.io/gateway-api v1.4.0/go.mod h1:AR5RSqciWP98OPckEjOjh2XJhAe2Na4LHyXD2FUY7Qk= +sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba h1:NfFYYePPDs+DOSwm/KerEwm5qETJ1y0dyyN1CpDAlAw= +sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251016181044-831a919943ba/go.mod h1:f9lu6hnm0Ywrdm7SYSjzBR16UGv7BmwjArQveY9IynM= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= diff --git a/pkg/plugins/profile/pd_profile_handler.go b/pkg/plugins/profile/pd_profile_handler.go index 3bfe056d..ff0a5f23 100644 --- a/pkg/plugins/profile/pd_profile_handler.go +++ b/pkg/plugins/profile/pd_profile_handler.go @@ -42,7 +42,7 @@ func PdProfileHandlerFactory(name string, rawParameters json.RawMessage, _ plugi DecodeProfile: defaultDecodeProfile, PrefillProfile: defaultPrefillProfile, PrefixPluginName: defaultPrefixPluginName, - HashBlockSize: prefix.DefaultHashBlockSize, + HashBlockSize: prefix.DefaultBlockSize, } if rawParameters != nil { if err := json.Unmarshal(rawParameters, ¶meters); err != nil { @@ -106,6 +106,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat } if h.pdThreshold > 0 { + userInput, err := getUserInputBytes(request) + if err != nil { + log.FromContext(ctx).V(logutil.DEBUG).Error(err, "Failed to get user input bytes") + return nil + } + // if we're here that means decode profile ran successfully, and we have additional profile configured that didn't run yet, // which means PD is enabled (otherwise, prefill profile is not configured at all and this profile handler is not used). // inspect decode execution result to decide if prefill should run or not. @@ -117,12 +123,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat } else { decodePod := profileResults[h.decodeProfile].TargetPods[0].GetPod().NamespacedName hitPrefix := max(prefixState.PrefixCacheServers[prefix.ServerID(decodePod)]-1, 0) // The first hit is always the model name - hitPercentagePrefix = float64(hitPrefix*h.hashBlockSize) / float64(len(request.Prompt)) + hitPercentagePrefix = float64(hitPrefix*h.hashBlockSize) / float64(len(userInput)) log.FromContext(ctx).V(logutil.DEBUG).Info("Computed hit percentage for prefix cache", "hitPercentage", hitPercentagePrefix, - "promptLength", len(request.Prompt)) + "promptLength", len(userInput)) } - if (1.0-hitPercentagePrefix)*float64(len(request.Prompt)) < float64(h.pdThreshold) { + if (1.0-hitPercentagePrefix)*float64(len(userInput)) < float64(h.pdThreshold) { log.FromContext(ctx).Info("Non-cached suffix is smaller than threshold, using decode profile only", "hitPercentage", hitPercentagePrefix) return map[string]*framework.SchedulerProfile{} // do not run prefill } @@ -160,3 +166,12 @@ func (h *PdProfileHandler) ProcessResults(_ context.Context, _ *types.CycleState }, }, nil } + +func getUserInputBytes(request *types.LLMRequest) ([]byte, error) { + if request.Body.Completions != nil { // assumed to be valid if not nil + return []byte(request.Body.Completions.Prompt), nil + } + + // must be chat-completions request at this point, return bytes of entire messages + return json.Marshal(request.Body.ChatCompletions.Messages) +} diff --git a/pkg/plugins/register.go b/pkg/plugins/register.go index 6f28a0c9..c3e5a665 100644 --- a/pkg/plugins/register.go +++ b/pkg/plugins/register.go @@ -20,4 +20,5 @@ func RegisterAllPlugins() { plugins.Register(scorer.LoadAwareType, scorer.LoadAwareFactory) plugins.Register(scorer.SessionAffinityType, scorer.SessionAffinityFactory) plugins.Register(scorer.ActiveRequestType, scorer.ActiveRequestFactory) + plugins.Register(scorer.NoHitLRUType, scorer.NoHitLRUFactory) } diff --git a/pkg/plugins/scorer/active_request.go b/pkg/plugins/scorer/active_request.go index f4018d96..da9b53c0 100644 --- a/pkg/plugins/scorer/active_request.go +++ b/pkg/plugins/scorer/active_request.go @@ -49,6 +49,8 @@ func (r *requestEntry) String() string { // compile-time type assertion var _ framework.Scorer = &ActiveRequest{} +var _ requestcontrol.PreRequest = &ActiveRequest{} +var _ requestcontrol.ResponseComplete = &ActiveRequest{} // ActiveRequestFactory defines the factory function for the ActiveRequest scorer. func ActiveRequestFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) { @@ -90,8 +92,8 @@ func NewActiveRequest(ctx context.Context, params *ActiveRequestParameters) *Act mutex: &sync.RWMutex{}, } // callback to decrement count when requests expire - // most requests will be removed in PostResponse, but this ensures - // that we don't leak pod counts if PostResponse is not called + // most requests will be removed in ResponseComplete, but this ensures + // that we don't leak pod counts if ResponseComplete is not called requestCache.OnEviction(func(_ context.Context, reason ttlcache.EvictionReason, item *ttlcache.Item[string, *requestEntry]) { if reason == ttlcache.EvictionReasonExpired { @@ -187,14 +189,14 @@ func (s *ActiveRequest) PreRequest(ctx context.Context, request *types.LLMReques } } -// PostResponse is called after a response is sent to the client. +// ResponseComplete is called after a response is sent to the client. // It removes the specific request entry from the cache and decrements // the pod count. -func (s *ActiveRequest) PostResponse(ctx context.Context, request *types.LLMRequest, +func (s *ActiveRequest) ResponseComplete(ctx context.Context, request *types.LLMRequest, _ *requestcontrol.Response, targetPod *backend.Pod) { - debugLogger := log.FromContext(ctx).V(logutil.DEBUG).WithName("ActiveRequest.PostResponse") + debugLogger := log.FromContext(ctx).V(logutil.DEBUG).WithName("ActiveRequest.ResponseComplete") if targetPod == nil { - debugLogger.Info("Skipping PostResponse because targetPod is nil") + debugLogger.Info("Skipping ResponseComplete because targetPod is nil") return } diff --git a/pkg/plugins/scorer/active_request_test.go b/pkg/plugins/scorer/active_request_test.go index 72ea0655..11e09b47 100644 --- a/pkg/plugins/scorer/active_request_test.go +++ b/pkg/plugins/scorer/active_request_test.go @@ -168,7 +168,7 @@ func TestActiveRequestScorer_PreRequest(t *testing.T) { } } -func TestActiveRequestScorer_PostResponse(t *testing.T) { +func TestActiveRequestScorer_ResponseComplete(t *testing.T) { ctx := context.Background() scorer := NewActiveRequest(ctx, nil) @@ -197,7 +197,7 @@ func TestActiveRequestScorer_PostResponse(t *testing.T) { // Verify initial state compositeKey := "default/pod-a.test-request-1" if !scorer.requestCache.Has(compositeKey) { - t.Fatal("Request should be in cache before PostResponse") + t.Fatal("Request should be in cache before ResponseComplete") } scorer.mutex.RLock() @@ -208,11 +208,11 @@ func TestActiveRequestScorer_PostResponse(t *testing.T) { } // Call PostResponse - scorer.PostResponse(ctx, request, &requestcontrol.Response{}, podA.GetPod()) + scorer.ResponseComplete(ctx, request, &requestcontrol.Response{}, podA.GetPod()) // Check request is removed from cache if scorer.requestCache.Has(compositeKey) { - t.Errorf("Request should be removed from cache after PostResponse") + t.Errorf("Request should be removed from cache after ResponseComplete") } // Check pod count is decremented and removed (since it was 1) diff --git a/pkg/plugins/scorer/no_hit_lru.go b/pkg/plugins/scorer/no_hit_lru.go new file mode 100644 index 00000000..2e9a5fa6 --- /dev/null +++ b/pkg/plugins/scorer/no_hit_lru.go @@ -0,0 +1,297 @@ +package scorer + +import ( + "context" + "encoding/json" + "fmt" + + lru "github.com/hashicorp/golang-lru/v2" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" +) + +const ( + // NoHitLRUType is the type of the NoHitLRU scorer + NoHitLRUType = "no-hit-lru-scorer" + + // defaultLRUSize is the maximum number of pods we'll consider in the cache + defaultLRUSize = 1024 +) + +// compile-time type assertions +var _ framework.Scorer = &NoHitLRU{} +var _ requestcontrol.PreRequest = &NoHitLRU{} + +// NoHitLRUParameters defines the parameters for the NoHitLRU scorer. +type NoHitLRUParameters struct { + // PrefixPluginName defines the name of the prefix cache plugin to read state from. + // Defaults to "prefix-cache-scorer". + PrefixPluginName string `json:"prefixPluginName"` + + // LRUSize defines the maximum number of pods to track in the LRU cache. + LRUSize int `json:"lruSize"` +} + +// coldRequestState tracks whether a request triggered a KV cache hit +// when the cache is missed, isCold is true. +type coldRequestState struct { + isCold bool +} + +// Clone implements the plugins.StateData interface +func (c *coldRequestState) Clone() plugins.StateData { + return &coldRequestState{isCold: c.isCold} +} + +// NoHitLRUFactory defines the factory function for the NoHitLRU +func NoHitLRUFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) { + parameters := NoHitLRUParameters{} + if rawParameters != nil { + if err := json.Unmarshal(rawParameters, ¶meters); err != nil { + return nil, fmt.Errorf("failed to parse the parameters of the '%s' scorer - %w", NoHitLRUType, err) + } + } + + if parameters.PrefixPluginName == "" { + parameters.PrefixPluginName = prefix.PrefixCachePluginType + } + + // Note: We don't enforce that the prefix plugin exists here + // The scorer will gracefully handle missing prefix cache state as an optimization + + return NewNoHitLRU(handle.Context(), ¶meters).WithName(name), nil +} + +// NewNoHitLRU creates a new NoHitLRU scorer +func NewNoHitLRU(ctx context.Context, params *NoHitLRUParameters) *NoHitLRU { + prefixPluginName := prefix.PrefixCachePluginType + lruSize := defaultLRUSize + + if params != nil { + if params.PrefixPluginName != "" { + prefixPluginName = params.PrefixPluginName + } + if params.LRUSize > 0 { + lruSize = params.LRUSize + } + } + + lruCache, err := lru.New[string, struct{}](lruSize) + if err != nil { + log.FromContext(ctx).Error(err, fmt.Sprintf("failed to initialize NoHitLRU scorer: could not create LRU cache with size %d: %v", lruSize, err)) + return nil + } + + return &NoHitLRU{ + typedName: plugins.TypedName{Type: NoHitLRUType}, + lruCache: lruCache, + prefixPluginName: prefixPluginName, + pluginState: plugins.NewPluginState(ctx), + } +} + +// NoHitLRU scorer that favors pods that were least recently used for cold requests. +// This can help evenly distribute cache growth, since cold requests result in more +// new KV blocks. +type NoHitLRU struct { + typedName plugins.TypedName + lruCache *lru.Cache[string, struct{}] // pod name -> dummy value (we only care about order) + prefixPluginName string + pluginState *plugins.PluginState +} + +// TypedName returns the typed name of the plugin. +func (s *NoHitLRU) TypedName() plugins.TypedName { + return s.typedName +} + +// WithName sets the name of the plugin. +func (s *NoHitLRU) WithName(name string) *NoHitLRU { + s.typedName.Name = name + return s +} + +// isColdRequest determines if a request is cold by reading the prefix cache state. +// Returns true if no prefix cache hits were found, or if prefix cache state is unavailable. +func (s *NoHitLRU) isColdRequest(ctx context.Context, cycleState *types.CycleState) bool { + logger := log.FromContext(ctx).V(logutil.DEBUG) + + // Read prefix cache state to determine if this is a cold request + // This is treated as an optimization - if the state isn't available, we assume cold request + prefixState, err := types.ReadCycleStateKey[*prefix.SchedulingContextState](cycleState, plugins.StateKey(s.prefixPluginName)) + + if err != nil { + logger.Info("No prefix cache state found, treating as cold request for LRU optimization", "error", err) + return true + } + + // Check if this is a cold request (no prefix cache hits) + return len(prefixState.PrefixCacheServers) == 0 +} + +// scoreNeutral returns neutral scores (0.5) for all pods. +// Used when a request has cache hits and LRU optimization should not apply. +func (s *NoHitLRU) scoreNeutral(pods []types.Pod) map[types.Pod]float64 { + scoredPods := make(map[types.Pod]float64, len(pods)) + for _, pod := range pods { + scoredPods[pod] = 0.5 + } + return scoredPods +} + +// getLRUPositions returns a map of pod names to their LRU position. +// Position 0 represents the oldest (least recently used) entry. +func (s *NoHitLRU) getLRUPositions() map[string]int { + // Get all keys from LRU cache in order (oldest first) + // https://pkg.go.dev/github.com/hashicorp/golang-lru/v2#Cache.Keys + lruKeys := s.lruCache.Keys() + + lruPosition := make(map[string]int, len(lruKeys)) + for i, key := range lruKeys { + lruPosition[key] = i + } + return lruPosition +} + +// partitionPodsByUsage separates pods into those that have received cold requests +// (usedPods) and those that have never received cold requests (neverUsedPods). +func (s *NoHitLRU) partitionPodsByUsage(pods []types.Pod, lruPosition map[string]int) (usedPods, neverUsedPods []types.Pod) { + for _, pod := range pods { + podName := pod.GetPod().NamespacedName.String() + if _, exists := lruPosition[podName]; exists { + usedPods = append(usedPods, pod) + } else { + neverUsedPods = append(neverUsedPods, pod) + } + } + return usedPods, neverUsedPods +} + +// scoreNeverUsedPods assigns scores to pods that have never received a cold request. +// The first never-used pod gets the highest score (1.0), with subsequent pods +// receiving progressively lower scores. +func (s *NoHitLRU) scoreNeverUsedPods(scoredPods map[types.Pod]float64, neverUsedPods []types.Pod, totalPods int) { + // Avoid possibility of dividing by zero. + if totalPods <= 1 { + return + } + for i, pod := range neverUsedPods { + score := 1.0 - float64(i)/float64(totalPods-1) + scoredPods[pod] = score + } +} + +// scoreUsedPods assigns scores to pods based on their LRU position. +// Pods that were least recently used for cold requests receive higher scores. +func (s *NoHitLRU) scoreUsedPods(scoredPods map[types.Pod]float64, usedPods []types.Pod, lruPosition map[string]int, neverUsedCount, totalPods int) { + // Avoid possibility of dividing by zero. + if totalPods <= 1 { + return + } + for _, pod := range usedPods { + podName := pod.GetPod().NamespacedName.String() + lruPos := lruPosition[podName] + // LRU keys are oldest to newest so rank 0 = oldest + // The never used pod count is added to the rank so that + // a never-used pod will always have the highest score. + rank := neverUsedCount + lruPos + score := 1.0 - float64(rank)/float64(totalPods-1) + if score < 0 { + score = 0 + } + scoredPods[pod] = score + } +} + +// scoreColdRequestByLRU scores pods based on their LRU position for cold requests. +// Pods that have never received a cold request get the highest scores. +// Among previously used pods, least recently used ones get higher scores. +func (s *NoHitLRU) scoreColdRequestByLRU(pods []types.Pod) map[types.Pod]float64 { + scoredPods := make(map[types.Pod]float64, len(pods)) + totalPods := len(pods) + + // Avoid possibility of dividing by zero. + if totalPods == 1 { + scoredPods[pods[0]] = 1.0 + return scoredPods + } + + lruPosition := s.getLRUPositions() + usedPods, neverUsedPods := s.partitionPodsByUsage(pods, lruPosition) + + s.scoreNeverUsedPods(scoredPods, neverUsedPods, totalPods) + s.scoreUsedPods(scoredPods, usedPods, lruPosition, len(neverUsedPods), totalPods) + + return scoredPods +} + +// Score scores the given pods based on LRU for cold requests. +// For cache hits, returns neutral scores (0.5) for all pods. +// For cache misses, ranks pods by their LRU order. +// - LRU ordering is with respect to when a pod last received a cold request. +// - Least recently used (or never used) pods get highest score (1.0) +// - Most recently used pods get lowest score (approaching 0.0) +func (s *NoHitLRU) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 { + logger := log.FromContext(ctx).V(logutil.DEBUG) + + isCold := s.isColdRequest(ctx, cycleState) + + // Store the cold request state in plugin state for PreRequest to use + coldState := &coldRequestState{isCold: isCold} + s.pluginState.Write(request.RequestId, plugins.StateKey(s.typedName.String()), coldState) + + if !isCold { + logger.Info("Cache hit detected, returning neutral scores") + return s.scoreNeutral(pods) + } + + logger.Info("Cold request detected, scoring pods by LRU") + return s.scoreColdRequestByLRU(pods) +} + +// PreRequest is called before a request is sent to the target pod. +// For cold requests, it updates the LRU cache to track which pods have been used recently. +func (s *NoHitLRU) PreRequest(ctx context.Context, request *types.LLMRequest, schedulingResult *types.SchedulingResult, _ int) { + logger := log.FromContext(ctx).V(logutil.DEBUG) + + if schedulingResult == nil || len(schedulingResult.ProfileResults) == 0 { + logger.Info("No scheduling result available") + return + } + + // Read the cold request state we stored in Score + coldState, err := plugins.ReadPluginStateKey[*coldRequestState](s.pluginState, request.RequestId, plugins.StateKey(s.typedName.String())) + // After fetching the cold state, drop it from the plugin state immediately (otherwise it will hang around until it becomes stale). + s.pluginState.Delete(request.RequestId) + + if err != nil { + logger.Info("No cold request state found, treating as non-cold request", "error", err) + return + } + + if !coldState.isCold { + logger.Info("Not a cold request, skipping LRU update") + return + } + + // Get the primary profile's target pod + primaryProfile := schedulingResult.ProfileResults[schedulingResult.PrimaryProfileName] + if primaryProfile == nil || len(primaryProfile.TargetPods) == 0 { + logger.Info("No target pod in primary profile") + return + } + + targetPod := primaryProfile.TargetPods[0] + podName := targetPod.GetPod().NamespacedName.String() + + // Move the pod to the front of the LRU. + var present struct{} // dummy value + s.lruCache.Add(podName, present) + + logger.Info("Updated LRU cache for cold request", "pod", podName, "requestId", request.RequestId) +} diff --git a/pkg/plugins/scorer/no_hit_lru_test.go b/pkg/plugins/scorer/no_hit_lru_test.go new file mode 100644 index 00000000..76f44aa2 --- /dev/null +++ b/pkg/plugins/scorer/no_hit_lru_test.go @@ -0,0 +1,449 @@ +package scorer_test + +import ( + "context" + "encoding/json" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + + k8stypes "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" + + "github.com/llm-d/llm-d-inference-scheduler/pkg/plugins/scorer" +) + +var _ plugins.Handle = &fakeHandle{} + +type fakeHandle struct { + ctx context.Context + plugins map[string]plugins.Plugin +} + +func newFakeHandle(ctx context.Context) *fakeHandle { + return &fakeHandle{ctx: ctx, plugins: map[string]plugins.Plugin{}} +} + +func (h *fakeHandle) Context() context.Context { + return h.ctx +} + +func (h *fakeHandle) Plugin(name string) plugins.Plugin { + return h.plugins[name] +} + +func (h *fakeHandle) AddPlugin(name string, plugin plugins.Plugin) { + h.plugins[name] = plugin +} + +func (h *fakeHandle) GetAllPlugins() []plugins.Plugin { + result := make([]plugins.Plugin, 0, len(h.plugins)) + for _, plugin := range h.plugins { + result = append(result, plugin) + } + return result +} + +func (h *fakeHandle) GetAllPluginsWithNames() map[string]plugins.Plugin { + return h.plugins +} + +func (h *fakeHandle) PodList(_ func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics { + return make([]backendmetrics.PodMetrics, 0) +} + +type stubPlugin struct { + name plugins.TypedName +} + +func (p *stubPlugin) TypedName() plugins.TypedName { + return p.name +} + +func TestNoHitLRUFactoryDependencyValidation(t *testing.T) { + tests := []struct { + name string + handle *fakeHandle + params map[string]any + expectError bool + errorMessage string + }{ + { + name: "missing prefix cache plugin - should work as optimization", + handle: newFakeHandle(context.Background()), + expectError: false, + }, + { + name: "prefix plugin present - should work", + handle: func() *fakeHandle { + h := newFakeHandle(context.Background()) + h.AddPlugin(prefix.PrefixCachePluginType, &stubPlugin{name: plugins.TypedName{Type: prefix.PrefixCachePluginType, Name: prefix.PrefixCachePluginType}}) + return h + }(), + expectError: false, + }, + } + + for _, tt := range tests { + // Marshal params if provided + var raw json.RawMessage + if tt.params != nil { + bytes, err := json.Marshal(tt.params) + if err != nil { + t.Fatalf("failed to marshal parameters: %v", err) + } + raw = bytes + } + + plugin, err := scorer.NoHitLRUFactory("test", raw, tt.handle) + if tt.expectError { + if err == nil { + t.Fatalf("expected error for case %q, got none", tt.name) + } + if tt.errorMessage != "" && !strings.Contains(err.Error(), tt.errorMessage) { + t.Fatalf("error message mismatch for case %q: %v", tt.name, err) + } + continue + } + + if err != nil { + t.Fatalf("unexpected error for case %q: %v", tt.name, err) + } + if plugin == nil { + t.Fatalf("expected plugin instance for case %q", tt.name) + } + } +} + +func TestNoHitLRUScorer(t *testing.T) { + podA := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + podB := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-b"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + podC := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-c"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + + tests := []struct { + name string + scorer framework.Scorer + req *types.LLMRequest + input []types.Pod + prefixState *prefix.SchedulingContextState + wantScores map[types.Pod]float64 + description string + }{ + { + name: "cold request - all pods never used", + scorer: scorer.NewNoHitLRU(context.Background(), nil), + req: &types.LLMRequest{ + TargetModel: "test-model", + }, + input: []types.Pod{podA, podB, podC}, + prefixState: &prefix.SchedulingContextState{ + PrefixCacheServers: make(map[prefix.ServerID]int), // empty = cold request + }, + wantScores: map[types.Pod]float64{ + podA: 1.0, // All never-used pods get high scores + podB: 0.5, + podC: 0.0, + }, + description: "Never-used pods should get high scores for cold requests", + }, + { + name: "cache hit - neutral scores", + scorer: scorer.NewNoHitLRU(context.Background(), nil), + req: &types.LLMRequest{ + TargetModel: "test-model", + }, + input: []types.Pod{podA, podB, podC}, + prefixState: &prefix.SchedulingContextState{ + PrefixCacheServers: map[prefix.ServerID]int{ + {Name: "server1", Namespace: "default"}: 5, // non-empty = cache hit + }, + }, + wantScores: map[types.Pod]float64{ + podA: 0.5, // All pods get neutral scores for cache hits + podB: 0.5, + podC: 0.5, + }, + description: "Cache hits should return neutral scores", + }, + { + name: "single pod - max score", + scorer: scorer.NewNoHitLRU(context.Background(), nil), + req: &types.LLMRequest{ + TargetModel: "test-model", + }, + input: []types.Pod{podA}, + prefixState: &prefix.SchedulingContextState{ + PrefixCacheServers: make(map[prefix.ServerID]int), // empty = cold request + }, + wantScores: map[types.Pod]float64{ + podA: 1.0, // Single pod gets max score + }, + description: "Single pod should get maximum score", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Create cycle state and set prefix state + cycleState := &types.CycleState{} + if test.prefixState != nil { + cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), test.prefixState) + } + + got := test.scorer.Score(context.Background(), cycleState, test.req, test.input) + + if diff := cmp.Diff(test.wantScores, got); diff != "" { + t.Errorf("%s: Unexpected output (-want +got): %v", test.description, diff) + } + }) + } +} + +func TestNoHitLRUBasicFunctionality(t *testing.T) { + ctx := context.Background() + scorer := scorer.NewNoHitLRU(ctx, nil) + + podA := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + podB := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-b"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + + pods := []types.Pod{podA, podB} + + // Test basic scoring for cold request (no crashes, returns valid scores) + coldPrefixState := &prefix.SchedulingContextState{ + PrefixCacheServers: make(map[prefix.ServerID]int), // empty = cold request + } + cycleState := &types.CycleState{} + cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), coldPrefixState) + + scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, pods) + + // Should return scores for all pods + if len(scores) != 2 { + t.Errorf("Expected 2 scores, got %d", len(scores)) + } + + // All scores should be valid (between 0 and 1) + for pod, score := range scores { + if score < 0 || score > 1 { + t.Errorf("Invalid score %f for pod %s", score, pod.GetPod().NamespacedName.String()) + } + } + + // For never-used pods, should have different scores (to provide ordering) + if scores[podA] == scores[podB] { + t.Errorf("Expected different scores for different pods, both got %f", scores[podA]) + } +} + +func TestNoPrefixCacheStateFound(t *testing.T) { + ctx := context.Background() + scorer := scorer.NewNoHitLRU(ctx, nil) + + podA := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + pods := []types.Pod{podA} + cycleState := &types.CycleState{} + + scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, pods) + + if scores[podA] != 1.0 { + t.Errorf("Failure to find a prefix cache should result in scoring as a cold request.") + } +} + +func TestNoHitLRUPreferLeastRecentlyUsedAfterColdRequests(t *testing.T) { + ctx := context.Background() + scorer := scorer.NewNoHitLRU(ctx, nil) + + podA := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a", Namespace: "default"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + podB := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-b", Namespace: "default"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + podC := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-c", Namespace: "default"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + pods := []types.Pod{podA, podB, podC} + + primaryProfile := "primary-profile" + toPrefixState := func(entries map[prefix.ServerID]int) *types.CycleState { + cycle := &types.CycleState{} + cycle.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{PrefixCacheServers: entries}) + return cycle + } + + requestToPod := func(target types.Pod) *types.SchedulingResult { + return &types.SchedulingResult{ + PrimaryProfileName: primaryProfile, + ProfileResults: map[string]*types.ProfileRunResult{ + primaryProfile: { + TargetPods: []types.Pod{target}, + }, + }, + } + } + + // Test LRU behavior indirectly through scoring rather than internal state + assertHighestScoredPod := func(expectedPod types.Pod, testName string) { + t.Helper() + coldReq := &types.LLMRequest{RequestId: testName + "-scoring-check"} + scores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReq, pods) + + highestScore := -1.0 + var highestPod types.Pod + for pod, score := range scores { + if score > highestScore { + highestScore = score + highestPod = pod + } + } + + if highestPod.GetPod().NamespacedName.String() != expectedPod.GetPod().NamespacedName.String() { + t.Fatalf("expected %s to have highest score for LRU behavior, but %s had highest score (%f). All scores: %+v", + expectedPod.GetPod().NamespacedName.String(), + highestPod.GetPod().NamespacedName.String(), + highestScore, + scores) + } + } + + t.Run("initial cold request seeds cache", func(_ *testing.T) { + coldReqA := &types.LLMRequest{RequestId: "cold-1"} + scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqA, pods) + scorer.PreRequest(ctx, coldReqA, requestToPod(podA), 0) + // After podA handles a cold request, other pods should score higher for new cold requests + assertHighestScoredPod(podB, "after-podA-used") + }) + + t.Run("unused pods rank above existing ones", func(t *testing.T) { + coldReqCheck := &types.LLMRequest{RequestId: "cold-check"} + coldScores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqCheck, pods) + if coldScores[podB] <= coldScores[podA] { + t.Fatalf("expected pod-b to outrank pod-a after pod-a handled previous cold request, scores=%+v", coldScores) + } + if coldScores[podB] != 1.0 { + t.Fatalf("expected pod-b to score 1.0, scores=%+v", coldScores) + } + if coldScores[podC] != 0.5 { + t.Fatalf("expected pod-c to score 0.5, scores=%+v", coldScores) + } + }) + + t.Run("warm request leaves LRU untouched", func(t *testing.T) { + warmReq := &types.LLMRequest{RequestId: "warm-1"} + warmState := map[prefix.ServerID]int{ + {Name: "server1", Namespace: "default"}: 1, + } + warmScores := scorer.Score(ctx, toPrefixState(warmState), warmReq, pods) + for _, score := range warmScores { + if score != 0.5 { + t.Fatalf("expected neutral score for warm request, got %f", score) + } + } + scorer.PreRequest(ctx, warmReq, requestToPod(podB), 0) + postWarmReq := &types.LLMRequest{RequestId: "cold-after-warm"} + postWarmScores := scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), postWarmReq, pods) + if postWarmScores[podB] <= postWarmScores[podA] { + t.Fatalf("expected warm request to leave ordering unchanged, scores=%+v", postWarmScores) + } + }) + + t.Run("second cold request rotates to podB", func(_ *testing.T) { + // Simulate podB handling a cold request + coldReqB := &types.LLMRequest{RequestId: "cold-2"} + scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqB, pods) + scorer.PreRequest(ctx, coldReqB, requestToPod(podB), 0) + // Now podC should score highest since both podA and podB have been used + assertHighestScoredPod(podC, "after-podB-used") + }) + + t.Run("third cold request rotates back to podA", func(_ *testing.T) { + // Simulate podC handling a cold request + coldReqC := &types.LLMRequest{RequestId: "cold-3"} + scorer.Score(ctx, toPrefixState(make(map[prefix.ServerID]int)), coldReqC, pods) + scorer.PreRequest(ctx, coldReqC, requestToPod(podC), 0) + // Now podA should score highest again (LRU rotation) + assertHighestScoredPod(podA, "after-podC-used") + }) +} + +func TestNoHitLRUEdgeCases(t *testing.T) { + ctx := context.Background() + scorer := scorer.NewNoHitLRU(ctx, nil) + + podA := &types.PodMetrics{ + Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod-a"}}, + MetricsState: &backendmetrics.MetricsState{}, + } + + t.Run("empty pods list", func(t *testing.T) { + emptyPods := []types.Pod{} + cycleState := &types.CycleState{} + cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{ + PrefixCacheServers: make(map[prefix.ServerID]int), // cold request + }) + + scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, emptyPods) + + if len(scores) != 0 { + t.Errorf("Expected empty scores for empty pods list, got %d scores", len(scores)) + } + }) + + t.Run("nil pods list", func(t *testing.T) { + cycleState := &types.CycleState{} + cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{ + PrefixCacheServers: make(map[prefix.ServerID]int), // cold request + }) + + scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, nil) + + if scores == nil { + t.Errorf("Expected non-nil scores map for nil pods list") + } + if len(scores) != 0 { + t.Errorf("Expected empty scores for nil pods list, got %d scores", len(scores)) + } + }) + + t.Run("single pod returns 1.0", func(t *testing.T) { + pods := []types.Pod{podA} + cycleState := &types.CycleState{} + cycleState.Write(plugins.StateKey(prefix.PrefixCachePluginType), &prefix.SchedulingContextState{ + PrefixCacheServers: make(map[prefix.ServerID]int), // cold request + }) + + scores := scorer.Score(ctx, cycleState, &types.LLMRequest{}, pods) + + if scores[podA] != 1.0 { + t.Errorf("Expected single pod to get score 1.0, got %f", scores[podA]) + } + }) +} diff --git a/pkg/plugins/scorer/precise_prefix_cache.go b/pkg/plugins/scorer/precise_prefix_cache.go index 636fb288..fe4c2d6d 100644 --- a/pkg/plugins/scorer/precise_prefix_cache.go +++ b/pkg/plugins/scorer/precise_prefix_cache.go @@ -121,7 +121,13 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat return nil } - scores, err := s.kvCacheIndexer.GetPodScores(ctx, request.Prompt, request.TargetModel, nil) + prompt, err := getUserInput(request) + if err != nil { + loggerDebug.Error(err, "Failed to get user input") + return nil + } + + scores, err := s.kvCacheIndexer.GetPodScores(ctx, prompt, request.TargetModel, nil) if err != nil { loggerDebug.Error(err, "Failed to get pod scores") return nil @@ -139,3 +145,16 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat return indexedScoresToNormalizedScoredPods(pods, podToKey, scores) } + +func getUserInput(request *types.LLMRequest) (string, error) { + if request.Body.Completions != nil { // assumed to be valid if not nil + return request.Body.Completions.Prompt, nil + } + + // must be chat-completions request at this point, return string of entire messages + data, err := json.Marshal(request.Body.ChatCompletions.Messages) + if err != nil { + return "", fmt.Errorf("failed to marshal chat-completions messages: %w", err) + } + return string(data), nil +} diff --git a/pkg/plugins/scorer/session_affinity.go b/pkg/plugins/scorer/session_affinity.go index a20de574..3ac9230c 100644 --- a/pkg/plugins/scorer/session_affinity.go +++ b/pkg/plugins/scorer/session_affinity.go @@ -23,7 +23,7 @@ const ( // compile-time type assertion var _ framework.Scorer = &SessionAffinity{} -var _ requestcontrol.PostResponse = &SessionAffinity{} +var _ requestcontrol.ResponseComplete = &SessionAffinity{} // SessionAffinityFactory defines the factory function for SessionAffinity scorer. func SessionAffinityFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { @@ -80,11 +80,11 @@ func (s *SessionAffinity) Score(ctx context.Context, _ *types.CycleState, reques return scoredPods } -// PostResponse sets the session header on the response sent to the client +// ResponseComplete sets the session header on the response sent to the client // TODO: this should be using a cookie and ensure not overriding any other // cookie values if present. // Tracked in https://github.com/llm-d/llm-d-inference-scheduler/issues/28 -func (s *SessionAffinity) PostResponse(ctx context.Context, _ *types.LLMRequest, response *requestcontrol.Response, targetPod *backend.Pod) { +func (s *SessionAffinity) ResponseComplete(ctx context.Context, _ *types.LLMRequest, response *requestcontrol.Response, targetPod *backend.Pod) { if response == nil || targetPod == nil { reqID := "undefined" if response != nil { diff --git a/pkg/plugins/scorer/session_affinity_test.go b/pkg/plugins/scorer/session_affinity_test.go index 8c1844e6..481b209a 100644 --- a/pkg/plugins/scorer/session_affinity_test.go +++ b/pkg/plugins/scorer/session_affinity_test.go @@ -94,7 +94,7 @@ func TestSessionAffinity_Score(t *testing.T) { } } -func TestSessionAffinity_PostResponse(t *testing.T) { +func TestSessionAffinity_ResponseComplete(t *testing.T) { targetPod := &backend.Pod{ NamespacedName: k8stypes.NamespacedName{Name: "pod1"}, @@ -135,7 +135,7 @@ func TestSessionAffinity_PostResponse(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - s.PostResponse(ctx, nil, test.initialResponse, test.targetPod) + s.ResponseComplete(ctx, nil, test.initialResponse, test.targetPod) if diff := cmp.Diff(test.wantHeaders, test.initialResponse.Headers); diff != "" { t.Errorf("Unexpected output (-want +got): %v", diff) diff --git a/pkg/scheduling/pd/scheduler_test.go b/pkg/scheduling/pd/scheduler_test.go index 023708aa..74cf0215 100644 --- a/pkg/scheduling/pd/scheduler_test.go +++ b/pkg/scheduling/pd/scheduler_test.go @@ -102,7 +102,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "any-model", - Prompt: "12345678901", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345678901", + }, + }, }, input: []types.Pod{}, err: true, @@ -112,7 +116,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "critical", - Prompt: "12345678901", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345678901", + }, + }, }, // pod2 will be picked because it is the only pod with Decode role input: []types.Pod{pod2}, @@ -123,7 +131,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "critical", - Prompt: "12345678901", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345678901", + }, + }, }, // no Decode pod input: []types.Pod{pod1}, @@ -134,7 +146,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "critical", - Prompt: "12345678906", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345678906", + }, + }, }, // pod2 will be picked in the decode profile result, pod1 will be in the prefill profile result input: []types.Pod{pod1, pod2}, @@ -146,7 +162,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "critical", - Prompt: "12345", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345", + }, + }, }, // pod2 will be picked because it is the decode pod, pod1 shouldn't be picked, // because the prompt is too short @@ -159,7 +179,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "critical", - Prompt: "12345678901", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345678901", + }, + }, }, input: []types.Pod{pod1, noRolePod1}, wantRes: &types.SchedulingResult{ @@ -187,7 +211,11 @@ func TestPDSchedule(t *testing.T) { req: &types.LLMRequest{ RequestId: uuid.NewString(), TargetModel: "critical", - Prompt: "12345678906", + Body: &types.LLMRequestBody{ + Completions: &types.CompletionsRequest{ + Prompt: "12345678906", + }, + }, }, // pod2 will be picked in the decode profile result cause it has higher score than noRolePod1 // pod1 will be in the prefill profile result @@ -204,7 +232,7 @@ func TestPDSchedule(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize scheduler with config - prefixScorer := prefix.New(ctx, prefix.Config{HashBlockSize: 5, MaxPrefixBlocksToMatch: 256, LRUCapacityPerServer: 31250}) + prefixScorer := prefix.New(ctx, prefix.Config{DefaultBlockSize: 5, MaxPrefixBlocksToMatch: 256, LRUCapacityPerServer: 31250}) prefillSchedulerProfile := framework.NewSchedulerProfile(). WithFilters(filter.NewPrefillRole()). diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index a69246b2..bfb5c835 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -2,8 +2,10 @@ package e2e import ( "context" + "fmt" "io" "os/exec" + "runtime" "strings" "testing" "time" @@ -12,7 +14,7 @@ import ( "github.com/onsi/gomega" "github.com/onsi/gomega/gexec" apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - "k8s.io/apimachinery/pkg/runtime" + k8sruntime "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -58,7 +60,7 @@ var ( ctx = context.Background() k8sClient client.Client port string - scheme = runtime.NewScheme() + scheme = k8sruntime.NewScheme() eppTag = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr) vllmSimTag = env.GetEnvString("VLLM_SIMULATOR_TAG", "dev", ginkgo.GinkgoLogr) @@ -118,20 +120,24 @@ func setupK8sCluster() { gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) - command = exec.Command("kind", "--name", "e2e-tests", "load", "docker-image", - "ghcr.io/llm-d/llm-d-inference-sim:"+vllmSimTag) - session, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) - gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) + kindLoadImage("ghcr.io/llm-d/llm-d-inference-sim:" + vllmSimTag) + kindLoadImage("ghcr.io/llm-d/llm-d-inference-scheduler:" + eppTag) + kindLoadImage("ghcr.io/llm-d/llm-d-routing-sidecar:" + routingSideCarTag) +} - command = exec.Command("kind", "--name", "e2e-tests", "load", "docker-image", - "ghcr.io/llm-d/llm-d-inference-scheduler:"+eppTag) - session, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) +func kindLoadImage(image string) { + tempDir := ginkgo.GinkgoT().TempDir() + target := tempDir + "/docker.tar" + + ginkgo.By(fmt.Sprintf("Loading %s into the cluster e2e-tests", image)) + + command := exec.Command("docker", "save", "--platform", "linux/"+runtime.GOARCH, + "--output", target, image) + session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) - command = exec.Command("kind", "--name", "e2e-tests", "load", "docker-image", - "ghcr.io/llm-d/llm-d-routing-sidecar:"+routingSideCarTag) + command = exec.Command("kind", "--name", "e2e-tests", "load", "image-archive", target) session, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index a291f926..19ea9425 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -62,19 +62,30 @@ func createObjsFromYaml(docs []string) []string { // Wait for the created object to exist. clientObj := getClientObject(kind) - testutils.EventuallyExists(ctx, func() error { + testutils.EventuallyExists(&testutils.TestConfig{ + ExistsTimeout: existsTimeout, + Interval: interval, + }, func() error { return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: name}, clientObj) - }, existsTimeout, interval) + }) switch kind { case "CustomResourceDefinition": // Wait for the CRD to be established. - testutils.CRDEstablished(ctx, k8sClient, clientObj.(*apiextv1.CustomResourceDefinition), - readyTimeout, interval) + testutils.CRDEstablished(&testutils.TestConfig{ + Context: ctx, + K8sClient: k8sClient, + ReadyTimeout: readyTimeout, + Interval: interval, + }, clientObj.(*apiextv1.CustomResourceDefinition)) case "Deployment": // Wait for the deployment to be available. - testutils.DeploymentAvailable(ctx, k8sClient, clientObj.(*appsv1.Deployment), - modelReadyTimeout, interval) + testutils.DeploymentAvailable(&testutils.TestConfig{ + Context: ctx, + K8sClient: k8sClient, + ModelReadyTimeout: modelReadyTimeout, + Interval: interval, + }, clientObj.(*appsv1.Deployment)) } } return objNames