From 5945cfb4e2872edef26d77ca3e420f6b1936132e Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Fri, 31 Oct 2025 16:18:55 -0700 Subject: [PATCH 01/34] partial draft --- cmd/epp/runner/runner.go | 156 ++++++++++++++---- .../inferenceobjective_reconciler_test.go | 2 +- .../controller/inferencepool_reconciler.go | 2 +- pkg/epp/controller/pod_reconciler_test.go | 2 +- pkg/epp/datalayer/endpointsPools.go | 46 ++++++ pkg/epp/datastore/datastore.go | 66 ++++---- pkg/epp/datastore/datastore_test.go | 8 +- .../metrics/collectors/inference_pool_test.go | 2 +- pkg/epp/requestcontrol/director_test.go | 4 +- pkg/epp/server/controller_manager.go | 51 +++--- pkg/epp/server/runserver.go | 44 ++--- test/utils/server.go | 2 +- 12 files changed, 260 insertions(+), 125 deletions(-) create mode 100644 pkg/epp/datalayer/endpointsPools.go diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index cbd3ea024..089929576 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -26,6 +26,8 @@ import ( "net/http/pprof" "os" "runtime" + "strconv" + "strings" "sync/atomic" "github.com/go-logr/logr" @@ -100,6 +102,8 @@ var ( poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") + selector = flag.String("selector", "", "selector to filter pods on. Format: a comma-separated list of labels, e.g., 'app: vllm-llama3-8b-instruct,env=prod'.") + targetPorts = flag.String("target-ports", "", "target ports of model server pods. Format: a comma-separated list of labels, e.g., '3000,3001,3002'") logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") @@ -194,6 +198,64 @@ func (r *Runner) Run(ctx context.Context) error { setupLog.Error(err, "Failed to get Kubernetes rest config") return err } + //Setup EndPointsPool + endPointsPool := datalayer.NewEndPointsPool() + if *poolName != "" { + // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default + resolvePoolNamespace := func() string { + if *poolNamespace != "" { + return *poolNamespace + } + if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { + return nsEnv + } + return runserver.DefaultPoolNamespace + } + resolvedPoolNamespace := resolvePoolNamespace() + poolNamespacedName := types.NamespacedName{ + Name: *poolName, + Namespace: resolvedPoolNamespace, + } + poolGroupKind := schema.GroupKind{ + Group: *poolGroup, + Kind: "InferencePool", + } + poolGKNN := common.GKNN{ + NamespacedName: poolNamespacedName, + GroupKind: poolGroupKind, + } + endPointsPool.GKNN = poolGKNN + } + + if *selector != "" { + endPointsPool.EndPoints.Selector, err = strToMap(*selector) + if err != nil { + setupLog.Error(err, "Failed to parse flag %q with error: %w", "selector", err) + return err + } + endPointsPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*targetPorts) + if err != nil { + setupLog.Error(err, "Failed to parse flag %q with error: %w", "target-ports", err) + } + endPointsPool.StandaloneMode = true + + // Determine EPP namespace: NAMESPACE env var; else default + eppNsEnv := os.Getenv("EPP_NAMESPACE") + if eppNsEnv == "" { + setupLog.Error(err, "Failed to get environment variable EPP_NAMESPACE") + } + // Determine EPP name: EPP_NAME env var + eppNameEnv := os.Getenv("EPP_NAME") + if eppNameEnv == "" { + setupLog.Error(err, "Failed to get environment variable EPP_NAME") + + } + endPointsPool.GKNN = common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppNameEnv}, + GroupKind: schema.GroupKind{Kind: "apps", Group: "Deployment"}, + } + + } // --- Setup Datastore --- useDatalayerV2 := env.GetEnvBool(enableExperimentalDatalayerV2, false, setupLog) @@ -201,7 +263,7 @@ func (r *Runner) Run(ctx context.Context) error { if err != nil { return err } - datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort)) + datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort), endPointsPool.EndPoints, endPointsPool.StandaloneMode) // --- Setup Metrics Server --- customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(datastore)} @@ -223,34 +285,10 @@ func (r *Runner) Run(ctx context.Context) error { }(), } - // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default - resolvePoolNamespace := func() string { - if *poolNamespace != "" { - return *poolNamespace - } - if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { - return nsEnv - } - return runserver.DefaultPoolNamespace - } - resolvedPoolNamespace := resolvePoolNamespace() - poolNamespacedName := types.NamespacedName{ - Name: *poolName, - Namespace: resolvedPoolNamespace, - } - poolGroupKind := schema.GroupKind{ - Group: *poolGroup, - Kind: "InferencePool", - } - poolGKNN := common.GKNN{ - NamespacedName: poolNamespacedName, - GroupKind: poolGroupKind, - } - isLeader := &atomic.Bool{} isLeader.Store(false) - mgr, err := runserver.NewDefaultManager(poolGKNN, cfg, metricsServerOptions, *haEnableLeaderElection) + mgr, err := runserver.NewDefaultManager(endPointsPool, cfg, metricsServerOptions, *haEnableLeaderElection) if err != nil { setupLog.Error(err, "Failed to create controller manager") return err @@ -339,8 +377,7 @@ func (r *Runner) Run(ctx context.Context) error { // --- Setup ExtProc Server Runner --- serverRunner := &runserver.ExtProcServerRunner{ GrpcPort: *grpcPort, - PoolNamespacedName: poolNamespacedName, - PoolGKNN: poolGKNN, + EndPointsPool: endPointsPool, Datastore: datastore, SecureServing: *secureServing, HealthChecking: *healthChecking, @@ -547,9 +584,19 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore. } func validateFlags() error { - if *poolName == "" { - return fmt.Errorf("required %q flag not set", "poolName") + if (*poolName != "" && *selector != "") || (*poolName == "" && *selector == "") { + return fmt.Errorf("either poolName or selector must be set") } + if *selector != "" { + targetPortsList, err := strToUniqueIntSlice(*targetPorts) + if err != nil { + return fmt.Errorf("unexpected value for %q flag with error %w", "target-ports", err) + } + if len(targetPortsList) == 0 || len(targetPortsList) > 8 { + return fmt.Errorf("flag %q should have length from 1 to 8", "target-ports") + } + } + if *configText != "" && *configFile != "" { return fmt.Errorf("both the %q and %q flags can not be set at the same time", "configText", "configFile") } @@ -560,6 +607,55 @@ func validateFlags() error { return nil } +func strToUniqueIntSlice(s string) ([]int, error) { + seen := make(map[int]struct{}) + var intList []int + + if s == "" { + return intList, nil + } + + strList := strings.Split(s, ",") + + for _, str := range strList { + trimmedStr := strings.TrimSpace(str) + if trimmedStr == "" { + continue + } + portInt, err := strconv.Atoi(trimmedStr) + if err != nil { + return nil, fmt.Errorf("invalid number: '%s' is not an integer", trimmedStr) + } + + if _, ok := seen[portInt]; !ok { + seen[portInt] = struct{}{} + intList = append(intList, portInt) + } + } + return intList, nil +} + +func strToMap(s string) (map[string]string, error) { + m := make(map[string]string) + if s == "" { + return m, nil + } + + mPairs := strings.Split(s, ",") + for _, pair := range mPairs { + trimmedPair := strings.TrimSpace(pair) + if trimmedPair == "" { + continue + } + kv := strings.Split(trimmedPair, ":") + if len(kv) != 2 { + return nil, fmt.Errorf("invalid format, expected key:value paris") + } + m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1]) + } + return m, nil +} + func verifyMetricMapping(mapping backendmetrics.MetricMapping, logger logr.Logger) { if mapping.TotalQueuedRequests == nil { logger.Info("Not scraping metric: TotalQueuedRequests") diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 4ceff5d07..1a7ed4dd6 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -164,7 +164,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } - _ = ds.PoolSet(context.Background(), fakeClient, pool) + _ = ds.EndPointsSet(context.Background(), fakeClient, pool) reconciler := &InferenceObjectiveReconciler{ Reader: fakeClient, Datastore: ds, diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 3b52de0ae..abe2aec86 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -93,7 +93,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, fmt.Errorf("unsupported API group: %s", c.PoolGKNN.Group) } - if err := c.Datastore.PoolSet(ctx, c.Reader, v1infPool); err != nil { + if err := c.Datastore.EndPointsSet(ctx, c.Reader, v1infPool); err != nil { return ctrl.Result{}, fmt.Errorf("failed to update datastore - %w", err) } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 28f817310..1e7f971be 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -197,7 +197,7 @@ func TestPodReconciler(t *testing.T) { // Configure the initial state of the datastore. store := datastore.NewDatastore(t.Context(), pmf, 0) - _ = store.PoolSet(t.Context(), fakeClient, test.pool) + _ = store.EndPointsSet(t.Context(), fakeClient, test.pool) for _, pod := range test.existingPods { store.PodUpdateOrAddIfNotExist(pod) } diff --git a/pkg/epp/datalayer/endpointsPools.go b/pkg/epp/datalayer/endpointsPools.go new file mode 100644 index 000000000..cd395ed15 --- /dev/null +++ b/pkg/epp/datalayer/endpointsPools.go @@ -0,0 +1,46 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package datalayer + +import "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + +type EndPointsPool struct { + EndPoints *EndPoints + StandaloneMode bool + GKNN common.GKNN +} + +// NewEndPointsPool creates and returns a new empty instance of EndPointsPool. +func NewEndPointsPool() *EndPointsPool { + endPoints := NewEndPoints() + return &EndPointsPool{ + EndPoints: endPoints, + } +} + +type EndPoints struct { + Selector map[string]string + TargetPorts []int +} + +// NewEndPoints creates and returns a new empty instance of EndPointsPool. +func NewEndPoints() *EndPoints { + return &EndPoints{ + Selector: make(map[string]string), + TargetPorts: []int{}, + } +} diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index dade69469..29807166a 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -49,7 +49,7 @@ type Datastore interface { // PoolSet sets the given pool in datastore. If the given pool has different label selector than the previous pool // that was stored, the function triggers a resync of the pods to keep the datastore updated. If the given pool // is nil, this call triggers the datastore.Clear() function. - PoolSet(ctx context.Context, reader client.Reader, pool *v1.InferencePool) error + EndPointsSet(ctx context.Context, reader client.Reader, pool *v1.InferencePool) error PoolGet() (*v1.InferencePool, error) PoolHasSynced() bool PoolLabelsMatch(podLabels map[string]string) bool @@ -69,14 +69,16 @@ type Datastore interface { Clear() } -func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32) Datastore { +func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endPoints *datalayer.EndPoints, standaloneMode bool) Datastore { store := &datastore{ - parentCtx: parentCtx, - poolAndObjectivesMu: sync.RWMutex{}, - objectives: make(map[string]*v1alpha2.InferenceObjective), - pods: &sync.Map{}, - modelServerMetricsPort: modelServerMetricsPort, - epf: epFactory, + parentCtx: parentCtx, + endPointsAndObjectivesMu: sync.RWMutex{}, + standaloneMode: standaloneMode, + endPoints: endPoints, + objectives: make(map[string]*v1alpha2.InferenceObjective), + pods: &sync.Map{}, + modelServerMetricsPort: modelServerMetricsPort, + epf: epFactory, } return store } @@ -84,9 +86,11 @@ func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory type datastore struct { // parentCtx controls the lifecycle of the background metrics goroutines that spawn up by the datastore. parentCtx context.Context - // poolAndObjectivesMu is used to synchronize access to pool and the objectives map. - poolAndObjectivesMu sync.RWMutex - pool *v1.InferencePool + // endPointsAndObjectivesMu is used to synchronize access to pool and the objectives map. + endPointsAndObjectivesMu sync.RWMutex + standaloneMode bool + // endPoints is used to filter the available model server endpoints + endPoints *datalayer.EndPoints // key: InferenceObjective.Spec.ModelName, value: *InferenceObjective objectives map[string]*v1alpha2.InferenceObjective // key: types.NamespacedName, value: backendmetrics.PodMetrics @@ -98,9 +102,9 @@ type datastore struct { } func (ds *datastore) Clear() { - ds.poolAndObjectivesMu.Lock() - defer ds.poolAndObjectivesMu.Unlock() - ds.pool = nil + ds.endPointsAndObjectivesMu.Lock() + defer ds.endPointsAndObjectivesMu.Unlock() + ds.endPoints = nil ds.objectives = make(map[string]*v1alpha2.InferenceObjective) // stop all pods go routines before clearing the pods map. ds.pods.Range(func(_, v any) bool { @@ -111,14 +115,14 @@ func (ds *datastore) Clear() { } // /// InferencePool APIs /// -func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, pool *v1.InferencePool) error { +func (ds *datastore) EndPointsSet(ctx context.Context, reader client.Reader, pool *v1.InferencePool) error { if pool == nil { ds.Clear() return nil } logger := log.FromContext(ctx) - ds.poolAndObjectivesMu.Lock() - defer ds.poolAndObjectivesMu.Unlock() + ds.endPointsAndObjectivesMu.Lock() + defer ds.endPointsAndObjectivesMu.Unlock() oldPool := ds.pool ds.pool = pool @@ -139,8 +143,8 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, pool *v1 } func (ds *datastore) PoolGet() (*v1.InferencePool, error) { - ds.poolAndObjectivesMu.RLock() - defer ds.poolAndObjectivesMu.RUnlock() + ds.endPointsAndObjectivesMu.RLock() + defer ds.endPointsAndObjectivesMu.RUnlock() if !ds.PoolHasSynced() { return nil, errPoolNotSynced } @@ -148,14 +152,14 @@ func (ds *datastore) PoolGet() (*v1.InferencePool, error) { } func (ds *datastore) PoolHasSynced() bool { - ds.poolAndObjectivesMu.RLock() - defer ds.poolAndObjectivesMu.RUnlock() + ds.endPointsAndObjectivesMu.RLock() + defer ds.endPointsAndObjectivesMu.RUnlock() return ds.pool != nil } func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { - ds.poolAndObjectivesMu.RLock() - defer ds.poolAndObjectivesMu.RUnlock() + ds.endPointsAndObjectivesMu.RLock() + defer ds.endPointsAndObjectivesMu.RUnlock() if ds.pool == nil { return false } @@ -165,15 +169,15 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { } func (ds *datastore) ObjectiveSet(infObjective *v1alpha2.InferenceObjective) { - ds.poolAndObjectivesMu.Lock() - defer ds.poolAndObjectivesMu.Unlock() + ds.endPointsAndObjectivesMu.Lock() + defer ds.endPointsAndObjectivesMu.Unlock() // Set the objective. ds.objectives[infObjective.Name] = infObjective } func (ds *datastore) ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjective { - ds.poolAndObjectivesMu.RLock() - defer ds.poolAndObjectivesMu.RUnlock() + ds.endPointsAndObjectivesMu.RLock() + defer ds.endPointsAndObjectivesMu.RUnlock() iObj, ok := ds.objectives[objectiveName] if !ok { return nil @@ -182,14 +186,14 @@ func (ds *datastore) ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjec } func (ds *datastore) ObjectiveDelete(namespacedName types.NamespacedName) { - ds.poolAndObjectivesMu.Lock() - defer ds.poolAndObjectivesMu.Unlock() + ds.endPointsAndObjectivesMu.Lock() + defer ds.endPointsAndObjectivesMu.Unlock() delete(ds.objectives, namespacedName.Name) } func (ds *datastore) ObjectiveGetAll() []*v1alpha2.InferenceObjective { - ds.poolAndObjectivesMu.RLock() - defer ds.poolAndObjectivesMu.RUnlock() + ds.endPointsAndObjectivesMu.RLock() + defer ds.endPointsAndObjectivesMu.RUnlock() res := []*v1alpha2.InferenceObjective{} for _, v := range ds.objectives { res = append(res, v) diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index ee59071e6..7acc87657 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -87,7 +87,7 @@ func TestPool(t *testing.T) { Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) ds := NewDatastore(context.Background(), pmf, 0) - _ = ds.PoolSet(context.Background(), fakeClient, tt.inferencePool) + _ = ds.EndPointsSet(context.Background(), fakeClient, tt.inferencePool) gotPool, gotErr := ds.PoolGet() if diff := cmp.Diff(tt.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" { t.Errorf("Unexpected error diff (+got/-want): %s", diff) @@ -328,7 +328,7 @@ func TestMetrics(t *testing.T) { Build() pmf := backendmetrics.NewPodMetricsFactory(test.pmc, time.Millisecond) ds := NewDatastore(ctx, pmf, 0) - _ = ds.PoolSet(ctx, fakeClient, inferencePool) + _ = ds.EndPointsSet(ctx, fakeClient, inferencePool) for _, pod := range test.storePods { ds.PodUpdateOrAddIfNotExist(pod) } @@ -397,7 +397,7 @@ func TestPods(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) ds := NewDatastore(t.Context(), pmf, 0) fakeClient := fake.NewFakeClient() - if err := ds.PoolSet(ctx, fakeClient, inferencePool); err != nil { + if err := ds.EndPointsSet(ctx, fakeClient, inferencePool); err != nil { t.Error(err) } for _, pod := range test.existingPods { @@ -581,7 +581,7 @@ func TestPodInfo(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) ds := NewDatastore(t.Context(), pmf, 0) fakeClient := fake.NewFakeClient() - if err := ds.PoolSet(ctx, fakeClient, test.pool); err != nil { + if err := ds.EndPointsSet(ctx, fakeClient, test.pool); err != nil { t.Error(err) } for _, pod := range test.existingPods { diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index af2923e50..e6a9c574c 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -83,7 +83,7 @@ func TestMetricsCollected(t *testing.T) { TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } - _ = ds.PoolSet(context.Background(), fakeClient, inferencePool) + _ = ds.EndPointsSet(context.Background(), fakeClient, inferencePool) _ = ds.PodUpdateOrAddIfNotExist(pod1) time.Sleep(1 * time.Second) diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 8cb9c91a5..a8778f7da 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -140,7 +140,7 @@ func TestDirector_HandleRequest(t *testing.T) { scheme := runtime.NewScheme() _ = clientgoscheme.AddToScheme(scheme) fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - if err := ds.PoolSet(ctx, fakeClient, pool); err != nil { + if err := ds.EndPointsSet(ctx, fakeClient, pool); err != nil { t.Fatalf("Error while setting inference pool: %v", err) } @@ -595,7 +595,7 @@ func TestGetRandomPod(t *testing.T) { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Millisecond) ds := datastore.NewDatastore(t.Context(), pmf, 0) - err := ds.PoolSet(t.Context(), fakeClient, pool) + err := ds.EndPointsSet(t.Context(), fakeClient, pool) if err != nil { t.Errorf("unexpected error setting pool: %s", err) } diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index 47e4f12d4..e3ca2a15c 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -18,6 +18,7 @@ package server import ( "fmt" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/fields" @@ -33,7 +34,6 @@ import ( v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" ) var scheme = runtime.NewScheme() @@ -45,48 +45,47 @@ func init() { } // defaultManagerOptions returns the default options used to create the manager. -func defaultManagerOptions(gknn common.GKNN, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { +func defaultManagerOptions(endPointsPool *datalayer.EndPointsPool, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { opt := ctrl.Options{ Scheme: scheme, Cache: cache.Options{ ByObject: map[client.Object]cache.ByObject{ &corev1.Pod{}: { Namespaces: map[string]cache.Config{ - gknn.Namespace: {}, - }, - }, - &v1alpha2.InferenceObjective{}: { - Namespaces: map[string]cache.Config{ - gknn.Namespace: {}, + endPointsPool.GKNN.Namespace: {}, }, }, }, }, Metrics: metricsServerOptions, } - switch gknn.Group { - case v1alpha2.GroupName: - opt.Cache.ByObject[&v1alpha2.InferencePool{}] = cache.ByObject{ - Namespaces: map[string]cache.Config{gknn.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ - "metadata.name": gknn.Name, - })}}, + if !endPointsPool.StandaloneMode { + opt.Cache.ByObject[&v1alpha2.InferenceObjective{}] = cache.ByObject{Namespaces: map[string]cache.Config{ + endPointsPool.GKNN.Namespace: {}, + }} + switch endPointsPool.GKNN.Group { + case v1alpha2.GroupName: + opt.Cache.ByObject[&v1alpha2.InferencePool{}] = cache.ByObject{ + Namespaces: map[string]cache.Config{endPointsPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ + "metadata.name": endPointsPool.GKNN.Name, + })}}, + } + case v1.GroupName: + opt.Cache.ByObject[&v1.InferencePool{}] = cache.ByObject{ + Namespaces: map[string]cache.Config{endPointsPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ + "metadata.name": endPointsPool.GKNN.Name, + })}}, + } } - case v1.GroupName: - opt.Cache.ByObject[&v1.InferencePool{}] = cache.ByObject{ - Namespaces: map[string]cache.Config{gknn.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ - "metadata.name": gknn.Name, - })}}, - } - default: - return ctrl.Options{}, fmt.Errorf("unknown group: %s", gknn.Group) + } return opt, nil } // NewDefaultManager creates a new controller manager with default configuration. -func NewDefaultManager(gknn common.GKNN, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { - opt, err := defaultManagerOptions(gknn, metricsServerOptions) +func NewDefaultManager(endPointsPool *datalayer.EndPointsPool, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { + opt, err := defaultManagerOptions(endPointsPool, metricsServerOptions) if err != nil { return nil, fmt.Errorf("failed to create controller manager options: %v", err) } @@ -95,8 +94,8 @@ func NewDefaultManager(gknn common.GKNN, restConfig *rest.Config, metricsServerO opt.LeaderElection = true opt.LeaderElectionResourceLock = "leases" // The lease name needs to be unique per EPP deployment. - opt.LeaderElectionID = fmt.Sprintf("epp-%s-%s.gateway-api-inference-extension.sigs.k8s.io", gknn.Namespace, gknn.Name) - opt.LeaderElectionNamespace = gknn.Namespace + opt.LeaderElectionID = fmt.Sprintf("epp-%s-%s.gateway-api-inference-extension.sigs.k8s.io", endPointsPool.GKNN.Namespace, endPointsPool.GKNN.Name) + opt.LeaderElectionNamespace = endPointsPool.GKNN.Namespace opt.LeaderElectionReleaseOnCancel = true } diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index c3037175e..c79054bd5 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "fmt" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "time" extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" @@ -28,14 +29,11 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/health" healthgrpc "google.golang.org/grpc/health/grpc_health_v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" tlsutil "sigs.k8s.io/gateway-api-inference-extension/internal/tls" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/controller" dlmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/metrics" @@ -48,8 +46,7 @@ import ( // ExtProcServerRunner provides methods to manage an external process server. type ExtProcServerRunner struct { GrpcPort int - PoolNamespacedName types.NamespacedName - PoolGKNN common.GKNN + EndPointsPool *datalayer.EndPointsPool Datastore datastore.Datastore SecureServing bool HealthChecking bool @@ -91,17 +88,8 @@ const ( // NewDefaultExtProcServerRunner creates a runner with default values. // Note: Dependencies like Datastore, Scheduler, SD need to be set separately. func NewDefaultExtProcServerRunner() *ExtProcServerRunner { - poolGKNN := common.GKNN{ - NamespacedName: types.NamespacedName{Name: DefaultPoolName, Namespace: DefaultPoolNamespace}, - GroupKind: schema.GroupKind{ - Group: DefaultPoolGroup, - Kind: "InferencePool", - }, - } return &ExtProcServerRunner{ GrpcPort: DefaultGrpcPort, - PoolNamespacedName: types.NamespacedName{Name: DefaultPoolName, Namespace: DefaultPoolNamespace}, - PoolGKNN: poolGKNN, SecureServing: DefaultSecureServing, HealthChecking: DefaultHealthChecking, RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval, @@ -113,20 +101,22 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { // SetupWithManager sets up the runner with the given manager. func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager - if err := (&controller.InferencePoolReconciler{ - Datastore: r.Datastore, - Reader: mgr.GetClient(), - PoolGKNN: r.PoolGKNN, - }).SetupWithManager(mgr); err != nil { - return fmt.Errorf("failed setting up InferencePoolReconciler: %w", err) - } + if !r.EndPointsPool.StandaloneMode { + if err := (&controller.InferencePoolReconciler{ + Datastore: r.Datastore, + Reader: mgr.GetClient(), + PoolGKNN: r.EndPointsPool.GKNN, + }).SetupWithManager(mgr); err != nil { + return fmt.Errorf("failed setting up InferencePoolReconciler: %w", err) + } - if err := (&controller.InferenceObjectiveReconciler{ - Datastore: r.Datastore, - Reader: mgr.GetClient(), - PoolGKNN: r.PoolGKNN, - }).SetupWithManager(ctx, mgr); err != nil { - return fmt.Errorf("failed setting up InferenceObjectiveReconciler: %w", err) + if err := (&controller.InferenceObjectiveReconciler{ + Datastore: r.Datastore, + Reader: mgr.GetClient(), + PoolGKNN: r.EndPointsPool.GKNN, + }).SetupWithManager(ctx, mgr); err != nil { + return fmt.Errorf("failed setting up InferenceObjectiveReconciler: %w", err) + } } if err := (&controller.PodReconciler{ diff --git a/test/utils/server.go b/test/utils/server.go index 9cf907d29..76060c105 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -72,7 +72,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() pool.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(poolPort)}} - _ = ds.PoolSet(context.Background(), fakeClient, pool) + _ = ds.EndPointsSet(context.Background(), fakeClient, pool) return ctx, cancel, ds, pmc } From 370f1a395254178678ac58272165467127a99491 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 2 Nov 2025 11:03:57 -0800 Subject: [PATCH 02/34] refactor Signed-off-by: Xiyue Yu --- cmd/epp/runner/health.go | 2 +- cmd/epp/runner/runner.go | 2 +- pkg/epp/backend/metrics/logger.go | 6 +- pkg/epp/backend/metrics/pod_metrics_test.go | 4 +- .../inferenceobjective_reconciler_test.go | 33 ++++---- .../controller/inferencepool_reconciler.go | 26 ++++++- .../inferencepool_reconciler_test.go | 8 +- pkg/epp/controller/pod_reconciler.go | 4 +- pkg/epp/controller/pod_reconciler_test.go | 4 +- pkg/epp/datalayer/factory.go | 4 +- pkg/epp/datastore/datastore.go | 52 ++++++------- pkg/epp/datastore/datastore_test.go | 2 +- pkg/epp/requestcontrol/director.go | 4 +- pkg/epp/requestcontrol/director_test.go | 64 +++++++++++++-- pkg/epp/util/pool/pool.go | 78 +++++++++++++++++++ test/integration/epp/hermetic_test.go | 2 +- 16 files changed, 223 insertions(+), 72 deletions(-) create mode 100644 pkg/epp/util/pool/pool.go diff --git a/cmd/epp/runner/health.go b/cmd/epp/runner/health.go index 1edbcff8e..c80cc4d20 100644 --- a/cmd/epp/runner/health.go +++ b/cmd/epp/runner/health.go @@ -44,7 +44,7 @@ const ( ) func (s *healthServer) Check(ctx context.Context, in *healthPb.HealthCheckRequest) (*healthPb.HealthCheckResponse, error) { - isLive := s.datastore.PoolHasSynced() + isLive := s.datastore.EndPointsPoolHasSynced() // If leader election is disabled, use current logic: all checks are based on whether the pool has synced. if !s.leaderElectionEnabled { diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 089929576..8df79a891 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -263,7 +263,7 @@ func (r *Runner) Run(ctx context.Context) error { if err != nil { return err } - datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort), endPointsPool.EndPoints, endPointsPool.StandaloneMode) + datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort), endPointsPool) // --- Setup Metrics Server --- customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(datastore)} diff --git a/pkg/epp/backend/metrics/logger.go b/pkg/epp/backend/metrics/logger.go index 69fc404e7..9f60ba76a 100644 --- a/pkg/epp/backend/metrics/logger.go +++ b/pkg/epp/backend/metrics/logger.go @@ -97,7 +97,7 @@ func refreshPrometheusMetrics(logger logr.Logger, datastore datalayer.PoolInfo, } podTotalCount := len(podMetrics) - metrics.RecordInferencePoolAvgKVCache(pool.Name, kvCacheTotal/float64(podTotalCount)) - metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(queueTotal/podTotalCount)) - metrics.RecordInferencePoolReadyPods(pool.Name, float64(podTotalCount)) + metrics.RecordInferencePoolAvgKVCache(pool.GKNN.Name, kvCacheTotal/float64(podTotalCount)) + metrics.RecordInferencePoolAvgQueueSize(pool.GKNN.Name, float64(queueTotal/podTotalCount)) + metrics.RecordInferencePoolReadyPods(pool.GKNN.Name, float64(podTotalCount)) } diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index b0297cd1e..9d32f338e 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -86,8 +86,8 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} -func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) { - return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}}, nil +func (f *fakeDataStore) PoolGet() (*datalayer.EndPointsPool, error) { + return datalayer.NewEndPointsPool(), nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 1a7ed4dd6..7b65269ec 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -18,6 +18,7 @@ package controller import ( "context" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "testing" "time" @@ -36,16 +37,17 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) var ( - pool = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef() + inferencePool = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef() infObjective1 = utiltest.MakeInferenceObjective("model1"). - Namespace(pool.Namespace). + Namespace(inferencePool.Namespace). Priority(1). CreationTimestamp(metav1.Unix(1000, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1Pool2 = utiltest.MakeInferenceObjective(infObjective1.Name). Namespace(infObjective1.Namespace). @@ -57,24 +59,24 @@ var ( Namespace(infObjective1.Namespace). Priority(2). CreationTimestamp(metav1.Unix(1003, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1Deleted = utiltest.MakeInferenceObjective(infObjective1.Name). Namespace(infObjective1.Namespace). CreationTimestamp(metav1.Unix(1004, 0)). DeletionTimestamp(). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1DiffGroup = utiltest.MakeInferenceObjective(infObjective1.Name). - Namespace(pool.Namespace). + Namespace(inferencePool.Namespace). Priority(1). CreationTimestamp(metav1.Unix(1005, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.x-k8s.io").ObjRef() infObjective2 = utiltest.MakeInferenceObjective("model2"). - Namespace(pool.Namespace). + Namespace(inferencePool.Namespace). CreationTimestamp(metav1.Unix(1000, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() ) @@ -120,7 +122,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { { name: "Objective not found, no matching existing objective to delete", objectivessInStore: []*v1alpha2.InferenceObjective{infObjective1}, - incomingReq: &types.NamespacedName{Name: "non-existent-objective", Namespace: pool.Namespace}, + incomingReq: &types.NamespacedName{Name: "non-existent-objective", Namespace: inferencePool.Namespace}, wantObjectives: []*v1alpha2.InferenceObjective{infObjective1}, }, { @@ -160,17 +162,18 @@ func TestInferenceObjectiveReconciler(t *testing.T) { WithObjects(initObjs...). Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0) + ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool()) for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } - _ = ds.EndPointsSet(context.Background(), fakeClient, pool) + endPointsPool := pool.InferencePoolToEndPointsPool(inferencePool) + _ = ds.PoolSet(context.Background(), fakeClient, endPointsPool) reconciler := &InferenceObjectiveReconciler{ Reader: fakeClient, Datastore: ds, PoolGKNN: common.GKNN{ - NamespacedName: types.NamespacedName{Name: pool.Name, Namespace: pool.Namespace}, - GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, + NamespacedName: types.NamespacedName{Name: inferencePool.Name, Namespace: inferencePool.Namespace}, + GroupKind: schema.GroupKind{Group: inferencePool.GroupVersionKind().Group, Kind: inferencePool.GroupVersionKind().Kind}, }, } if test.incomingReq == nil { @@ -191,7 +194,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { t.Errorf("Unexpected; want: %d, got:%d", len(test.wantObjectives), len(ds.ObjectiveGetAll())) } - if diff := diffStore(ds, diffStoreParams{wantPool: pool, wantObjectives: test.wantObjectives}); diff != "" { + if diff := diffStore(ds, diffStoreParams{wantPool: inferencePool, wantObjectives: test.wantObjectives}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index abe2aec86..156673914 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -19,11 +19,11 @@ package controller import ( "context" "fmt" - "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" @@ -92,8 +92,30 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques default: return ctrl.Result{}, fmt.Errorf("unsupported API group: %s", c.PoolGKNN.Group) } + gknn := common.GKNN{ + NamespacedName: req.NamespacedName, + GroupKind: c.PoolGKNN.GroupKind, + } + targetPorts := make([]int, 0, len(v1infPool.Spec.TargetPorts)) + for _, p := range v1infPool.Spec.TargetPorts { + targetPorts = append(targetPorts, int(p.Number)) + + } + selector := make(map[string]string, len(v1infPool.Spec.Selector.MatchLabels)) + for k, v := range v1infPool.Spec.Selector.MatchLabels { + selector[string(k)] = string(v) + } + endPoints := &datalayer.EndPoints{ + Selector: selector, + TargetPorts: targetPorts, + } + endPointsPool := &datalayer.EndPointsPool{ + EndPoints: endPoints, + StandaloneMode: false, + GKNN: gknn, + } - if err := c.Datastore.EndPointsSet(ctx, c.Reader, v1infPool); err != nil { + if err := c.Datastore.PoolSet(ctx, c.Reader, endPointsPool); err != nil { return ctrl.Result{}, fmt.Errorf("failed to update datastore - %w", err) } diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index a2bce1256..1ec1ae9a8 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -18,6 +18,8 @@ package controller import ( "context" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" "testing" "time" @@ -114,7 +116,7 @@ func TestInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0) + ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool()) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. @@ -261,7 +263,7 @@ func TestXInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0) + ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool()) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. @@ -332,7 +334,7 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa gotXPool := &v1alpha2.InferencePool{} - err := gotXPool.ConvertFrom(gotPool) + err := gotXPool.ConvertFrom(pool.EndPointsPoolToInferencePool(gotPool)) if err != nil { t.Fatalf("failed to convert InferencePool to XInferencePool: %v", err) } diff --git a/pkg/epp/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go index b3a78ef92..8add16ef3 100644 --- a/pkg/epp/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -41,8 +41,8 @@ type PodReconciler struct { func (c *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) - if !c.Datastore.PoolHasSynced() { - logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the InferencePool is not available yet") + if !c.Datastore.EndPointsPoolHasSynced() { + logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the EndPointsPicker is not available yet") // When the inferencePool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. return ctrl.Result{}, nil } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 1e7f971be..f39b29852 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -18,6 +18,8 @@ package controller import ( "context" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "testing" "time" @@ -197,7 +199,7 @@ func TestPodReconciler(t *testing.T) { // Configure the initial state of the datastore. store := datastore.NewDatastore(t.Context(), pmf, 0) - _ = store.EndPointsSet(t.Context(), fakeClient, test.pool) + _ = store.PoolSet(t.Context(), fakeClient, test.pool) for _, pod := range test.existingPods { store.PodUpdateOrAddIfNotExist(pod) } diff --git a/pkg/epp/datalayer/factory.go b/pkg/epp/datalayer/factory.go index 989527c6c..4ed4d08ef 100644 --- a/pkg/epp/datalayer/factory.go +++ b/pkg/epp/datalayer/factory.go @@ -23,8 +23,6 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/log" - - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" ) // PoolInfo represents the DataStore information needed for endpoints. @@ -36,7 +34,7 @@ import ( // - Global metrics logging uses PoolGet solely for error return and PodList to enumerate // all endpoints for metrics summarization. type PoolInfo interface { - PoolGet() (*v1.InferencePool, error) + PoolGet() (*EndPointsPool, error) PodList(func(Endpoint) bool) []Endpoint } diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 29807166a..f9f5360f4 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -49,8 +49,8 @@ type Datastore interface { // PoolSet sets the given pool in datastore. If the given pool has different label selector than the previous pool // that was stored, the function triggers a resync of the pods to keep the datastore updated. If the given pool // is nil, this call triggers the datastore.Clear() function. - EndPointsSet(ctx context.Context, reader client.Reader, pool *v1.InferencePool) error - PoolGet() (*v1.InferencePool, error) + PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndPointsPool) error + PoolGet() (*datalayer.EndPointsPool, error) PoolHasSynced() bool PoolLabelsMatch(podLabels map[string]string) bool @@ -69,12 +69,11 @@ type Datastore interface { Clear() } -func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endPoints *datalayer.EndPoints, standaloneMode bool) Datastore { +func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endPointsPool *datalayer.EndPointsPool) Datastore { store := &datastore{ parentCtx: parentCtx, endPointsAndObjectivesMu: sync.RWMutex{}, - standaloneMode: standaloneMode, - endPoints: endPoints, + endPointsPool: endPointsPool, objectives: make(map[string]*v1alpha2.InferenceObjective), pods: &sync.Map{}, modelServerMetricsPort: modelServerMetricsPort, @@ -89,8 +88,7 @@ type datastore struct { // endPointsAndObjectivesMu is used to synchronize access to pool and the objectives map. endPointsAndObjectivesMu sync.RWMutex standaloneMode bool - // endPoints is used to filter the available model server endpoints - endPoints *datalayer.EndPoints + endPointsPool *datalayer.EndPointsPool // key: InferenceObjective.Spec.ModelName, value: *InferenceObjective objectives map[string]*v1alpha2.InferenceObjective // key: types.NamespacedName, value: backendmetrics.PodMetrics @@ -104,7 +102,7 @@ type datastore struct { func (ds *datastore) Clear() { ds.endPointsAndObjectivesMu.Lock() defer ds.endPointsAndObjectivesMu.Unlock() - ds.endPoints = nil + ds.endPointsPool = nil ds.objectives = make(map[string]*v1alpha2.InferenceObjective) // stop all pods go routines before clearing the pods map. ds.pods.Range(func(_, v any) bool { @@ -114,9 +112,9 @@ func (ds *datastore) Clear() { ds.pods.Clear() } -// /// InferencePool APIs /// -func (ds *datastore) EndPointsSet(ctx context.Context, reader client.Reader, pool *v1.InferencePool) error { - if pool == nil { +// /// EndPoints APIs /// +func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndPointsPool) error { + if endPointsPool == nil { ds.Clear() return nil } @@ -124,10 +122,10 @@ func (ds *datastore) EndPointsSet(ctx context.Context, reader client.Reader, poo ds.endPointsAndObjectivesMu.Lock() defer ds.endPointsAndObjectivesMu.Unlock() - oldPool := ds.pool - ds.pool = pool - if oldPool == nil || !reflect.DeepEqual(pool.Spec.Selector, oldPool.Spec.Selector) { - logger.V(logutil.DEFAULT).Info("Updating inference pool endpoints", "selector", pool.Spec.Selector) + oldEndPointsPool := ds.endPointsPool + ds.endPointsPool = endPointsPool + if oldEndPointsPool == nil || !reflect.DeepEqual(endPointsPool.EndPoints.Selector, endPointsPool.EndPoints.Selector) { + logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endPointsPool.EndPoints.Selector) // A full resync is required to address two cases: // 1) At startup, the pod events may get processed before the pool is synced with the datastore, // and hence they will not be added to the store since pool selector is not known yet @@ -142,28 +140,28 @@ func (ds *datastore) EndPointsSet(ctx context.Context, reader client.Reader, poo return nil } -func (ds *datastore) PoolGet() (*v1.InferencePool, error) { +func (ds *datastore) PoolGet() (*datalayer.EndPointsPool, error) { ds.endPointsAndObjectivesMu.RLock() defer ds.endPointsAndObjectivesMu.RUnlock() if !ds.PoolHasSynced() { return nil, errPoolNotSynced } - return ds.pool, nil + return ds.endPointsPool, nil } func (ds *datastore) PoolHasSynced() bool { ds.endPointsAndObjectivesMu.RLock() defer ds.endPointsAndObjectivesMu.RUnlock() - return ds.pool != nil + return ds.endPointsPool != nil } func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { ds.endPointsAndObjectivesMu.RLock() defer ds.endPointsAndObjectivesMu.RUnlock() - if ds.pool == nil { + if ds.endPointsPool == nil { return false } - poolSelector := selectorFromInferencePoolSelector(ds.pool.Spec.Selector.MatchLabels) + poolSelector := labels.SelectorFromSet(ds.endPointsPool.EndPoints.Selector) podSet := labels.Set(podLabels) return poolSelector.Matches(podSet) } @@ -219,7 +217,7 @@ func (ds *datastore) PodList(predicate func(backendmetrics.PodMetrics) bool) []b } func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { - if ds.pool == nil { + if ds.endPointsPool == nil { return true } @@ -229,14 +227,14 @@ func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { } modelServerMetricsPort := 0 - if len(ds.pool.Spec.TargetPorts) == 1 { + if len(ds.endPointsPool.EndPoints.TargetPorts) == 1 { modelServerMetricsPort = int(ds.modelServerMetricsPort) } pods := []*datalayer.PodInfo{} - for idx, port := range ds.pool.Spec.TargetPorts { + for idx, port := range ds.endPointsPool.EndPoints.TargetPorts { metricsPort := modelServerMetricsPort if metricsPort == 0 { - metricsPort = int(port.Number) + metricsPort = port } pods = append(pods, &datalayer.PodInfo{ @@ -246,7 +244,7 @@ func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { }, PodName: pod.Name, Address: pod.Status.PodIP, - Port: strconv.Itoa(int(port.Number)), + Port: strconv.Itoa(port), MetricsHost: net.JoinHostPort(pod.Status.PodIP, strconv.Itoa(metricsPort)), Labels: labels, }) @@ -284,8 +282,8 @@ func (ds *datastore) podResyncAll(ctx context.Context, reader client.Reader) err logger := log.FromContext(ctx) podList := &corev1.PodList{} if err := reader.List(ctx, podList, &client.ListOptions{ - LabelSelector: selectorFromInferencePoolSelector(ds.pool.Spec.Selector.MatchLabels), - Namespace: ds.pool.Namespace, + LabelSelector: labels.SelectorFromSet(ds.endPointsPool.EndPoints.Selector), + Namespace: ds.endPointsPool.GKNN.Namespace, }); err != nil { return fmt.Errorf("failed to list pods - %w", err) } diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 7acc87657..76c30c583 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -95,7 +95,7 @@ func TestPool(t *testing.T) { if diff := cmp.Diff(tt.wantPool, gotPool); diff != "" { t.Errorf("Unexpected pool diff (+got/-want): %s", diff) } - gotSynced := ds.PoolHasSynced() + gotSynced := ds.EndPointsPoolHasSynced() if diff := cmp.Diff(tt.wantSynced, gotSynced); diff != "" { t.Errorf("Unexpected synced diff (+got/-want): %s", diff) } diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index f6f7deebe..460be0d18 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -23,12 +23,12 @@ import ( "fmt" "math/rand" "net" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "strings" "time" "sigs.k8s.io/controller-runtime/pkg/log" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" @@ -43,7 +43,7 @@ import ( // Datastore defines the interface required by the Director. type Datastore interface { - PoolGet() (*v1.InferencePool, error) + PoolGet() (*datalayer.EndPointsPool, error) ObjectiveGet(modelName string) *v1alpha2.InferenceObjective PodList(predicate func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics } diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index a8778f7da..a312dca8a 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -20,6 +20,9 @@ import ( "context" "errors" "fmt" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "testing" "time" @@ -76,7 +79,7 @@ type mockDatastore struct { pods []backendmetrics.PodMetrics } -func (ds *mockDatastore) PoolGet() (*v1.InferencePool, error) { +func (ds *mockDatastore) PoolGet() (*datalayer.EndPointsPool, error) { return nil, nil } func (ds *mockDatastore) ObjectiveGet(_ string) *v1alpha2.InferenceObjective { @@ -120,7 +123,7 @@ func TestDirector_HandleRequest(t *testing.T) { // Datastore setup pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0) + ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool()) ds.ObjectiveSet(ioFoodReview) ds.ObjectiveSet(ioFoodReviewResolve) ds.ObjectiveSet(ioFoodReviewSheddable) @@ -140,7 +143,29 @@ func TestDirector_HandleRequest(t *testing.T) { scheme := runtime.NewScheme() _ = clientgoscheme.AddToScheme(scheme) fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - if err := ds.EndPointsSet(ctx, fakeClient, pool); err != nil { + targetPorts := make([]int, 0, len(pool.Spec.TargetPorts)) + for _, p := range pool.Spec.TargetPorts { + targetPorts = append(targetPorts, int(p.Number)) + + } + selector := make(map[string]string, len(pool.Spec.Selector.MatchLabels)) + for k, v := range pool.Spec.Selector.MatchLabels { + selector[string(k)] = string(v) + } + gknn := common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: pool.Namespace, Name: pool.Name}, + GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, + } + endPoints := &datalayer.EndPoints{ + Selector: selector, + TargetPorts: targetPorts, + } + endPointsPool := &datalayer.EndPointsPool{ + EndPoints: endPoints, + StandaloneMode: false, + GKNN: gknn, + } + if err := ds.PoolSet(ctx, fakeClient, endPointsPool); err != nil { t.Fatalf("Error while setting inference pool: %v", err) } @@ -594,8 +619,31 @@ func TestGetRandomPod(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Millisecond) - ds := datastore.NewDatastore(t.Context(), pmf, 0) - err := ds.EndPointsSet(t.Context(), fakeClient, pool) + targetPorts := make([]int, 0, len(pool.Spec.TargetPorts)) + for _, p := range pool.Spec.TargetPorts { + targetPorts = append(targetPorts, int(p.Number)) + + } + selector := make(map[string]string, len(pool.Spec.Selector.MatchLabels)) + for k, v := range pool.Spec.Selector.MatchLabels { + selector[string(k)] = string(v) + } + gknn := common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: pool.Namespace, Name: pool.Name}, + GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, + } + endPoints := &datalayer.EndPoints{ + Selector: selector, + TargetPorts: targetPorts, + } + endPointsPool := &datalayer.EndPointsPool{ + EndPoints: endPoints, + StandaloneMode: false, + GKNN: gknn, + } + + ds := datastore.NewDatastore(t.Context(), pmf, 0, endPointsPool) + err := ds.PoolSet(t.Context(), fakeClient, endPointsPool) if err != nil { t.Errorf("unexpected error setting pool: %s", err) } @@ -619,7 +667,7 @@ func TestDirector_HandleResponseReceived(t *testing.T) { pr1 := newTestResponseReceived("pr1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool()) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, &mockAdmissionController{}, NewConfig().WithResponseReceivedPlugins(pr1)) @@ -656,7 +704,7 @@ func TestDirector_HandleResponseStreaming(t *testing.T) { ps1 := newTestResponseStreaming("ps1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool()) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseStreamingPlugins(ps1)) @@ -692,7 +740,7 @@ func TestDirector_HandleResponseComplete(t *testing.T) { pc1 := newTestResponseComplete("pc1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool()) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseCompletePlugins(pc1)) diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go new file mode 100644 index 000000000..c903f5d20 --- /dev/null +++ b/pkg/epp/util/pool/pool.go @@ -0,0 +1,78 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package pool + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" +) + +func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.EndPointsPool { + targetPorts := make([]int, 0, len(inferencePool.Spec.TargetPorts)) + for _, p := range inferencePool.Spec.TargetPorts { + targetPorts = append(targetPorts, int(p.Number)) + + } + selector := make(map[string]string, len(inferencePool.Spec.Selector.MatchLabels)) + for k, v := range inferencePool.Spec.Selector.MatchLabels { + selector[string(k)] = string(v) + } + gknn := common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, + GroupKind: schema.GroupKind{Group: inferencePool.GroupVersionKind().Group, Kind: inferencePool.GroupVersionKind().Kind}, + } + endPoints := &datalayer.EndPoints{ + Selector: selector, + TargetPorts: targetPorts, + } + endPointsPool := &datalayer.EndPointsPool{ + EndPoints: endPoints, + StandaloneMode: false, + GKNN: gknn, + } + return endPointsPool +} + +func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndPointsPool) *v1.InferencePool { + targetPorts := make([]v1.Port, 0, len(endPointsPool.EndPoints.TargetPorts)) + for _, p := range endPointsPool.EndPoints.TargetPorts { + targetPorts = append(targetPorts, v1.Port{Number: v1.PortNumber(p)}) + } + labels := make(map[v1.LabelKey]v1.LabelValue, len(endPointsPool.EndPoints.Selector)) + for k, v := range endPointsPool.EndPoints.Selector { + labels[v1.LabelKey(k)] = v1.LabelValue(v) + } + + inferencePool := &v1.InferencePool{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "inference.networking.k8s.io/v1", + Kind: "InferencePool", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: endPointsPool.GKNN.Name, + Namespace: endPointsPool.GKNN.Namespace, + }, + Spec: v1.InferencePoolSpec{ + Selector: v1.LabelSelector{MatchLabels: labels}, + TargetPorts: targetPorts, + }, + } + return inferencePool +} diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 9ce4fec64..6c0cd39a2 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -1238,7 +1238,7 @@ func BeforeSuite() func() { assert.Eventually(nil, func() bool { modelExist := serverRunner.Datastore.ObjectiveGet(modelMyModel) - synced := serverRunner.Datastore.PoolHasSynced() && modelExist != nil + synced := serverRunner.Datastore.EndPointsPoolHasSynced() && modelExist != nil return synced }, 10*time.Second, 10*time.Millisecond) From 85e4622c47c3e61b186b52a43ecafbd1bcc40c17 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 6 Nov 2025 11:02:56 -0800 Subject: [PATCH 03/34] fixed some ut --- .../inferenceobjective_reconciler_test.go | 5 ++--- .../controller/inferencepool_reconciler_test.go | 15 +++++++++------ pkg/epp/controller/pod_reconciler.go | 2 +- pkg/epp/controller/pod_reconciler_test.go | 6 +++--- pkg/epp/datalayer/endpointsPools.go | 10 +++++++--- pkg/epp/util/pool/pool.go | 13 +++++++++++++ pkg/epp/util/testing/wrappers.go | 16 ++++++++++++++++ 7 files changed, 51 insertions(+), 16 deletions(-) diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 7b65269ec..81364af4b 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -162,7 +162,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { WithObjects(initObjs...). Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool()) + ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pool.ToGKNN(inferencePool))) for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } @@ -193,8 +193,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { if len(test.wantObjectives) != len(ds.ObjectiveGetAll()) { t.Errorf("Unexpected; want: %d, got:%d", len(test.wantObjectives), len(ds.ObjectiveGetAll())) } - - if diff := diffStore(ds, diffStoreParams{wantPool: inferencePool, wantObjectives: test.wantObjectives}); diff != "" { + if diff := diffStore(ds, diffStoreParams{wantPool: endPointsPool, wantObjectives: test.wantObjectives}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 1ec1ae9a8..06d12b01b 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -116,14 +116,15 @@ func TestInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool()) + ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool(false, gknn)) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := diffStore(ds, diffStoreParams{wantPool: pool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { + endPointsPool1 := pool.InferencePoolToEndPointsPool(pool1) + if diff := diffStore(ds, diffStoreParams{wantPool: endPointsPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -141,7 +142,8 @@ func TestInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := diffStore(ds, diffStoreParams{wantPool: newPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndPointsPool1 := pool.InferencePoolToEndPointsPool(newPool1) + if diff := diffStore(ds, diffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -156,7 +158,8 @@ func TestInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := diffStore(ds, diffStoreParams{wantPool: newPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndPointsPool1 = pool.InferencePoolToEndPointsPool(newPool1) + if diff := diffStore(ds, diffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -176,7 +179,7 @@ func TestInferencePoolReconciler(t *testing.T) { } type diffStoreParams struct { - wantPool *v1.InferencePool + wantPool *datalayer.EndPointsPool wantPods []string wantObjectives []*v1alpha2.InferenceObjective } @@ -263,7 +266,7 @@ func TestXInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool()) + ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool(false, gknn)) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. diff --git a/pkg/epp/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go index 8add16ef3..b3a73f313 100644 --- a/pkg/epp/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -41,7 +41,7 @@ type PodReconciler struct { func (c *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) - if !c.Datastore.EndPointsPoolHasSynced() { + if !c.Datastore.PoolHasSynced() { logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the EndPointsPicker is not available yet") // When the inferencePool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. return ctrl.Result{}, nil diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index f39b29852..253a60b34 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -18,7 +18,6 @@ package controller import ( "context" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "testing" "time" @@ -37,6 +36,7 @@ import ( v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) @@ -198,8 +198,8 @@ func TestPodReconciler(t *testing.T) { Build() // Configure the initial state of the datastore. - store := datastore.NewDatastore(t.Context(), pmf, 0) - _ = store.PoolSet(t.Context(), fakeClient, test.pool) + store := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pool.ToGKNN(test.pool))) + _ = store.PoolSet(t.Context(), fakeClient, pool.InferencePoolToEndPointsPool(test.pool)) for _, pod := range test.existingPods { store.PodUpdateOrAddIfNotExist(pod) } diff --git a/pkg/epp/datalayer/endpointsPools.go b/pkg/epp/datalayer/endpointsPools.go index cd395ed15..5e8051ffb 100644 --- a/pkg/epp/datalayer/endpointsPools.go +++ b/pkg/epp/datalayer/endpointsPools.go @@ -16,7 +16,9 @@ limitations under the License. package datalayer -import "sigs.k8s.io/gateway-api-inference-extension/pkg/common" +import ( + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" +) type EndPointsPool struct { EndPoints *EndPoints @@ -25,10 +27,12 @@ type EndPointsPool struct { } // NewEndPointsPool creates and returns a new empty instance of EndPointsPool. -func NewEndPointsPool() *EndPointsPool { +func NewEndPointsPool(standAloneMode bool, gknn common.GKNN) *EndPointsPool { endPoints := NewEndPoints() return &EndPointsPool{ - EndPoints: endPoints, + GKNN: gknn, + StandaloneMode: standAloneMode, + EndPoints: endPoints, } } diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index c903f5d20..6b5c8d043 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -76,3 +76,16 @@ func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndPointsPool) *v1.In } return inferencePool } + +func ToGKNN(ip *v1.InferencePool) common.GKNN { + return common.GKNN{ + NamespacedName: types.NamespacedName{ + Name: ip.Name, + Namespace: ip.ObjectMeta.Namespace, + }, + GroupKind: schema.GroupKind{ + Group: ip.GroupVersionKind().Group, + Kind: ip.GroupVersionKind().Kind, + }, + } +} diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index 7621bff96..2ad02c55d 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -19,8 +19,11 @@ package testing import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" ) // PodWrapper wraps a Pod. @@ -219,6 +222,19 @@ func (m *InferencePoolWrapper) ObjRef() *v1.InferencePool { return &m.InferencePool } +func (m *InferencePoolWrapper) ToGKNN() common.GKNN { + return common.GKNN{ + NamespacedName: types.NamespacedName{ + Name: m.Name, + Namespace: m.ObjectMeta.Namespace, + }, + GroupKind: schema.GroupKind{ + Group: "inference.networking.k8s.io", + Kind: "InferencePool", + }, + } +} + // AlphaInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool. type AlphaInferencePoolWrapper struct { v1alpha2.InferencePool From 579b17ef783f4034f1337913fc9dc86d54fface2 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 6 Nov 2025 14:10:40 -0800 Subject: [PATCH 04/34] make epp controller ut pass --- .../controller/inferencepool_reconciler.go | 38 +++---------------- .../inferencepool_reconciler_test.go | 33 ++++++---------- pkg/epp/datastore/datastore.go | 2 +- pkg/epp/util/pool/pool.go | 25 +++++++++++- 4 files changed, 42 insertions(+), 56 deletions(-) diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 156673914..06942316d 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -19,6 +19,7 @@ package controller import ( "context" "fmt" + "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -30,6 +31,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" ) // InferencePoolReconciler utilizes the controller runtime to reconcile Instance Gateway resources @@ -75,45 +77,15 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques c.Datastore.Clear() return ctrl.Result{}, nil } - - // 4. Convert the fetched object to the canonical v1.InferencePool. - v1infPool := &v1.InferencePool{} - + endPointsPool := &datalayer.EndPointsPool{} switch pool := obj.(type) { case *v1.InferencePool: - // If it's already a v1 object, just use it. - v1infPool = pool + endPointsPool = pooltuil.InferencePoolToEndPointsPool(pool) case *v1alpha2.InferencePool: - var err error - err = pool.ConvertTo(v1infPool) - if err != nil { - return ctrl.Result{}, fmt.Errorf("failed to convert XInferencePool to InferencePool - %w", err) - } + endPointsPool = pooltuil.AlphaInferencePoolToEndPointsPool(pool) default: return ctrl.Result{}, fmt.Errorf("unsupported API group: %s", c.PoolGKNN.Group) } - gknn := common.GKNN{ - NamespacedName: req.NamespacedName, - GroupKind: c.PoolGKNN.GroupKind, - } - targetPorts := make([]int, 0, len(v1infPool.Spec.TargetPorts)) - for _, p := range v1infPool.Spec.TargetPorts { - targetPorts = append(targetPorts, int(p.Number)) - - } - selector := make(map[string]string, len(v1infPool.Spec.Selector.MatchLabels)) - for k, v := range v1infPool.Spec.Selector.MatchLabels { - selector[string(k)] = string(v) - } - endPoints := &datalayer.EndPoints{ - Selector: selector, - TargetPorts: targetPorts, - } - endPointsPool := &datalayer.EndPointsPool{ - EndPoints: endPoints, - StandaloneMode: false, - GKNN: gknn, - } if err := c.Datastore.PoolSet(ctx, c.Reader, endPointsPool); err != nil { return ctrl.Result{}, fmt.Errorf("failed to update datastore - %w", err) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 06d12b01b..9b0808348 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -18,15 +18,15 @@ package controller import ( "context" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" "testing" "time" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" + "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" @@ -138,7 +138,6 @@ func TestInferencePoolReconciler(t *testing.T) { if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } - if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } @@ -186,9 +185,7 @@ type diffStoreParams struct { func diffStore(datastore datastore.Datastore, params diffStoreParams) string { gotPool, _ := datastore.PoolGet() - // controller-runtime fake client may not populate TypeMeta (APIVersion/Kind). - // Ignore it when comparing pools. - if diff := cmp.Diff(params.wantPool, gotPool, cmpopts.IgnoreTypes(metav1.TypeMeta{})); diff != "" { + if diff := cmp.Diff(params.wantPool, gotPool); diff != "" { return "pool:" + diff } @@ -273,7 +270,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: pool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { + endPointsPool1 := pool.AlphaInferencePoolToEndPointsPool(pool1) + if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: endPointsPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -289,7 +287,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: newPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndPointsPoll1 := pool.AlphaInferencePoolToEndPointsPool(newPool1) + if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: newEndPointsPoll1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -304,7 +303,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: newPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndPointsPool1 := pool.AlphaInferencePoolToEndPointsPool(newPool1) + if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -324,7 +324,7 @@ func TestXInferencePoolReconciler(t *testing.T) { } type xDiffStoreParams struct { - wantPool *v1alpha2.InferencePool + wantPool *datalayer.EndPointsPool wantPods []string wantObjectives []*v1alpha2.InferenceObjective } @@ -335,16 +335,7 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa return "" } - gotXPool := &v1alpha2.InferencePool{} - - err := gotXPool.ConvertFrom(pool.EndPointsPoolToInferencePool(gotPool)) - if err != nil { - t.Fatalf("failed to convert InferencePool to XInferencePool: %v", err) - } - - // controller-runtime fake client may not populate TypeMeta (APIVersion/Kind). - // Ignore it when comparing pools. - if diff := cmp.Diff(params.wantPool, gotXPool, cmpopts.IgnoreTypes(metav1.TypeMeta{})); diff != "" { + if diff := cmp.Diff(params.wantPool, gotPool); diff != "" { return "pool:" + diff } diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index f9f5360f4..2384161eb 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -124,7 +124,7 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPoint oldEndPointsPool := ds.endPointsPool ds.endPointsPool = endPointsPool - if oldEndPointsPool == nil || !reflect.DeepEqual(endPointsPool.EndPoints.Selector, endPointsPool.EndPoints.Selector) { + if oldEndPointsPool == nil || !reflect.DeepEqual(oldEndPointsPool.EndPoints.Selector, endPointsPool.EndPoints.Selector) { logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endPointsPool.EndPoints.Selector) // A full resync is required to address two cases: // 1) At startup, the pod events may get processed before the pool is synced with the datastore, diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index 6b5c8d043..b0e443d7a 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -20,6 +20,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" + v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" ) @@ -36,7 +37,29 @@ func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.En } gknn := common.GKNN{ NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, - GroupKind: schema.GroupKind{Group: inferencePool.GroupVersionKind().Group, Kind: inferencePool.GroupVersionKind().Kind}, + GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, + } + endPoints := &datalayer.EndPoints{ + Selector: selector, + TargetPorts: targetPorts, + } + endPointsPool := &datalayer.EndPointsPool{ + EndPoints: endPoints, + StandaloneMode: false, + GKNN: gknn, + } + return endPointsPool +} + +func AlphaInferencePoolToEndPointsPool(inferencePool *v1alpha2.InferencePool) *datalayer.EndPointsPool { + targetPorts := []int{int(inferencePool.Spec.TargetPortNumber)} + selector := make(map[string]string, len(inferencePool.Spec.Selector)) + for k, v := range inferencePool.Spec.Selector { + selector[string(k)] = string(v) + } + gknn := common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, + GroupKind: schema.GroupKind{Group: "inference.networking.x-k8s.io", Kind: "InferencePool"}, } endPoints := &datalayer.EndPoints{ Selector: selector, From e9d704d40b52adb7041a8e2974c39745c98ec8b1 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 6 Nov 2025 14:54:11 -0800 Subject: [PATCH 05/34] make ut pass --- pkg/epp/backend/metrics/pod_metrics_test.go | 3 +- pkg/epp/datalayer/metrics/logger.go | 6 +-- pkg/epp/datastore/datastore.go | 2 +- pkg/epp/datastore/datastore_test.go | 30 ++++++++----- pkg/epp/handlers/server.go | 4 +- pkg/epp/metrics/collectors/inference_pool.go | 2 +- .../metrics/collectors/inference_pool_test.go | 21 +++++---- pkg/epp/requestcontrol/director_test.go | 44 +++++++------------ pkg/epp/util/pool/pool.go | 6 +++ test/utils/server.go | 13 +++++- 10 files changed, 71 insertions(+), 60 deletions(-) diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index 9d32f338e..eff9547ad 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -25,7 +25,6 @@ import ( "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/types" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" ) @@ -87,7 +86,7 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} func (f *fakeDataStore) PoolGet() (*datalayer.EndPointsPool, error) { - return datalayer.NewEndPointsPool(), nil + return &datalayer.EndPointsPool{}, nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/datalayer/metrics/logger.go b/pkg/epp/datalayer/metrics/logger.go index fac757dbe..75cbb8414 100644 --- a/pkg/epp/datalayer/metrics/logger.go +++ b/pkg/epp/datalayer/metrics/logger.go @@ -116,9 +116,9 @@ func refreshPrometheusMetrics(logger logr.Logger, datastore datalayer.PoolInfo, totals := calculateTotals(podMetrics) podCount := len(podMetrics) - metrics.RecordInferencePoolAvgKVCache(pool.Name, totals.kvCache/float64(podCount)) - metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(totals.queueSize/podCount)) - metrics.RecordInferencePoolReadyPods(pool.Name, float64(podCount)) + metrics.RecordInferencePoolAvgKVCache(pool.GKNN.Name, totals.kvCache/float64(podCount)) + metrics.RecordInferencePoolAvgQueueSize(pool.GKNN.Name, float64(totals.queueSize/podCount)) + metrics.RecordInferencePoolReadyPods(pool.GKNN.Name, float64(podCount)) } // totals holds aggregated metric values diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 2384161eb..01b12dd54 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -217,7 +217,7 @@ func (ds *datastore) PodList(predicate func(backendmetrics.PodMetrics) bool) []b } func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { - if ds.endPointsPool == nil { + if ds.endPointsPool == nil || ds.endPointsPool.EndPoints == nil { return true } diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 76c30c583..df1907398 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -33,6 +33,7 @@ import ( "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client/fake" + pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" @@ -86,16 +87,18 @@ func TestPool(t *testing.T) { WithScheme(scheme). Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(context.Background(), pmf, 0) - _ = ds.EndPointsSet(context.Background(), fakeClient, tt.inferencePool) + gknn := pooltuil.ToGKNN(tt.inferencePool) + endPointPool := datalayer.NewEndPointsPool(false, gknn) + ds := NewDatastore(context.Background(), pmf, 0, endPointPool) + _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndPointsPool(tt.inferencePool)) gotPool, gotErr := ds.PoolGet() if diff := cmp.Diff(tt.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" { t.Errorf("Unexpected error diff (+got/-want): %s", diff) } - if diff := cmp.Diff(tt.wantPool, gotPool); diff != "" { + if diff := cmp.Diff(pooltuil.InferencePoolToEndPointsPool(tt.wantPool), gotPool); diff != "" { t.Errorf("Unexpected pool diff (+got/-want): %s", diff) } - gotSynced := ds.EndPointsPoolHasSynced() + gotSynced := ds.PoolHasSynced() if diff := cmp.Diff(tt.wantSynced, gotSynced); diff != "" { t.Errorf("Unexpected synced diff (+got/-want): %s", diff) } @@ -120,6 +123,10 @@ func TestObjective(t *testing.T) { Priority(2).ObjRef() // Same object name as model2ts, different model name. model2chat := testutil.MakeInferenceObjective(model2ts.Name).ObjRef() + pool1Selector := map[string]string{"app": "vllm_v1"} + pool1 := testutil.MakeInferencePool("pool1"). + Namespace("default"). + Selector(pool1Selector).ObjRef() tests := []struct { name string @@ -193,7 +200,7 @@ func TestObjective(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0) + ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pooltuil.ToGKNN(pool1))) for _, m := range test.existingModels { ds.ObjectiveSet(m) } @@ -327,8 +334,9 @@ func TestMetrics(t *testing.T) { WithScheme(scheme). Build() pmf := backendmetrics.NewPodMetricsFactory(test.pmc, time.Millisecond) - ds := NewDatastore(ctx, pmf, 0) - _ = ds.EndPointsSet(ctx, fakeClient, inferencePool) + gknn := pooltuil.ToGKNN(inferencePool) + ds := NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool(false, gknn)) + _ = ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(inferencePool)) for _, pod := range test.storePods { ds.PodUpdateOrAddIfNotExist(pod) } @@ -395,9 +403,9 @@ func TestPods(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0) + ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pooltuil.ToGKNN(inferencePool))) fakeClient := fake.NewFakeClient() - if err := ds.EndPointsSet(ctx, fakeClient, inferencePool); err != nil { + if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(inferencePool)); err != nil { t.Error(err) } for _, pod := range test.existingPods { @@ -579,9 +587,9 @@ func TestPodInfo(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0) + ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pooltuil.ToGKNN(test.pool))) fakeClient := fake.NewFakeClient() - if err := ds.EndPointsSet(ctx, fakeClient, test.pool); err != nil { + if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(test.pool)); err != nil { t.Error(err) } for _, pod := range test.existingPods { diff --git a/pkg/epp/handlers/server.go b/pkg/epp/handlers/server.go index 0d5305574..861d2a040 100644 --- a/pkg/epp/handlers/server.go +++ b/pkg/epp/handlers/server.go @@ -30,8 +30,8 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" @@ -61,7 +61,7 @@ type Director interface { } type Datastore interface { - PoolGet() (*v1.InferencePool, error) + PoolGet() (*datalayer.EndPointsPool, error) } // Server implements the Envoy external processing server. diff --git a/pkg/epp/metrics/collectors/inference_pool.go b/pkg/epp/metrics/collectors/inference_pool.go index ec3def164..1bb6e206e 100644 --- a/pkg/epp/metrics/collectors/inference_pool.go +++ b/pkg/epp/metrics/collectors/inference_pool.go @@ -73,7 +73,7 @@ func (c *inferencePoolMetricsCollector) Collect(ch chan<- prometheus.Metric) { descInferencePoolPerPodQueueSize, prometheus.GaugeValue, float64(pod.GetMetrics().WaitingQueueSize), - pool.Name, + pool.GKNN.Name, pod.GetPod().NamespacedName.Name, ) } diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index e6a9c574c..267066185 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -28,6 +28,9 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/component-base/metrics/testutil" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" @@ -50,7 +53,7 @@ var ( func TestNoMetricsCollected(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(context.Background(), pmf, 0) + ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) collector := &inferencePoolMetricsCollector{ ds: ds, @@ -68,13 +71,6 @@ func TestMetricsCollected(t *testing.T) { }, } pmf := backendmetrics.NewPodMetricsFactory(pmc, time.Millisecond) - ds := datastore.NewDatastore(context.Background(), pmf, 0) - - scheme := runtime.NewScheme() - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - Build() - inferencePool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", @@ -83,7 +79,14 @@ func TestMetricsCollected(t *testing.T) { TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } - _ = ds.EndPointsSet(context.Background(), fakeClient, inferencePool) + ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndPointsPool(false, poolutil.ToGKNN(inferencePool))) + + scheme := runtime.NewScheme() + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + _ = ds.PoolSet(context.Background(), fakeClient, poolutil.InferencePoolToEndPointsPool(inferencePool)) _ = ds.PodUpdateOrAddIfNotExist(pod1) time.Sleep(1 * time.Second) diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index a312dca8a..b2c955723 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -20,9 +20,12 @@ import ( "context" "errors" "fmt" + "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" + "testing" "time" @@ -120,14 +123,6 @@ func TestDirector_HandleRequest(t *testing.T) { CreationTimestamp(metav1.Unix(1000, 0)). Priority(1). ObjRef() - - // Datastore setup - pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool()) - ds.ObjectiveSet(ioFoodReview) - ds.ObjectiveSet(ioFoodReviewResolve) - ds.ObjectiveSet(ioFoodReviewSheddable) - pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ @@ -140,6 +135,13 @@ func TestDirector_HandleRequest(t *testing.T) { }, } + // Datastore setup + pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) + ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, poolutil.ToGKNN(pool))) + ds.ObjectiveSet(ioFoodReview) + ds.ObjectiveSet(ioFoodReviewResolve) + ds.ObjectiveSet(ioFoodReviewSheddable) + scheme := runtime.NewScheme() _ = clientgoscheme.AddToScheme(scheme) fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() @@ -148,24 +150,8 @@ func TestDirector_HandleRequest(t *testing.T) { targetPorts = append(targetPorts, int(p.Number)) } - selector := make(map[string]string, len(pool.Spec.Selector.MatchLabels)) - for k, v := range pool.Spec.Selector.MatchLabels { - selector[string(k)] = string(v) - } - gknn := common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: pool.Namespace, Name: pool.Name}, - GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, - } - endPoints := &datalayer.EndPoints{ - Selector: selector, - TargetPorts: targetPorts, - } - endPointsPool := &datalayer.EndPointsPool{ - EndPoints: endPoints, - StandaloneMode: false, - GKNN: gknn, - } - if err := ds.PoolSet(ctx, fakeClient, endPointsPool); err != nil { + + if err := ds.PoolSet(ctx, fakeClient, poolutil.InferencePoolToEndPointsPool(pool)); err != nil { t.Fatalf("Error while setting inference pool: %v", err) } @@ -667,7 +653,7 @@ func TestDirector_HandleResponseReceived(t *testing.T) { pr1 := newTestResponseReceived("pr1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool()) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, &mockAdmissionController{}, NewConfig().WithResponseReceivedPlugins(pr1)) @@ -704,7 +690,7 @@ func TestDirector_HandleResponseStreaming(t *testing.T) { ps1 := newTestResponseStreaming("ps1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool()) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseStreamingPlugins(ps1)) @@ -740,7 +726,7 @@ func TestDirector_HandleResponseComplete(t *testing.T) { pc1 := newTestResponseComplete("pc1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool()) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseCompletePlugins(pc1)) diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index b0e443d7a..004627254 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -26,6 +26,9 @@ import ( ) func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.EndPointsPool { + if inferencePool == nil { + return nil + } targetPorts := make([]int, 0, len(inferencePool.Spec.TargetPorts)) for _, p := range inferencePool.Spec.TargetPorts { targetPorts = append(targetPorts, int(p.Number)) @@ -101,6 +104,9 @@ func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndPointsPool) *v1.In } func ToGKNN(ip *v1.InferencePool) common.GKNN { + if ip == nil { + return common.GKNN{} + } return common.GKNN{ NamespacedName: types.NamespacedName{ Name: ip.Name, diff --git a/test/utils/server.go b/test/utils/server.go index 76060c105..5afc5ad05 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -29,9 +29,14 @@ import ( "google.golang.org/grpc/test/bufconn" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" @@ -50,7 +55,11 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po pmc := &metrics.FakePodMetricsClient{} pmf := metrics.NewPodMetricsFactory(pmc, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0) + endPointsPool := datalayer.NewEndPointsPool(false, common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: namespace, Name: poolName}, + GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, + }) + ds := datastore.NewDatastore(ctx, pmf, 0, endPointsPool) initObjs := []client.Object{} for _, objective := range objectives { @@ -72,7 +81,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() pool.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(poolPort)}} - _ = ds.EndPointsSet(context.Background(), fakeClient, pool) + _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndPointsPool(pool)) return ctx, cancel, ds, pmc } From 42cb2189eff6f9503cf4cca321a841df1b4958e8 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Fri, 7 Nov 2025 10:44:02 -0800 Subject: [PATCH 06/34] fixed build --- cmd/epp/runner/health.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/epp/runner/health.go b/cmd/epp/runner/health.go index c80cc4d20..1edbcff8e 100644 --- a/cmd/epp/runner/health.go +++ b/cmd/epp/runner/health.go @@ -44,7 +44,7 @@ const ( ) func (s *healthServer) Check(ctx context.Context, in *healthPb.HealthCheckRequest) (*healthPb.HealthCheckResponse, error) { - isLive := s.datastore.EndPointsPoolHasSynced() + isLive := s.datastore.PoolHasSynced() // If leader election is disabled, use current logic: all checks are based on whether the pool has synced. if !s.leaderElectionEnabled { From 943c53debe29a3489b6cc886ebc270f2e96fcda9 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Fri, 7 Nov 2025 10:46:54 -0800 Subject: [PATCH 07/34] fixed build --- cmd/epp/runner/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 8df79a891..e38bca971 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -199,7 +199,7 @@ func (r *Runner) Run(ctx context.Context) error { return err } //Setup EndPointsPool - endPointsPool := datalayer.NewEndPointsPool() + endPointsPool := datalayer.NewEndPointsPool(false, common.GKNN{}) if *poolName != "" { // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default resolvePoolNamespace := func() string { From 1e388217ff27d5486a7a3e4009e29a99fbff7532 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 10 Nov 2025 12:54:00 -0800 Subject: [PATCH 08/34] fixed build failure --- test/integration/epp/hermetic_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 6c0cd39a2..020beb59b 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -56,6 +56,7 @@ import ( crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/yaml" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" @@ -1170,11 +1171,14 @@ func BeforeSuite() func() { serverRunner.TestPodMetricsClient = &backendmetrics.FakePodMetricsClient{} pmf := backendmetrics.NewPodMetricsFactory(serverRunner.TestPodMetricsClient, 10*time.Millisecond) // Adjust from defaults - serverRunner.PoolGKNN = common.GKNN{ + poolGKNN := common.GKNN{ NamespacedName: types.NamespacedName{Namespace: testNamespace, Name: testPoolName}, GroupKind: schema.GroupKind{Group: v1.GroupVersion.Group, Kind: "InferencePool"}, } - serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0) + endPointsPool := datalayer.NewEndPointsPool(false, poolGKNN) + serverRunner.EndPointsPool = datalayer.NewEndPointsPool(false, poolGKNN) + + serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0, endPointsPool) kvCacheUtilizationScorer := scorer.NewKVCacheUtilizationScorer() queueingScorer := scorer.NewQueueScorer() @@ -1238,7 +1242,7 @@ func BeforeSuite() func() { assert.Eventually(nil, func() bool { modelExist := serverRunner.Datastore.ObjectiveGet(modelMyModel) - synced := serverRunner.Datastore.EndPointsPoolHasSynced() && modelExist != nil + synced := serverRunner.Datastore.PoolHasSynced() && modelExist != nil return synced }, 10*time.Second, 10*time.Millisecond) From 30fd66743409760e3aa9bb3ac786a6d9d171f22f Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 10 Nov 2025 13:16:57 -0800 Subject: [PATCH 09/34] fixed lint --- cmd/epp/runner/runner.go | 6 +++--- .../inferenceobjective_reconciler_test.go | 3 ++- pkg/epp/controller/inferencepool_reconciler.go | 2 +- .../controller/inferencepool_reconciler_test.go | 10 +++++----- pkg/epp/controller/pod_reconciler_test.go | 3 ++- pkg/epp/datastore/datastore.go | 14 -------------- pkg/epp/requestcontrol/director.go | 3 ++- pkg/epp/requestcontrol/director_test.go | 5 ----- pkg/epp/server/controller_manager.go | 5 +++-- pkg/epp/server/runserver.go | 3 ++- pkg/epp/util/pool/pool.go | 2 +- 11 files changed, 21 insertions(+), 35 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index e38bca971..88a3fed8a 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -198,7 +198,7 @@ func (r *Runner) Run(ctx context.Context) error { setupLog.Error(err, "Failed to get Kubernetes rest config") return err } - //Setup EndPointsPool + // Setup EndPointsPool endPointsPool := datalayer.NewEndPointsPool(false, common.GKNN{}) if *poolName != "" { // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default @@ -585,7 +585,7 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore. func validateFlags() error { if (*poolName != "" && *selector != "") || (*poolName == "" && *selector == "") { - return fmt.Errorf("either poolName or selector must be set") + return errors.New("either poolName or selector must be set") } if *selector != "" { targetPortsList, err := strToUniqueIntSlice(*targetPorts) @@ -649,7 +649,7 @@ func strToMap(s string) (map[string]string, error) { } kv := strings.Split(trimmedPair, ":") if len(kv) != 2 { - return nil, fmt.Errorf("invalid format, expected key:value paris") + return nil, errors.New("invalid format, expected key:value paris") } m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1]) } diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 81364af4b..66abc98ac 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -18,10 +18,11 @@ package controller import ( "context" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "testing" "time" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "github.com/google/go-cmp/cmp" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 06942316d..8ff8a86f1 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -77,7 +77,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques c.Datastore.Clear() return ctrl.Result{}, nil } - endPointsPool := &datalayer.EndPointsPool{} + var endPointsPool *datalayer.EndPointsPool switch pool := obj.(type) { case *v1.InferencePool: endPointsPool = pooltuil.InferencePoolToEndPointsPool(pool) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 9b0808348..6b3a14f3e 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -271,7 +271,7 @@ func TestXInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } endPointsPool1 := pool.AlphaInferencePoolToEndPointsPool(pool1) - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: endPointsPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { + if diff := xDiffStore(ds, xDiffStoreParams{wantPool: endPointsPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -288,7 +288,7 @@ func TestXInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } newEndPointsPoll1 := pool.AlphaInferencePoolToEndPointsPool(newPool1) - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: newEndPointsPoll1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + if diff := xDiffStore(ds, xDiffStoreParams{wantPool: newEndPointsPoll1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -304,7 +304,7 @@ func TestXInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } newEndPointsPool1 := pool.AlphaInferencePoolToEndPointsPool(newPool1) - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + if diff := xDiffStore(ds, xDiffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -318,7 +318,7 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - if diff := xDiffStore(t, ds, xDiffStoreParams{wantPods: []string{}}); diff != "" { + if diff := xDiffStore(ds, xDiffStoreParams{wantPods: []string{}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } } @@ -329,7 +329,7 @@ type xDiffStoreParams struct { wantObjectives []*v1alpha2.InferenceObjective } -func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStoreParams) string { +func xDiffStore(datastore datastore.Datastore, params xDiffStoreParams) string { gotPool, _ := datastore.PoolGet() if gotPool == nil && params.wantPool == nil { return "" diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 253a60b34..989b11069 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -18,10 +18,11 @@ package controller import ( "context" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "testing" "time" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 01b12dd54..6e662e220 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -31,7 +31,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" @@ -87,7 +86,6 @@ type datastore struct { parentCtx context.Context // endPointsAndObjectivesMu is used to synchronize access to pool and the objectives map. endPointsAndObjectivesMu sync.RWMutex - standaloneMode bool endPointsPool *datalayer.EndPointsPool // key: InferenceObjective.Spec.ModelName, value: *InferenceObjective objectives map[string]*v1alpha2.InferenceObjective @@ -314,15 +312,3 @@ func (ds *datastore) podResyncAll(ctx context.Context, reader client.Reader) err return nil } - -func selectorFromInferencePoolSelector(selector map[v1.LabelKey]v1.LabelValue) labels.Selector { - return labels.SelectorFromSet(stripLabelKeyAliasFromLabelMap(selector)) -} - -func stripLabelKeyAliasFromLabelMap(labels map[v1.LabelKey]v1.LabelValue) map[string]string { - outMap := make(map[string]string) - for k, v := range labels { - outMap[string(k)] = string(v) - } - return outMap -} diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index 460be0d18..7b9b23816 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -23,10 +23,11 @@ import ( "fmt" "math/rand" "net" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "strings" "time" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index b2c955723..faa092b6f 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -145,11 +145,6 @@ func TestDirector_HandleRequest(t *testing.T) { scheme := runtime.NewScheme() _ = clientgoscheme.AddToScheme(scheme) fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - targetPorts := make([]int, 0, len(pool.Spec.TargetPorts)) - for _, p := range pool.Spec.TargetPorts { - targetPorts = append(targetPorts, int(p.Number)) - - } if err := ds.PoolSet(ctx, fakeClient, poolutil.InferencePoolToEndPointsPool(pool)); err != nil { t.Fatalf("Error while setting inference pool: %v", err) diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index e3ca2a15c..4b5102f45 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -18,7 +18,6 @@ package server import ( "fmt" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/fields" @@ -31,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/manager" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" @@ -76,8 +76,9 @@ func defaultManagerOptions(endPointsPool *datalayer.EndPointsPool, metricsServer "metadata.name": endPointsPool.GKNN.Name, })}}, } + default: + return ctrl.Options{}, fmt.Errorf("unknown group: %s", endPointsPool.GKNN.Group) } - } return opt, nil diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index c79054bd5..86ee780be 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -20,9 +20,10 @@ import ( "context" "crypto/tls" "fmt" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "time" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "github.com/go-logr/logr" "google.golang.org/grpc" diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index 004627254..c720b60cd 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -110,7 +110,7 @@ func ToGKNN(ip *v1.InferencePool) common.GKNN { return common.GKNN{ NamespacedName: types.NamespacedName{ Name: ip.Name, - Namespace: ip.ObjectMeta.Namespace, + Namespace: ip.Namespace, }, GroupKind: schema.GroupKind{ Group: ip.GroupVersionKind().Group, From 9e853776b462b55c8f05a99c0d9803dcdb3fa0a9 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 10 Nov 2025 13:35:21 -0800 Subject: [PATCH 10/34] fix format --- pkg/epp/util/pool/pool.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index c720b60cd..f5126face 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package pool import ( From 02675693e5dfb107d8701043312e2d6267a219f7 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Fri, 14 Nov 2025 15:44:39 -0800 Subject: [PATCH 11/34] fixed import format --- pkg/epp/controller/inferenceobjective_reconciler_test.go | 3 +-- pkg/epp/controller/inferencepool_reconciler_test.go | 5 ++--- pkg/epp/controller/pod_reconciler_test.go | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 66abc98ac..33d5417d4 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -21,8 +21,6 @@ import ( "testing" "time" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "github.com/google/go-cmp/cmp" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -32,6 +30,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 6b3a14f3e..9c2654a77 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -21,9 +21,6 @@ import ( "testing" "time" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" - "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" @@ -34,6 +31,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 989b11069..24c8ee61c 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -21,8 +21,6 @@ import ( "testing" "time" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" @@ -33,6 +31,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" From 200dbf4acc8461ed921c709ab39342b1fcfc1c52 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 10:37:46 -0800 Subject: [PATCH 12/34] rename and refactor Signed-off-by: Xiyue Yu --- cmd/epp/runner/runner.go | 134 ++++++++++-------- pkg/epp/backend/metrics/pod_metrics_test.go | 4 +- .../inferenceobjective_reconciler_test.go | 2 +- .../controller/inferencepool_reconciler.go | 2 +- .../inferencepool_reconciler_test.go | 8 +- pkg/epp/controller/pod_reconciler.go | 2 +- pkg/epp/controller/pod_reconciler_test.go | 2 +- .../{endpointsPools.go => endpointPool.go} | 22 +-- pkg/epp/datalayer/factory.go | 2 +- pkg/epp/datastore/datastore.go | 90 ++++++------ pkg/epp/datastore/datastore_test.go | 10 +- pkg/epp/handlers/server.go | 2 +- .../metrics/collectors/inference_pool_test.go | 4 +- pkg/epp/requestcontrol/director.go | 2 +- pkg/epp/requestcontrol/director_test.go | 18 +-- pkg/epp/server/controller_manager.go | 6 +- pkg/epp/server/runserver.go | 4 +- pkg/epp/util/pool/pool.go | 22 +-- test/integration/epp/hermetic_test.go | 4 +- test/utils/server.go | 2 +- 20 files changed, 176 insertions(+), 166 deletions(-) rename pkg/epp/datalayer/{endpointsPools.go => endpointPool.go} (66%) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index fdb34d6d5..8059bbba3 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -102,8 +102,8 @@ var ( poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") - selector = flag.String("selector", "", "selector to filter pods on. Format: a comma-separated list of labels, e.g., 'app: vllm-llama3-8b-instruct,env=prod'.") - targetPorts = flag.String("target-ports", "", "target ports of model server pods. Format: a comma-separated list of labels, e.g., '3000,3001,3002'") + selector = flag.String("selector", "", "selector to filter pods on, only key value paris is supported. Format: a comma-separated list of key value paris, e.g., 'app:vllm-llama3-8b-instruct,env=prod'.") + targetPorts = flag.String("target-ports", "", "target ports of model server pods. Format: a comma-separated list of numbers, e.g., '3000,3001,3002'") logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") @@ -213,63 +213,11 @@ func (r *Runner) Run(ctx context.Context) error { setupLog.Error(err, "Failed to get Kubernetes rest config") return err } - // Setup EndPointsPool - endPointsPool := datalayer.NewEndPointsPool(false, common.GKNN{}) - if *poolName != "" { - // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default - resolvePoolNamespace := func() string { - if *poolNamespace != "" { - return *poolNamespace - } - if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { - return nsEnv - } - return runserver.DefaultPoolNamespace - } - resolvedPoolNamespace := resolvePoolNamespace() - poolNamespacedName := types.NamespacedName{ - Name: *poolName, - Namespace: resolvedPoolNamespace, - } - poolGroupKind := schema.GroupKind{ - Group: *poolGroup, - Kind: "InferencePool", - } - poolGKNN := common.GKNN{ - NamespacedName: poolNamespacedName, - GroupKind: poolGroupKind, - } - endPointsPool.GKNN = poolGKNN - } - - if *selector != "" { - endPointsPool.EndPoints.Selector, err = strToMap(*selector) - if err != nil { - setupLog.Error(err, "Failed to parse flag %q with error: %w", "selector", err) - return err - } - endPointsPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*targetPorts) - if err != nil { - setupLog.Error(err, "Failed to parse flag %q with error: %w", "target-ports", err) - } - endPointsPool.StandaloneMode = true - - // Determine EPP namespace: NAMESPACE env var; else default - eppNsEnv := os.Getenv("EPP_NAMESPACE") - if eppNsEnv == "" { - setupLog.Error(err, "Failed to get environment variable EPP_NAMESPACE") - } - // Determine EPP name: EPP_NAME env var - eppNameEnv := os.Getenv("EPP_NAME") - if eppNameEnv == "" { - setupLog.Error(err, "Failed to get environment variable EPP_NAME") - - } - endPointsPool.GKNN = common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppNameEnv}, - GroupKind: schema.GroupKind{Kind: "apps", Group: "Deployment"}, - } - + // Setup EndpointPool + endpointPool, err := setupEndpointPool(setupLog) + if err != nil { + setupLog.Error(err, "Failed to set up Endpoints Pool") + return err } // --- Setup Datastore --- @@ -278,7 +226,7 @@ func (r *Runner) Run(ctx context.Context) error { if err != nil { return err } - datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort), endPointsPool) + datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort), endpointPool) // --- Setup Metrics Server --- customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(datastore)} @@ -306,7 +254,7 @@ func (r *Runner) Run(ctx context.Context) error { isLeader := &atomic.Bool{} isLeader.Store(false) - mgr, err := runserver.NewDefaultManager(endPointsPool, cfg, metricsServerOptions, *haEnableLeaderElection) + mgr, err := runserver.NewDefaultManager(endpointPool, cfg, metricsServerOptions, *haEnableLeaderElection) if err != nil { setupLog.Error(err, "Failed to create controller manager") return err @@ -395,7 +343,7 @@ func (r *Runner) Run(ctx context.Context) error { // --- Setup ExtProc Server Runner --- serverRunner := &runserver.ExtProcServerRunner{ GrpcPort: *grpcPort, - EndPointsPool: endPointsPool, + EndPointsPool: endpointPool, Datastore: datastore, SecureServing: *secureServing, HealthChecking: *healthChecking, @@ -433,6 +381,68 @@ func (r *Runner) Run(ctx context.Context) error { return nil } +func setupEndpointPool(setupLog logr.Logger) (*datalayer.EndpointPool, error) { + endpointPool := datalayer.NewEndpointPool(false, common.GKNN{}) + if *poolName != "" { + // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default + resolvePoolNamespace := func() string { + if *poolNamespace != "" { + return *poolNamespace + } + if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { + return nsEnv + } + return runserver.DefaultPoolNamespace + } + resolvedPoolNamespace := resolvePoolNamespace() + poolNamespacedName := types.NamespacedName{ + Name: *poolName, + Namespace: resolvedPoolNamespace, + } + poolGroupKind := schema.GroupKind{ + Group: *poolGroup, + Kind: "InferencePool", + } + poolGKNN := common.GKNN{ + NamespacedName: poolNamespacedName, + GroupKind: poolGroupKind, + } + endpointPool.GKNN = poolGKNN + } + + if *selector != "" { + endPointPoolSelector, err := strToMap(*selector) + if err != nil { + setupLog.Error(err, "Failed to parse flag %q with error: %w", "selector", err) + return nil, err + } + endpointPool.EndPoints.Selector = endPointPoolSelector + endpointPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*targetPorts) + if err != nil { + setupLog.Error(err, "Failed to parse flag %q with error: %w", "target-ports", err) + } + endpointPool.DisableK8sCrd = true + + // Determine EPP namespace: NAMESPACE env var; else default + eppNsEnv := os.Getenv("NAMESPACE") + if eppNsEnv == "" { + setupLog.Error(err, "Failed to get environment variable EPP_NAMESPACE") + } + // Determine EPP name: EPP_NAME env var + eppNameEnv := os.Getenv("EPP_NAME") + if eppNameEnv == "" { + setupLog.Error(err, "Failed to get environment variable EPP_NAME") + + } + endpointPool.GKNN = common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppNameEnv}, + GroupKind: schema.GroupKind{Kind: "apps", Group: "Deployment"}, + } + + } + return endpointPool, nil +} + // registerInTreePlugins registers the factory functions of all known plugins func (r *Runner) registerInTreePlugins() { plugins.Register(prefix.PrefixCachePluginType, prefix.PrefixCachePluginFactory) diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index eff9547ad..8a5561c0e 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -85,8 +85,8 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} -func (f *fakeDataStore) PoolGet() (*datalayer.EndPointsPool, error) { - return &datalayer.EndPointsPool{}, nil +func (f *fakeDataStore) PoolGet() (*datalayer.EndpointPool, error) { + return &datalayer.EndpointPool{}, nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 33d5417d4..2d79c6f45 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -162,7 +162,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { WithObjects(initObjs...). Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pool.ToGKNN(inferencePool))) + ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pool.ToGKNN(inferencePool))) for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 8ff8a86f1..07ac768b7 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -77,7 +77,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques c.Datastore.Clear() return ctrl.Result{}, nil } - var endPointsPool *datalayer.EndPointsPool + var endPointsPool *datalayer.EndpointPool switch pool := obj.(type) { case *v1.InferencePool: endPointsPool = pooltuil.InferencePoolToEndPointsPool(pool) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 9c2654a77..5ab734002 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -115,7 +115,7 @@ func TestInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool(false, gknn)) + ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. @@ -177,7 +177,7 @@ func TestInferencePoolReconciler(t *testing.T) { } type diffStoreParams struct { - wantPool *datalayer.EndPointsPool + wantPool *datalayer.EndpointPool wantPods []string wantObjectives []*v1alpha2.InferenceObjective } @@ -262,7 +262,7 @@ func TestXInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool(false, gknn)) + ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. @@ -323,7 +323,7 @@ func TestXInferencePoolReconciler(t *testing.T) { } type xDiffStoreParams struct { - wantPool *datalayer.EndPointsPool + wantPool *datalayer.EndpointPool wantPods []string wantObjectives []*v1alpha2.InferenceObjective } diff --git a/pkg/epp/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go index b3a73f313..b3a78ef92 100644 --- a/pkg/epp/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -42,7 +42,7 @@ type PodReconciler struct { func (c *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) if !c.Datastore.PoolHasSynced() { - logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the EndPointsPicker is not available yet") + logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the InferencePool is not available yet") // When the inferencePool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. return ctrl.Result{}, nil } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 24c8ee61c..44d45bd7e 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -198,7 +198,7 @@ func TestPodReconciler(t *testing.T) { Build() // Configure the initial state of the datastore. - store := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pool.ToGKNN(test.pool))) + store := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pool.ToGKNN(test.pool))) _ = store.PoolSet(t.Context(), fakeClient, pool.InferencePoolToEndPointsPool(test.pool)) for _, pod := range test.existingPods { store.PodUpdateOrAddIfNotExist(pod) diff --git a/pkg/epp/datalayer/endpointsPools.go b/pkg/epp/datalayer/endpointPool.go similarity index 66% rename from pkg/epp/datalayer/endpointsPools.go rename to pkg/epp/datalayer/endpointPool.go index 5e8051ffb..d9c508677 100644 --- a/pkg/epp/datalayer/endpointsPools.go +++ b/pkg/epp/datalayer/endpointPool.go @@ -20,19 +20,19 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/common" ) -type EndPointsPool struct { - EndPoints *EndPoints - StandaloneMode bool - GKNN common.GKNN +type EndpointPool struct { + EndPoints *EndPoints + DisableK8sCrd bool + GKNN common.GKNN } -// NewEndPointsPool creates and returns a new empty instance of EndPointsPool. -func NewEndPointsPool(standAloneMode bool, gknn common.GKNN) *EndPointsPool { +// NewEndpointPool creates and returns a new empty instance of EndpointPool. +func NewEndpointPool(standAloneMode bool, gknn common.GKNN) *EndpointPool { endPoints := NewEndPoints() - return &EndPointsPool{ - GKNN: gknn, - StandaloneMode: standAloneMode, - EndPoints: endPoints, + return &EndpointPool{ + GKNN: gknn, + DisableK8sCrd: standAloneMode, + EndPoints: endPoints, } } @@ -41,7 +41,7 @@ type EndPoints struct { TargetPorts []int } -// NewEndPoints creates and returns a new empty instance of EndPointsPool. +// NewEndPoints creates and returns a new empty instance of EndpointPool. func NewEndPoints() *EndPoints { return &EndPoints{ Selector: make(map[string]string), diff --git a/pkg/epp/datalayer/factory.go b/pkg/epp/datalayer/factory.go index 4ed4d08ef..018ab5311 100644 --- a/pkg/epp/datalayer/factory.go +++ b/pkg/epp/datalayer/factory.go @@ -34,7 +34,7 @@ import ( // - Global metrics logging uses PoolGet solely for error return and PodList to enumerate // all endpoints for metrics summarization. type PoolInfo interface { - PoolGet() (*EndPointsPool, error) + PoolGet() (*EndpointPool, error) PodList(func(Endpoint) bool) []Endpoint } diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 6e662e220..710a66991 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -48,8 +48,8 @@ type Datastore interface { // PoolSet sets the given pool in datastore. If the given pool has different label selector than the previous pool // that was stored, the function triggers a resync of the pods to keep the datastore updated. If the given pool // is nil, this call triggers the datastore.Clear() function. - PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndPointsPool) error - PoolGet() (*datalayer.EndPointsPool, error) + PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndpointPool) error + PoolGet() (*datalayer.EndpointPool, error) PoolHasSynced() bool PoolLabelsMatch(podLabels map[string]string) bool @@ -68,15 +68,15 @@ type Datastore interface { Clear() } -func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endPointsPool *datalayer.EndPointsPool) Datastore { +func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endPointsPool *datalayer.EndpointPool) Datastore { store := &datastore{ - parentCtx: parentCtx, - endPointsAndObjectivesMu: sync.RWMutex{}, - endPointsPool: endPointsPool, - objectives: make(map[string]*v1alpha2.InferenceObjective), - pods: &sync.Map{}, - modelServerMetricsPort: modelServerMetricsPort, - epf: epFactory, + parentCtx: parentCtx, + poolAndObjectivesMu: sync.RWMutex{}, + pool: endPointsPool, + objectives: make(map[string]*v1alpha2.InferenceObjective), + pods: &sync.Map{}, + modelServerMetricsPort: modelServerMetricsPort, + epf: epFactory, } return store } @@ -84,9 +84,9 @@ func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory type datastore struct { // parentCtx controls the lifecycle of the background metrics goroutines that spawn up by the datastore. parentCtx context.Context - // endPointsAndObjectivesMu is used to synchronize access to pool and the objectives map. - endPointsAndObjectivesMu sync.RWMutex - endPointsPool *datalayer.EndPointsPool + // poolAndObjectivesMu is used to synchronize access to pool and the objectives map. + poolAndObjectivesMu sync.RWMutex + pool *datalayer.EndpointPool // key: InferenceObjective.Spec.ModelName, value: *InferenceObjective objectives map[string]*v1alpha2.InferenceObjective // key: types.NamespacedName, value: backendmetrics.PodMetrics @@ -98,9 +98,9 @@ type datastore struct { } func (ds *datastore) Clear() { - ds.endPointsAndObjectivesMu.Lock() - defer ds.endPointsAndObjectivesMu.Unlock() - ds.endPointsPool = nil + ds.poolAndObjectivesMu.Lock() + defer ds.poolAndObjectivesMu.Unlock() + ds.pool = nil ds.objectives = make(map[string]*v1alpha2.InferenceObjective) // stop all pods go routines before clearing the pods map. ds.pods.Range(func(_, v any) bool { @@ -111,17 +111,17 @@ func (ds *datastore) Clear() { } // /// EndPoints APIs /// -func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndPointsPool) error { +func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndpointPool) error { if endPointsPool == nil { ds.Clear() return nil } logger := log.FromContext(ctx) - ds.endPointsAndObjectivesMu.Lock() - defer ds.endPointsAndObjectivesMu.Unlock() + ds.poolAndObjectivesMu.Lock() + defer ds.poolAndObjectivesMu.Unlock() - oldEndPointsPool := ds.endPointsPool - ds.endPointsPool = endPointsPool + oldEndPointsPool := ds.pool + ds.pool = endPointsPool if oldEndPointsPool == nil || !reflect.DeepEqual(oldEndPointsPool.EndPoints.Selector, endPointsPool.EndPoints.Selector) { logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endPointsPool.EndPoints.Selector) // A full resync is required to address two cases: @@ -138,42 +138,42 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPoint return nil } -func (ds *datastore) PoolGet() (*datalayer.EndPointsPool, error) { - ds.endPointsAndObjectivesMu.RLock() - defer ds.endPointsAndObjectivesMu.RUnlock() +func (ds *datastore) PoolGet() (*datalayer.EndpointPool, error) { + ds.poolAndObjectivesMu.RLock() + defer ds.poolAndObjectivesMu.RUnlock() if !ds.PoolHasSynced() { return nil, errPoolNotSynced } - return ds.endPointsPool, nil + return ds.pool, nil } func (ds *datastore) PoolHasSynced() bool { - ds.endPointsAndObjectivesMu.RLock() - defer ds.endPointsAndObjectivesMu.RUnlock() - return ds.endPointsPool != nil + ds.poolAndObjectivesMu.RLock() + defer ds.poolAndObjectivesMu.RUnlock() + return ds.pool != nil } func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { - ds.endPointsAndObjectivesMu.RLock() - defer ds.endPointsAndObjectivesMu.RUnlock() - if ds.endPointsPool == nil { + ds.poolAndObjectivesMu.RLock() + defer ds.poolAndObjectivesMu.RUnlock() + if ds.pool == nil { return false } - poolSelector := labels.SelectorFromSet(ds.endPointsPool.EndPoints.Selector) + poolSelector := labels.SelectorFromSet(ds.pool.EndPoints.Selector) podSet := labels.Set(podLabels) return poolSelector.Matches(podSet) } func (ds *datastore) ObjectiveSet(infObjective *v1alpha2.InferenceObjective) { - ds.endPointsAndObjectivesMu.Lock() - defer ds.endPointsAndObjectivesMu.Unlock() + ds.poolAndObjectivesMu.Lock() + defer ds.poolAndObjectivesMu.Unlock() // Set the objective. ds.objectives[infObjective.Name] = infObjective } func (ds *datastore) ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjective { - ds.endPointsAndObjectivesMu.RLock() - defer ds.endPointsAndObjectivesMu.RUnlock() + ds.poolAndObjectivesMu.RLock() + defer ds.poolAndObjectivesMu.RUnlock() iObj, ok := ds.objectives[objectiveName] if !ok { return nil @@ -182,14 +182,14 @@ func (ds *datastore) ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjec } func (ds *datastore) ObjectiveDelete(namespacedName types.NamespacedName) { - ds.endPointsAndObjectivesMu.Lock() - defer ds.endPointsAndObjectivesMu.Unlock() + ds.poolAndObjectivesMu.Lock() + defer ds.poolAndObjectivesMu.Unlock() delete(ds.objectives, namespacedName.Name) } func (ds *datastore) ObjectiveGetAll() []*v1alpha2.InferenceObjective { - ds.endPointsAndObjectivesMu.RLock() - defer ds.endPointsAndObjectivesMu.RUnlock() + ds.poolAndObjectivesMu.RLock() + defer ds.poolAndObjectivesMu.RUnlock() res := []*v1alpha2.InferenceObjective{} for _, v := range ds.objectives { res = append(res, v) @@ -215,7 +215,7 @@ func (ds *datastore) PodList(predicate func(backendmetrics.PodMetrics) bool) []b } func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { - if ds.endPointsPool == nil || ds.endPointsPool.EndPoints == nil { + if ds.pool == nil || ds.pool.EndPoints == nil { return true } @@ -225,11 +225,11 @@ func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { } modelServerMetricsPort := 0 - if len(ds.endPointsPool.EndPoints.TargetPorts) == 1 { + if len(ds.pool.EndPoints.TargetPorts) == 1 { modelServerMetricsPort = int(ds.modelServerMetricsPort) } pods := []*datalayer.PodInfo{} - for idx, port := range ds.endPointsPool.EndPoints.TargetPorts { + for idx, port := range ds.pool.EndPoints.TargetPorts { metricsPort := modelServerMetricsPort if metricsPort == 0 { metricsPort = port @@ -280,8 +280,8 @@ func (ds *datastore) podResyncAll(ctx context.Context, reader client.Reader) err logger := log.FromContext(ctx) podList := &corev1.PodList{} if err := reader.List(ctx, podList, &client.ListOptions{ - LabelSelector: labels.SelectorFromSet(ds.endPointsPool.EndPoints.Selector), - Namespace: ds.endPointsPool.GKNN.Namespace, + LabelSelector: labels.SelectorFromSet(ds.pool.EndPoints.Selector), + Namespace: ds.pool.GKNN.Namespace, }); err != nil { return fmt.Errorf("failed to list pods - %w", err) } diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index df1907398..26de07713 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -88,7 +88,7 @@ func TestPool(t *testing.T) { Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) gknn := pooltuil.ToGKNN(tt.inferencePool) - endPointPool := datalayer.NewEndPointsPool(false, gknn) + endPointPool := datalayer.NewEndpointPool(false, gknn) ds := NewDatastore(context.Background(), pmf, 0, endPointPool) _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndPointsPool(tt.inferencePool)) gotPool, gotErr := ds.PoolGet() @@ -200,7 +200,7 @@ func TestObjective(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pooltuil.ToGKNN(pool1))) + ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(pool1))) for _, m := range test.existingModels { ds.ObjectiveSet(m) } @@ -335,7 +335,7 @@ func TestMetrics(t *testing.T) { Build() pmf := backendmetrics.NewPodMetricsFactory(test.pmc, time.Millisecond) gknn := pooltuil.ToGKNN(inferencePool) - ds := NewDatastore(ctx, pmf, 0, datalayer.NewEndPointsPool(false, gknn)) + ds := NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) _ = ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(inferencePool)) for _, pod := range test.storePods { ds.PodUpdateOrAddIfNotExist(pod) @@ -403,7 +403,7 @@ func TestPods(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pooltuil.ToGKNN(inferencePool))) + ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(inferencePool))) fakeClient := fake.NewFakeClient() if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(inferencePool)); err != nil { t.Error(err) @@ -587,7 +587,7 @@ func TestPodInfo(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, pooltuil.ToGKNN(test.pool))) + ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(test.pool))) fakeClient := fake.NewFakeClient() if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(test.pool)); err != nil { t.Error(err) diff --git a/pkg/epp/handlers/server.go b/pkg/epp/handlers/server.go index 7b71fd8c5..3e9f65691 100644 --- a/pkg/epp/handlers/server.go +++ b/pkg/epp/handlers/server.go @@ -61,7 +61,7 @@ type Director interface { } type Datastore interface { - PoolGet() (*datalayer.EndPointsPool, error) + PoolGet() (*datalayer.EndpointPool, error) } // Server implements the Envoy external processing server. diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index 267066185..5ae71a5b2 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -53,7 +53,7 @@ var ( func TestNoMetricsCollected(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) + ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndpointPool(false, common.GKNN{})) collector := &inferencePoolMetricsCollector{ ds: ds, @@ -79,7 +79,7 @@ func TestMetricsCollected(t *testing.T) { TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } - ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndPointsPool(false, poolutil.ToGKNN(inferencePool))) + ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndpointPool(false, poolutil.ToGKNN(inferencePool))) scheme := runtime.NewScheme() fakeClient := fake.NewClientBuilder(). diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index 7b9b23816..83138aafa 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -44,7 +44,7 @@ import ( // Datastore defines the interface required by the Director. type Datastore interface { - PoolGet() (*datalayer.EndPointsPool, error) + PoolGet() (*datalayer.EndpointPool, error) ObjectiveGet(modelName string) *v1alpha2.InferenceObjective PodList(predicate func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics } diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index faa092b6f..8f8256551 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -82,7 +82,7 @@ type mockDatastore struct { pods []backendmetrics.PodMetrics } -func (ds *mockDatastore) PoolGet() (*datalayer.EndPointsPool, error) { +func (ds *mockDatastore) PoolGet() (*datalayer.EndpointPool, error) { return nil, nil } func (ds *mockDatastore) ObjectiveGet(_ string) *v1alpha2.InferenceObjective { @@ -137,7 +137,7 @@ func TestDirector_HandleRequest(t *testing.T) { // Datastore setup pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndPointsPool(false, poolutil.ToGKNN(pool))) + ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, poolutil.ToGKNN(pool))) ds.ObjectiveSet(ioFoodReview) ds.ObjectiveSet(ioFoodReviewResolve) ds.ObjectiveSet(ioFoodReviewSheddable) @@ -617,10 +617,10 @@ func TestGetRandomPod(t *testing.T) { Selector: selector, TargetPorts: targetPorts, } - endPointsPool := &datalayer.EndPointsPool{ - EndPoints: endPoints, - StandaloneMode: false, - GKNN: gknn, + endPointsPool := &datalayer.EndpointPool{ + EndPoints: endPoints, + DisableK8sCrd: false, + GKNN: gknn, } ds := datastore.NewDatastore(t.Context(), pmf, 0, endPointsPool) @@ -648,7 +648,7 @@ func TestDirector_HandleResponseReceived(t *testing.T) { pr1 := newTestResponseReceived("pr1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndpointPool(false, common.GKNN{})) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, &mockAdmissionController{}, NewConfig().WithResponseReceivedPlugins(pr1)) @@ -685,7 +685,7 @@ func TestDirector_HandleResponseStreaming(t *testing.T) { ps1 := newTestResponseStreaming("ps1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndpointPool(false, common.GKNN{})) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseStreamingPlugins(ps1)) @@ -721,7 +721,7 @@ func TestDirector_HandleResponseComplete(t *testing.T) { pc1 := newTestResponseComplete("pc1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndPointsPool(false, common.GKNN{})) + ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndpointPool(false, common.GKNN{})) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseCompletePlugins(pc1)) diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index 4b5102f45..657bb704b 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -45,7 +45,7 @@ func init() { } // defaultManagerOptions returns the default options used to create the manager. -func defaultManagerOptions(endPointsPool *datalayer.EndPointsPool, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { +func defaultManagerOptions(endPointsPool *datalayer.EndpointPool, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { opt := ctrl.Options{ Scheme: scheme, Cache: cache.Options{ @@ -59,7 +59,7 @@ func defaultManagerOptions(endPointsPool *datalayer.EndPointsPool, metricsServer }, Metrics: metricsServerOptions, } - if !endPointsPool.StandaloneMode { + if !endPointsPool.DisableK8sCrd { opt.Cache.ByObject[&v1alpha2.InferenceObjective{}] = cache.ByObject{Namespaces: map[string]cache.Config{ endPointsPool.GKNN.Namespace: {}, }} @@ -85,7 +85,7 @@ func defaultManagerOptions(endPointsPool *datalayer.EndPointsPool, metricsServer } // NewDefaultManager creates a new controller manager with default configuration. -func NewDefaultManager(endPointsPool *datalayer.EndPointsPool, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { +func NewDefaultManager(endPointsPool *datalayer.EndpointPool, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { opt, err := defaultManagerOptions(endPointsPool, metricsServerOptions) if err != nil { return nil, fmt.Errorf("failed to create controller manager options: %v", err) diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index 86ee780be..da7b705f7 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -47,7 +47,7 @@ import ( // ExtProcServerRunner provides methods to manage an external process server. type ExtProcServerRunner struct { GrpcPort int - EndPointsPool *datalayer.EndPointsPool + EndPointsPool *datalayer.EndpointPool Datastore datastore.Datastore SecureServing bool HealthChecking bool @@ -102,7 +102,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { // SetupWithManager sets up the runner with the given manager. func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager - if !r.EndPointsPool.StandaloneMode { + if !r.EndPointsPool.DisableK8sCrd { if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index f5126face..12229fd9e 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -26,7 +26,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" ) -func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.EndPointsPool { +func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.EndpointPool { if inferencePool == nil { return nil } @@ -47,15 +47,15 @@ func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.En Selector: selector, TargetPorts: targetPorts, } - endPointsPool := &datalayer.EndPointsPool{ - EndPoints: endPoints, - StandaloneMode: false, - GKNN: gknn, + endPointsPool := &datalayer.EndpointPool{ + EndPoints: endPoints, + DisableK8sCrd: false, + GKNN: gknn, } return endPointsPool } -func AlphaInferencePoolToEndPointsPool(inferencePool *v1alpha2.InferencePool) *datalayer.EndPointsPool { +func AlphaInferencePoolToEndPointsPool(inferencePool *v1alpha2.InferencePool) *datalayer.EndpointPool { targetPorts := []int{int(inferencePool.Spec.TargetPortNumber)} selector := make(map[string]string, len(inferencePool.Spec.Selector)) for k, v := range inferencePool.Spec.Selector { @@ -69,15 +69,15 @@ func AlphaInferencePoolToEndPointsPool(inferencePool *v1alpha2.InferencePool) *d Selector: selector, TargetPorts: targetPorts, } - endPointsPool := &datalayer.EndPointsPool{ - EndPoints: endPoints, - StandaloneMode: false, - GKNN: gknn, + endPointsPool := &datalayer.EndpointPool{ + EndPoints: endPoints, + DisableK8sCrd: false, + GKNN: gknn, } return endPointsPool } -func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndPointsPool) *v1.InferencePool { +func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndpointPool) *v1.InferencePool { targetPorts := make([]v1.Port, 0, len(endPointsPool.EndPoints.TargetPorts)) for _, p := range endPointsPool.EndPoints.TargetPorts { targetPorts = append(targetPorts, v1.Port{Number: v1.PortNumber(p)}) diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 020beb59b..cdb650892 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -1175,8 +1175,8 @@ func BeforeSuite() func() { NamespacedName: types.NamespacedName{Namespace: testNamespace, Name: testPoolName}, GroupKind: schema.GroupKind{Group: v1.GroupVersion.Group, Kind: "InferencePool"}, } - endPointsPool := datalayer.NewEndPointsPool(false, poolGKNN) - serverRunner.EndPointsPool = datalayer.NewEndPointsPool(false, poolGKNN) + endPointsPool := datalayer.NewEndpointPool(false, poolGKNN) + serverRunner.EndPointsPool = datalayer.NewEndpointPool(false, poolGKNN) serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0, endPointsPool) diff --git a/test/utils/server.go b/test/utils/server.go index 5afc5ad05..70e012d51 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -55,7 +55,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po pmc := &metrics.FakePodMetricsClient{} pmf := metrics.NewPodMetricsFactory(pmc, time.Second) - endPointsPool := datalayer.NewEndPointsPool(false, common.GKNN{ + endPointsPool := datalayer.NewEndpointPool(false, common.GKNN{ NamespacedName: types.NamespacedName{Namespace: namespace, Name: poolName}, GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, }) From 9d1514a3607eefbda6bcf8849aaf3654dba7f207 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 10:43:22 -0800 Subject: [PATCH 13/34] added epp name in env --- config/charts/inferencepool/templates/epp-deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index 5b3634c2a..c253bbf92 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -147,6 +147,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: EPP_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name {{- if .Values.inferenceExtension.tracing.enabled }} - name: OTEL_SERVICE_NAME value: "gateway-api-inference-extension" From ba89d2433c8854ea70243a0049ad040fdcce576a Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 16:37:33 -0800 Subject: [PATCH 14/34] rename to endpointPool --- cmd/epp/runner/runner.go | 50 ++++++------------- .../inferenceobjective_reconciler_test.go | 6 +-- .../controller/inferencepool_reconciler.go | 8 +-- .../inferencepool_reconciler_test.go | 24 ++++----- pkg/epp/controller/pod_reconciler_test.go | 2 +- .../{endpointPool.go => endpoint_pool.go} | 0 pkg/epp/datastore/datastore.go | 20 ++++---- pkg/epp/datastore/datastore_test.go | 10 ++-- .../metrics/collectors/inference_pool_test.go | 2 +- pkg/epp/requestcontrol/director_test.go | 8 +-- pkg/epp/server/controller_manager.go | 28 +++++------ pkg/epp/server/runserver.go | 8 +-- pkg/epp/util/pool/pool.go | 26 +++++----- test/integration/epp/hermetic_test.go | 6 +-- test/utils/server.go | 6 +-- 15 files changed, 92 insertions(+), 112 deletions(-) rename pkg/epp/datalayer/{endpointPool.go => endpoint_pool.go} (100%) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 8059bbba3..ea7eaf5af 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -46,6 +46,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" @@ -102,8 +103,8 @@ var ( poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") - selector = flag.String("selector", "", "selector to filter pods on, only key value paris is supported. Format: a comma-separated list of key value paris, e.g., 'app:vllm-llama3-8b-instruct,env=prod'.") - targetPorts = flag.String("target-ports", "", "target ports of model server pods. Format: a comma-separated list of numbers, e.g., '3000,3001,3002'") + endpointSelector = flag.String("endpoint-selector", "", "selector to filter model server pods on, only key value paris is supported. Format: a comma-separated list of key value paris, e.g., 'app:vllm-llama3-8b-instruct,env=prod'.") + endpointTargetPorts = flag.String("endpoint-target-ports", "", "target ports of model server pods. Format: a comma-separated list of numbers, e.g., '3000,3001,3002'") logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") @@ -343,7 +344,7 @@ func (r *Runner) Run(ctx context.Context) error { // --- Setup ExtProc Server Runner --- serverRunner := &runserver.ExtProcServerRunner{ GrpcPort: *grpcPort, - EndPointsPool: endpointPool, + EndpointPool: endpointPool, Datastore: datastore, SecureServing: *secureServing, HealthChecking: *healthChecking, @@ -410,16 +411,16 @@ func setupEndpointPool(setupLog logr.Logger) (*datalayer.EndpointPool, error) { endpointPool.GKNN = poolGKNN } - if *selector != "" { - endPointPoolSelector, err := strToMap(*selector) + if *endpointSelector != "" { + labelsMap, err:= labels.ConvertSelectorToLabelsMap(*endpointSelector) if err != nil { - setupLog.Error(err, "Failed to parse flag %q with error: %w", "selector", err) + setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-selector", err) return nil, err } - endpointPool.EndPoints.Selector = endPointPoolSelector - endpointPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*targetPorts) + endpointPool.EndPoints.Selector = labelsMap + endpointPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*endpointTargetPorts) if err != nil { - setupLog.Error(err, "Failed to parse flag %q with error: %w", "target-ports", err) + setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-target-ports", err) } endpointPool.DisableK8sCrd = true @@ -609,14 +610,14 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore. return err } return nil -} +}f func validateFlags() error { - if (*poolName != "" && *selector != "") || (*poolName == "" && *selector == "") { - return errors.New("either poolName or selector must be set") + if (*poolName != "" && *endpointSelector != "") || (*poolName == "" && *endpointSelector == "") { + return errors.New("either poolName or endpointSelector must be set") } - if *selector != "" { - targetPortsList, err := strToUniqueIntSlice(*targetPorts) + if *endpointSelector != "" { + targetPortsList, err := strToUniqueIntSlice(*endpointTargetPorts) if err != nil { return fmt.Errorf("unexpected value for %q flag with error %w", "target-ports", err) } @@ -663,27 +664,6 @@ func strToUniqueIntSlice(s string) ([]int, error) { return intList, nil } -func strToMap(s string) (map[string]string, error) { - m := make(map[string]string) - if s == "" { - return m, nil - } - - mPairs := strings.Split(s, ",") - for _, pair := range mPairs { - trimmedPair := strings.TrimSpace(pair) - if trimmedPair == "" { - continue - } - kv := strings.Split(trimmedPair, ":") - if len(kv) != 2 { - return nil, errors.New("invalid format, expected key:value paris") - } - m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1]) - } - return m, nil -} - func verifyMetricMapping(mapping backendmetrics.MetricMapping, logger logr.Logger) { if mapping.TotalQueuedRequests == nil { logger.Info("Not scraping metric: TotalQueuedRequests") diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 2d79c6f45..57bd3bc5d 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -166,8 +166,8 @@ func TestInferenceObjectiveReconciler(t *testing.T) { for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } - endPointsPool := pool.InferencePoolToEndPointsPool(inferencePool) - _ = ds.PoolSet(context.Background(), fakeClient, endPointsPool) + endpointPool := pool.InferencePoolToEndpointPool(inferencePool) + _ = ds.PoolSet(context.Background(), fakeClient, endpointPool) reconciler := &InferenceObjectiveReconciler{ Reader: fakeClient, Datastore: ds, @@ -193,7 +193,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { if len(test.wantObjectives) != len(ds.ObjectiveGetAll()) { t.Errorf("Unexpected; want: %d, got:%d", len(test.wantObjectives), len(ds.ObjectiveGetAll())) } - if diff := diffStore(ds, diffStoreParams{wantPool: endPointsPool, wantObjectives: test.wantObjectives}); diff != "" { + if diff := diffStore(ds, diffStoreParams{wantPool: endpointPool, wantObjectives: test.wantObjectives}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 07ac768b7..76eac3317 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -77,17 +77,17 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques c.Datastore.Clear() return ctrl.Result{}, nil } - var endPointsPool *datalayer.EndpointPool + var endpointPool *datalayer.EndpointPool switch pool := obj.(type) { case *v1.InferencePool: - endPointsPool = pooltuil.InferencePoolToEndPointsPool(pool) + endpointPool = pooltuil.InferencePoolToEndpointPool(pool) case *v1alpha2.InferencePool: - endPointsPool = pooltuil.AlphaInferencePoolToEndPointsPool(pool) + endpointPool = pooltuil.AlphaInferencePoolToEndpointPool(pool) default: return ctrl.Result{}, fmt.Errorf("unsupported API group: %s", c.PoolGKNN.Group) } - if err := c.Datastore.PoolSet(ctx, c.Reader, endPointsPool); err != nil { + if err := c.Datastore.PoolSet(ctx, c.Reader, endpointPool); err != nil { return ctrl.Result{}, fmt.Errorf("failed to update datastore - %w", err) } diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 5ab734002..27160a98b 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -122,8 +122,8 @@ func TestInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - endPointsPool1 := pool.InferencePoolToEndPointsPool(pool1) - if diff := diffStore(ds, diffStoreParams{wantPool: endPointsPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { + endpointPool1 := pool.InferencePoolToEndpointPool(pool1) + if diff := diffStore(ds, diffStoreParams{wantPool: endpointPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -140,8 +140,8 @@ func TestInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - newEndPointsPool1 := pool.InferencePoolToEndPointsPool(newPool1) - if diff := diffStore(ds, diffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndpointPool1 := pool.InferencePoolToEndpointPool(newPool1) + if diff := diffStore(ds, diffStoreParams{wantPool: newEndpointPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -156,8 +156,8 @@ func TestInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - newEndPointsPool1 = pool.InferencePoolToEndPointsPool(newPool1) - if diff := diffStore(ds, diffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndpointPool1 = pool.InferencePoolToEndpointPool(newPool1) + if diff := diffStore(ds, diffStoreParams{wantPool: newEndpointPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -269,8 +269,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - endPointsPool1 := pool.AlphaInferencePoolToEndPointsPool(pool1) - if diff := xDiffStore(ds, xDiffStoreParams{wantPool: endPointsPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { + endpointPool1 := pool.AlphaInferencePoolToEndpointPool(pool1) + if diff := xDiffStore(ds, xDiffStoreParams{wantPool: endpointPool1, wantPods: []string{"pod1-rank-0", "pod2-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -286,8 +286,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - newEndPointsPoll1 := pool.AlphaInferencePoolToEndPointsPool(newPool1) - if diff := xDiffStore(ds, xDiffStoreParams{wantPool: newEndPointsPoll1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndpointPool1 := pool.AlphaInferencePoolToEndpointPool(newPool1) + if diff := xDiffStore(ds, xDiffStoreParams{wantPool: newEndpointPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } @@ -302,8 +302,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) } - newEndPointsPool1 := pool.AlphaInferencePoolToEndPointsPool(newPool1) - if diff := xDiffStore(ds, xDiffStoreParams{wantPool: newEndPointsPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { + newEndpointPool1 = pool.AlphaInferencePoolToEndpointPool(newPool1) + if diff := xDiffStore(ds, xDiffStoreParams{wantPool: newEndpointPool1, wantPods: []string{"pod5-rank-0"}}); diff != "" { t.Errorf("Unexpected diff (+got/-want): %s", diff) } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 44d45bd7e..aeb00499d 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -199,7 +199,7 @@ func TestPodReconciler(t *testing.T) { // Configure the initial state of the datastore. store := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pool.ToGKNN(test.pool))) - _ = store.PoolSet(t.Context(), fakeClient, pool.InferencePoolToEndPointsPool(test.pool)) + _ = store.PoolSet(t.Context(), fakeClient, pool.InferencePoolToEndpointPool(test.pool)) for _, pod := range test.existingPods { store.PodUpdateOrAddIfNotExist(pod) } diff --git a/pkg/epp/datalayer/endpointPool.go b/pkg/epp/datalayer/endpoint_pool.go similarity index 100% rename from pkg/epp/datalayer/endpointPool.go rename to pkg/epp/datalayer/endpoint_pool.go diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 710a66991..70caa55c8 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -48,7 +48,7 @@ type Datastore interface { // PoolSet sets the given pool in datastore. If the given pool has different label selector than the previous pool // that was stored, the function triggers a resync of the pods to keep the datastore updated. If the given pool // is nil, this call triggers the datastore.Clear() function. - PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndpointPool) error + PoolSet(ctx context.Context, reader client.Reader, endpointPool *datalayer.EndpointPool) error PoolGet() (*datalayer.EndpointPool, error) PoolHasSynced() bool PoolLabelsMatch(podLabels map[string]string) bool @@ -68,11 +68,11 @@ type Datastore interface { Clear() } -func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endPointsPool *datalayer.EndpointPool) Datastore { +func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endpointPool *datalayer.EndpointPool) Datastore { store := &datastore{ parentCtx: parentCtx, poolAndObjectivesMu: sync.RWMutex{}, - pool: endPointsPool, + pool: endpointPool, objectives: make(map[string]*v1alpha2.InferenceObjective), pods: &sync.Map{}, modelServerMetricsPort: modelServerMetricsPort, @@ -110,9 +110,9 @@ func (ds *datastore) Clear() { ds.pods.Clear() } -// /// EndPoints APIs /// -func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPointsPool *datalayer.EndpointPool) error { - if endPointsPool == nil { +// /// Pool APIs /// +func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endpointPool *datalayer.EndpointPool) error { + if endpointPool == nil { ds.Clear() return nil } @@ -120,10 +120,10 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endPoint ds.poolAndObjectivesMu.Lock() defer ds.poolAndObjectivesMu.Unlock() - oldEndPointsPool := ds.pool - ds.pool = endPointsPool - if oldEndPointsPool == nil || !reflect.DeepEqual(oldEndPointsPool.EndPoints.Selector, endPointsPool.EndPoints.Selector) { - logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endPointsPool.EndPoints.Selector) + oldEndpointPool := ds.pool + ds.pool = endpointPool + if oldEndpointPool == nil || !reflect.DeepEqual(oldEndpointPool.EndPoints.Selector, endpointPool.EndPoints.Selector) { + logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endpointPool.EndPoints.Selector) // A full resync is required to address two cases: // 1) At startup, the pod events may get processed before the pool is synced with the datastore, // and hence they will not be added to the store since pool selector is not known yet diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 26de07713..c0a0a456d 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -90,12 +90,12 @@ func TestPool(t *testing.T) { gknn := pooltuil.ToGKNN(tt.inferencePool) endPointPool := datalayer.NewEndpointPool(false, gknn) ds := NewDatastore(context.Background(), pmf, 0, endPointPool) - _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndPointsPool(tt.inferencePool)) + _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndpointPool(tt.inferencePool)) gotPool, gotErr := ds.PoolGet() if diff := cmp.Diff(tt.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" { t.Errorf("Unexpected error diff (+got/-want): %s", diff) } - if diff := cmp.Diff(pooltuil.InferencePoolToEndPointsPool(tt.wantPool), gotPool); diff != "" { + if diff := cmp.Diff(pooltuil.InferencePoolToEndpointPool(tt.wantPool), gotPool); diff != "" { t.Errorf("Unexpected pool diff (+got/-want): %s", diff) } gotSynced := ds.PoolHasSynced() @@ -336,7 +336,7 @@ func TestMetrics(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(test.pmc, time.Millisecond) gknn := pooltuil.ToGKNN(inferencePool) ds := NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) - _ = ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(inferencePool)) + _ = ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndpointPool(inferencePool)) for _, pod := range test.storePods { ds.PodUpdateOrAddIfNotExist(pod) } @@ -405,7 +405,7 @@ func TestPods(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(inferencePool))) fakeClient := fake.NewFakeClient() - if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(inferencePool)); err != nil { + if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndpointPool(inferencePool)); err != nil { t.Error(err) } for _, pod := range test.existingPods { @@ -589,7 +589,7 @@ func TestPodInfo(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(test.pool))) fakeClient := fake.NewFakeClient() - if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndPointsPool(test.pool)); err != nil { + if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndpointPool(test.pool)); err != nil { t.Error(err) } for _, pod := range test.existingPods { diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index 5ae71a5b2..7dd82c267 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -86,7 +86,7 @@ func TestMetricsCollected(t *testing.T) { WithScheme(scheme). Build() - _ = ds.PoolSet(context.Background(), fakeClient, poolutil.InferencePoolToEndPointsPool(inferencePool)) + _ = ds.PoolSet(context.Background(), fakeClient, poolutil.InferencePoolToEndpointPool(inferencePool)) _ = ds.PodUpdateOrAddIfNotExist(pod1) time.Sleep(1 * time.Second) diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 8f8256551..1f57f894e 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -146,7 +146,7 @@ func TestDirector_HandleRequest(t *testing.T) { _ = clientgoscheme.AddToScheme(scheme) fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - if err := ds.PoolSet(ctx, fakeClient, poolutil.InferencePoolToEndPointsPool(pool)); err != nil { + if err := ds.PoolSet(ctx, fakeClient, poolutil.InferencePoolToEndpointPool(pool)); err != nil { t.Fatalf("Error while setting inference pool: %v", err) } @@ -617,14 +617,14 @@ func TestGetRandomPod(t *testing.T) { Selector: selector, TargetPorts: targetPorts, } - endPointsPool := &datalayer.EndpointPool{ + endpointPool := &datalayer.EndpointPool{ EndPoints: endPoints, DisableK8sCrd: false, GKNN: gknn, } - ds := datastore.NewDatastore(t.Context(), pmf, 0, endPointsPool) - err := ds.PoolSet(t.Context(), fakeClient, endPointsPool) + ds := datastore.NewDatastore(t.Context(), pmf, 0, endpointPool) + err := ds.PoolSet(t.Context(), fakeClient, endpointPool) if err != nil { t.Errorf("unexpected error setting pool: %s", err) } diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index 657bb704b..f308814e2 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -45,39 +45,39 @@ func init() { } // defaultManagerOptions returns the default options used to create the manager. -func defaultManagerOptions(endPointsPool *datalayer.EndpointPool, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { +func defaultManagerOptions(endpointPool *datalayer.EndpointPool, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { opt := ctrl.Options{ Scheme: scheme, Cache: cache.Options{ ByObject: map[client.Object]cache.ByObject{ &corev1.Pod{}: { Namespaces: map[string]cache.Config{ - endPointsPool.GKNN.Namespace: {}, + endpointPool.GKNN.Namespace: {}, }, }, }, }, Metrics: metricsServerOptions, } - if !endPointsPool.DisableK8sCrd { + if !endpointPool.DisableK8sCrd { opt.Cache.ByObject[&v1alpha2.InferenceObjective{}] = cache.ByObject{Namespaces: map[string]cache.Config{ - endPointsPool.GKNN.Namespace: {}, + endpointPool.GKNN.Namespace: {}, }} - switch endPointsPool.GKNN.Group { + switch endpointPool.GKNN.Group { case v1alpha2.GroupName: opt.Cache.ByObject[&v1alpha2.InferencePool{}] = cache.ByObject{ - Namespaces: map[string]cache.Config{endPointsPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ - "metadata.name": endPointsPool.GKNN.Name, + Namespaces: map[string]cache.Config{endpointPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ + "metadata.name": endpointPool.GKNN.Name, })}}, } case v1.GroupName: opt.Cache.ByObject[&v1.InferencePool{}] = cache.ByObject{ - Namespaces: map[string]cache.Config{endPointsPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ - "metadata.name": endPointsPool.GKNN.Name, + Namespaces: map[string]cache.Config{endpointPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ + "metadata.name": endpointPool.GKNN.Name, })}}, } default: - return ctrl.Options{}, fmt.Errorf("unknown group: %s", endPointsPool.GKNN.Group) + return ctrl.Options{}, fmt.Errorf("unknown group: %s", endpointPool.GKNN.Group) } } @@ -85,8 +85,8 @@ func defaultManagerOptions(endPointsPool *datalayer.EndpointPool, metricsServerO } // NewDefaultManager creates a new controller manager with default configuration. -func NewDefaultManager(endPointsPool *datalayer.EndpointPool, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { - opt, err := defaultManagerOptions(endPointsPool, metricsServerOptions) +func NewDefaultManager(endpointPool *datalayer.EndpointPool, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { + opt, err := defaultManagerOptions(endpointPool, metricsServerOptions) if err != nil { return nil, fmt.Errorf("failed to create controller manager options: %v", err) } @@ -95,8 +95,8 @@ func NewDefaultManager(endPointsPool *datalayer.EndpointPool, restConfig *rest.C opt.LeaderElection = true opt.LeaderElectionResourceLock = "leases" // The lease name needs to be unique per EPP deployment. - opt.LeaderElectionID = fmt.Sprintf("epp-%s-%s.gateway-api-inference-extension.sigs.k8s.io", endPointsPool.GKNN.Namespace, endPointsPool.GKNN.Name) - opt.LeaderElectionNamespace = endPointsPool.GKNN.Namespace + opt.LeaderElectionID = fmt.Sprintf("epp-%s-%s.gateway-api-inference-extension.sigs.k8s.io", endpointPool.GKNN.Namespace, endpointPool.GKNN.Name) + opt.LeaderElectionNamespace = endpointPool.GKNN.Namespace opt.LeaderElectionReleaseOnCancel = true } diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index da7b705f7..a509e83cc 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -47,7 +47,7 @@ import ( // ExtProcServerRunner provides methods to manage an external process server. type ExtProcServerRunner struct { GrpcPort int - EndPointsPool *datalayer.EndpointPool + EndpointPool *datalayer.EndpointPool Datastore datastore.Datastore SecureServing bool HealthChecking bool @@ -102,11 +102,11 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { // SetupWithManager sets up the runner with the given manager. func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager - if !r.EndPointsPool.DisableK8sCrd { + if !r.EndpointPool.DisableK8sCrd { if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), - PoolGKNN: r.EndPointsPool.GKNN, + PoolGKNN: r.EndpointPool.GKNN, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("failed setting up InferencePoolReconciler: %w", err) } @@ -114,7 +114,7 @@ func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Man if err := (&controller.InferenceObjectiveReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), - PoolGKNN: r.EndPointsPool.GKNN, + PoolGKNN: r.EndpointPool.GKNN, }).SetupWithManager(ctx, mgr); err != nil { return fmt.Errorf("failed setting up InferenceObjectiveReconciler: %w", err) } diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index 12229fd9e..84a020e74 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -26,7 +26,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" ) -func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.EndpointPool { +func InferencePoolToEndpointPool(inferencePool *v1.InferencePool) *datalayer.EndpointPool { if inferencePool == nil { return nil } @@ -47,15 +47,15 @@ func InferencePoolToEndPointsPool(inferencePool *v1.InferencePool) *datalayer.En Selector: selector, TargetPorts: targetPorts, } - endPointsPool := &datalayer.EndpointPool{ + endpointPool := &datalayer.EndpointPool{ EndPoints: endPoints, DisableK8sCrd: false, GKNN: gknn, } - return endPointsPool + return endpointPool } -func AlphaInferencePoolToEndPointsPool(inferencePool *v1alpha2.InferencePool) *datalayer.EndpointPool { +func AlphaInferencePoolToEndpointPool(inferencePool *v1alpha2.InferencePool) *datalayer.EndpointPool { targetPorts := []int{int(inferencePool.Spec.TargetPortNumber)} selector := make(map[string]string, len(inferencePool.Spec.Selector)) for k, v := range inferencePool.Spec.Selector { @@ -69,21 +69,21 @@ func AlphaInferencePoolToEndPointsPool(inferencePool *v1alpha2.InferencePool) *d Selector: selector, TargetPorts: targetPorts, } - endPointsPool := &datalayer.EndpointPool{ + endpointPool := &datalayer.EndpointPool{ EndPoints: endPoints, DisableK8sCrd: false, GKNN: gknn, } - return endPointsPool + return endpointPool } -func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndpointPool) *v1.InferencePool { - targetPorts := make([]v1.Port, 0, len(endPointsPool.EndPoints.TargetPorts)) - for _, p := range endPointsPool.EndPoints.TargetPorts { +func EndpointPoolToInferencePool(endpointPool *datalayer.EndpointPool) *v1.InferencePool { + targetPorts := make([]v1.Port, 0, len(endpointPool.EndPoints.TargetPorts)) + for _, p := range endpointPool.EndPoints.TargetPorts { targetPorts = append(targetPorts, v1.Port{Number: v1.PortNumber(p)}) } - labels := make(map[v1.LabelKey]v1.LabelValue, len(endPointsPool.EndPoints.Selector)) - for k, v := range endPointsPool.EndPoints.Selector { + labels := make(map[v1.LabelKey]v1.LabelValue, len(endpointPool.EndPoints.Selector)) + for k, v := range endpointPool.EndPoints.Selector { labels[v1.LabelKey(k)] = v1.LabelValue(v) } @@ -93,8 +93,8 @@ func EndPointsPoolToInferencePool(endPointsPool *datalayer.EndpointPool) *v1.Inf Kind: "InferencePool", }, ObjectMeta: metav1.ObjectMeta{ - Name: endPointsPool.GKNN.Name, - Namespace: endPointsPool.GKNN.Namespace, + Name: endpointPool.GKNN.Name, + Namespace: endpointPool.GKNN.Namespace, }, Spec: v1.InferencePoolSpec{ Selector: v1.LabelSelector{MatchLabels: labels}, diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index cdb650892..84c1700dc 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -1175,10 +1175,10 @@ func BeforeSuite() func() { NamespacedName: types.NamespacedName{Namespace: testNamespace, Name: testPoolName}, GroupKind: schema.GroupKind{Group: v1.GroupVersion.Group, Kind: "InferencePool"}, } - endPointsPool := datalayer.NewEndpointPool(false, poolGKNN) - serverRunner.EndPointsPool = datalayer.NewEndpointPool(false, poolGKNN) + endpointPool := datalayer.NewEndpointPool(false, poolGKNN) + serverRunner.EndpointPool = datalayer.NewEndpointPool(false, poolGKNN) - serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0, endPointsPool) + serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0, endpointPool) kvCacheUtilizationScorer := scorer.NewKVCacheUtilizationScorer() queueingScorer := scorer.NewQueueScorer() diff --git a/test/utils/server.go b/test/utils/server.go index 70e012d51..c7a080f8d 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -55,11 +55,11 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po pmc := &metrics.FakePodMetricsClient{} pmf := metrics.NewPodMetricsFactory(pmc, time.Second) - endPointsPool := datalayer.NewEndpointPool(false, common.GKNN{ + endpointPool := datalayer.NewEndpointPool(false, common.GKNN{ NamespacedName: types.NamespacedName{Namespace: namespace, Name: poolName}, GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, }) - ds := datastore.NewDatastore(ctx, pmf, 0, endPointsPool) + ds := datastore.NewDatastore(ctx, pmf, 0, endpointPool) initObjs := []client.Object{} for _, objective := range objectives { @@ -81,7 +81,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() pool.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(poolPort)}} - _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndPointsPool(pool)) + _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndpointPool(pool)) return ctx, cancel, ds, pmc } From ba90213ae849d5abe0d3f1e3c145c02b24ecaedf Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 16:53:19 -0800 Subject: [PATCH 15/34] refactor in ut --- pkg/epp/requestcontrol/director.go | 3 --- pkg/epp/requestcontrol/director_test.go | 25 +------------------------ 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index a13a45547..e6af31b2c 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -26,10 +26,7 @@ import ( "strings" "time" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 25144d343..eadba9c49 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -21,7 +21,6 @@ import ( "errors" "fmt" - "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" @@ -759,29 +758,7 @@ func TestGetRandomPod(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Millisecond) - targetPorts := make([]int, 0, len(pool.Spec.TargetPorts)) - for _, p := range pool.Spec.TargetPorts { - targetPorts = append(targetPorts, int(p.Number)) - - } - selector := make(map[string]string, len(pool.Spec.Selector.MatchLabels)) - for k, v := range pool.Spec.Selector.MatchLabels { - selector[string(k)] = string(v) - } - gknn := common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: pool.Namespace, Name: pool.Name}, - GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, - } - endPoints := &datalayer.EndPoints{ - Selector: selector, - TargetPorts: targetPorts, - } - endpointPool := &datalayer.EndpointPool{ - EndPoints: endPoints, - DisableK8sCrd: false, - GKNN: gknn, - } - + endpointPool := poolutil.InferencePoolToEndpointPool(pool) ds := datastore.NewDatastore(t.Context(), pmf, 0, endpointPool) err := ds.PoolSet(t.Context(), fakeClient, endpointPool) if err != nil { From bc6a4c6febf269bf77beae9384a84dd96c49bc22 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 17:00:55 -0800 Subject: [PATCH 16/34] fixed format --- pkg/epp/requestcontrol/director_test.go | 8 +++----- pkg/epp/server/runserver.go | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index eadba9c49..cd64f9221 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -20,11 +20,6 @@ import ( "context" "errors" "fmt" - - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" - "maps" "testing" "time" @@ -38,6 +33,8 @@ import ( "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" @@ -50,6 +47,7 @@ import ( schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" requtil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/request" testutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index ef4427daf..20abe8b31 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -103,7 +103,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager if !r.EndpointPool.DisableK8sCrd { - + if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), From 93ab79102b63abf05ad4f6dddea7d5e88d161091 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 17:06:54 -0800 Subject: [PATCH 17/34] fixed format --- pkg/epp/server/runserver.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index 20abe8b31..a509e83cc 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -103,7 +103,6 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager if !r.EndpointPool.DisableK8sCrd { - if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), From c3ac6f2845f9f397023f7f79c260b8bc61782771 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 17:23:16 -0800 Subject: [PATCH 18/34] changed error message Signed-off-by: Xiyue Yu --- cmd/epp/runner/runner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 45827f62b..a71079357 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -629,10 +629,10 @@ func validateFlags() error { if *endpointSelector != "" { targetPortsList, err := strToUniqueIntSlice(*endpointTargetPorts) if err != nil { - return fmt.Errorf("unexpected value for %q flag with error %w", "target-ports", err) + return fmt.Errorf("unexpected value for %q flag with error %w", "endpoint-target-ports", err) } if len(targetPortsList) == 0 || len(targetPortsList) > 8 { - return fmt.Errorf("flag %q should have length from 1 to 8", "target-ports") + return fmt.Errorf("flag %q should have length from 1 to 8", "endpoint-target-ports") } } From c4b8c32b26688ee7b4b6ce31d8d81474c47265e9 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 17:23:46 -0800 Subject: [PATCH 19/34] changed error message Signed-off-by: Xiyue Yu --- cmd/epp/runner/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index a71079357..cb706efa4 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -624,7 +624,7 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore. func validateFlags() error { if (*poolName != "" && *endpointSelector != "") || (*poolName == "" && *endpointSelector == "") { - return errors.New("either poolName or endpointSelector must be set") + return errors.New("either poolName or endpoint-selector must be set") } if *endpointSelector != "" { targetPortsList, err := strToUniqueIntSlice(*endpointTargetPorts) From 513590d685ab95f423ed3c9edeeb60292998d88c Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 18:11:00 -0800 Subject: [PATCH 20/34] debug --- pkg/epp/datastore/datastore.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 70caa55c8..e91a56f9e 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -161,6 +161,12 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { } poolSelector := labels.SelectorFromSet(ds.pool.EndPoints.Selector) podSet := labels.Set(podLabels) + fmt.Println("debug1") + fmt.Println(ds.pool.EndPoints.Selector) + fmt.Println("debug2") + fmt.Println(podSet) + fmt.Println("debug3") + fmt.Println(poolSelector.Matches(podSet)) return poolSelector.Matches(podSet) } From 84b22756745a2eda1856ba1c1c69f8a0b863e83b Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 17 Nov 2025 18:25:29 -0800 Subject: [PATCH 21/34] remove debug logging --- pkg/epp/datastore/datastore.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index e91a56f9e..70caa55c8 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -161,12 +161,6 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { } poolSelector := labels.SelectorFromSet(ds.pool.EndPoints.Selector) podSet := labels.Set(podLabels) - fmt.Println("debug1") - fmt.Println(ds.pool.EndPoints.Selector) - fmt.Println("debug2") - fmt.Println(podSet) - fmt.Println("debug3") - fmt.Println(poolSelector.Matches(podSet)) return poolSelector.Matches(podSet) } From 0bd692fee29c1bb8e6deeea3015b01b998912066 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 13:41:35 -0800 Subject: [PATCH 22/34] fixed format --- cmd/epp/runner/runner.go | 11 +++++----- .../inferenceobjective_reconciler.go | 2 +- .../inferenceobjective_reconciler_test.go | 4 ++-- .../controller/inferencepool_reconciler.go | 4 ++-- .../inferencepool_reconciler_test.go | 6 +++--- pkg/epp/controller/pod_reconciler.go | 2 +- pkg/epp/controller/pod_reconciler_test.go | 4 ++-- pkg/epp/datalayer/endpoint_pool.go | 20 +++++++++---------- pkg/epp/datastore/datastore.go | 2 +- pkg/epp/datastore/datastore_test.go | 4 ++-- pkg/epp/handlers/server.go | 4 ++-- .../metrics/collectors/inference_pool_test.go | 8 ++++---- pkg/epp/metrics/metrics.go | 2 +- pkg/epp/requestcontrol/director_test.go | 6 +++--- pkg/epp/server/controller_manager.go | 6 +++--- pkg/epp/server/runserver.go | 4 ++-- pkg/epp/server/runserver_test.go | 1 - pkg/epp/util/pool/pool.go | 17 ++++++++-------- pkg/epp/util/testing/wrappers.go | 1 + test/integration/epp/hermetic_test.go | 6 +++--- test/utils/server.go | 8 ++++---- 21 files changed, 62 insertions(+), 60 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index cb706efa4..6c7b20cc6 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -36,17 +36,18 @@ import ( "go.uber.org/zap/zapcore" "google.golang.org/grpc" healthPb "google.golang.org/grpc/health/grpc_health_v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - - "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" @@ -103,7 +104,7 @@ var ( poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") - endpointSelector = flag.String("endpoint-selector", "", "selector to filter model server pods on, only key value paris is supported. Format: a comma-separated list of key value paris, e.g., 'app:vllm-llama3-8b-instruct,env=prod'.") + endpointSelector = flag.String("endpoint-selector", "", "selector to filter model server pods on, only key=value paris is supported. Format: a comma-separated list of key value paris, e.g., 'app=vllm-llama3-8b-instruct,env=prod'.") endpointTargetPorts = flag.String("endpoint-target-ports", "", "target ports of model server pods. Format: a comma-separated list of numbers, e.g., '3000,3001,3002'") logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") @@ -422,7 +423,7 @@ func setupEndpointPool(setupLog logr.Logger) (*datalayer.EndpointPool, error) { if err != nil { setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-target-ports", err) } - endpointPool.DisableK8sCrd = true + endpointPool.DisableK8sCrdReconcile = true // Determine EPP namespace: NAMESPACE env var; else default eppNsEnv := os.Getenv("NAMESPACE") @@ -647,7 +648,7 @@ func validateFlags() error { } func strToUniqueIntSlice(s string) ([]int, error) { - seen := make(map[int]struct{}) + seen := sets.NewInt() var intList []int if s == "" { diff --git a/pkg/epp/controller/inferenceobjective_reconciler.go b/pkg/epp/controller/inferenceobjective_reconciler.go index c8ac5a6c3..2318f2414 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler.go +++ b/pkg/epp/controller/inferenceobjective_reconciler.go @@ -21,12 +21,12 @@ import ( "fmt" "k8s.io/apimachinery/pkg/api/errors" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index 57bd3bc5d..c381f083b 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -27,15 +27,15 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 76eac3317..90332b2f5 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -21,14 +21,14 @@ import ( "fmt" "k8s.io/apimachinery/pkg/api/errors" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 27160a98b..51c12b031 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -28,17 +28,17 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) diff --git a/pkg/epp/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go index b3a78ef92..06be5d785 100644 --- a/pkg/epp/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -23,12 +23,12 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" podutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pod" diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index aeb00499d..0042a7c04 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -28,13 +28,13 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" diff --git a/pkg/epp/datalayer/endpoint_pool.go b/pkg/epp/datalayer/endpoint_pool.go index d9c508677..f3a7e5eac 100644 --- a/pkg/epp/datalayer/endpoint_pool.go +++ b/pkg/epp/datalayer/endpoint_pool.go @@ -21,29 +21,29 @@ import ( ) type EndpointPool struct { - EndPoints *EndPoints - DisableK8sCrd bool - GKNN common.GKNN + EndPoints *Endpoints + DisableK8sCrdReconcile bool + GKNN common.GKNN } // NewEndpointPool creates and returns a new empty instance of EndpointPool. -func NewEndpointPool(standAloneMode bool, gknn common.GKNN) *EndpointPool { +func NewEndpointPool(disableK8sCrdReconcile bool, gknn common.GKNN) *EndpointPool { endPoints := NewEndPoints() return &EndpointPool{ - GKNN: gknn, - DisableK8sCrd: standAloneMode, - EndPoints: endPoints, + GKNN: gknn, + DisableK8sCrdReconcile: disableK8sCrdReconcile, + EndPoints: endPoints, } } -type EndPoints struct { +type Endpoints struct { Selector map[string]string TargetPorts []int } // NewEndPoints creates and returns a new empty instance of EndpointPool. -func NewEndPoints() *EndPoints { - return &EndPoints{ +func NewEndPoints() *Endpoints { + return &Endpoints{ Selector: make(map[string]string), TargetPorts: []int{}, } diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 70caa55c8..c0e351128 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -28,9 +28,9 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index c0a0a456d..94007cdd0 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -32,13 +32,13 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" + "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" testutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) diff --git a/pkg/epp/handlers/server.go b/pkg/epp/handlers/server.go index 3e9f65691..a67a1ea6f 100644 --- a/pkg/epp/handlers/server.go +++ b/pkg/epp/handlers/server.go @@ -29,10 +29,10 @@ import ( "github.com/google/uuid" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index 7dd82c267..ddafe4692 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -27,14 +27,14 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/component-base/metrics/testutil" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" + "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" ) var ( diff --git a/pkg/epp/metrics/metrics.go b/pkg/epp/metrics/metrics.go index 59c8976cd..d49104700 100644 --- a/pkg/epp/metrics/metrics.go +++ b/pkg/epp/metrics/metrics.go @@ -23,9 +23,9 @@ import ( "github.com/prometheus/client_golang/prometheus" compbasemetrics "k8s.io/component-base/metrics" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" metricsutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/metrics" ) diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index cd64f9221..19bf14c56 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -32,14 +32,14 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata" diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index f308814e2..2eb863cda 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -25,15 +25,15 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/manager" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" ) var scheme = runtime.NewScheme() @@ -59,7 +59,7 @@ func defaultManagerOptions(endpointPool *datalayer.EndpointPool, metricsServerOp }, Metrics: metricsServerOptions, } - if !endpointPool.DisableK8sCrd { + if !endpointPool.DisableK8sCrdReconcile { opt.Cache.ByObject[&v1alpha2.InferenceObjective{}] = cache.ByObject{Namespaces: map[string]cache.Config{ endpointPool.GKNN.Namespace: {}, }} diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index a509e83cc..e3e594e21 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -30,9 +30,9 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/health" healthgrpc "google.golang.org/grpc/health/grpc_health_v1" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" tlsutil "sigs.k8s.io/gateway-api-inference-extension/internal/tls" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" @@ -102,7 +102,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { // SetupWithManager sets up the runner with the given manager. func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager - if !r.EndpointPool.DisableK8sCrd { + if !r.EndpointPool.DisableK8sCrdReconcile { if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), diff --git a/pkg/epp/server/runserver_test.go b/pkg/epp/server/runserver_test.go index b02688c58..172928865 100644 --- a/pkg/epp/server/runserver_test.go +++ b/pkg/epp/server/runserver_test.go @@ -20,7 +20,6 @@ import ( "testing" "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index 84a020e74..bd03cdbce 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -20,6 +20,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" @@ -43,14 +44,14 @@ func InferencePoolToEndpointPool(inferencePool *v1.InferencePool) *datalayer.End NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, } - endPoints := &datalayer.EndPoints{ + endPoints := &datalayer.Endpoints{ Selector: selector, TargetPorts: targetPorts, } endpointPool := &datalayer.EndpointPool{ - EndPoints: endPoints, - DisableK8sCrd: false, - GKNN: gknn, + EndPoints: endPoints, + DisableK8sCrdReconcile: false, + GKNN: gknn, } return endpointPool } @@ -65,14 +66,14 @@ func AlphaInferencePoolToEndpointPool(inferencePool *v1alpha2.InferencePool) *da NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, GroupKind: schema.GroupKind{Group: "inference.networking.x-k8s.io", Kind: "InferencePool"}, } - endPoints := &datalayer.EndPoints{ + endPoints := &datalayer.Endpoints{ Selector: selector, TargetPorts: targetPorts, } endpointPool := &datalayer.EndpointPool{ - EndPoints: endPoints, - DisableK8sCrd: false, - GKNN: gknn, + EndPoints: endPoints, + DisableK8sCrdReconcile: false, + GKNN: gknn, } return endpointPool } diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index 2ad02c55d..3e3380519 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -21,6 +21,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 84c1700dc..0853c229f 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -48,6 +48,7 @@ import ( k8syaml "k8s.io/apimachinery/pkg/util/yaml" clientgoscheme "k8s.io/client-go/kubernetes/scheme" metricsutils "k8s.io/component-base/metrics/testutil" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -56,14 +57,12 @@ import ( crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - "sigs.k8s.io/yaml" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" @@ -80,6 +79,7 @@ import ( requtil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/request" epptestutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" integrationutils "sigs.k8s.io/gateway-api-inference-extension/test/integration" + "sigs.k8s.io/yaml" ) const ( diff --git a/test/utils/server.go b/test/utils/server.go index c7a080f8d..241b0bcef 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -32,16 +32,16 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" testutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) From 72bb590d32a985e87c0bef4084220772d1d6fe28 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 13:47:43 -0800 Subject: [PATCH 23/34] fixed import --- cmd/epp/runner/runner.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 0612d34ec..1219ebdae 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -48,7 +48,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - configapi "sigs.k8s.io/gateway-api-inference-extension/apix/config/v1alpha1" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" From 71f9fe5b359e0965248746d94bdce26d5058b0f6 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 14:01:18 -0800 Subject: [PATCH 24/34] updated to use epp name instead of pod name --- cmd/epp/runner/runner.go | 25 +++++++++++++++---- .../templates/epp-deployment.yaml | 2 +- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 1219ebdae..57a8576cf 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -25,6 +25,7 @@ import ( "net/http" "net/http/pprof" "os" + "regexp" "runtime" "strconv" "strings" @@ -440,14 +441,18 @@ func setupEndpointPool(setupLog logr.Logger) (*datalayer.EndpointPool, error) { if eppNsEnv == "" { setupLog.Error(err, "Failed to get environment variable EPP_NAMESPACE") } - // Determine EPP name: EPP_NAME env var - eppNameEnv := os.Getenv("EPP_NAME") - if eppNameEnv == "" { - setupLog.Error(err, "Failed to get environment variable EPP_NAME") + // Determine EPP name: POD_NAME env var + eppPodNameEnv := os.Getenv("POD_NAME") + if eppPodNameEnv == "" { + setupLog.Error(err, "Failed to get environment variable POD_NAME") } + eppName, err := extractDeploymentName(eppPodNameEnv) + if err != nil { + setupLog.Error(err, "Failed to extract deployment name from POD_NAME") + } endpointPool.GKNN = common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppNameEnv}, + NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppName}, GroupKind: schema.GroupKind{Kind: "apps", Group: "Deployment"}, } @@ -770,3 +775,13 @@ func setupPprofHandlers(mgr ctrl.Manager) error { } return nil } + +func extractDeploymentName(podName string) (string, error) { + regex := regexp.MustCompile(`^(.+)-[a-z0-9]+-[a-z0-9]+$`) + + matches := regex.FindStringSubmatch(podName) + if len(matches) == 2 { + return matches[1], nil + } + return "", fmt.Errorf("failed to parse deployment name from pod name %s", podName) +} diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index c253bbf92..be6a39ead 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -147,7 +147,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace - - name: EPP_NAME + - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name From 770f96c034a4db55f9151feb06e1b1e35dd35437 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 14:09:50 -0800 Subject: [PATCH 25/34] fixed compiler --- pkg/epp/datalayer/metrics/logger_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/epp/datalayer/metrics/logger_test.go b/pkg/epp/datalayer/metrics/logger_test.go index 3ba2e7e84..4bf68cf0a 100644 --- a/pkg/epp/datalayer/metrics/logger_test.go +++ b/pkg/epp/datalayer/metrics/logger_test.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" ) // Buffer to write the logs to @@ -95,8 +96,9 @@ var pod2 = &datalayer.PodInfo{ type fakeDataStore struct{} -func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) { - return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}}, nil +func (f *fakeDataStore) PoolGet() (*datalayer.EndpointPool, error) { + pool := &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}} + return poolutil.InferencePoolToEndpointPool(pool), nil } func (f *fakeDataStore) PodList(predicate func(datalayer.Endpoint) bool) []datalayer.Endpoint { From a96edb03ef1c55d6750f2d896ca4d98ddf25be55 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 14:35:28 -0800 Subject: [PATCH 26/34] verify --- pkg/epp/controller/inferencepool_reconciler_test.go | 2 +- pkg/epp/datastore/datastore.go | 2 +- pkg/epp/metrics/metrics.go | 2 +- pkg/epp/util/pool/pool.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 51c12b031..25e03cb59 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -28,7 +28,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index c0e351128..90479c3e3 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -28,7 +28,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" diff --git a/pkg/epp/metrics/metrics.go b/pkg/epp/metrics/metrics.go index d49104700..e005e1d4c 100644 --- a/pkg/epp/metrics/metrics.go +++ b/pkg/epp/metrics/metrics.go @@ -23,7 +23,7 @@ import ( "github.com/prometheus/client_golang/prometheus" compbasemetrics "k8s.io/component-base/metrics" - + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/metrics" logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index bd03cdbce..968ed4c4d 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -20,7 +20,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" - + v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" From 1f15673771c6e9b62eeec95d31bc56dac11d4ee3 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 15:29:40 -0800 Subject: [PATCH 27/34] don't set endpointpool in datastore for inferencepool at start --- cmd/epp/runner/runner.go | 141 ++++++++++-------- .../inferenceobjective_reconciler_test.go | 3 +- .../inferencepool_reconciler_test.go | 4 +- pkg/epp/controller/pod_reconciler_test.go | 3 +- pkg/epp/datastore/datastore.go | 19 ++- pkg/epp/datastore/datastore_test.go | 17 +-- .../metrics/collectors/inference_pool_test.go | 6 +- pkg/epp/requestcontrol/director_test.go | 11 +- pkg/epp/server/controller_manager.go | 30 ++-- pkg/epp/server/runserver.go | 11 +- test/integration/epp/hermetic_test.go | 6 +- test/utils/server.go | 10 +- 12 files changed, 132 insertions(+), 129 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 57a8576cf..b93f5b890 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -228,28 +228,29 @@ func (r *Runner) Run(ctx context.Context) error { return err } r.featureGates = featureGates - // Setup EndpointPool - endpointPool, err := setupEndpointPool(setupLog) - if err != nil { - setupLog.Error(err, "Failed to set up Endpoints Pool") - return err - } // --- Setup Datastore --- epf, err := r.setupMetricsCollection(setupLog, r.featureGates[datalayer.FeatureGate]) if err != nil { return err } - datastore := datastore.NewDatastore(ctx, epf, int32(*modelServerMetricsPort), endpointPool) - eppConfig, err := r.parseConfigurationPhaseTwo(ctx, rawConfig, datastore) + gknn, err := extractGKNN() + if err != nil { + return err + } + ds, err := setupDataStore(setupLog, ctx, epf, int32(*modelServerMetricsPort), *gknn) + if err != nil { + return err + } + eppConfig, err := r.parseConfigurationPhaseTwo(ctx, rawConfig, ds) if err != nil { setupLog.Error(err, "Failed to parse configuration") return err } // --- Setup Metrics Server --- - customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(datastore)} + customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(ds)} if r.customCollectors != nil { customCollectors = append(customCollectors, r.customCollectors...) } @@ -274,7 +275,7 @@ func (r *Runner) Run(ctx context.Context) error { isLeader := &atomic.Bool{} isLeader.Store(false) - mgr, err := runserver.NewDefaultManager(endpointPool, cfg, metricsServerOptions, *haEnableLeaderElection) + mgr, err := runserver.NewDefaultManager(*gknn, cfg, metricsServerOptions, *haEnableLeaderElection) if err != nil { setupLog.Error(err, "Failed to create controller manager") return err @@ -348,7 +349,7 @@ func (r *Runner) Run(ctx context.Context) error { } director := requestcontrol.NewDirectorWithConfig( - datastore, + ds, scheduler, admissionController, r.requestControlConfig) @@ -356,8 +357,8 @@ func (r *Runner) Run(ctx context.Context) error { // --- Setup ExtProc Server Runner --- serverRunner := &runserver.ExtProcServerRunner{ GrpcPort: *grpcPort, - EndpointPool: endpointPool, - Datastore: datastore, + GKNN: *gknn, + Datastore: ds, SecureServing: *secureServing, HealthChecking: *healthChecking, CertPath: *certPath, @@ -374,7 +375,7 @@ func (r *Runner) Run(ctx context.Context) error { // --- Add Runnables to Manager --- // Register health server. - if err := registerHealthServer(mgr, ctrl.Log.WithName("health"), datastore, *grpcHealthPort, isLeader, *haEnableLeaderElection); err != nil { + if err := registerHealthServer(mgr, ctrl.Log.WithName("health"), ds, *grpcHealthPort, isLeader, *haEnableLeaderElection); err != nil { return err } @@ -394,36 +395,12 @@ func (r *Runner) Run(ctx context.Context) error { return nil } -func setupEndpointPool(setupLog logr.Logger) (*datalayer.EndpointPool, error) { - endpointPool := datalayer.NewEndpointPool(false, common.GKNN{}) - if *poolName != "" { - // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default - resolvePoolNamespace := func() string { - if *poolNamespace != "" { - return *poolNamespace - } - if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { - return nsEnv - } - return runserver.DefaultPoolNamespace - } - resolvedPoolNamespace := resolvePoolNamespace() - poolNamespacedName := types.NamespacedName{ - Name: *poolName, - Namespace: resolvedPoolNamespace, - } - poolGroupKind := schema.GroupKind{ - Group: *poolGroup, - Kind: "InferencePool", - } - poolGKNN := common.GKNN{ - NamespacedName: poolNamespacedName, - GroupKind: poolGroupKind, - } - endpointPool.GKNN = poolGKNN +func setupDataStore(setupLog logr.Logger, ctx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, gknn common.GKNN) (datastore.Datastore, error) { + if gknn.Kind == "InferencePool" { + return datastore.NewDatastore(ctx, epFactory, modelServerMetricsPort), nil } - - if *endpointSelector != "" { + if gknn.Kind == "Deployment" { + endpointPool := datalayer.NewEndpointPool(true, gknn) labelsMap, err := labels.ConvertSelectorToLabelsMap(*endpointSelector) if err != nil { setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-selector", err) @@ -433,31 +410,14 @@ func setupEndpointPool(setupLog logr.Logger) (*datalayer.EndpointPool, error) { endpointPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*endpointTargetPorts) if err != nil { setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-target-ports", err) - } - endpointPool.DisableK8sCrdReconcile = true - - // Determine EPP namespace: NAMESPACE env var; else default - eppNsEnv := os.Getenv("NAMESPACE") - if eppNsEnv == "" { - setupLog.Error(err, "Failed to get environment variable EPP_NAMESPACE") - } - // Determine EPP name: POD_NAME env var - eppPodNameEnv := os.Getenv("POD_NAME") - if eppPodNameEnv == "" { - setupLog.Error(err, "Failed to get environment variable POD_NAME") - - } - eppName, err := extractDeploymentName(eppPodNameEnv) - if err != nil { - setupLog.Error(err, "Failed to extract deployment name from POD_NAME") - } - endpointPool.GKNN = common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppName}, - GroupKind: schema.GroupKind{Kind: "apps", Group: "Deployment"}, + return nil, err } + endpointPoolOption := datastore.WithEndpointPool(endpointPool) + return datastore.NewDatastore(ctx, epFactory, modelServerMetricsPort, endpointPoolOption), nil } - return endpointPool, nil + return nil, fmt.Errorf("invalid gknn kind %s", gknn.Kind) + } // registerInTreePlugins registers the factory functions of all known plugins @@ -785,3 +745,54 @@ func extractDeploymentName(podName string) (string, error) { } return "", fmt.Errorf("failed to parse deployment name from pod name %s", podName) } + +func extractGKNN() (*common.GKNN, error) { + if *poolName != "" { + // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default + resolvePoolNamespace := func() string { + if *poolNamespace != "" { + return *poolNamespace + } + if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { + return nsEnv + } + return runserver.DefaultPoolNamespace + } + resolvedPoolNamespace := resolvePoolNamespace() + poolNamespacedName := types.NamespacedName{ + Name: *poolName, + Namespace: resolvedPoolNamespace, + } + poolGroupKind := schema.GroupKind{ + Group: *poolGroup, + Kind: "InferencePool", + } + return &common.GKNN{ + NamespacedName: poolNamespacedName, + GroupKind: poolGroupKind, + }, nil + } + + if *endpointSelector != "" { + // Determine EPP namespace: NAMESPACE env var; else default + eppNsEnv := os.Getenv("NAMESPACE") + if eppNsEnv == "" { + return nil, errors.New("failed to get environment variable NAMESPACE") + } + // Determine EPP name: POD_NAME env var + eppPodNameEnv := os.Getenv("POD_NAME") + if eppPodNameEnv == "" { + return nil, errors.New("failed to get environment variable POD_NAME") + + } + eppName, err := extractDeploymentName(eppPodNameEnv) + if err != nil { + return nil, err + } + return &common.GKNN{ + NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppName}, + GroupKind: schema.GroupKind{Kind: "Deployment", Group: "apps"}, + }, nil + } + return nil, errors.New("can't construct gknn as both pool-name and endpoint-selector are missing") +} diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index c381f083b..c2593d64e 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -35,7 +35,6 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" @@ -162,7 +161,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { WithObjects(initObjs...). Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pool.ToGKNN(inferencePool))) + ds := datastore.NewDatastore(t.Context(), pmf, 0) for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 25e03cb59..4454929b8 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -115,7 +115,7 @@ func TestInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) + ds := datastore.NewDatastore(ctx, pmf, 0) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. @@ -262,7 +262,7 @@ func TestXInferencePoolReconciler(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) + ds := datastore.NewDatastore(ctx, pmf, 0) inferencePoolReconciler := &InferencePoolReconciler{Reader: fakeClient, Datastore: ds, PoolGKNN: gknn} // Step 1: Inception, only ready pods matching pool1 are added to the store. diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 0042a7c04..efdb36b25 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -34,7 +34,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" @@ -198,7 +197,7 @@ func TestPodReconciler(t *testing.T) { Build() // Configure the initial state of the datastore. - store := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pool.ToGKNN(test.pool))) + store := datastore.NewDatastore(t.Context(), pmf, 0) _ = store.PoolSet(t.Context(), fakeClient, pool.InferencePoolToEndpointPool(test.pool)) for _, pod := range test.existingPods { store.PodUpdateOrAddIfNotExist(pod) diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 90479c3e3..5f519ab74 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -68,16 +68,23 @@ type Datastore interface { Clear() } -func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, endpointPool *datalayer.EndpointPool) Datastore { +func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, opts ...DatastoreOption) Datastore { + // Initialize with defaults store := &datastore{ parentCtx: parentCtx, poolAndObjectivesMu: sync.RWMutex{}, - pool: endpointPool, + pool: nil, objectives: make(map[string]*v1alpha2.InferenceObjective), pods: &sync.Map{}, modelServerMetricsPort: modelServerMetricsPort, epf: epFactory, } + + // Apply options + for _, opt := range opts { + opt(store) + } + return store } @@ -312,3 +319,11 @@ func (ds *datastore) podResyncAll(ctx context.Context, reader client.Reader) err return nil } + +type DatastoreOption func(*datastore) + +func WithEndpointPool(pool *datalayer.EndpointPool) DatastoreOption { + return func(d *datastore) { + d.pool = pool + } +} diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 94007cdd0..73beb1f24 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -87,9 +87,7 @@ func TestPool(t *testing.T) { WithScheme(scheme). Build() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - gknn := pooltuil.ToGKNN(tt.inferencePool) - endPointPool := datalayer.NewEndpointPool(false, gknn) - ds := NewDatastore(context.Background(), pmf, 0, endPointPool) + ds := NewDatastore(context.Background(), pmf, 0) _ = ds.PoolSet(context.Background(), fakeClient, pooltuil.InferencePoolToEndpointPool(tt.inferencePool)) gotPool, gotErr := ds.PoolGet() if diff := cmp.Diff(tt.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" { @@ -123,10 +121,6 @@ func TestObjective(t *testing.T) { Priority(2).ObjRef() // Same object name as model2ts, different model name. model2chat := testutil.MakeInferenceObjective(model2ts.Name).ObjRef() - pool1Selector := map[string]string{"app": "vllm_v1"} - pool1 := testutil.MakeInferencePool("pool1"). - Namespace("default"). - Selector(pool1Selector).ObjRef() tests := []struct { name string @@ -200,7 +194,7 @@ func TestObjective(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(pool1))) + ds := NewDatastore(t.Context(), pmf, 0) for _, m := range test.existingModels { ds.ObjectiveSet(m) } @@ -334,8 +328,7 @@ func TestMetrics(t *testing.T) { WithScheme(scheme). Build() pmf := backendmetrics.NewPodMetricsFactory(test.pmc, time.Millisecond) - gknn := pooltuil.ToGKNN(inferencePool) - ds := NewDatastore(ctx, pmf, 0, datalayer.NewEndpointPool(false, gknn)) + ds := NewDatastore(ctx, pmf, 0) _ = ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndpointPool(inferencePool)) for _, pod := range test.storePods { ds.PodUpdateOrAddIfNotExist(pod) @@ -403,7 +396,7 @@ func TestPods(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(inferencePool))) + ds := NewDatastore(t.Context(), pmf, 0) fakeClient := fake.NewFakeClient() if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndpointPool(inferencePool)); err != nil { t.Error(err) @@ -587,7 +580,7 @@ func TestPodInfo(t *testing.T) { t.Run(test.name, func(t *testing.T) { ctx := context.Background() pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, pooltuil.ToGKNN(test.pool))) + ds := NewDatastore(t.Context(), pmf, 0) fakeClient := fake.NewFakeClient() if err := ds.PoolSet(ctx, fakeClient, pooltuil.InferencePoolToEndpointPool(test.pool)); err != nil { t.Error(err) diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index ddafe4692..20ab69fd7 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -30,9 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" ) @@ -53,7 +51,7 @@ var ( func TestNoMetricsCollected(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndpointPool(false, common.GKNN{})) + ds := datastore.NewDatastore(context.Background(), pmf, 0) collector := &inferencePoolMetricsCollector{ ds: ds, @@ -79,7 +77,7 @@ func TestMetricsCollected(t *testing.T) { TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } - ds := datastore.NewDatastore(context.Background(), pmf, 0, datalayer.NewEndpointPool(false, poolutil.ToGKNN(inferencePool))) + ds := datastore.NewDatastore(context.Background(), pmf, 0) scheme := runtime.NewScheme() fakeClient := fake.NewClientBuilder(). diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 19bf14c56..f361303c8 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -36,7 +36,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" @@ -206,7 +205,7 @@ func TestDirector_HandleRequest(t *testing.T) { // Datastore setup pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second) - ds := datastore.NewDatastore(t.Context(), pmf, 0, datalayer.NewEndpointPool(false, poolutil.ToGKNN(pool))) + ds := datastore.NewDatastore(t.Context(), pmf, 0) ds.ObjectiveSet(ioFoodReview) ds.ObjectiveSet(ioFoodReviewResolve) ds.ObjectiveSet(ioFoodReviewSheddable) @@ -757,7 +756,7 @@ func TestGetRandomPod(t *testing.T) { t.Run(test.name, func(t *testing.T) { pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Millisecond) endpointPool := poolutil.InferencePoolToEndpointPool(pool) - ds := datastore.NewDatastore(t.Context(), pmf, 0, endpointPool) + ds := datastore.NewDatastore(t.Context(), pmf, 0) err := ds.PoolSet(t.Context(), fakeClient, endpointPool) if err != nil { t.Errorf("unexpected error setting pool: %s", err) @@ -782,7 +781,7 @@ func TestDirector_HandleResponseReceived(t *testing.T) { pr1 := newTestResponseReceived("pr1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndpointPool(false, common.GKNN{})) + ds := datastore.NewDatastore(t.Context(), nil, 0) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, &mockAdmissionController{}, NewConfig().WithResponseReceivedPlugins(pr1)) @@ -819,7 +818,7 @@ func TestDirector_HandleResponseStreaming(t *testing.T) { ps1 := newTestResponseStreaming("ps1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndpointPool(false, common.GKNN{})) + ds := datastore.NewDatastore(t.Context(), nil, 0) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseStreamingPlugins(ps1)) @@ -855,7 +854,7 @@ func TestDirector_HandleResponseComplete(t *testing.T) { pc1 := newTestResponseComplete("pc1") ctx := logutil.NewTestLoggerIntoContext(context.Background()) - ds := datastore.NewDatastore(t.Context(), nil, 0, datalayer.NewEndpointPool(false, common.GKNN{})) + ds := datastore.NewDatastore(t.Context(), nil, 0) mockSched := &mockScheduler{} director := NewDirectorWithConfig(ds, mockSched, nil, NewConfig().WithResponseCompletePlugins(pc1)) diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index 2eb863cda..2b8e04932 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -33,7 +33,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" ) var scheme = runtime.NewScheme() @@ -45,39 +45,39 @@ func init() { } // defaultManagerOptions returns the default options used to create the manager. -func defaultManagerOptions(endpointPool *datalayer.EndpointPool, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { +func defaultManagerOptions(gknn common.GKNN, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { opt := ctrl.Options{ Scheme: scheme, Cache: cache.Options{ ByObject: map[client.Object]cache.ByObject{ &corev1.Pod{}: { Namespaces: map[string]cache.Config{ - endpointPool.GKNN.Namespace: {}, + gknn.Namespace: {}, }, }, }, }, Metrics: metricsServerOptions, } - if !endpointPool.DisableK8sCrdReconcile { + if gknn.Kind == "InferencePool" { opt.Cache.ByObject[&v1alpha2.InferenceObjective{}] = cache.ByObject{Namespaces: map[string]cache.Config{ - endpointPool.GKNN.Namespace: {}, + gknn.Namespace: {}, }} - switch endpointPool.GKNN.Group { + switch gknn.Group { case v1alpha2.GroupName: opt.Cache.ByObject[&v1alpha2.InferencePool{}] = cache.ByObject{ - Namespaces: map[string]cache.Config{endpointPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ - "metadata.name": endpointPool.GKNN.Name, + Namespaces: map[string]cache.Config{gknn.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ + "metadata.name": gknn.Name, })}}, } case v1.GroupName: opt.Cache.ByObject[&v1.InferencePool{}] = cache.ByObject{ - Namespaces: map[string]cache.Config{endpointPool.GKNN.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ - "metadata.name": endpointPool.GKNN.Name, + Namespaces: map[string]cache.Config{gknn.Namespace: {FieldSelector: fields.SelectorFromSet(fields.Set{ + "metadata.name": gknn.Name, })}}, } default: - return ctrl.Options{}, fmt.Errorf("unknown group: %s", endpointPool.GKNN.Group) + return ctrl.Options{}, fmt.Errorf("unknown group: %s", gknn.Group) } } @@ -85,8 +85,8 @@ func defaultManagerOptions(endpointPool *datalayer.EndpointPool, metricsServerOp } // NewDefaultManager creates a new controller manager with default configuration. -func NewDefaultManager(endpointPool *datalayer.EndpointPool, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { - opt, err := defaultManagerOptions(endpointPool, metricsServerOptions) +func NewDefaultManager(gknn common.GKNN, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { + opt, err := defaultManagerOptions(gknn, metricsServerOptions) if err != nil { return nil, fmt.Errorf("failed to create controller manager options: %v", err) } @@ -95,8 +95,8 @@ func NewDefaultManager(endpointPool *datalayer.EndpointPool, restConfig *rest.Co opt.LeaderElection = true opt.LeaderElectionResourceLock = "leases" // The lease name needs to be unique per EPP deployment. - opt.LeaderElectionID = fmt.Sprintf("epp-%s-%s.gateway-api-inference-extension.sigs.k8s.io", endpointPool.GKNN.Namespace, endpointPool.GKNN.Name) - opt.LeaderElectionNamespace = endpointPool.GKNN.Namespace + opt.LeaderElectionID = fmt.Sprintf("epp-%s-%s.gateway-api-inference-extension.sigs.k8s.io", gknn.Namespace, gknn.Name) + opt.LeaderElectionNamespace = gknn.Namespace opt.LeaderElectionReleaseOnCancel = true } diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index e3e594e21..65f4c2944 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -22,8 +22,6 @@ import ( "fmt" "time" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" - extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "github.com/go-logr/logr" "google.golang.org/grpc" @@ -35,6 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" tlsutil "sigs.k8s.io/gateway-api-inference-extension/internal/tls" + "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/controller" dlmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/metrics" @@ -47,7 +46,7 @@ import ( // ExtProcServerRunner provides methods to manage an external process server. type ExtProcServerRunner struct { GrpcPort int - EndpointPool *datalayer.EndpointPool + GKNN common.GKNN Datastore datastore.Datastore SecureServing bool HealthChecking bool @@ -102,11 +101,11 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { // SetupWithManager sets up the runner with the given manager. func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager - if !r.EndpointPool.DisableK8sCrdReconcile { + if r.GKNN.Kind == "InferencePool" { if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), - PoolGKNN: r.EndpointPool.GKNN, + PoolGKNN: r.GKNN, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("failed setting up InferencePoolReconciler: %w", err) } @@ -114,7 +113,7 @@ func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Man if err := (&controller.InferenceObjectiveReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), - PoolGKNN: r.EndpointPool.GKNN, + PoolGKNN: r.GKNN, }).SetupWithManager(ctx, mgr); err != nil { return fmt.Errorf("failed setting up InferenceObjectiveReconciler: %w", err) } diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 0853c229f..81bba5ee3 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -62,7 +62,6 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" @@ -1175,10 +1174,9 @@ func BeforeSuite() func() { NamespacedName: types.NamespacedName{Namespace: testNamespace, Name: testPoolName}, GroupKind: schema.GroupKind{Group: v1.GroupVersion.Group, Kind: "InferencePool"}, } - endpointPool := datalayer.NewEndpointPool(false, poolGKNN) - serverRunner.EndpointPool = datalayer.NewEndpointPool(false, poolGKNN) + serverRunner.GKNN = poolGKNN - serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0, endpointPool) + serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0) kvCacheUtilizationScorer := scorer.NewKVCacheUtilizationScorer() queueingScorer := scorer.NewQueueScorer() diff --git a/test/utils/server.go b/test/utils/server.go index 241b0bcef..f46ed6f79 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -29,17 +29,13 @@ import ( "google.golang.org/grpc/test/bufconn" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" pooltuil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" testutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" @@ -55,11 +51,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po pmc := &metrics.FakePodMetricsClient{} pmf := metrics.NewPodMetricsFactory(pmc, time.Second) - endpointPool := datalayer.NewEndpointPool(false, common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: namespace, Name: poolName}, - GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, - }) - ds := datastore.NewDatastore(ctx, pmf, 0, endpointPool) + ds := datastore.NewDatastore(ctx, pmf, 0) initObjs := []client.Object{} for _, objective := range objectives { From d0794237452ecaac2d4f8ab12992129b5de7a38b Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 15:39:29 -0800 Subject: [PATCH 28/34] rename endpoints to endpointsmeta --- cmd/epp/runner/runner.go | 30 ++++++++++++++---------------- pkg/epp/datalayer/endpoint_pool.go | 14 +++++++------- pkg/epp/datastore/datastore.go | 14 +++++++------- pkg/epp/util/pool/pool.go | 16 ++++++++-------- 4 files changed, 36 insertions(+), 38 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index b93f5b890..bf2025132 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -406,8 +406,8 @@ func setupDataStore(setupLog logr.Logger, ctx context.Context, epFactory datalay setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-selector", err) return nil, err } - endpointPool.EndPoints.Selector = labelsMap - endpointPool.EndPoints.TargetPorts, err = strToUniqueIntSlice(*endpointTargetPorts) + endpointPool.EndpointMeta.Selector = labelsMap + endpointPool.EndpointMeta.TargetPorts, err = strToUniqueIntSlice(*endpointTargetPorts) if err != nil { setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-target-ports", err) return nil, err @@ -749,15 +749,6 @@ func extractDeploymentName(podName string) (string, error) { func extractGKNN() (*common.GKNN, error) { if *poolName != "" { // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default - resolvePoolNamespace := func() string { - if *poolNamespace != "" { - return *poolNamespace - } - if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { - return nsEnv - } - return runserver.DefaultPoolNamespace - } resolvedPoolNamespace := resolvePoolNamespace() poolNamespacedName := types.NamespacedName{ Name: *poolName, @@ -775,10 +766,7 @@ func extractGKNN() (*common.GKNN, error) { if *endpointSelector != "" { // Determine EPP namespace: NAMESPACE env var; else default - eppNsEnv := os.Getenv("NAMESPACE") - if eppNsEnv == "" { - return nil, errors.New("failed to get environment variable NAMESPACE") - } + resolvedPoolNamespace := resolvePoolNamespace() // Determine EPP name: POD_NAME env var eppPodNameEnv := os.Getenv("POD_NAME") if eppPodNameEnv == "" { @@ -790,9 +778,19 @@ func extractGKNN() (*common.GKNN, error) { return nil, err } return &common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: eppNsEnv, Name: eppName}, + NamespacedName: types.NamespacedName{Namespace: resolvedPoolNamespace, Name: eppName}, GroupKind: schema.GroupKind{Kind: "Deployment", Group: "apps"}, }, nil } return nil, errors.New("can't construct gknn as both pool-name and endpoint-selector are missing") } + +func resolvePoolNamespace() string { + if *poolNamespace != "" { + return *poolNamespace + } + if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { + return nsEnv + } + return runserver.DefaultPoolNamespace +} diff --git a/pkg/epp/datalayer/endpoint_pool.go b/pkg/epp/datalayer/endpoint_pool.go index f3a7e5eac..0c08da00b 100644 --- a/pkg/epp/datalayer/endpoint_pool.go +++ b/pkg/epp/datalayer/endpoint_pool.go @@ -21,29 +21,29 @@ import ( ) type EndpointPool struct { - EndPoints *Endpoints + EndpointMeta *EndpointsMeta DisableK8sCrdReconcile bool GKNN common.GKNN } // NewEndpointPool creates and returns a new empty instance of EndpointPool. func NewEndpointPool(disableK8sCrdReconcile bool, gknn common.GKNN) *EndpointPool { - endPoints := NewEndPoints() + endpointsMeta := NewEndpointMeta() return &EndpointPool{ GKNN: gknn, DisableK8sCrdReconcile: disableK8sCrdReconcile, - EndPoints: endPoints, + EndpointMeta: endpointsMeta, } } -type Endpoints struct { +type EndpointsMeta struct { Selector map[string]string TargetPorts []int } -// NewEndPoints creates and returns a new empty instance of EndpointPool. -func NewEndPoints() *Endpoints { - return &Endpoints{ +// NewEndpointMeta creates and returns a new empty instance of EndpointPool. +func NewEndpointMeta() *EndpointsMeta { + return &EndpointsMeta{ Selector: make(map[string]string), TargetPorts: []int{}, } diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 5f519ab74..79c985b27 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -129,8 +129,8 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endpoint oldEndpointPool := ds.pool ds.pool = endpointPool - if oldEndpointPool == nil || !reflect.DeepEqual(oldEndpointPool.EndPoints.Selector, endpointPool.EndPoints.Selector) { - logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endpointPool.EndPoints.Selector) + if oldEndpointPool == nil || !reflect.DeepEqual(oldEndpointPool.EndpointMeta.Selector, endpointPool.EndpointMeta.Selector) { + logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endpointPool.EndpointMeta.Selector) // A full resync is required to address two cases: // 1) At startup, the pod events may get processed before the pool is synced with the datastore, // and hence they will not be added to the store since pool selector is not known yet @@ -166,7 +166,7 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { if ds.pool == nil { return false } - poolSelector := labels.SelectorFromSet(ds.pool.EndPoints.Selector) + poolSelector := labels.SelectorFromSet(ds.pool.EndpointMeta.Selector) podSet := labels.Set(podLabels) return poolSelector.Matches(podSet) } @@ -222,7 +222,7 @@ func (ds *datastore) PodList(predicate func(backendmetrics.PodMetrics) bool) []b } func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { - if ds.pool == nil || ds.pool.EndPoints == nil { + if ds.pool == nil || ds.pool.EndpointMeta == nil { return true } @@ -232,11 +232,11 @@ func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { } modelServerMetricsPort := 0 - if len(ds.pool.EndPoints.TargetPorts) == 1 { + if len(ds.pool.EndpointMeta.TargetPorts) == 1 { modelServerMetricsPort = int(ds.modelServerMetricsPort) } pods := []*datalayer.PodInfo{} - for idx, port := range ds.pool.EndPoints.TargetPorts { + for idx, port := range ds.pool.EndpointMeta.TargetPorts { metricsPort := modelServerMetricsPort if metricsPort == 0 { metricsPort = port @@ -287,7 +287,7 @@ func (ds *datastore) podResyncAll(ctx context.Context, reader client.Reader) err logger := log.FromContext(ctx) podList := &corev1.PodList{} if err := reader.List(ctx, podList, &client.ListOptions{ - LabelSelector: labels.SelectorFromSet(ds.pool.EndPoints.Selector), + LabelSelector: labels.SelectorFromSet(ds.pool.EndpointMeta.Selector), Namespace: ds.pool.GKNN.Namespace, }); err != nil { return fmt.Errorf("failed to list pods - %w", err) diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index 968ed4c4d..43c764e01 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -44,12 +44,12 @@ func InferencePoolToEndpointPool(inferencePool *v1.InferencePool) *datalayer.End NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, } - endPoints := &datalayer.Endpoints{ + endPoints := &datalayer.EndpointsMeta{ Selector: selector, TargetPorts: targetPorts, } endpointPool := &datalayer.EndpointPool{ - EndPoints: endPoints, + EndpointMeta: endPoints, DisableK8sCrdReconcile: false, GKNN: gknn, } @@ -66,12 +66,12 @@ func AlphaInferencePoolToEndpointPool(inferencePool *v1alpha2.InferencePool) *da NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, GroupKind: schema.GroupKind{Group: "inference.networking.x-k8s.io", Kind: "InferencePool"}, } - endPoints := &datalayer.Endpoints{ + endPoints := &datalayer.EndpointsMeta{ Selector: selector, TargetPorts: targetPorts, } endpointPool := &datalayer.EndpointPool{ - EndPoints: endPoints, + EndpointMeta: endPoints, DisableK8sCrdReconcile: false, GKNN: gknn, } @@ -79,12 +79,12 @@ func AlphaInferencePoolToEndpointPool(inferencePool *v1alpha2.InferencePool) *da } func EndpointPoolToInferencePool(endpointPool *datalayer.EndpointPool) *v1.InferencePool { - targetPorts := make([]v1.Port, 0, len(endpointPool.EndPoints.TargetPorts)) - for _, p := range endpointPool.EndPoints.TargetPorts { + targetPorts := make([]v1.Port, 0, len(endpointPool.EndpointMeta.TargetPorts)) + for _, p := range endpointPool.EndpointMeta.TargetPorts { targetPorts = append(targetPorts, v1.Port{Number: v1.PortNumber(p)}) } - labels := make(map[v1.LabelKey]v1.LabelValue, len(endpointPool.EndPoints.Selector)) - for k, v := range endpointPool.EndPoints.Selector { + labels := make(map[v1.LabelKey]v1.LabelValue, len(endpointPool.EndpointMeta.Selector)) + for k, v := range endpointPool.EndpointMeta.Selector { labels[v1.LabelKey(k)] = v1.LabelValue(v) } From 76b4eaa94c2b7ee932471045b983f25d53d92fb3 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 15:45:44 -0800 Subject: [PATCH 29/34] rename import package --- .../inferenceobjective_reconciler_test.go | 28 +++++++++---------- pkg/epp/controller/pod_reconciler.go | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index c2593d64e..df2873e52 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -36,17 +36,17 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/common" backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" + poolutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pool" utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) var ( - inferencePool = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef() + pool = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef() infObjective1 = utiltest.MakeInferenceObjective("model1"). - Namespace(inferencePool.Namespace). + Namespace(pool.Namespace). Priority(1). CreationTimestamp(metav1.Unix(1000, 0)). - PoolName(inferencePool.Name). + PoolName(pool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1Pool2 = utiltest.MakeInferenceObjective(infObjective1.Name). Namespace(infObjective1.Namespace). @@ -58,24 +58,24 @@ var ( Namespace(infObjective1.Namespace). Priority(2). CreationTimestamp(metav1.Unix(1003, 0)). - PoolName(inferencePool.Name). + PoolName(pool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1Deleted = utiltest.MakeInferenceObjective(infObjective1.Name). Namespace(infObjective1.Namespace). CreationTimestamp(metav1.Unix(1004, 0)). DeletionTimestamp(). - PoolName(inferencePool.Name). + PoolName(pool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1DiffGroup = utiltest.MakeInferenceObjective(infObjective1.Name). - Namespace(inferencePool.Namespace). + Namespace(pool.Namespace). Priority(1). CreationTimestamp(metav1.Unix(1005, 0)). - PoolName(inferencePool.Name). + PoolName(pool.Name). PoolGroup("inference.networking.x-k8s.io").ObjRef() infObjective2 = utiltest.MakeInferenceObjective("model2"). - Namespace(inferencePool.Namespace). + Namespace(pool.Namespace). CreationTimestamp(metav1.Unix(1000, 0)). - PoolName(inferencePool.Name). + PoolName(pool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() ) @@ -121,7 +121,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { { name: "Objective not found, no matching existing objective to delete", objectivessInStore: []*v1alpha2.InferenceObjective{infObjective1}, - incomingReq: &types.NamespacedName{Name: "non-existent-objective", Namespace: inferencePool.Namespace}, + incomingReq: &types.NamespacedName{Name: "non-existent-objective", Namespace: pool.Namespace}, wantObjectives: []*v1alpha2.InferenceObjective{infObjective1}, }, { @@ -165,14 +165,14 @@ func TestInferenceObjectiveReconciler(t *testing.T) { for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } - endpointPool := pool.InferencePoolToEndpointPool(inferencePool) + endpointPool := poolutil.InferencePoolToEndpointPool(pool) _ = ds.PoolSet(context.Background(), fakeClient, endpointPool) reconciler := &InferenceObjectiveReconciler{ Reader: fakeClient, Datastore: ds, PoolGKNN: common.GKNN{ - NamespacedName: types.NamespacedName{Name: inferencePool.Name, Namespace: inferencePool.Namespace}, - GroupKind: schema.GroupKind{Group: inferencePool.GroupVersionKind().Group, Kind: inferencePool.GroupVersionKind().Kind}, + NamespacedName: types.NamespacedName{Name: pool.Name, Namespace: pool.Namespace}, + GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, }, } if test.incomingReq == nil { diff --git a/pkg/epp/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go index 06be5d785..26472d2c2 100644 --- a/pkg/epp/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -43,7 +43,7 @@ func (c *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R logger := log.FromContext(ctx) if !c.Datastore.PoolHasSynced() { logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the InferencePool is not available yet") - // When the inferencePool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. + // When the pool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. return ctrl.Result{}, nil } From 2c67bf85bfa74d060d00535ce2a3fa93a4449c9a Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 15:54:18 -0800 Subject: [PATCH 30/34] rename test utility --- .../inferenceobjective_reconciler.go | 2 +- .../inferenceobjective_reconciler_test.go | 30 ++++++++-------- .../controller/inferencepool_reconciler.go | 2 +- .../inferencepool_reconciler_test.go | 36 +++++++++---------- pkg/epp/controller/pod_reconciler.go | 2 +- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/pkg/epp/controller/inferenceobjective_reconciler.go b/pkg/epp/controller/inferenceobjective_reconciler.go index 2318f2414..49ddf70b0 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler.go +++ b/pkg/epp/controller/inferenceobjective_reconciler.go @@ -55,7 +55,7 @@ func (c *InferenceObjectiveReconciler) Reconcile(ctx context.Context, req ctrl.R } if notFound || !infObjective.DeletionTimestamp.IsZero() || infObjective.Spec.PoolRef.Name != v1alpha2.ObjectName(c.PoolGKNN.Name) || infObjective.Spec.PoolRef.Group != v1alpha2.Group(c.PoolGKNN.Group) { - // InferenceObjective object got deleted or changed the referenced pool. + // InferenceObjective object got deleted or changed the referenced inferencePool. c.Datastore.ObjectiveDelete(req.NamespacedName) return ctrl.Result{}, nil } diff --git a/pkg/epp/controller/inferenceobjective_reconciler_test.go b/pkg/epp/controller/inferenceobjective_reconciler_test.go index df2873e52..f8d48eca9 100644 --- a/pkg/epp/controller/inferenceobjective_reconciler_test.go +++ b/pkg/epp/controller/inferenceobjective_reconciler_test.go @@ -41,12 +41,12 @@ import ( ) var ( - pool = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef() + inferencePool = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef() infObjective1 = utiltest.MakeInferenceObjective("model1"). - Namespace(pool.Namespace). + Namespace(inferencePool.Namespace). Priority(1). CreationTimestamp(metav1.Unix(1000, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1Pool2 = utiltest.MakeInferenceObjective(infObjective1.Name). Namespace(infObjective1.Namespace). @@ -58,24 +58,24 @@ var ( Namespace(infObjective1.Namespace). Priority(2). CreationTimestamp(metav1.Unix(1003, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1Deleted = utiltest.MakeInferenceObjective(infObjective1.Name). Namespace(infObjective1.Namespace). CreationTimestamp(metav1.Unix(1004, 0)). DeletionTimestamp(). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() infObjective1DiffGroup = utiltest.MakeInferenceObjective(infObjective1.Name). - Namespace(pool.Namespace). + Namespace(inferencePool.Namespace). Priority(1). CreationTimestamp(metav1.Unix(1005, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.x-k8s.io").ObjRef() infObjective2 = utiltest.MakeInferenceObjective("model2"). - Namespace(pool.Namespace). + Namespace(inferencePool.Namespace). CreationTimestamp(metav1.Unix(1000, 0)). - PoolName(pool.Name). + PoolName(inferencePool.Name). PoolGroup("inference.networking.k8s.io").ObjRef() ) @@ -121,7 +121,7 @@ func TestInferenceObjectiveReconciler(t *testing.T) { { name: "Objective not found, no matching existing objective to delete", objectivessInStore: []*v1alpha2.InferenceObjective{infObjective1}, - incomingReq: &types.NamespacedName{Name: "non-existent-objective", Namespace: pool.Namespace}, + incomingReq: &types.NamespacedName{Name: "non-existent-objective", Namespace: inferencePool.Namespace}, wantObjectives: []*v1alpha2.InferenceObjective{infObjective1}, }, { @@ -131,13 +131,13 @@ func TestInferenceObjectiveReconciler(t *testing.T) { wantObjectives: []*v1alpha2.InferenceObjective{infObjective1, infObjective2}, }, { - name: "Objective deleted due to group mismatch for the inference pool", + name: "Objective deleted due to group mismatch for the inference inferencePool", objectivessInStore: []*v1alpha2.InferenceObjective{infObjective1}, objective: infObjective1DiffGroup, wantObjectives: []*v1alpha2.InferenceObjective{}, }, { - name: "Objective ignored due to group mismatch for the inference pool", + name: "Objective ignored due to group mismatch for the inference inferencePool", objective: infObjective1DiffGroup, wantObjectives: []*v1alpha2.InferenceObjective{}, }, @@ -165,14 +165,14 @@ func TestInferenceObjectiveReconciler(t *testing.T) { for _, m := range test.objectivessInStore { ds.ObjectiveSet(m) } - endpointPool := poolutil.InferencePoolToEndpointPool(pool) + endpointPool := poolutil.InferencePoolToEndpointPool(inferencePool) _ = ds.PoolSet(context.Background(), fakeClient, endpointPool) reconciler := &InferenceObjectiveReconciler{ Reader: fakeClient, Datastore: ds, PoolGKNN: common.GKNN{ - NamespacedName: types.NamespacedName{Name: pool.Name, Namespace: pool.Namespace}, - GroupKind: schema.GroupKind{Group: pool.GroupVersionKind().Group, Kind: pool.GroupVersionKind().Kind}, + NamespacedName: types.NamespacedName{Name: inferencePool.Name, Namespace: inferencePool.Namespace}, + GroupKind: schema.GroupKind{Group: inferencePool.GroupVersionKind().Group, Kind: inferencePool.GroupVersionKind().Kind}, }, } if test.incomingReq == nil { diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 90332b2f5..400ce4392 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -36,7 +36,7 @@ import ( // InferencePoolReconciler utilizes the controller runtime to reconcile Instance Gateway resources // This implementation is just used for reading & maintaining data sync. The Gateway implementation -// will have the proper controller that will create/manage objects on behalf of the server pool. +// will have the proper controller that will create/manage objects on behalf of the server inferencePool. type InferencePoolReconciler struct { client.Reader Datastore datastore.Datastore diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 4454929b8..0fdecb674 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -129,13 +129,13 @@ func TestInferencePoolReconciler(t *testing.T) { newPool1 := &v1.InferencePool{} if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { - t.Errorf("Unexpected pool get error: %v", err) + t.Errorf("Unexpected inferencePool get error: %v", err) } newPool1.Spec.Selector = v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{"app": "vllm_v2"}, } if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { - t.Errorf("Unexpected pool update error: %v", err) + t.Errorf("Unexpected inferencePool update error: %v", err) } if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) @@ -145,13 +145,13 @@ func TestInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected diff (+got/-want): %s", diff) } - // Step 3: update the pool port + // Step 3: update the inferencePool port if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { - t.Errorf("Unexpected pool get error: %v", err) + t.Errorf("Unexpected inferencePool get error: %v", err) } newPool1.Spec.TargetPorts = []v1.Port{{Number: 9090}} if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { - t.Errorf("Unexpected pool update error: %v", err) + t.Errorf("Unexpected inferencePool update error: %v", err) } if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) @@ -161,12 +161,12 @@ func TestInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected diff (+got/-want): %s", diff) } - // Step 4: delete the pool to trigger a datastore clear + // Step 4: delete the inferencePool to trigger a datastore clear if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { - t.Errorf("Unexpected pool get error: %v", err) + t.Errorf("Unexpected inferencePool get error: %v", err) } if err := fakeClient.Delete(ctx, newPool1, &client.DeleteOptions{}); err != nil { - t.Errorf("Unexpected pool delete error: %v", err) + t.Errorf("Unexpected inferencePool delete error: %v", err) } if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) @@ -185,7 +185,7 @@ type diffStoreParams struct { func diffStore(datastore datastore.Datastore, params diffStoreParams) string { gotPool, _ := datastore.PoolGet() if diff := cmp.Diff(params.wantPool, gotPool); diff != "" { - return "pool:" + diff + return "inferencePool:" + diff } // Default wantPods if not set because PodGetAll returns an empty slice when empty. @@ -276,11 +276,11 @@ func TestXInferencePoolReconciler(t *testing.T) { newPool1 := &v1alpha2.InferencePool{} if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { - t.Errorf("Unexpected pool get error: %v", err) + t.Errorf("Unexpected inferencePool get error: %v", err) } newPool1.Spec.Selector = map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm_v2"} if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { - t.Errorf("Unexpected pool update error: %v", err) + t.Errorf("Unexpected inferencePool update error: %v", err) } if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { @@ -291,13 +291,13 @@ func TestXInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected diff (+got/-want): %s", diff) } - // Step 3: update the pool port + // Step 3: update the inferencePool port if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { - t.Errorf("Unexpected pool get error: %v", err) + t.Errorf("Unexpected inferencePool get error: %v", err) } newPool1.Spec.TargetPortNumber = 9090 if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { - t.Errorf("Unexpected pool update error: %v", err) + t.Errorf("Unexpected inferencePool update error: %v", err) } if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) @@ -307,12 +307,12 @@ func TestXInferencePoolReconciler(t *testing.T) { t.Errorf("Unexpected diff (+got/-want): %s", diff) } - // Step 4: delete the pool to trigger a datastore clear + // Step 4: delete the inferencePool to trigger a datastore clear if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { - t.Errorf("Unexpected pool get error: %v", err) + t.Errorf("Unexpected inferencePool get error: %v", err) } if err := fakeClient.Delete(ctx, newPool1, &client.DeleteOptions{}); err != nil { - t.Errorf("Unexpected pool delete error: %v", err) + t.Errorf("Unexpected inferencePool delete error: %v", err) } if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil { t.Errorf("Unexpected InferencePool reconcile error: %v", err) @@ -335,7 +335,7 @@ func xDiffStore(datastore datastore.Datastore, params xDiffStoreParams) string { } if diff := cmp.Diff(params.wantPool, gotPool); diff != "" { - return "pool:" + diff + return "inferencePool:" + diff } // Default wantPods if not set because PodGetAll returns an empty slice when empty. diff --git a/pkg/epp/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go index 26472d2c2..06be5d785 100644 --- a/pkg/epp/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -43,7 +43,7 @@ func (c *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R logger := log.FromContext(ctx) if !c.Datastore.PoolHasSynced() { logger.V(logutil.TRACE).Info("Skipping reconciling Pod because the InferencePool is not available yet") - // When the pool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. + // When the inferencePool is initialized it lists the appropriate pods and populates the datastore, so no need to requeue. return ctrl.Result{}, nil } From 7798ebdb52343950df88c1213b118888f7af8743 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 18 Nov 2025 16:35:09 -0800 Subject: [PATCH 31/34] added logging info --- cmd/epp/runner/runner.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index bf2025132..b2d4d41df 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -237,10 +237,12 @@ func (r *Runner) Run(ctx context.Context) error { gknn, err := extractGKNN() if err != nil { + setupLog.Error(err, "Failed to extract GKNN") return err } ds, err := setupDataStore(setupLog, ctx, epf, int32(*modelServerMetricsPort), *gknn) if err != nil { + setupLog.Error(err, "Failed to setup datastore") return err } eppConfig, err := r.parseConfigurationPhaseTwo(ctx, rawConfig, ds) From e33233b3fae41482498d36d0ca9bd7fe17a44244 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 20 Nov 2025 11:16:13 -0800 Subject: [PATCH 32/34] change endpointpool struct --- cmd/epp/runner/runner.go | 50 ++++++++-------- pkg/epp/backend/metrics/logger.go | 6 +- pkg/epp/datalayer/endpoint_pool.go | 30 +++------- pkg/epp/datalayer/metrics/logger.go | 6 +- pkg/epp/datastore/datastore.go | 16 ++--- pkg/epp/metrics/collectors/inference_pool.go | 2 +- pkg/epp/requestcontrol/director.go | 2 +- pkg/epp/server/controller_manager.go | 8 +-- pkg/epp/server/runserver.go | 14 ++++- pkg/epp/util/pool/pool.go | 61 +++++--------------- test/integration/epp/hermetic_test.go | 3 +- 11 files changed, 81 insertions(+), 117 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index bd667fce0..8498211ef 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -239,12 +239,16 @@ func (r *Runner) Run(ctx context.Context) error { return err } - gknn, err := extractGKNN() + gknn, err := extractGKNN(*poolName, *poolGroup, *poolNamespace, *endpointSelector) if err != nil { setupLog.Error(err, "Failed to extract GKNN") return err } - ds, err := setupDataStore(setupLog, ctx, epf, int32(*modelServerMetricsPort), *gknn) + disableK8sCrdReconciler := false + if *endpointSelector != "" { + disableK8sCrdReconciler = true + } + ds, err := setupDatastore(setupLog, ctx, epf, int32(*modelServerMetricsPort), disableK8sCrdReconciler, *poolName, *poolNamespace, *endpointSelector, *endpointTargetPorts) if err != nil { setupLog.Error(err, "Failed to setup datastore") return err @@ -278,7 +282,7 @@ func (r *Runner) Run(ctx context.Context) error { isLeader := &atomic.Bool{} isLeader.Store(false) - mgr, err := runserver.NewDefaultManager(*gknn, cfg, metricsServerOptions, *haEnableLeaderElection) + mgr, err := runserver.NewDefaultManager(disableK8sCrdReconciler, *gknn, cfg, metricsServerOptions, *haEnableLeaderElection) if err != nil { setupLog.Error(err, "Failed to create controller manager") return err @@ -356,6 +360,7 @@ func (r *Runner) Run(ctx context.Context) error { GrpcPort: *grpcPort, GKNN: *gknn, Datastore: ds, + DisableK8sCrdReconcile: disableK8sCrdReconciler, SecureServing: *secureServing, HealthChecking: *healthChecking, CertPath: *certPath, @@ -392,19 +397,18 @@ func (r *Runner) Run(ctx context.Context) error { return nil } -func setupDataStore(setupLog logr.Logger, ctx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, gknn common.GKNN) (datastore.Datastore, error) { - if gknn.Kind == "InferencePool" { +func setupDatastore(setupLog logr.Logger, ctx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, disableK8sCrdReconciler bool, namespace, name, endpointSelector, endpointTargetPorts string) (datastore.Datastore, error) { + if !disableK8sCrdReconciler { return datastore.NewDatastore(ctx, epFactory, modelServerMetricsPort), nil - } - if gknn.Kind == "Deployment" { - endpointPool := datalayer.NewEndpointPool(true, gknn) - labelsMap, err := labels.ConvertSelectorToLabelsMap(*endpointSelector) + } else { + endpointPool := datalayer.NewEndpointPool(namespace, name) + labelsMap, err := labels.ConvertSelectorToLabelsMap(endpointSelector) if err != nil { setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-selector", err) return nil, err } - endpointPool.EndpointMeta.Selector = labelsMap - endpointPool.EndpointMeta.TargetPorts, err = strToUniqueIntSlice(*endpointTargetPorts) + endpointPool.Selector = labelsMap + endpointPool.TargetPorts, err = strToUniqueIntSlice(endpointTargetPorts) if err != nil { setupLog.Error(err, "Failed to parse flag %q with error: %w", "endpoint-target-ports", err) return nil, err @@ -413,8 +417,6 @@ func setupDataStore(setupLog logr.Logger, ctx context.Context, epFactory datalay endpointPoolOption := datastore.WithEndpointPool(endpointPool) return datastore.NewDatastore(ctx, epFactory, modelServerMetricsPort, endpointPoolOption), nil } - return nil, fmt.Errorf("invalid gknn kind %s", gknn.Kind) - } // registerInTreePlugins registers the factory functions of all known plugins @@ -656,7 +658,7 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore. func validateFlags() error { if (*poolName != "" && *endpointSelector != "") || (*poolName == "" && *endpointSelector == "") { - return errors.New("either poolName or endpoint-selector must be set") + return errors.New("either pool-name or endpoint-selector must be set") } if *endpointSelector != "" { targetPortsList, err := strToUniqueIntSlice(*endpointTargetPorts) @@ -752,16 +754,16 @@ func extractDeploymentName(podName string) (string, error) { return "", fmt.Errorf("failed to parse deployment name from pod name %s", podName) } -func extractGKNN() (*common.GKNN, error) { - if *poolName != "" { +func extractGKNN(poolName, poolGroup, poolNamespace, endpointSelector string) (*common.GKNN, error) { + if poolName != "" { // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default - resolvedPoolNamespace := resolvePoolNamespace() + resolvedPoolNamespace := resolvePoolNamespace(poolNamespace) poolNamespacedName := types.NamespacedName{ - Name: *poolName, + Name: poolName, Namespace: resolvedPoolNamespace, } poolGroupKind := schema.GroupKind{ - Group: *poolGroup, + Group: poolGroup, Kind: "InferencePool", } return &common.GKNN{ @@ -770,9 +772,9 @@ func extractGKNN() (*common.GKNN, error) { }, nil } - if *endpointSelector != "" { + if endpointSelector != "" { // Determine EPP namespace: NAMESPACE env var; else default - resolvedPoolNamespace := resolvePoolNamespace() + resolvedPoolNamespace := resolvePoolNamespace(poolNamespace) // Determine EPP name: POD_NAME env var eppPodNameEnv := os.Getenv("POD_NAME") if eppPodNameEnv == "" { @@ -791,9 +793,9 @@ func extractGKNN() (*common.GKNN, error) { return nil, errors.New("can't construct gknn as both pool-name and endpoint-selector are missing") } -func resolvePoolNamespace() string { - if *poolNamespace != "" { - return *poolNamespace +func resolvePoolNamespace(poolNamespace string) string { + if poolNamespace != "" { + return poolNamespace } if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" { return nsEnv diff --git a/pkg/epp/backend/metrics/logger.go b/pkg/epp/backend/metrics/logger.go index 9f60ba76a..69fc404e7 100644 --- a/pkg/epp/backend/metrics/logger.go +++ b/pkg/epp/backend/metrics/logger.go @@ -97,7 +97,7 @@ func refreshPrometheusMetrics(logger logr.Logger, datastore datalayer.PoolInfo, } podTotalCount := len(podMetrics) - metrics.RecordInferencePoolAvgKVCache(pool.GKNN.Name, kvCacheTotal/float64(podTotalCount)) - metrics.RecordInferencePoolAvgQueueSize(pool.GKNN.Name, float64(queueTotal/podTotalCount)) - metrics.RecordInferencePoolReadyPods(pool.GKNN.Name, float64(podTotalCount)) + metrics.RecordInferencePoolAvgKVCache(pool.Name, kvCacheTotal/float64(podTotalCount)) + metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(queueTotal/podTotalCount)) + metrics.RecordInferencePoolReadyPods(pool.Name, float64(podTotalCount)) } diff --git a/pkg/epp/datalayer/endpoint_pool.go b/pkg/epp/datalayer/endpoint_pool.go index 0c08da00b..05d91d451 100644 --- a/pkg/epp/datalayer/endpoint_pool.go +++ b/pkg/epp/datalayer/endpoint_pool.go @@ -16,35 +16,19 @@ limitations under the License. package datalayer -import ( - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" -) - type EndpointPool struct { - EndpointMeta *EndpointsMeta - DisableK8sCrdReconcile bool - GKNN common.GKNN -} - -// NewEndpointPool creates and returns a new empty instance of EndpointPool. -func NewEndpointPool(disableK8sCrdReconcile bool, gknn common.GKNN) *EndpointPool { - endpointsMeta := NewEndpointMeta() - return &EndpointPool{ - GKNN: gknn, - DisableK8sCrdReconcile: disableK8sCrdReconcile, - EndpointMeta: endpointsMeta, - } -} - -type EndpointsMeta struct { Selector map[string]string TargetPorts []int + Namespace string + Name string } -// NewEndpointMeta creates and returns a new empty instance of EndpointPool. -func NewEndpointMeta() *EndpointsMeta { - return &EndpointsMeta{ +// NewEndpointPool creates and returns a new empty instance of EndpointPool. +func NewEndpointPool(namespace string, name string) *EndpointPool { + return &EndpointPool{ Selector: make(map[string]string), TargetPorts: []int{}, + Namespace: namespace, + Name: name, } } diff --git a/pkg/epp/datalayer/metrics/logger.go b/pkg/epp/datalayer/metrics/logger.go index f759ff078..130c62f2b 100644 --- a/pkg/epp/datalayer/metrics/logger.go +++ b/pkg/epp/datalayer/metrics/logger.go @@ -117,9 +117,9 @@ func refreshPrometheusMetrics(logger logr.Logger, datastore datalayer.PoolInfo, totals := calculateTotals(podMetrics) podCount := len(podMetrics) - metrics.RecordInferencePoolAvgKVCache(pool.GKNN.Name, totals.kvCache/float64(podCount)) - metrics.RecordInferencePoolAvgQueueSize(pool.GKNN.Name, float64(totals.queueSize/podCount)) - metrics.RecordInferencePoolReadyPods(pool.GKNN.Name, float64(podCount)) + metrics.RecordInferencePoolAvgKVCache(pool.Name, totals.kvCache/float64(podCount)) + metrics.RecordInferencePoolAvgQueueSize(pool.Name, float64(totals.queueSize/podCount)) + metrics.RecordInferencePoolReadyPods(pool.Name, float64(podCount)) } // totals holds aggregated metric values diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go index 79c985b27..2ab2e98cb 100644 --- a/pkg/epp/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -129,8 +129,8 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, endpoint oldEndpointPool := ds.pool ds.pool = endpointPool - if oldEndpointPool == nil || !reflect.DeepEqual(oldEndpointPool.EndpointMeta.Selector, endpointPool.EndpointMeta.Selector) { - logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endpointPool.EndpointMeta.Selector) + if oldEndpointPool == nil || !reflect.DeepEqual(oldEndpointPool.Selector, endpointPool.Selector) { + logger.V(logutil.DEFAULT).Info("Updating endpoints", "selector", endpointPool.Selector) // A full resync is required to address two cases: // 1) At startup, the pod events may get processed before the pool is synced with the datastore, // and hence they will not be added to the store since pool selector is not known yet @@ -166,7 +166,7 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool { if ds.pool == nil { return false } - poolSelector := labels.SelectorFromSet(ds.pool.EndpointMeta.Selector) + poolSelector := labels.SelectorFromSet(ds.pool.Selector) podSet := labels.Set(podLabels) return poolSelector.Matches(podSet) } @@ -222,7 +222,7 @@ func (ds *datastore) PodList(predicate func(backendmetrics.PodMetrics) bool) []b } func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { - if ds.pool == nil || ds.pool.EndpointMeta == nil { + if ds.pool == nil { return true } @@ -232,11 +232,11 @@ func (ds *datastore) PodUpdateOrAddIfNotExist(pod *corev1.Pod) bool { } modelServerMetricsPort := 0 - if len(ds.pool.EndpointMeta.TargetPorts) == 1 { + if len(ds.pool.TargetPorts) == 1 { modelServerMetricsPort = int(ds.modelServerMetricsPort) } pods := []*datalayer.PodInfo{} - for idx, port := range ds.pool.EndpointMeta.TargetPorts { + for idx, port := range ds.pool.TargetPorts { metricsPort := modelServerMetricsPort if metricsPort == 0 { metricsPort = port @@ -287,8 +287,8 @@ func (ds *datastore) podResyncAll(ctx context.Context, reader client.Reader) err logger := log.FromContext(ctx) podList := &corev1.PodList{} if err := reader.List(ctx, podList, &client.ListOptions{ - LabelSelector: labels.SelectorFromSet(ds.pool.EndpointMeta.Selector), - Namespace: ds.pool.GKNN.Namespace, + LabelSelector: labels.SelectorFromSet(ds.pool.Selector), + Namespace: ds.pool.Namespace, }); err != nil { return fmt.Errorf("failed to list pods - %w", err) } diff --git a/pkg/epp/metrics/collectors/inference_pool.go b/pkg/epp/metrics/collectors/inference_pool.go index 1bb6e206e..ec3def164 100644 --- a/pkg/epp/metrics/collectors/inference_pool.go +++ b/pkg/epp/metrics/collectors/inference_pool.go @@ -73,7 +73,7 @@ func (c *inferencePoolMetricsCollector) Collect(ch chan<- prometheus.Metric) { descInferencePoolPerPodQueueSize, prometheus.GaugeValue, float64(pod.GetMetrics().WaitingQueueSize), - pool.GKNN.Name, + pool.Name, pod.GetPod().NamespacedName.Name, ) } diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index 69d0a9ecd..c4f4f1c1b 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -48,7 +48,7 @@ const ( // Datastore defines the interface required by the Director. type Datastore interface { PoolGet() (*datalayer.EndpointPool, error) - ObjectiveGet(modelName string) *v1alpha2.InferenceObjective + ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjective PodList(predicate func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics } diff --git a/pkg/epp/server/controller_manager.go b/pkg/epp/server/controller_manager.go index 2b8e04932..c82b0bcb9 100644 --- a/pkg/epp/server/controller_manager.go +++ b/pkg/epp/server/controller_manager.go @@ -45,7 +45,7 @@ func init() { } // defaultManagerOptions returns the default options used to create the manager. -func defaultManagerOptions(gknn common.GKNN, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { +func defaultManagerOptions(disableK8sCrdReconcile bool, gknn common.GKNN, metricsServerOptions metricsserver.Options) (ctrl.Options, error) { opt := ctrl.Options{ Scheme: scheme, Cache: cache.Options{ @@ -59,7 +59,7 @@ func defaultManagerOptions(gknn common.GKNN, metricsServerOptions metricsserver. }, Metrics: metricsServerOptions, } - if gknn.Kind == "InferencePool" { + if !disableK8sCrdReconcile { opt.Cache.ByObject[&v1alpha2.InferenceObjective{}] = cache.ByObject{Namespaces: map[string]cache.Config{ gknn.Namespace: {}, }} @@ -85,8 +85,8 @@ func defaultManagerOptions(gknn common.GKNN, metricsServerOptions metricsserver. } // NewDefaultManager creates a new controller manager with default configuration. -func NewDefaultManager(gknn common.GKNN, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { - opt, err := defaultManagerOptions(gknn, metricsServerOptions) +func NewDefaultManager(disableK8sCrdReconcile bool, gknn common.GKNN, restConfig *rest.Config, metricsServerOptions metricsserver.Options, leaderElectionEnabled bool) (ctrl.Manager, error) { + opt, err := defaultManagerOptions(disableK8sCrdReconcile, gknn, metricsServerOptions) if err != nil { return nil, fmt.Errorf("failed to create controller manager options: %v", err) } diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index 65f4c2944..e43d84923 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -28,6 +28,8 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/health" healthgrpc "google.golang.org/grpc/health/grpc_health_v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -47,6 +49,7 @@ import ( type ExtProcServerRunner struct { GrpcPort int GKNN common.GKNN + DisableK8sCrdReconcile bool Datastore datastore.Datastore SecureServing bool HealthChecking bool @@ -88,8 +91,17 @@ const ( // NewDefaultExtProcServerRunner creates a runner with default values. // Note: Dependencies like Datastore, Scheduler, SD need to be set separately. func NewDefaultExtProcServerRunner() *ExtProcServerRunner { + gknn := common.GKNN{ + NamespacedName: types.NamespacedName{Name: DefaultPoolName, Namespace: DefaultPoolNamespace}, + GroupKind: schema.GroupKind{ + Group: DefaultPoolGroup, + Kind: "InferencePool", + }, + } return &ExtProcServerRunner{ GrpcPort: DefaultGrpcPort, + GKNN: gknn, + DisableK8sCrdReconcile: false, SecureServing: DefaultSecureServing, HealthChecking: DefaultHealthChecking, RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval, @@ -101,7 +113,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { // SetupWithManager sets up the runner with the given manager. func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { // Create the controllers and register them with the manager - if r.GKNN.Kind == "InferencePool" { + if !r.DisableK8sCrdReconcile { if err := (&controller.InferencePoolReconciler{ Datastore: r.Datastore, Reader: mgr.GetClient(), diff --git a/pkg/epp/util/pool/pool.go b/pkg/epp/util/pool/pool.go index 43c764e01..67fd9fef8 100644 --- a/pkg/epp/util/pool/pool.go +++ b/pkg/epp/util/pool/pool.go @@ -18,12 +18,8 @@ package pool import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2" - "sigs.k8s.io/gateway-api-inference-extension/pkg/common" "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer" ) @@ -40,18 +36,11 @@ func InferencePoolToEndpointPool(inferencePool *v1.InferencePool) *datalayer.End for k, v := range inferencePool.Spec.Selector.MatchLabels { selector[string(k)] = string(v) } - gknn := common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, - GroupKind: schema.GroupKind{Group: "inference.networking.k8s.io", Kind: "InferencePool"}, - } - endPoints := &datalayer.EndpointsMeta{ + endpointPool := &datalayer.EndpointPool{ Selector: selector, TargetPorts: targetPorts, - } - endpointPool := &datalayer.EndpointPool{ - EndpointMeta: endPoints, - DisableK8sCrdReconcile: false, - GKNN: gknn, + Namespace: inferencePool.Namespace, + Name: inferencePool.Name, } return endpointPool } @@ -62,29 +51,23 @@ func AlphaInferencePoolToEndpointPool(inferencePool *v1alpha2.InferencePool) *da for k, v := range inferencePool.Spec.Selector { selector[string(k)] = string(v) } - gknn := common.GKNN{ - NamespacedName: types.NamespacedName{Namespace: inferencePool.Namespace, Name: inferencePool.Name}, - GroupKind: schema.GroupKind{Group: "inference.networking.x-k8s.io", Kind: "InferencePool"}, - } - endPoints := &datalayer.EndpointsMeta{ - Selector: selector, - TargetPorts: targetPorts, - } + endpointPool := &datalayer.EndpointPool{ - EndpointMeta: endPoints, - DisableK8sCrdReconcile: false, - GKNN: gknn, + TargetPorts: targetPorts, + Selector: selector, + Namespace: inferencePool.Namespace, + Name: inferencePool.Name, } return endpointPool } func EndpointPoolToInferencePool(endpointPool *datalayer.EndpointPool) *v1.InferencePool { - targetPorts := make([]v1.Port, 0, len(endpointPool.EndpointMeta.TargetPorts)) - for _, p := range endpointPool.EndpointMeta.TargetPorts { + targetPorts := make([]v1.Port, 0, len(endpointPool.TargetPorts)) + for _, p := range endpointPool.TargetPorts { targetPorts = append(targetPorts, v1.Port{Number: v1.PortNumber(p)}) } - labels := make(map[v1.LabelKey]v1.LabelValue, len(endpointPool.EndpointMeta.Selector)) - for k, v := range endpointPool.EndpointMeta.Selector { + labels := make(map[v1.LabelKey]v1.LabelValue, len(endpointPool.Selector)) + for k, v := range endpointPool.Selector { labels[v1.LabelKey(k)] = v1.LabelValue(v) } @@ -94,8 +77,8 @@ func EndpointPoolToInferencePool(endpointPool *datalayer.EndpointPool) *v1.Infer Kind: "InferencePool", }, ObjectMeta: metav1.ObjectMeta{ - Name: endpointPool.GKNN.Name, - Namespace: endpointPool.GKNN.Namespace, + Name: endpointPool.Name, + Namespace: endpointPool.Namespace, }, Spec: v1.InferencePoolSpec{ Selector: v1.LabelSelector{MatchLabels: labels}, @@ -104,19 +87,3 @@ func EndpointPoolToInferencePool(endpointPool *datalayer.EndpointPool) *v1.Infer } return inferencePool } - -func ToGKNN(ip *v1.InferencePool) common.GKNN { - if ip == nil { - return common.GKNN{} - } - return common.GKNN{ - NamespacedName: types.NamespacedName{ - Name: ip.Name, - Namespace: ip.Namespace, - }, - GroupKind: schema.GroupKind{ - Group: ip.GroupVersionKind().Group, - Kind: ip.GroupVersionKind().Kind, - }, - } -} diff --git a/test/integration/epp/hermetic_test.go b/test/integration/epp/hermetic_test.go index 81bba5ee3..f1f32d58b 100644 --- a/test/integration/epp/hermetic_test.go +++ b/test/integration/epp/hermetic_test.go @@ -1170,11 +1170,10 @@ func BeforeSuite() func() { serverRunner.TestPodMetricsClient = &backendmetrics.FakePodMetricsClient{} pmf := backendmetrics.NewPodMetricsFactory(serverRunner.TestPodMetricsClient, 10*time.Millisecond) // Adjust from defaults - poolGKNN := common.GKNN{ + serverRunner.GKNN = common.GKNN{ NamespacedName: types.NamespacedName{Namespace: testNamespace, Name: testPoolName}, GroupKind: schema.GroupKind{Group: v1.GroupVersion.Group, Kind: "InferencePool"}, } - serverRunner.GKNN = poolGKNN serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf, 0) From 86ac1f8a7d608bbe08dd1853b9389243028bcc58 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 20 Nov 2025 11:22:02 -0800 Subject: [PATCH 33/34] fixed variable naming --- cmd/epp/runner/runner.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 8498211ef..cce3d8789 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -244,11 +244,11 @@ func (r *Runner) Run(ctx context.Context) error { setupLog.Error(err, "Failed to extract GKNN") return err } - disableK8sCrdReconciler := false + disableK8sCrdReconcile := false if *endpointSelector != "" { - disableK8sCrdReconciler = true + disableK8sCrdReconcile = true } - ds, err := setupDatastore(setupLog, ctx, epf, int32(*modelServerMetricsPort), disableK8sCrdReconciler, *poolName, *poolNamespace, *endpointSelector, *endpointTargetPorts) + ds, err := setupDatastore(setupLog, ctx, epf, int32(*modelServerMetricsPort), disableK8sCrdReconcile, *poolName, *poolNamespace, *endpointSelector, *endpointTargetPorts) if err != nil { setupLog.Error(err, "Failed to setup datastore") return err @@ -282,7 +282,7 @@ func (r *Runner) Run(ctx context.Context) error { isLeader := &atomic.Bool{} isLeader.Store(false) - mgr, err := runserver.NewDefaultManager(disableK8sCrdReconciler, *gknn, cfg, metricsServerOptions, *haEnableLeaderElection) + mgr, err := runserver.NewDefaultManager(disableK8sCrdReconcile, *gknn, cfg, metricsServerOptions, *haEnableLeaderElection) if err != nil { setupLog.Error(err, "Failed to create controller manager") return err @@ -360,7 +360,7 @@ func (r *Runner) Run(ctx context.Context) error { GrpcPort: *grpcPort, GKNN: *gknn, Datastore: ds, - DisableK8sCrdReconcile: disableK8sCrdReconciler, + DisableK8sCrdReconcile: disableK8sCrdReconcile, SecureServing: *secureServing, HealthChecking: *healthChecking, CertPath: *certPath, @@ -397,8 +397,8 @@ func (r *Runner) Run(ctx context.Context) error { return nil } -func setupDatastore(setupLog logr.Logger, ctx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, disableK8sCrdReconciler bool, namespace, name, endpointSelector, endpointTargetPorts string) (datastore.Datastore, error) { - if !disableK8sCrdReconciler { +func setupDatastore(setupLog logr.Logger, ctx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32, disableK8sCrdReconcile bool, namespace, name, endpointSelector, endpointTargetPorts string) (datastore.Datastore, error) { + if !disableK8sCrdReconcile { return datastore.NewDatastore(ctx, epFactory, modelServerMetricsPort), nil } else { endpointPool := datalayer.NewEndpointPool(namespace, name) From 11a8a68916c63008f8856c3a68436fce053d1e72 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 20 Nov 2025 11:34:33 -0800 Subject: [PATCH 34/34] fixed linter --- cmd/epp/runner/runner.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index cce3d8789..24a59ebf7 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -244,10 +244,7 @@ func (r *Runner) Run(ctx context.Context) error { setupLog.Error(err, "Failed to extract GKNN") return err } - disableK8sCrdReconcile := false - if *endpointSelector != "" { - disableK8sCrdReconcile = true - } + disableK8sCrdReconcile := *endpointSelector != "" ds, err := setupDatastore(setupLog, ctx, epf, int32(*modelServerMetricsPort), disableK8sCrdReconcile, *poolName, *poolNamespace, *endpointSelector, *endpointTargetPorts) if err != nil { setupLog.Error(err, "Failed to setup datastore")