From e782b75fe16cffd8745da99a49e383cd1961fccc Mon Sep 17 00:00:00 2001 From: Siyi Wang Date: Thu, 16 Oct 2025 12:51:23 -0700 Subject: [PATCH 1/3] Remove validation for epp to have multi-port --- pkg/epp/requestcontrol/director.go | 19 ++++++++++--------- pkg/epp/requestcontrol/director_test.go | 12 ++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index d7de39d4a..682e4d3c8 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -226,17 +226,18 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC return reqCtx, err } targetPods := []*backend.Pod{} - if len(pool.Spec.TargetPorts) != 1 { - return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length 1"} + if len(pool.Spec.TargetPorts) > 8 { + return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should not have length more than 8"} } - targetPort := int(pool.Spec.TargetPorts[0].Number) targetEndpoints := []string{} for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods { curPod := pod.GetPod() - curEndpoint := net.JoinHostPort(curPod.Address, strconv.Itoa(targetPort)) - targetPods = append(targetPods, curPod) - targetEndpoints = append(targetEndpoints, curEndpoint) + for _, tp := range pool.Spec.TargetPorts { + curEndpoint := net.JoinHostPort(curPod.Address, strconv.Itoa(int(tp.Number))) + targetPods = append(targetPods, curPod) + targetEndpoints = append(targetEndpoints, curEndpoint) + } } multiEndpointString := strings.Join(targetEndpoints, ",") @@ -245,7 +246,7 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC reqCtx.TargetPod = targetPods[0] reqCtx.TargetEndpoint = multiEndpointString - d.runPreRequestPlugins(ctx, reqCtx.SchedulingRequest, result, targetPort) + d.runPreRequestPlugins(ctx, reqCtx.SchedulingRequest, result) return reqCtx, nil } @@ -313,12 +314,12 @@ func (d *Director) GetRandomPod() *backend.Pod { } func (d *Director) runPreRequestPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, - schedulingResult *schedulingtypes.SchedulingResult, targetPort int) { + schedulingResult *schedulingtypes.SchedulingResult) { loggerDebug := log.FromContext(ctx).V(logutil.DEBUG) for _, plugin := range d.requestControlPlugins.preRequestPlugins { loggerDebug.Info("Running PreRequest plugin", "plugin", plugin.TypedName()) before := time.Now() - plugin.PreRequest(ctx, request, schedulingResult, targetPort) + plugin.PreRequest(ctx, request, schedulingResult, 54321) // passing in dummy targetPort number since it's not being used underneath metrics.RecordPluginProcessingLatency(PreRequestExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before)) loggerDebug.Info("Completed running PreRequest plugin successfully", "plugin", plugin.TypedName()) } diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index ffd62da36..bda690065 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -128,7 +128,7 @@ func TestDirector_HandleRequest(t *testing.T) { pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}, {Number: v1.PortNumber(int32(8001))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "app": "inference", @@ -223,7 +223,7 @@ func TestDirector_HandleRequest(t *testing.T) { NamespacedName: types.NamespacedName{Namespace: "default", Name: "pod1"}, Address: "192.168.1.100", }, - TargetEndpoint: "192.168.1.100:8000,192.168.2.100:8000,192.168.4.100:8000", + TargetEndpoint: "192.168.1.100:8000,192.168.1.100:8001,192.168.2.100:8000,192.168.2.100:8001,192.168.4.100:8000,192.168.4.100:8001", }, wantMutatedBodyModel: model, inferenceObjectiveName: objectiveName, @@ -250,7 +250,7 @@ func TestDirector_HandleRequest(t *testing.T) { NamespacedName: types.NamespacedName{Namespace: "default", Name: "pod1"}, Address: "192.168.1.100", }, - TargetEndpoint: "192.168.1.100:8000,192.168.2.100:8000,192.168.4.100:8000", + TargetEndpoint: "192.168.1.100:8000,192.168.1.100:8001,192.168.2.100:8000,192.168.2.100:8001,192.168.4.100:8000,192.168.4.100:8001", }, wantMutatedBodyModel: model, targetModelName: model, @@ -281,7 +281,7 @@ func TestDirector_HandleRequest(t *testing.T) { NamespacedName: types.NamespacedName{Namespace: "default", Name: "pod1"}, Address: "192.168.1.100", }, - TargetEndpoint: "192.168.1.100:8000,192.168.2.100:8000,192.168.4.100:8000", + TargetEndpoint: "192.168.1.100:8000,192.168.1.100:8001,192.168.2.100:8000,192.168.2.100:8001,192.168.4.100:8000,192.168.4.100:8001", }, wantMutatedBodyModel: model, inferenceObjectiveName: objectiveName, @@ -304,7 +304,7 @@ func TestDirector_HandleRequest(t *testing.T) { NamespacedName: types.NamespacedName{Namespace: "default", Name: "pod1"}, Address: "192.168.1.100", }, - TargetEndpoint: "192.168.1.100:8000,192.168.2.100:8000,192.168.4.100:8000", + TargetEndpoint: "192.168.1.100:8000,192.168.1.100:8001,192.168.2.100:8000,192.168.2.100:8001,192.168.4.100:8000,192.168.4.100:8001", }, wantMutatedBodyModel: "resolved-target-model-A", inferenceObjectiveName: objectiveNameResolve, @@ -322,7 +322,7 @@ func TestDirector_HandleRequest(t *testing.T) { NamespacedName: types.NamespacedName{Namespace: "default", Name: "pod1"}, Address: "192.168.1.100", }, - TargetEndpoint: "192.168.1.100:8000,192.168.2.100:8000,192.168.4.100:8000", + TargetEndpoint: "192.168.1.100:8000,192.168.1.100:8001,192.168.2.100:8000,192.168.2.100:8001,192.168.4.100:8000,192.168.4.100:8001", }, wantMutatedBodyModel: "food-review-1", reqBodyMap: map[string]any{ From 8f34a0478cd8b67a30fb8186ac718c190ccc498d Mon Sep 17 00:00:00 2001 From: Siyi Wang Date: Thu, 16 Oct 2025 13:30:24 -0700 Subject: [PATCH 2/3] optimize wording --- pkg/epp/requestcontrol/director.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index 682e4d3c8..b9d886367 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -227,7 +227,7 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC } targetPods := []*backend.Pod{} if len(pool.Spec.TargetPorts) > 8 { - return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should not have length more than 8"} + return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length less than 8"} } targetEndpoints := []string{} From 3c06f0fd7fe703ba0ca3b237163e4d39f70df891 Mon Sep 17 00:00:00 2001 From: Siyi Wang Date: Thu, 16 Oct 2025 14:48:10 -0700 Subject: [PATCH 3/3] Update the placeholder port number to 0 after checking the test cases --- pkg/epp/requestcontrol/director.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index b9d886367..32fb10b24 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -319,7 +319,7 @@ func (d *Director) runPreRequestPlugins(ctx context.Context, request *scheduling for _, plugin := range d.requestControlPlugins.preRequestPlugins { loggerDebug.Info("Running PreRequest plugin", "plugin", plugin.TypedName()) before := time.Now() - plugin.PreRequest(ctx, request, schedulingResult, 54321) // passing in dummy targetPort number since it's not being used underneath + plugin.PreRequest(ctx, request, schedulingResult, 0) // Using 0 as a placeholder targetPort. The actual value is ignored by the underlying implementation. metrics.RecordPluginProcessingLatency(PreRequestExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before)) loggerDebug.Info("Completed running PreRequest plugin successfully", "plugin", plugin.TypedName()) }