openshift-service-mesh
diff --git a/‎pilot/pkg/config/kube/gateway/conversion.go‎
Lines changed: 10 additions & 1 deletion b/‎pilot/pkg/config/kube/gateway/conversion.go‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎pilot/pkg/config/kube/gateway/conversion_test.go‎
Lines changed: 10 additions & 2 deletions b/‎pilot/pkg/config/kube/gateway/conversion_test.go‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎pilot/pkg/config/kube/gateway/inferencepool_collection.go‎
Lines changed: 10 additions & 6 deletions b/‎pilot/pkg/config/kube/gateway/inferencepool_collection.go‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎pilot/pkg/config/kube/gateway/inferencepool_test.go‎
Lines changed: 51 additions & 6 deletions b/‎pilot/pkg/config/kube/gateway/inferencepool_test.go‎
Lines changed: 51 additions & 6 deletions
diff --git a/‎pilot/pkg/config/kube/gateway/testdata/http.yaml‎
Lines changed: 2 additions & 0 deletions b/‎pilot/pkg/config/kube/gateway/testdata/http.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pilot/pkg/config/kube/gateway/testdata/http.yaml.golden‎
Lines changed: 4 additions & 0 deletions b/‎pilot/pkg/config/kube/gateway/testdata/http.yaml.golden‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pilot/pkg/model/push_context.go‎
Lines changed: 19 additions & 0 deletions b/‎pilot/pkg/model/push_context.go‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎pilot/pkg/networking/core/cluster.go‎
Lines changed: 6 additions & 1 deletion b/‎pilot/pkg/networking/core/cluster.go‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎pilot/pkg/xds/endpoints/endpoint_builder.go‎
Lines changed: 8 additions & 3 deletions b/‎pilot/pkg/xds/endpoints/endpoint_builder.go‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎pkg/test/echo/cmd/server/main.go‎
Lines changed: 42 additions & 23 deletions b/‎pkg/test/echo/cmd/server/main.go‎
Lines changed: 42 additions & 23 deletions
@@ -1114,9 +1114,18 @@ func buildDestination(ctx RouteContext, to k8s.BackendRef, ns string,
 		if ipCfg.endpointPickerDst == "" || ipCfg.endpointPickerPort == "" || ipCfg.endpointPickerFailureMode == "" {
 			invalidBackendErr = &ConfigError{Reason: InvalidDestination, Message: "InferencePool service invalid, extensionRef labels not found"}
 		}
+
+		// For InferencePool, always use the first service port (54321).
+		// The cluster for that service port will include all endpoints for all
+		// target ports, allowing the EPP to load-balance across them.
+		var destPort uint32
+		if len(svc.Ports) > 0 {
+			destPort = uint32(svc.Ports[0].Port)
+		}
+
 		return &istio.Destination{
 			Host: hostname,
-			// Port: &istio.PortSelector{Number: uint32(*to.Port)},
+			Port: &istio.PortSelector{Number: destPort},
 		}, ipCfg, invalidBackendErr
 	default:
 		return &istio.Destination{}, nil, &ConfigError{
 
@@ -77,6 +77,14 @@ var ports = []*model.Port{
 	},
 }
 
+var inferencePoolPorts = []*model.Port{
+	{
+		Name:     "http",
+		Port:     54321,
+		Protocol: "HTTP",
+	},
+}
+
 var services = []*model.Service{
 	{
 		Attributes: model.ServiceAttributes{
@@ -135,7 +143,7 @@ var services = []*model.Service{
 				InferencePoolExtensionRefFailureMode: "FailClose",
 			},
 		},
-		Ports:    ports,
+		Ports:    inferencePoolPorts,
 		Hostname: host.Name(fmt.Sprintf("%s.default.svc.domain.suffix", firstValue(InferencePoolServiceName("infpool-gen")))),
 	},
 	{
@@ -147,7 +155,7 @@ var services = []*model.Service{
 				InferencePoolExtensionRefFailureMode: "FailClose",
 			},
 		},
-		Ports:    ports,
+		Ports:    inferencePoolPorts,
 		Hostname: host.Name(fmt.Sprintf("%s.default.svc.domain.suffix", firstValue(InferencePoolServiceName("infpool-gen2")))),
 	},
 
 
@@ -506,15 +506,19 @@ func InferencePoolServiceName(poolName string) (string, error) {
 }
 
 func translateShadowServiceToService(shadow shadowServiceInfo, extRef extRefInfo) *corev1.Service {
-	// Create the ports used by the shadow service
+	// Create multiple ports for the shadow service - one for each InferencePool targetPort.
+	// This allows Istio to discover endpoints for all targetPorts.
+	// We use dummy service ports (54321, 54322, etc.) that map to the actual targetPorts.
+	baseDummyPort := int32(54321)
 	ports := make([]corev1.ServicePort, 0, len(shadow.targetPorts))
-	dummyPort := int32(54321) // Dummy port, not used for anything
-	for i, port := range shadow.targetPorts {
+
+	for i, tp := range shadow.targetPorts {
+		portName := fmt.Sprintf("http-%d", i)
 		ports = append(ports, corev1.ServicePort{
-			Name:       "port" + strconv.Itoa(i),
+			Name:       portName,
 			Protocol:   corev1.ProtocolTCP,
-			Port:       dummyPort + int32(i),
-			TargetPort: intstr.FromInt(int(port.port)),
+			Port:       baseDummyPort + int32(i),
+			TargetPort: intstr.FromInt(int(tp.port)),
 		})
 	}
 
 
@@ -40,7 +40,7 @@ func TestReconcileInferencePool(t *testing.T) {
 		expectedAnnotations map[string]string
 		expectedLabels      map[string]string
 		expectedServiceName string
-		expectedTargetPort  int32
+		expectedTargetPorts []int32
 	}{
 		{
 			name: "basic shadow service creation",
@@ -72,7 +72,7 @@ func TestReconcileInferencePool(t *testing.T) {
 				constants.InternalServiceSemantics: constants.ServiceSemanticsInferencePool,
 				InferencePoolRefLabel:              "test-pool",
 			},
-			expectedTargetPort: 8080,
+			expectedTargetPorts: []int32{8080},
 		},
 		{
 			name: "user label and annotation preservation",
@@ -136,7 +136,7 @@ func TestReconcileInferencePool(t *testing.T) {
 				"user.example.com/my-label":        "user-value",
 				"another.domain.com/label":         "another-value",
 			},
-			expectedTargetPort: 8080,
+			expectedTargetPorts: []int32{8080},
 		},
 		{
 			name: "very long inferencepool name",
@@ -169,7 +169,45 @@ func TestReconcileInferencePool(t *testing.T) {
 				InferencePoolRefLabel:              "very-long-inference-pool-name-that-should-be-truncated-properly",
 			},
 			expectedServiceName: "very-long-inference-pool-name-that-should-be-trunca-ip-6d24df6a",
-			expectedTargetPort:  9090,
+			expectedTargetPorts: []int32{9090},
+		},
+		{
+			name: "multiple target ports creates single service port",
+			inferencePool: &inferencev1.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "multi-port-pool",
+					Namespace: "default",
+				},
+				Spec: inferencev1.InferencePoolSpec{
+					TargetPorts: []inferencev1.Port{
+						{
+							Number: inferencev1.PortNumber(8000),
+						},
+						{
+							Number: inferencev1.PortNumber(8001),
+						},
+						{
+							Number: inferencev1.PortNumber(8002),
+						},
+					},
+					Selector: inferencev1.LabelSelector{
+						MatchLabels: map[inferencev1.LabelKey]inferencev1.LabelValue{
+							"app": "multiport",
+						},
+					},
+					EndpointPickerRef: inferencev1.EndpointPickerRef{
+						Name: "dummy",
+						Port: &inferencev1.Port{
+							Number: inferencev1.PortNumber(5421),
+						},
+					},
+				},
+			},
+			expectedLabels: map[string]string{
+				constants.InternalServiceSemantics: constants.ServiceSemanticsInferencePool,
+				InferencePoolRefLabel:              "multi-port-pool",
+			},
+			expectedTargetPorts: []int32{8000, 8001, 8002},
 		},
 	}
 
@@ -217,8 +255,15 @@ func TestReconcileInferencePool(t *testing.T) {
 			for key, expectedValue := range tc.expectedAnnotations {
 				assert.Equal(t, service.Annotations[key], expectedValue, fmt.Sprintf("Annotation '%s' should have value '%s'", key, expectedValue))
 			}
-			assert.Equal(t, service.Spec.Ports[0].Port, int32(54321)) // dummyPort + i
-			assert.Equal(t, service.Spec.Ports[0].TargetPort.IntVal, tc.expectedTargetPort)
+			expectedPortCount := len(tc.inferencePool.Spec.TargetPorts)
+			assert.Equal(t, len(service.Spec.Ports), expectedPortCount, fmt.Sprintf("Shadow service should have %d ports", expectedPortCount))
+
+			for i := 1; i < len(service.Spec.Ports); i++ {
+				assert.Equal(t, service.Spec.Ports[i].Port, int32(54321+i))
+				assert.Equal(t, service.Spec.Ports[i].TargetPort.IntVal, tc.expectedTargetPorts[i])
+				assert.Equal(t, service.Spec.Ports[i].Name, fmt.Sprintf("http-%d", i))
+			}
+
 			assert.Equal(t, service.OwnerReferences[0].Name, tc.inferencePool.Name)
 		})
 	}
 
@@ -398,6 +398,8 @@ metadata:
 spec:
   targetPorts:
   - number: 8000
+  - number: 8001
+  - number: 8002
   selector:
     matchLabels:
       app: vllm-llama3-8b-instruct
 
@@ -235,6 +235,8 @@ spec:
     route:
     - destination:
         host: infpool-gen-ip-6580eb2c.default.svc.domain.suffix
+        port:
+          number: 54321
   - match:
     - headers:
         my-header:
@@ -245,6 +247,8 @@ spec:
     route:
     - destination:
         host: infpool-gen2-ip-97b729d1.default.svc.domain.suffix
+        port:
+          number: 54321
 ---
 apiVersion: networking.istio.io/v1
 kind: VirtualService
 
@@ -2526,6 +2526,25 @@ func (ps *PushContext) BestEffortInferServiceMTLSMode(tp *networking.TrafficPoli
 // ServiceEndpointsByPort returns the cached instances by port if it exists.
 func (ps *PushContext) ServiceEndpointsByPort(svc *Service, port int, labels labels.Instance) []*IstioEndpoint {
 	var out []*IstioEndpoint
+
+	// For InferencePool services, return ALL endpoints regardless of port
+	// because they may have different target ports but belong to the same cluster
+	if svc.UseInferenceSemantics() {
+		allPorts := ps.ServiceIndex.instancesByPort[svc.Key()]
+		for _, instances := range allPorts {
+			if len(labels) == 0 {
+				out = append(out, instances...)
+				continue
+			}
+			for _, instance := range instances {
+				if labels.SubsetOf(instance.Labels) {
+					out = append(out, instance)
+				}
+			}
+		}
+		return out
+	}
+
 	if instances, exists := ps.ServiceIndex.instancesByPort[svc.Key()][port]; exists {
 		// Use cached version of instances by port when labels are empty.
 		if len(labels) == 0 {
 
@@ -315,10 +315,15 @@ func (configgen *ConfigGeneratorImpl) buildOutboundClusters(cb *ClusterBuilder,
 		if service.Resolution == model.Alias {
 			continue
 		}
-		for _, port := range service.Ports {
+		for i, port := range service.Ports {
 			if port.Protocol == protocol.UDP {
 				continue
 			}
+			// For InferencePool services, only build cluster for the first port
+			// All endpoints from all ports are merged into this single cluster
+			if service.UseInferenceSemantics() && i > 0 {
+				continue
+			}
 			clusterKey := buildClusterKey(service, port, cb, proxy, efKeys)
 			cached, allFound := cb.getAllCachedSubsetClusters(clusterKey)
 			if allFound && !features.EnableUnsafeAssertions {
 
@@ -362,9 +362,14 @@ func (b *EndpointBuilder) BuildClusterLoadAssignment(endpointIndex *model.Endpoi
 
 	svcEps := b.snapshotShards(endpointIndex)
 	svcEps = slices.FilterInPlace(svcEps, func(ep *model.IstioEndpoint) bool {
-		// filter out endpoints that don't match the service port
-		if svcPort.Name != ep.ServicePortName {
-			return false
+		// For InferencePool services, include endpoints from all service ports
+		// They use multiple service ports (54321+i) mapped to different targetPorts
+		// but we want all endpoints in a single cluster so the EPP can load-balance across them
+		if !b.service.UseInferenceSemantics() {
+			// filter out endpoints that don't match the service port
+			if svcPort.Name != ep.ServicePortName {
+				return false
+			}
 		}
 		// filter out endpoint that has invalid ip address, mostly domain name. Because this is generated from ServiceEntry.
 		// There are other two cases that should not be filtered out:
 
@@ -33,28 +33,29 @@ import (
 )
 
 var (
-	httpPorts          []int
-	grpcPorts          []int
-	tcpPorts           []int
-	udpPorts           []int
-	tlsPorts           []int
-	mtlsPorts          []int
-	hbonePorts         []int
-	doubleHbonePorts   []int
-	instanceIPPorts    []int
-	localhostIPPorts   []int
-	serverFirstPorts   []int
-	proxyProtocolPorts []int
-	xdsGRPCServers     []int
-	metricsPort        int
-	uds                string
-	version            string
-	cluster            string
-	crt                string
-	key                string
-	ca                 string
-	istioVersion       string
-	disableALPN        bool
+	httpPorts           []int
+	grpcPorts           []int
+	tcpPorts            []int
+	udpPorts            []int
+	tlsPorts            []int
+	mtlsPorts           []int
+	hbonePorts          []int
+	doubleHbonePorts    []int
+	instanceIPPorts     []int
+	localhostIPPorts    []int
+	serverFirstPorts    []int
+	proxyProtocolPorts  []int
+	xdsGRPCServers      []int
+	endpointPickerPorts []int
+	metricsPort         int
+	uds                 string
+	version             string
+	cluster             string
+	crt                 string
+	key                 string
+	ca                  string
+	istioVersion        string
+	disableALPN         bool
 
 	loggingOptions = log.DefaultOptions()
 
@@ -66,7 +67,7 @@ var (
 		PersistentPreRunE: configureLogging,
 		Run: func(cmd *cobra.Command, args []string) {
 			shutdown := NewShutdown()
-			ports := make(common.PortList, len(httpPorts)+len(grpcPorts)+len(tcpPorts)+len(udpPorts)+len(hbonePorts)+len(doubleHbonePorts))
+			ports := make(common.PortList, len(httpPorts)+len(grpcPorts)+len(tcpPorts)+len(udpPorts)+len(hbonePorts)+len(doubleHbonePorts)+len(endpointPickerPorts))
 			tlsByPort := map[int]bool{}
 			mtlsByPort := map[int]bool{}
 			for _, p := range tlsPorts {
@@ -89,6 +90,10 @@ var (
 			for _, p := range xdsGRPCServers {
 				xdsGRPCByPort[p] = true
 			}
+			endpointPickerByPort := map[int]bool{}
+			for _, p := range endpointPickerPorts {
+				endpointPickerByPort[p] = true
+			}
 			portIndex := 0
 			for i, p := range httpPorts {
 				ports[portIndex] = &common.Port{
@@ -151,6 +156,18 @@ var (
 				}
 				portIndex++
 			}
+			for i, p := range endpointPickerPorts {
+				ports[portIndex] = &common.Port{
+					Name:           "endpoint-picker-" + strconv.Itoa(i),
+					Protocol:       protocol.GRPC,
+					Port:           p,
+					TLS:            tlsByPort[p],
+					ServerFirst:    serverFirstByPort[p],
+					ProxyProtocol:  proxyProtocolByPort[p],
+					EndpointPicker: true,
+				}
+				portIndex++
+			}
 
 			instanceIPByPort := map[int]struct{}{}
 			for _, p := range instanceIPPorts {
@@ -250,6 +267,8 @@ func init() {
 	rootCmd.PersistentFlags().IntSliceVar(&serverFirstPorts, "server-first", []int{}, "Ports that are server first. These must be defined as tcp.")
 	rootCmd.PersistentFlags().IntSliceVar(&proxyProtocolPorts, "proxy-protocol", []int{}, "Ports that are wrapped in HA-PROXY protocol.")
 	rootCmd.PersistentFlags().IntSliceVar(&xdsGRPCServers, "xds-grpc-server", []int{}, "Ports that should rely on XDS configuration to serve.")
+	rootCmd.PersistentFlags().IntSliceVar(&endpointPickerPorts, "endpoint-picker", []int{},
+		"Endpoint picker (ext_proc) ports. These are GRPC ports that implement the Envoy external processor protocol.")
 	rootCmd.PersistentFlags().IntVar(&metricsPort, "metrics", 0, "Metrics port")
 	rootCmd.PersistentFlags().StringVar(&uds, "uds", "", "HTTP server on unix domain socket")
 	rootCmd.PersistentFlags().StringVar(&version, "version", "", "Version string")