diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go
index af945827e..2c7b52705 100644
--- a/api/v1/inferencepool_types.go
+++ b/api/v1/inferencepool_types.go
@@ -96,6 +96,7 @@ type Port struct {
// EndpointPickerRef specifies a reference to an Endpoint Picker extension and its
// associated configuration.
+// +kubebuilder:validation:XValidation:rule="self.kind != 'Service' || has(self.port)",message="port is required when kind is 'Service' or unspecified (defaults to 'Service')"
type EndpointPickerRef struct {
// Group is the group of the referent API object. When unspecified, the default value
// is "", representing the Core API group.
@@ -125,13 +126,15 @@ type EndpointPickerRef struct {
// +required
Name ObjectName `json:"name,omitempty"`
- // PortNumber is the port number of the Endpoint Picker extension service. When unspecified,
- // implementations SHOULD infer a default value of 9002 when the kind field is "Service" or
- // unspecified (defaults to "Service").
+ // Port is the port of the Endpoint Picker extension service.
+ //
+ // Port is required when the referent is a Kubernetes Service. In this
+ // case, the port number is the service port number, not the target port.
+ // For other resources, destination port might be derived from the referent
+ // resource or this field.
//
// +optional
- //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer as zero means all ports in convention, we don't make to use 0 to indicate not set.
- PortNumber *PortNumber `json:"portNumber,omitempty"`
+ Port *Port `json:"port,omitempty"`
// FailureMode configures how the parent handles the case when the Endpoint Picker extension
// is non-responsive. When unspecified, defaults to "FailClose".
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index b42573ce2..3b003632a 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -33,9 +33,9 @@ func (in *EndpointPickerRef) DeepCopyInto(out *EndpointPickerRef) {
*out = new(Group)
**out = **in
}
- if in.PortNumber != nil {
- in, out := &in.PortNumber, &out.PortNumber
- *out = new(PortNumber)
+ if in.Port != nil {
+ in, out := &in.Port, &out.Port
+ *out = new(Port)
**out = **in
}
}
diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go
index 31ea997bf..01520dc4b 100644
--- a/apix/v1alpha2/inferencepool_conversion.go
+++ b/apix/v1alpha2/inferencepool_conversion.go
@@ -254,7 +254,7 @@ func convertExtensionRefToV1(src *Extension) (v1.EndpointPickerRef, error) {
}
endpointPickerRef.Name = v1.ObjectName(src.Name)
if src.PortNumber != nil {
- endpointPickerRef.PortNumber = ptr.To(v1.PortNumber(*src.PortNumber))
+ endpointPickerRef.Port = ptr.To(v1.Port{Number: v1.PortNumber(*src.PortNumber)})
}
if src.FailureMode != nil {
endpointPickerRef.FailureMode = v1.EndpointPickerFailureMode(*src.FailureMode)
@@ -275,8 +275,8 @@ func convertEndpointPickerRefFromV1(src *v1.EndpointPickerRef) (Extension, error
extension.Kind = ptr.To(Kind(src.Kind))
}
extension.Name = ObjectName(src.Name)
- if src.PortNumber != nil {
- extension.PortNumber = ptr.To(PortNumber(*src.PortNumber))
+ if src.Port != nil {
+ extension.PortNumber = ptr.To(PortNumber(src.Port.Number))
}
if src.FailureMode != "" {
extension.FailureMode = ptr.To(ExtensionFailureMode(src.FailureMode))
diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go
index a4147f2df..64b222fed 100644
--- a/apix/v1alpha2/inferencepool_conversion_test.go
+++ b/apix/v1alpha2/inferencepool_conversion_test.go
@@ -21,6 +21,7 @@ import (
"github.com/google/go-cmp/cmp"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
)
@@ -34,7 +35,7 @@ var (
v1Group = v1.Group("my-group")
v1Kind = v1.Kind("MyKind")
v1FailureMode = v1.EndpointPickerFailureMode("Deny")
- v1PortNumber = v1.PortNumber(9000)
+ v1Port = v1.Port{Number: 9000}
)
func TestInferencePoolConvertTo(t *testing.T) {
@@ -110,7 +111,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
Group: &v1Group,
Kind: v1Kind,
Name: "my-epp-service",
- PortNumber: &v1PortNumber,
+ Port: &v1Port,
FailureMode: v1FailureMode,
},
},
@@ -433,7 +434,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
Group: &v1Group,
Kind: v1Kind,
Name: "my-epp-service",
- PortNumber: &v1PortNumber,
+ Port: &v1Port,
FailureMode: v1FailureMode,
},
},
diff --git a/client-go/applyconfiguration/api/v1/endpointpickerref.go b/client-go/applyconfiguration/api/v1/endpointpickerref.go
index 8a9991bc5..0d7886239 100644
--- a/client-go/applyconfiguration/api/v1/endpointpickerref.go
+++ b/client-go/applyconfiguration/api/v1/endpointpickerref.go
@@ -28,7 +28,7 @@ type EndpointPickerRefApplyConfiguration struct {
Group *apiv1.Group `json:"group,omitempty"`
Kind *apiv1.Kind `json:"kind,omitempty"`
Name *apiv1.ObjectName `json:"name,omitempty"`
- PortNumber *apiv1.PortNumber `json:"portNumber,omitempty"`
+ Port *PortApplyConfiguration `json:"port,omitempty"`
FailureMode *apiv1.EndpointPickerFailureMode `json:"failureMode,omitempty"`
}
@@ -62,11 +62,11 @@ func (b *EndpointPickerRefApplyConfiguration) WithName(value apiv1.ObjectName) *
return b
}
-// WithPortNumber sets the PortNumber field in the declarative configuration to the given value
+// WithPort sets the Port field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the PortNumber field is set to the value of the last call.
-func (b *EndpointPickerRefApplyConfiguration) WithPortNumber(value apiv1.PortNumber) *EndpointPickerRefApplyConfiguration {
- b.PortNumber = &value
+// If called multiple times, the Port field is set to the value of the last call.
+func (b *EndpointPickerRefApplyConfiguration) WithPort(value *PortApplyConfiguration) *EndpointPickerRefApplyConfiguration {
+ b.Port = value
return b
}
diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
index 66cbcec6f..8f60199f2 100644
--- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
@@ -89,18 +89,33 @@ spec:
maxLength: 253
minLength: 1
type: string
- portNumber:
+ port:
description: |-
- PortNumber is the port number of the Endpoint Picker extension service. When unspecified,
- implementations SHOULD infer a default value of 9002 when the kind field is "Service" or
- unspecified (defaults to "Service").
- format: int32
- maximum: 65535
- minimum: 1
- type: integer
+ Port is the port of the Endpoint Picker extension service.
+
+ Port is required when the referent is a Kubernetes Service. In this
+ case, the port number is the service port number, not the target port.
+ For other resources, destination port might be derived from the referent
+ resource or this field.
+ properties:
+ number:
+ description: |-
+ Number defines the port number to access the selected model server Pods.
+ The number must be in the range 1 to 65535.
+ format: int32
+ maximum: 65535
+ minimum: 1
+ type: integer
+ required:
+ - number
+ type: object
required:
- name
type: object
+ x-kubernetes-validations:
+ - message: port is required when kind is 'Service' or unspecified
+ (defaults to 'Service')
+ rule: self.kind != 'Service' || has(self.port)
selector:
description: |-
Selector determines which Pods are members of this inference pool.
diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
index 7c31da97e..ffe19654b 100644
--- a/config/manifests/inferencepool-resources.yaml
+++ b/config/manifests/inferencepool-resources.yaml
@@ -15,6 +15,9 @@ spec:
app: vllm-llama3-8b-instruct
endpointPickerRef:
name: vllm-llama3-8b-instruct-epp
+ kind: Service
+ port:
+ number: 9002
---
apiVersion: v1
kind: Service
diff --git a/conformance/tests/inferencepool_invalid_epp_service.yaml b/conformance/tests/inferencepool_invalid_epp_service.yaml
index cdc2048f7..b3dc70e19 100644
--- a/conformance/tests/inferencepool_invalid_epp_service.yaml
+++ b/conformance/tests/inferencepool_invalid_epp_service.yaml
@@ -11,6 +11,9 @@ spec:
- number: 3000
endpointPickerRef:
name: non-existent-epp-svc
+ kind: Service
+ port:
+ number: 9002
---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
diff --git a/site-src/reference/spec.md b/site-src/reference/spec.md
index 12ce3d7ab..11080fe24 100644
--- a/site-src/reference/spec.md
+++ b/site-src/reference/spec.md
@@ -51,7 +51,7 @@ _Appears in:_
| `group` _[Group](#group)_ | Group is the group of the referent API object. When unspecified, the default value
is "", representing the Core API group. | | MaxLength: 253
MinLength: 0
Pattern: `^$\|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
|
| `kind` _[Kind](#kind)_ | Kind is the Kubernetes resource kind of the referent.
Required if the referent is ambiguous, e.g. service with multiple ports.
Defaults to "Service" when not specified.
ExternalName services can refer to CNAME DNS records that may live
outside of the cluster and as such are difficult to reason about in
terms of conformance. They also may not be safe to forward to (see
CVE-2021-25740 for more information). Implementations MUST NOT
support ExternalName Services. | Service | MaxLength: 63
MinLength: 1
Pattern: `^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
|
| `name` _[ObjectName](#objectname)_ | Name is the name of the referent API object. | | MaxLength: 253
MinLength: 1
|
-| `portNumber` _[PortNumber](#portnumber)_ | PortNumber is the port number of the Endpoint Picker extension service. When unspecified,
implementations SHOULD infer a default value of 9002 when the kind field is "Service" or
unspecified (defaults to "Service"). | | Maximum: 65535
Minimum: 1
|
+| `port` _[Port](#port)_ | Port is the port of the Endpoint Picker extension service.
Port is required when the referent is a Kubernetes Service. In this
case, the port number is the service port number, not the target port.
For other resources, destination port might be derived from the referent
resource or this field. | | |
| `failureMode` _[EndpointPickerFailureMode](#endpointpickerfailuremode)_ | FailureMode configures how the parent handles the case when the Endpoint Picker extension
is non-responsive. When unspecified, defaults to "FailClose". | FailClose | Enum: [FailOpen FailClose]
|
@@ -340,6 +340,7 @@ Port defines the network port that will be exposed by this InferencePool.
_Appears in:_
+- [EndpointPickerRef](#endpointpickerref)
- [InferencePoolSpec](#inferencepoolspec)
| Field | Description | Default | Validation |
@@ -358,7 +359,6 @@ _Validation:_
- Minimum: 1
_Appears in:_
-- [EndpointPickerRef](#endpointpickerref)
- [Port](#port)
diff --git a/test/cel/inferencepool_test.go b/test/cel/inferencepool_test.go
new file mode 100644
index 000000000..8b3ba3ea5
--- /dev/null
+++ b/test/cel/inferencepool_test.go
@@ -0,0 +1,114 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+)
+
+func TestValidateInferencePool(t *testing.T) {
+ ctx := context.Background()
+
+ // baseInferencePool is a valid, InferencePool resource.
+ baseInferencePool := v1.InferencePool{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "base-pool",
+ Namespace: metav1.NamespaceDefault,
+ },
+ Spec: v1.InferencePoolSpec{
+ TargetPorts: []v1.Port{
+ {Number: 8000},
+ },
+ Selector: v1.LabelSelector{
+ MatchLabels: map[v1.LabelKey]v1.LabelValue{
+ "app": "model-server",
+ },
+ },
+ EndpointPickerRef: v1.EndpointPickerRef{
+ Name: "epp",
+ Kind: "Service",
+ Port: ptrTo(v1.Port{Number: 9002}),
+ },
+ },
+ }
+
+ testCases := []struct {
+ desc string
+ mutate func(ip *v1.InferencePool)
+ wantErrors []string
+ }{
+ {
+ desc: "passes validation with a valid configuration",
+ mutate: func(ip *v1.InferencePool) {
+ },
+ wantErrors: nil,
+ },
+ {
+ desc: "fails validation when kind is unset (defaults to Service) and port is missing",
+ mutate: func(ip *v1.InferencePool) {
+ // By setting Kind to an empty string, we rely on the API server's default value of "Service".
+ ip.Spec.EndpointPickerRef.Kind = ""
+ ip.Spec.EndpointPickerRef.Port = nil
+ },
+ wantErrors: []string{"port is required when kind is 'Service' or unspecified (defaults to 'Service')"},
+ },
+ {
+ desc: "fails validation when kind is explicitly 'Service' and port is missing",
+ mutate: func(ip *v1.InferencePool) {
+ ip.Spec.EndpointPickerRef.Kind = "Service"
+ ip.Spec.EndpointPickerRef.Port = nil
+ },
+ wantErrors: []string{"port is required when kind is 'Service' or unspecified (defaults to 'Service')"},
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ ip := baseInferencePool.DeepCopy()
+ // Use a unique name for each test case to avoid conflicts.
+ ip.Name = fmt.Sprintf("test-pool-%v", time.Now().UnixNano())
+
+ if tc.mutate != nil {
+ tc.mutate(ip)
+ }
+ err := k8sClient.Create(ctx, ip)
+
+ // This is a boolean XOR. It's true if one is true, but not both.
+ // It ensures that an error is returned if and only if we expect one.
+ if (len(tc.wantErrors) != 0) != (err != nil) {
+ t.Fatalf("Unexpected response while creating InferencePool; got err=\n%v\n; want error=%v", err, tc.wantErrors != nil)
+ }
+
+ // If we got an error, check that it contains the expected substrings.
+ var missingErrorStrings []string
+ for _, wantError := range tc.wantErrors {
+ if !celErrorStringMatches(err.Error(), wantError) {
+ missingErrorStrings = append(missingErrorStrings, wantError)
+ }
+ }
+ if len(missingErrorStrings) != 0 {
+ t.Errorf("Unexpected response while creating InferencePool; got err=\n%v\n; missing strings within error=%q", err, missingErrorStrings)
+ }
+ })
+ }
+}
diff --git a/test/cel/main_test.go b/test/cel/main_test.go
new file mode 100644
index 000000000..2758a5632
--- /dev/null
+++ b/test/cel/main_test.go
@@ -0,0 +1,130 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/client-go/rest"
+ "k8s.io/client-go/tools/clientcmd"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/envtest"
+
+ inferencev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+ inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
+)
+
+var k8sClient client.Client
+
+func TestMain(m *testing.M) {
+ scheme := runtime.NewScheme()
+ var restConfig *rest.Config
+ var testEnv *envtest.Environment
+ var err error
+
+ _ = inferencev1.Install(scheme)
+ _ = inferencev1alpha2.Install(scheme)
+
+ // Add core APIs in case we refer secrets, services and configmaps
+ _ = corev1.AddToScheme(scheme)
+
+ // If one wants to use a local cluster, a KUBECONFIG envvar should be passed,
+ // otherwise testenv will be used
+ kubeconfig := os.Getenv("KUBECONFIG")
+ if kubeconfig != "" {
+ restConfig, err = clientcmd.BuildConfigFromFlags("", kubeconfig)
+ if err != nil {
+ panic(fmt.Sprintf("Failed to get restConfig from BuildConfigFromFlags: %v", err))
+ }
+ } else {
+ // The version used here MUST reflect the available versions at
+ // controller-runtime repo: https://raw.githubusercontent.com/kubernetes-sigs/controller-tools/HEAD/envtest-releases.yaml
+ // If the envvar is not passed, the latest GA will be used
+ k8sVersion := os.Getenv("K8S_VERSION")
+ testEnv = &envtest.Environment{
+ Scheme: scheme,
+ ErrorIfCRDPathMissing: true,
+ DownloadBinaryAssets: true,
+ DownloadBinaryAssetsVersion: k8sVersion,
+ CRDInstallOptions: envtest.CRDInstallOptions{
+ Paths: []string{
+ filepath.Join("..", "..", "config", "crd", "bases"),
+ },
+ CleanUpAfterUse: true,
+ },
+ }
+
+ restConfig, err = testEnv.Start()
+ if err != nil {
+ panic(fmt.Sprintf("Error initializing test environment: %v", err))
+ }
+ }
+
+ k8sClient, err = client.New(restConfig, client.Options{
+ Scheme: scheme,
+ })
+ if err != nil {
+ panic(fmt.Sprintf("Error initializing Kubernetes client: %v", err))
+ }
+
+ rc := m.Run()
+ if testEnv != nil {
+ if err := testEnv.Stop(); err != nil {
+ panic(fmt.Sprintf("error stopping test environment: %v", err))
+ }
+ }
+
+ os.Exit(rc)
+}
+
+func ptrTo[T any](a T) *T {
+ return &a
+}
+
+func celErrorStringMatches(got, want string) bool {
+ gotL := strings.ToLower(got)
+ wantL := strings.ToLower(want)
+
+ // Starting in k8s v1.32, some CEL error messages changed to use "more" instead of "longer"
+ alternativeWantL := strings.ReplaceAll(wantL, "longer", "more")
+
+ // Starting in k8s v1.28, CEL error messages stopped adding spec and status prefixes to path names
+ wantLAdjusted := strings.ReplaceAll(wantL, "spec.", "")
+ wantLAdjusted = strings.ReplaceAll(wantLAdjusted, "status.", "")
+ alternativeWantL = strings.ReplaceAll(alternativeWantL, "spec.", "")
+ alternativeWantL = strings.ReplaceAll(alternativeWantL, "status.", "")
+
+ // Enum validation messages changed in k8s v1.28:
+ // Before: must be one of ['Exact', 'PathPrefix', 'RegularExpression']
+ // After: supported values: "Exact", "PathPrefix", "RegularExpression"
+ if strings.Contains(wantLAdjusted, "must be one of") {
+ r := strings.NewReplacer(
+ "must be one of", "supported values:",
+ "[", "",
+ "]", "",
+ "'", "\"",
+ )
+ wantLAdjusted = r.Replace(wantLAdjusted)
+ }
+ return strings.Contains(gotL, wantL) || strings.Contains(gotL, wantLAdjusted) || strings.Contains(gotL, alternativeWantL)
+}
diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
index 9fa50593a..b8d7fb697 100644
--- a/test/testdata/inferencepool-e2e.yaml
+++ b/test/testdata/inferencepool-e2e.yaml
@@ -12,6 +12,9 @@ spec:
endpointPickerRef:
name: vllm-llama3-8b-instruct-epp
namespace: $E2E_NS
+ kind: Service
+ port:
+ number: 9002
---
apiVersion: v1
kind: Service
diff --git a/test/testdata/inferencepool-leader-election-e2e.yaml b/test/testdata/inferencepool-leader-election-e2e.yaml
index 8c243470f..9ba5dcb4a 100644
--- a/test/testdata/inferencepool-leader-election-e2e.yaml
+++ b/test/testdata/inferencepool-leader-election-e2e.yaml
@@ -10,6 +10,9 @@ spec:
endpointPickerRef:
name: vllm-llama3-8b-instruct-epp
namespace: $E2E_NS
+ kind: Service
+ port:
+ number: 9002
---
apiVersion: v1
kind: Service
diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml
index 794b7fcd4..df6ae30db 100644
--- a/test/testdata/inferencepool-with-model-hermetic.yaml
+++ b/test/testdata/inferencepool-with-model-hermetic.yaml
@@ -11,6 +11,9 @@ spec:
app: vllm-llama3-8b-instruct-pool
endpointPickerRef:
name: epp
+ kind: Service
+ port:
+ number: 9002
---
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferenceObjective