diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index d8d738c55..eb12c7660 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -62,18 +62,31 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required Selector LabelSelector `json:"selector"` - // TargetPortNumber defines the port number to access the selected model server Pods. - // The number must be in the range 1 to 65535. + // TargetPorts defines the ports to access the selected model server Pods. // - // +kubebuilder:validation:Minimum=1 - // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required - TargetPortNumber int32 `json:"targetPortNumber"` + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=1 + // +listType=map + // +listMapKey=number + TargetPorts []Port `json:"targetPorts"` // Extension configures an endpoint picker as an extension service. + // + // +kubebuilder:validation:Required ExtensionRef *Extension `json:"extensionRef,omitempty"` } +type Port struct { + // Number defines the port number to access the selected model server Pods. + // The number must be in the range 1 to 65535. + // + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + // +kubebuilder:validation:Required + Number int32 `json:"number"` +} + // Extension specifies how to configure an extension that runs the endpoint picker. type Extension struct { // Group is the group of the referent. @@ -234,7 +247,7 @@ const ( InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs" // This reason is used with the "ResolvedRefs" condition when the - // Extension is invalid in some way. This can include an unsupported kind + // ExtensionRef is invalid in some way. This can include an unsupported kind // or API group, or a reference to a resource that can not be found. InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef" ) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 8a03dda3b..977cac82a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -123,6 +123,11 @@ func (in *InferencePoolList) DeepCopyObject() runtime.Object { func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = *in in.Selector.DeepCopyInto(&out.Selector) + if in.TargetPorts != nil { + in, out := &in.TargetPorts, &out.TargetPorts + *out = make([]Port, len(*in)) + copy(*out, *in) + } if in.ExtensionRef != nil { in, out := &in.ExtensionRef, &out.ExtensionRef *out = new(Extension) @@ -236,3 +241,18 @@ func (in *PoolStatus) DeepCopy() *PoolStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Port) DeepCopyInto(out *Port) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Port. +func (in *Port) DeepCopy() *Port { + if in == nil { + return nil + } + out := new(Port) + in.DeepCopyInto(out) + return out +} diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 1733c37d9..3fd5802f5 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -17,9 +17,9 @@ limitations under the License. package v1alpha2 import ( + "errors" "fmt" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" runtime "k8s.io/apimachinery/pkg/runtime" @@ -27,101 +27,85 @@ import ( ) // ConvertTo converts this InferencePool (v1alpha2) to the v1 version. -func (src *InferencePool) ConvertTo() (*v1.InferencePool, error) { - if src == nil { - return nil, nil +func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { + if dst == nil { + return errors.New("dst cannot be nil") } - - v1Extension, err := convertEndpointPickerConfToV1(&src.Spec.EndpointPickerConfig) + v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) if err != nil { - return nil, err + return err } - v1Status, err := converStatusToV1(src.Status) + v1Status, err := convertStatusToV1(&src.Status) if err != nil { - return nil, err - } - dst := &v1.InferencePool{ - TypeMeta: src.TypeMeta, - ObjectMeta: src.ObjectMeta, - Spec: v1.InferencePoolSpec{ - TargetPortNumber: src.Spec.TargetPortNumber, - ExtensionRef: v1Extension, - }, - Status: *v1Status, + return err } + dst.TypeMeta = src.TypeMeta + dst.ObjectMeta = src.ObjectMeta + dst.Spec.TargetPorts = []v1.Port{{Number: src.Spec.TargetPortNumber}} + dst.Spec.ExtensionRef = v1Extension + dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) for k, v := range src.Spec.Selector { dst.Spec.Selector.MatchLabels[v1.LabelKey(k)] = v1.LabelValue(v) } } - return dst, nil + return nil } // ConvertFrom converts from the v1 version to this version (v1alpha2). -func ConvertFrom(src *v1.InferencePool) (*InferencePool, error) { +func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { if src == nil { - return nil, nil + return errors.New("src cannot be nil") } - - endPointPickerConfig, err := convertEndpointPickerConfigFromV1(src.Spec.ExtensionRef) + extensionRef, err := convertExtensionRefFromV1(src.Spec.ExtensionRef) if err != nil { - return nil, err + return err } - status, err := converStatusFromV1(src.Status) + status, err := convertStatusFromV1(&src.Status) if err != nil { - return nil, err - } - dst := &InferencePool{ - TypeMeta: metav1.TypeMeta{ - Kind: "InferencePool", - APIVersion: "inference.networking.x-k8s.io/v1alpha2", - }, - ObjectMeta: src.ObjectMeta, - Spec: InferencePoolSpec{ - TargetPortNumber: src.Spec.TargetPortNumber, - EndpointPickerConfig: *endPointPickerConfig, - }, - Status: *status, + return err } - + dst.TypeMeta = src.TypeMeta + dst.ObjectMeta = src.ObjectMeta + dst.Spec.TargetPortNumber = src.Spec.TargetPorts[0].Number + dst.Spec.ExtensionRef = extensionRef + dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) for k, v := range src.Spec.Selector.MatchLabels { dst.Spec.Selector[LabelKey(k)] = LabelValue(v) } } - - return dst, nil + return nil } -func converStatusToV1(src InferencePoolStatus) (*v1.InferencePoolStatus, error) { - u, err := toUnstructured(&src) +func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) { + u, err := toUnstructured(src) if err != nil { return nil, err } return convert[v1.InferencePoolStatus](u) } -func converStatusFromV1(src v1.InferencePoolStatus) (*InferencePoolStatus, error) { - u, err := toUnstructured(&src) +func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) { + u, err := toUnstructured(src) if err != nil { return nil, err } return convert[InferencePoolStatus](u) } -func convertEndpointPickerConfToV1(src *EndpointPickerConfig) (*v1.Extension, error) { - extension := src.ExtensionRef - u, err := toUnstructured(&extension) +func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { + u, err := toUnstructured(src) if err != nil { return nil, err } return convert[v1.Extension](u) } -func convertEndpointPickerConfigFromV1(src *v1.Extension) (*EndpointPickerConfig, error) { - u, err := toUnstructured(&src) +func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { + u, err := toUnstructured(src) if err != nil { return nil, err } @@ -129,9 +113,7 @@ func convertEndpointPickerConfigFromV1(src *v1.Extension) (*EndpointPickerConfig if err != nil { return nil, err } - return &EndpointPickerConfig{ - ExtensionRef: extension, - }, nil + return extension, nil } func toUnstructured(obj any) (*unstructured.Unstructured, error) { diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 73469925b..cc005d87e 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -47,6 +47,10 @@ func TestInferencePoolConvertTo(t *testing.T) { { name: "full conversion from v1alpha2 to v1 including status", src: &InferencePool{ + TypeMeta: metav1.TypeMeta{ + Kind: "InferencePool", + APIVersion: "inference.networking.x-k8s.io/v1alpha2", + }, ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -56,14 +60,12 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - EndpointPickerConfig: EndpointPickerConfig{ - ExtensionRef: &Extension{ - Group: &group, - Kind: &kind, - Name: "my-epp-service", - PortNumber: &portNumber, - FailureMode: &failureMode, - }, + ExtensionRef: &Extension{ + Group: &group, + Kind: &kind, + Name: "my-epp-service", + PortNumber: &portNumber, + FailureMode: &failureMode, }, }, Status: InferencePoolStatus{ @@ -83,6 +85,10 @@ func TestInferencePoolConvertTo(t *testing.T) { }, }, want: &v1.InferencePool{ + TypeMeta: metav1.TypeMeta{ + Kind: "InferencePool", + APIVersion: "inference.networking.x-k8s.io/v1alpha2", + }, ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -93,7 +99,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: int32(8080)}}, ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: &v1Kind, @@ -120,17 +126,12 @@ func TestInferencePoolConvertTo(t *testing.T) { }, wantErr: false, }, - { - name: "nil source should return nil and no error", - src: nil, - want: nil, - wantErr: false, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := tt.src.ConvertTo() + got := &v1.InferencePool{} + err := tt.src.ConvertTo(got) if (err != nil) != tt.wantErr { t.Fatalf("ConvertTo() error = %v, wantErr %v", err, tt.wantErr) } @@ -151,6 +152,10 @@ func TestInferencePoolConvertFrom(t *testing.T) { { name: "full conversion from v1 to v1alpha2 including status", src: &v1.InferencePool{ + TypeMeta: metav1.TypeMeta{ + Kind: "InferencePool", + APIVersion: "inference.networking.k8s.io/v1", + }, ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", Namespace: "test-ns", @@ -161,7 +166,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: int32(8080)}}, ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: &v1Kind, @@ -189,7 +194,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { want: &InferencePool{ TypeMeta: metav1.TypeMeta{ Kind: "InferencePool", - APIVersion: "inference.networking.x-k8s.io/v1alpha2", + APIVersion: "inference.networking.k8s.io/v1", }, ObjectMeta: metav1.ObjectMeta{ Name: "test-pool", @@ -200,14 +205,12 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - EndpointPickerConfig: EndpointPickerConfig{ - ExtensionRef: &Extension{ - Group: &group, - Kind: &kind, - Name: "my-epp-service", - PortNumber: &portNumber, - FailureMode: &failureMode, - }, + ExtensionRef: &Extension{ + Group: &group, + Kind: &kind, + Name: "my-epp-service", + PortNumber: &portNumber, + FailureMode: &failureMode, }, }, Status: InferencePoolStatus{ @@ -229,16 +232,17 @@ func TestInferencePoolConvertFrom(t *testing.T) { wantErr: false, }, { - name: "nil source should return nil and no error", + name: "nil source", src: nil, - want: nil, - wantErr: false, + want: &InferencePool{}, + wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := ConvertFrom(tt.src) + got := &InferencePool{} + err := got.ConvertFrom(tt.src) if (err != nil) != tt.wantErr { t.Fatalf("ConvertFrom() error = %v, wantErr %v", err, tt.wantErr) } diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index 98490c651..515aa0080 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -69,14 +69,6 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required TargetPortNumber int32 `json:"targetPortNumber"` - // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint - // picker service that picks endpoints for the requests routed to this pool. - EndpointPickerConfig `json:",inline"` -} - -// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. -// This type is intended to be a union of mutually exclusive configuration options that we may add in the future. -type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. ExtensionRef *Extension `json:"extensionRef,omitempty"` } diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index f7f717fd5..f8eb08185 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -25,26 +25,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) { - *out = *in - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig. -func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { - if in == nil { - return nil - } - out := new(EndpointPickerConfig) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Extension) DeepCopyInto(out *Extension) { *out = *in @@ -251,7 +231,11 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig) + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go index 8d14a3969..364d059dd 100644 --- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go @@ -21,9 +21,9 @@ package v1 // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` - ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` + Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` + TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -40,11 +40,16 @@ func (b *InferencePoolSpecApplyConfiguration) WithSelector(value *LabelSelectorA return b } -// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TargetPortNumber field is set to the value of the last call. -func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) *InferencePoolSpecApplyConfiguration { - b.TargetPortNumber = &value +// WithTargetPorts adds the given value to the TargetPorts field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the TargetPorts field. +func (b *InferencePoolSpecApplyConfiguration) WithTargetPorts(values ...*PortApplyConfiguration) *InferencePoolSpecApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithTargetPorts") + } + b.TargetPorts = append(b.TargetPorts, *values[i]) + } return b } diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go new file mode 100644 index 000000000..266244d0c --- /dev/null +++ b/client-go/applyconfiguration/api/v1/port.go @@ -0,0 +1,39 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +// PortApplyConfiguration represents a declarative configuration of the Port type for use +// with apply. +type PortApplyConfiguration struct { + Number *int32 `json:"number,omitempty"` +} + +// PortApplyConfiguration constructs a declarative configuration of the Port type for use with +// apply. +func Port() *PortApplyConfiguration { + return &PortApplyConfiguration{} +} + +// WithNumber sets the Number field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Number field is set to the value of the last call. +func (b *PortApplyConfiguration) WithNumber(value int32) *PortApplyConfiguration { + b.Number = &value + return b +} diff --git a/client-go/applyconfiguration/apix/v1alpha2/endpointpickerconfig.go b/client-go/applyconfiguration/apix/v1alpha2/endpointpickerconfig.go deleted file mode 100644 index 39d4d8de6..000000000 --- a/client-go/applyconfiguration/apix/v1alpha2/endpointpickerconfig.go +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1alpha2 - -// EndpointPickerConfigApplyConfiguration represents a declarative configuration of the EndpointPickerConfig type for use -// with apply. -type EndpointPickerConfigApplyConfiguration struct { - ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` -} - -// EndpointPickerConfigApplyConfiguration constructs a declarative configuration of the EndpointPickerConfig type for use with -// apply. -func EndpointPickerConfig() *EndpointPickerConfigApplyConfiguration { - return &EndpointPickerConfigApplyConfiguration{} -} - -// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ExtensionRef field is set to the value of the last call. -func (b *EndpointPickerConfigApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *EndpointPickerConfigApplyConfiguration { - b.ExtensionRef = value - return b -} diff --git a/client-go/applyconfiguration/apix/v1alpha2/inferencepoolspec.go b/client-go/applyconfiguration/apix/v1alpha2/inferencepoolspec.go index 8773d9c03..1f59e887b 100644 --- a/client-go/applyconfiguration/apix/v1alpha2/inferencepoolspec.go +++ b/client-go/applyconfiguration/apix/v1alpha2/inferencepoolspec.go @@ -25,9 +25,9 @@ import ( // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector map[apixv1alpha2.LabelKey]apixv1alpha2.LabelValue `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` - EndpointPickerConfigApplyConfiguration `json:",inline"` + Selector map[apixv1alpha2.LabelKey]apixv1alpha2.LabelValue `json:"selector,omitempty"` + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -62,6 +62,6 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ExtensionRef field is set to the value of the last call. func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration { - b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value + b.ExtensionRef = value return b } diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index 67564f1cf..c5d4f575e 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -48,10 +48,10 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &apiv1.ParentGatewayReferenceApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("PoolStatus"): return &apiv1.PoolStatusApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("Port"): + return &apiv1.PortApplyConfiguration{} // Group=inference.networking.x-k8s.io, Version=v1alpha2 - case v1alpha2.SchemeGroupVersion.WithKind("EndpointPickerConfig"): - return &apixv1alpha2.EndpointPickerConfigApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("Extension"): return &apixv1alpha2.ExtensionApplyConfiguration{} case v1alpha2.SchemeGroupVersion.WithKind("InferenceObjective"): diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index c614b2146..29be0bc80 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -148,7 +148,7 @@ var ( "The configuration specified as text, in lieu of a file") modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+ - "Default value will be set to InferencePool.Spec.TargetPortNumber if not set.") + "Default value will be set to InferencePool.Spec.TargetPorts if not set.") modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods") modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods") modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)") diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index ecddbfaa7..fbce19cf0 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -7,7 +7,8 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: - targetPortNumber: {{ .Values.inferencePool.targetPortNumber }} + targetPorts: + - number: {{ .Values.inferencePool.targetPortNumber }} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 8bfc9b0b4..7d061d1aa 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -131,17 +131,31 @@ spec: required: - matchLabels type: object - targetPortNumber: - description: |- - TargetPortNumber defines the port number to access the selected model server Pods. - The number must be in the range 1 to 65535. - format: int32 - maximum: 65535 - minimum: 1 - type: integer + targetPorts: + description: TargetPorts defines the ports to access the selected + model server Pods. + items: + properties: + number: + description: |- + Number defines the port number to access the selected model server Pods. + The number must be in the range 1 to 65535. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - number + type: object + maxItems: 1 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - number + x-kubernetes-list-type: map required: - selector - - targetPortNumber + - targetPorts type: object status: default: diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index 33a7b8e74..a3eba5582 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -8,7 +8,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/conformance/resources/base.yaml b/conformance/resources/base.yaml index e3d7561bb..015e89557 100644 --- a/conformance/resources/base.yaml +++ b/conformance/resources/base.yaml @@ -153,9 +153,13 @@ metadata: namespace: gateway-conformance-app-backend spec: selector: + app: primary-inference-model-server + targetPorts: + - portNumber: 3000 matchLabels: app: primary-inference-model-server targetPortNumber: 3000 + - number: 3000 extensionRef: name: primary-endpoint-picker-svc --- @@ -247,9 +251,9 @@ metadata: namespace: gateway-conformance-app-backend spec: selector: - matchLabels: - app: secondary-inference-model-server - targetPortNumber: 3000 + app: secondary-inference-model-server + targetPorts: + - portNumber: 3000 extensionRef: name: secondary-endpoint-picker-svc failureMode: FailOpen diff --git a/pkg/epp/backend/metrics/pod_metrics.go b/pkg/epp/backend/metrics/pod_metrics.go index 3471ddf3d..0edad43e3 100644 --- a/pkg/epp/backend/metrics/pod_metrics.go +++ b/pkg/epp/backend/metrics/pod_metrics.go @@ -116,7 +116,10 @@ func (pm *podMetrics) refreshMetrics() error { } ctx, cancel := context.WithTimeout(context.Background(), fetchMetricsTimeout) defer cancel() - updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPortNumber) + if len(pool.Spec.TargetPorts) != 1 { + return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) + } + updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPorts[0].Number) if err != nil { pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err) } diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index 952b0ee16..54a36719a 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -88,7 +88,7 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) { - return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPortNumber: 8000}}, nil + return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}}, nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go index 986abe371..886da782b 100644 --- a/pkg/epp/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -80,7 +80,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques } // 4. Convert the fetched object to the canonical v1.InferencePool. - var v1infPool *v1.InferencePool + v1infPool := &v1.InferencePool{} switch pool := obj.(type) { case *v1.InferencePool: @@ -88,9 +88,9 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques v1infPool = pool case *v1alpha2.InferencePool: var err error - v1infPool, err = pool.ConvertTo() + err = pool.ConvertTo(v1infPool) if err != nil { - logger.Error(err, "Failed to convert unstructured to inferencePool") + logger.Error(err, "Failed to convert XInferencePool to InferencePool") return ctrl.Result{}, err } default: diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index eacad468b..811e4d59e 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -80,8 +80,8 @@ func TestInferencePoolReconciler(t *testing.T) { pool1 := utiltest.MakeInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). - ExtensionRef("epp-service"). - TargetPortNumber(8080).ObjRef() + TargetPorts(8080). + ExtensionRef("epp-service").ObjRef() pool1.SetGroupVersionKind(gvk) pool2 := utiltest.MakeInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() pool2.SetGroupVersionKind(gvk) @@ -146,7 +146,7 @@ func TestInferencePoolReconciler(t *testing.T) { if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { t.Errorf("Unexpected pool get error: %v", err) } - newPool1.Spec.TargetPortNumber = 9090 + newPool1.Spec.TargetPorts = []v1.Port{{Number: 9090}} if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } @@ -224,7 +224,10 @@ func TestXInferencePoolReconciler(t *testing.T) { Selector(selector_v1). ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() - pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() + pool2 := utiltest.MakeXInferencePool("pool2"). + Namespace("pool2-ns"). + ExtensionRef("epp-service"). + TargetPortNumber(8080).ObjRef() pool1.SetGroupVersionKind(gvk) pool2.SetGroupVersionKind(gvk) @@ -324,7 +327,9 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa return "" } - gotXPool, err := v1alpha2.ConvertFrom(gotPool) + gotXPool := &v1alpha2.InferencePool{} + + err := gotXPool.ConvertFrom(gotPool) if err != nil { t.Fatalf("failed to convert unstructured to InferencePool: %v", err) } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index d1dae0f05..518dae135 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -61,7 +61,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -79,7 +79,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -97,7 +97,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -116,7 +116,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -132,7 +132,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -149,7 +149,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -167,7 +167,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 77fe7c7c0..dc60458b6 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -245,7 +245,7 @@ var ( pod2NamespacedName = types.NamespacedName{Name: pod2.Name, Namespace: pod2.Namespace} inferencePool = &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: 8000, + TargetPorts: []v1.Port{{Number: int32(8000)}}, }, } ) diff --git a/pkg/epp/handlers/request.go b/pkg/epp/handlers/request.go index d051e163d..7f8122195 100644 --- a/pkg/epp/handlers/request.go +++ b/pkg/epp/handlers/request.go @@ -17,6 +17,7 @@ limitations under the License. package handlers import ( + "fmt" "strconv" "time" @@ -45,7 +46,10 @@ func (s *StreamingServer) HandleRequestHeaders(reqCtx *RequestContext, req *extP if err != nil { return err } - reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPortNumber)) + if len(pool.Spec.TargetPorts) != 1 { + return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) + } + reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPorts[0].Number)) reqCtx.RequestSize = 0 reqCtx.reqHeaderResp = s.generateRequestHeaderResponse(reqCtx) return nil diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index e855c55c1..6d4291b6d 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -80,7 +80,7 @@ func TestMetricsCollected(t *testing.T) { Name: "test-pool", }, Spec: v1.InferencePoolSpec{ - TargetPortNumber: 8000, + TargetPorts: []v1.Port{{Number: int32(8000)}}, }, } _ = ds.PoolSet(context.Background(), fakeClient, inferencePool) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index c40b5864d..1435c0154 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -240,7 +240,10 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC return reqCtx, err } targetPods := []*backend.Pod{} - targetPort := int(pool.Spec.TargetPortNumber) + if len(pool.Spec.TargetPorts) != 1 { + return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length 1"} + } + targetPort := int(pool.Spec.TargetPorts[0].Number) targetEndpoints := []string{} for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods { diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index c25b9640d..6919ae45c 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -101,7 +101,7 @@ func TestDirector_HandleRequest(t *testing.T) { pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "app": "inference", diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index 5e6c4c18f..96d10147b 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -200,8 +200,8 @@ func (m *InferencePoolWrapper) Selector(selector map[string]string) *InferencePo return m } -func (m *InferencePoolWrapper) TargetPortNumber(p int32) *InferencePoolWrapper { - m.Spec.TargetPortNumber = p +func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { + m.Spec.TargetPorts = []v1.Port{{Number: p}} return m } @@ -227,11 +227,7 @@ func MakeXInferencePool(name string) *XInferencePoolWrapper { ObjectMeta: metav1.ObjectMeta{ Name: name, }, - Spec: v1alpha2.InferencePoolSpec{ - EndpointPickerConfig: v1alpha2.EndpointPickerConfig{ - ExtensionRef: &v1alpha2.Extension{}, - }, - }, + Spec: v1alpha2.InferencePoolSpec{}, }, } } diff --git a/site-src/api-types/inferencepool.md b/site-src/api-types/inferencepool.md index d2794478b..c4481b1ad 100644 --- a/site-src/api-types/inferencepool.md +++ b/site-src/api-types/inferencepool.md @@ -33,7 +33,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/implementers.md b/site-src/guides/implementers.md index 747e934a2..6fce01657 100644 --- a/site-src/guides/implementers.md +++ b/site-src/guides/implementers.md @@ -47,7 +47,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md index 80a6e3bf6..98b3cc4cc 100644 --- a/site-src/guides/inferencepool-rollout.md +++ b/site-src/guides/inferencepool-rollout.md @@ -208,7 +208,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct-new spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: app: vllm-llama3-8b-instruct-new extensionRef: diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index aec574a39..7c546ba37 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -4,7 +4,8 @@ metadata: labels: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index 707a76eed..20fea8d1a 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -4,7 +4,8 @@ metadata: name: vllm-llama3-8b-instruct-pool namespace: default spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct-pool diff --git a/test/utils/server.go b/test/utils/server.go index f76e147af..bcec8bca0 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -71,7 +71,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po WithObjects(initObjs...). Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() - pool.Spec.TargetPortNumber = poolPort + pool.Spec.TargetPorts = []v1.Port{{Number: poolPort}} _ = ds.PoolSet(context.Background(), fakeClient, pool) return ctx, cancel, ds, pmc