Skip to content

Commit 1ea9959

Browse files
feat: move x-k8s to apix and add v1 InferencePool to api/v1 (#1116)
* rename x-k8s to apix and add v1 InferencePool to api/v1 * updated docker file * fixed naming collision * added fake approved annotation * try to fix unit tests * fixed typo * added annotation * fixed annotation * Update api/v1/inferencepool_types.go Co-authored-by: Rob Scott <[email protected]> * Apply suggestions from code review Co-authored-by: Rob Scott <[email protected]> * use v1 type in v1alpha2 * clean unused depdendency * fixed code generator error * revert back * merge bob's change * upadted code-gen * remove deep copy needs * use codegen * revert to workable version * re-run generate * revert to workable version * change to use v1alpha2 back * resolve merge conflicts * fixed typo * fixed typo * fixed pipeline * fixed boilerplate Signed-off-by: Xiyue Yu <[email protected]> * updated missing dependency * fixed change * fixed format * fixed import issue * fixed format --------- Signed-off-by: Xiyue Yu <[email protected]> Co-authored-by: Rob Scott <[email protected]>
1 parent 32ad5bb commit 1ea9959

File tree

110 files changed

+2724
-258
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+2724
-258
lines changed

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ RUN go mod download
2020
COPY cmd/epp ./cmd/epp
2121
COPY pkg/epp ./pkg/epp
2222
COPY internal ./internal
23+
COPY apix ./apix
2324
COPY api ./api
2425
COPY version ./version
2526
WORKDIR /src/cmd/epp

api/v1/doc.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package v1 contains API Schema definitions for the
18+
// inference.networking.k8s.io API group.
19+
//
20+
// +k8s:openapi-gen=true
21+
// +kubebuilder:object:generate=true
22+
// +groupName=inference.networking.k8s.io
23+
// +groupGoName=Inference
24+
package v1

api/v1/inferencepool_types.go

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// InferencePool is the Schema for the InferencePools API.
24+
//
25+
// +kubebuilder:object:root=true
26+
// TODO: change the annotation once it gets officially approved
27+
// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=unapproved, experimental-only"
28+
// +kubebuilder:subresource:status
29+
// +kubebuilder:storageversion
30+
// +genclient
31+
type InferencePool struct {
32+
metav1.TypeMeta `json:",inline"`
33+
metav1.ObjectMeta `json:"metadata,omitempty"`
34+
35+
Spec InferencePoolSpec `json:"spec,omitempty"`
36+
37+
// Status defines the observed state of InferencePool.
38+
//
39+
// +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}}
40+
Status InferencePoolStatus `json:"status,omitempty"`
41+
}
42+
43+
// InferencePoolList contains a list of InferencePool.
44+
//
45+
// +kubebuilder:object:root=true
46+
type InferencePoolList struct {
47+
metav1.TypeMeta `json:",inline"`
48+
metav1.ListMeta `json:"metadata,omitempty"`
49+
Items []InferencePool `json:"items"`
50+
}
51+
52+
// InferencePoolSpec defines the desired state of InferencePool
53+
type InferencePoolSpec struct {
54+
// Selector defines a map of labels to watch model server Pods
55+
// that should be included in the InferencePool.
56+
// In some cases, implementations may translate this field to a Service selector, so this matches the simple
57+
// map used for Service selectors instead of the full Kubernetes LabelSelector type.
58+
// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
59+
// Cross namesoace selector is not supported.
60+
//
61+
// +kubebuilder:validation:Required
62+
Selector map[LabelKey]LabelValue `json:"selector"`
63+
64+
// TargetPortNumber defines the port number to access the selected model server Pods.
65+
// The number must be in the range 1 to 65535.
66+
//
67+
// +kubebuilder:validation:Minimum=1
68+
// +kubebuilder:validation:Maximum=65535
69+
// +kubebuilder:validation:Required
70+
TargetPortNumber int32 `json:"targetPortNumber"`
71+
72+
// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
73+
// picker service that picks endpoints for the requests routed to this pool.
74+
EndpointPickerConfig `json:",inline"`
75+
}
76+
77+
// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
78+
// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
79+
type EndpointPickerConfig struct {
80+
// Extension configures an endpoint picker as an extension service.
81+
//
82+
// +kubebuilder:validation:Required
83+
ExtensionRef *Extension `json:"extensionRef,omitempty"`
84+
}
85+
86+
// Extension specifies how to configure an extension that runs the endpoint picker.
87+
type Extension struct {
88+
// Reference is a reference to a service extension. When ExtensionReference is invalid,
89+
// a 5XX status code MUST be returned for the request that would have otherwise been routed
90+
// to the invalid backend.
91+
ExtensionReference `json:",inline"`
92+
93+
// ExtensionConnection configures the connection between the Gateway and the extension.
94+
ExtensionConnection `json:",inline"`
95+
}
96+
97+
// ExtensionReference is a reference to the extension.
98+
//
99+
// If a reference is invalid, the implementation MUST update the `ResolvedRefs`
100+
// Condition on the InferencePool's status to `status: False`. A 5XX status code MUST be returned
101+
// for the request that would have otherwise been routed to the invalid backend.
102+
type ExtensionReference struct {
103+
// Group is the group of the referent.
104+
// The default value is "", representing the Core API group.
105+
//
106+
// +optional
107+
// +kubebuilder:default=""
108+
Group *Group `json:"group,omitempty"`
109+
110+
// Kind is the Kubernetes resource kind of the referent.
111+
//
112+
// Defaults to "Service" when not specified.
113+
//
114+
// ExternalName services can refer to CNAME DNS records that may live
115+
// outside of the cluster and as such are difficult to reason about in
116+
// terms of conformance. They also may not be safe to forward to (see
117+
// CVE-2021-25740 for more information). Implementations MUST NOT
118+
// support ExternalName Services.
119+
//
120+
// +optional
121+
// +kubebuilder:default=Service
122+
Kind *Kind `json:"kind,omitempty"`
123+
124+
// Name is the name of the referent.
125+
//
126+
// +kubebuilder:validation:Required
127+
Name ObjectName `json:"name"`
128+
129+
// The port number on the service running the extension. When unspecified,
130+
// implementations SHOULD infer a default value of 9002 when the Kind is
131+
// Service.
132+
//
133+
// +optional
134+
PortNumber *PortNumber `json:"portNumber,omitempty"`
135+
}
136+
137+
// ExtensionConnection encapsulates options that configures the connection to the extension.
138+
type ExtensionConnection struct {
139+
// Configures how the gateway handles the case when the extension is not responsive.
140+
// Defaults to failClose.
141+
//
142+
// +optional
143+
// +kubebuilder:default="FailClose"
144+
FailureMode *ExtensionFailureMode `json:"failureMode"`
145+
}
146+
147+
// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
148+
// responsive.
149+
// +kubebuilder:validation:Enum=FailOpen;FailClose
150+
type ExtensionFailureMode string
151+
152+
const (
153+
// FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails.
154+
FailOpen ExtensionFailureMode = "FailOpen"
155+
// FailClose specifies that the proxy should drop the request when the Endpoint Picker fails.
156+
FailClose ExtensionFailureMode = "FailClose"
157+
)
158+
159+
// InferencePoolStatus defines the observed state of InferencePool.
160+
type InferencePoolStatus struct {
161+
// Parents is a list of parent resources (usually Gateways) that are
162+
// associated with the InferencePool, and the status of the InferencePool with respect to
163+
// each parent.
164+
//
165+
// A maximum of 32 Gateways will be represented in this list. When the list contains
166+
// `kind: Status, name: default`, it indicates that the InferencePool is not
167+
// associated with any Gateway and a controller must perform the following:
168+
//
169+
// - Remove the parent when setting the "Accepted" condition.
170+
// - Add the parent when the controller will no longer manage the InferencePool
171+
// and no other parents exist.
172+
//
173+
// +kubebuilder:validation:MaxItems=32
174+
Parents []PoolStatus `json:"parent,omitempty"`
175+
}
176+
177+
// PoolStatus defines the observed state of InferencePool from a Gateway.
178+
type PoolStatus struct {
179+
// GatewayRef indicates the gateway that observed state of InferencePool.
180+
GatewayRef ParentGatewayReference `json:"parentRef"`
181+
182+
// Conditions track the state of the InferencePool.
183+
//
184+
// Known condition types are:
185+
//
186+
// * "Accepted"
187+
// * "ResolvedRefs"
188+
//
189+
// +optional
190+
// +listType=map
191+
// +listMapKey=type
192+
// +kubebuilder:validation:MaxItems=8
193+
// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
194+
Conditions []metav1.Condition `json:"conditions,omitempty"`
195+
}
196+
197+
// InferencePoolConditionType is a type of condition for the InferencePool
198+
type InferencePoolConditionType string
199+
200+
// InferencePoolReason is the reason for a given InferencePoolConditionType
201+
type InferencePoolReason string
202+
203+
const (
204+
// This condition indicates whether the InferencePool has been accepted or rejected
205+
// by a Gateway, and why.
206+
//
207+
// Possible reasons for this condition to be True are:
208+
//
209+
// * "Accepted"
210+
//
211+
// Possible reasons for this condition to be False are:
212+
//
213+
// * "NotSupportedByGateway"
214+
// * "HTTPRouteNotAccepted"
215+
//
216+
// Possible reasons for this condition to be Unknown are:
217+
//
218+
// * "Pending"
219+
//
220+
// Controllers MAY raise this condition with other reasons, but should
221+
// prefer to use the reasons listed above to improve interoperability.
222+
InferencePoolConditionAccepted InferencePoolConditionType = "Accepted"
223+
224+
// This reason is used with the "Accepted" condition when the InferencePool has been
225+
// accepted by the Gateway.
226+
InferencePoolReasonAccepted InferencePoolReason = "Accepted"
227+
228+
// This reason is used with the "Accepted" condition when the InferencePool
229+
// has not been accepted by a Gateway because the Gateway does not support
230+
// InferencePool as a backend.
231+
InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway"
232+
233+
// This reason is used with the "Accepted" condition when the InferencePool is
234+
// referenced by an HTTPRoute that has been rejected by the Gateway. The user
235+
// should inspect the status of the referring HTTPRoute for the specific reason.
236+
InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted"
237+
238+
// This reason is used with the "Accepted" when a controller has not yet
239+
// reconciled the InferencePool.
240+
InferencePoolReasonPending InferencePoolReason = "Pending"
241+
)
242+
243+
const (
244+
// This condition indicates whether the controller was able to resolve all
245+
// the object references for the InferencePool.
246+
//
247+
// Possible reasons for this condition to be True are:
248+
//
249+
// * "ResolvedRefs"
250+
//
251+
// Possible reasons for this condition to be False are:
252+
//
253+
// * "InvalidExtensionRef"
254+
//
255+
// Controllers MAY raise this condition with other reasons, but should
256+
// prefer to use the reasons listed above to improve interoperability.
257+
InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs"
258+
259+
// This reason is used with the "ResolvedRefs" condition when the condition
260+
// is true.
261+
InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"
262+
263+
// This reason is used with the "ResolvedRefs" condition when the
264+
// ExtensionRef is invalid in some way. This can include an unsupported kind
265+
// or API group, or a reference to a resource that can not be found.
266+
InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
267+
)
268+
269+
// ParentGatewayReference identifies an API object including its namespace,
270+
// defaulting to Gateway.
271+
type ParentGatewayReference struct {
272+
// Group is the group of the referent.
273+
//
274+
// +optional
275+
// +kubebuilder:default="gateway.networking.k8s.io"
276+
Group *Group `json:"group"`
277+
278+
// Kind is kind of the referent. For example "Gateway".
279+
//
280+
// +optional
281+
// +kubebuilder:default=Gateway
282+
Kind *Kind `json:"kind"`
283+
284+
// Name is the name of the referent.
285+
Name ObjectName `json:"name"`
286+
287+
// Namespace is the namespace of the referent. If not present,
288+
// the namespace of the referent is assumed to be the same as
289+
// the namespace of the referring object.
290+
//
291+
// +optional
292+
Namespace *Namespace `json:"namespace,omitempty"`
293+
}

0 commit comments

Comments
 (0)