@@ -18,6 +18,7 @@ package v1alpha2
18
18
19
19
import (
20
20
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21
+ "sigs.k8s.io/gateway-api-inference-extension/api/v1"
21
22
)
22
23
23
24
// InferencePool is the Schema for the InferencePools API.
@@ -28,17 +29,7 @@ import (
28
29
// +kubebuilder:subresource:status
29
30
// +kubebuilder:storageversion
30
31
// +genclient
31
- type InferencePool struct {
32
- metav1.TypeMeta `json:",inline"`
33
- metav1.ObjectMeta `json:"metadata,omitempty"`
34
-
35
- Spec InferencePoolSpec `json:"spec,omitempty"`
36
-
37
- // Status defines the observed state of InferencePool.
38
- //
39
- // +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}}
40
- Status InferencePoolStatus `json:"status,omitempty"`
41
- }
32
+ type InferencePool v1.InferencePool
42
33
43
34
// InferencePoolList contains a list of InferencePool.
44
35
//
@@ -48,251 +39,3 @@ type InferencePoolList struct {
48
39
metav1.ListMeta `json:"metadata,omitempty"`
49
40
Items []InferencePool `json:"items"`
50
41
}
51
-
52
- // InferencePoolSpec defines the desired state of InferencePool
53
- type InferencePoolSpec struct {
54
- // Selector defines a map of labels to watch model server Pods
55
- // that should be included in the InferencePool.
56
- // In some cases, implementations may translate this field to a Service selector, so this matches the simple
57
- // map used for Service selectors instead of the full Kubernetes LabelSelector type.
58
- // If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
59
- // Cross namesoace selector is not supported.
60
- //
61
- // +kubebuilder:validation:Required
62
- Selector map [LabelKey ]LabelValue `json:"selector"`
63
-
64
- // TargetPortNumber defines the port number to access the selected model server Pods.
65
- // The number must be in the range 1 to 65535.
66
- //
67
- // +kubebuilder:validation:Minimum=1
68
- // +kubebuilder:validation:Maximum=65535
69
- // +kubebuilder:validation:Required
70
- TargetPortNumber int32 `json:"targetPortNumber"`
71
-
72
- // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
73
- // picker service that picks endpoints for the requests routed to this pool.
74
- EndpointPickerConfig `json:",inline"`
75
- }
76
-
77
- // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
78
- // This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
79
- type EndpointPickerConfig struct {
80
- // Extension configures an endpoint picker as an extension service.
81
- //
82
- // +kubebuilder:validation:Required
83
- ExtensionRef * Extension `json:"extensionRef,omitempty"`
84
- }
85
-
86
- // Extension specifies how to configure an extension that runs the endpoint picker.
87
- type Extension struct {
88
- // Reference is a reference to a service extension. When ExtensionReference is invalid,
89
- // a 5XX status code MUST be returned for the request that would have otherwise been routed
90
- // to the invalid backend.
91
- ExtensionReference `json:",inline"`
92
-
93
- // ExtensionConnection configures the connection between the Gateway and the extension.
94
- ExtensionConnection `json:",inline"`
95
- }
96
-
97
- // ExtensionReference is a reference to the extension.
98
- //
99
- // Connections to this extension MUST use TLS by default. Implementations MAY
100
- // provide a way to customize this connection to use cleartext, a different
101
- // protocol, or custom TLS configuration.
102
- //
103
- // If a reference is invalid, the implementation MUST update the `ResolvedRefs`
104
- // Condition on the InferencePool's status to `status: False`. A 5XX status code
105
- // MUST be returned for the request that would have otherwise been routed to the
106
- // invalid backend.
107
- type ExtensionReference struct {
108
- // Group is the group of the referent.
109
- // The default value is "", representing the Core API group.
110
- //
111
- // +optional
112
- // +kubebuilder:default=""
113
- Group * Group `json:"group,omitempty"`
114
-
115
- // Kind is the Kubernetes resource kind of the referent.
116
- //
117
- // Defaults to "Service" when not specified.
118
- //
119
- // ExternalName services can refer to CNAME DNS records that may live
120
- // outside of the cluster and as such are difficult to reason about in
121
- // terms of conformance. They also may not be safe to forward to (see
122
- // CVE-2021-25740 for more information). Implementations MUST NOT
123
- // support ExternalName Services.
124
- //
125
- // +optional
126
- // +kubebuilder:default=Service
127
- Kind * Kind `json:"kind,omitempty"`
128
-
129
- // Name is the name of the referent.
130
- //
131
- // +kubebuilder:validation:Required
132
- Name ObjectName `json:"name"`
133
-
134
- // The port number on the service running the extension. When unspecified,
135
- // implementations SHOULD infer a default value of 9002 when the Kind is
136
- // Service.
137
- //
138
- // +optional
139
- PortNumber * PortNumber `json:"portNumber,omitempty"`
140
- }
141
-
142
- // ExtensionConnection encapsulates options that configures the connection to the extension.
143
- type ExtensionConnection struct {
144
- // Configures how the gateway handles the case when the extension is not responsive.
145
- // Defaults to failClose.
146
- //
147
- // +optional
148
- // +kubebuilder:default="FailClose"
149
- FailureMode * ExtensionFailureMode `json:"failureMode"`
150
- }
151
-
152
- // ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
153
- // responsive.
154
- // +kubebuilder:validation:Enum=FailOpen;FailClose
155
- type ExtensionFailureMode string
156
-
157
- const (
158
- // FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails.
159
- FailOpen ExtensionFailureMode = "FailOpen"
160
- // FailClose specifies that the proxy should drop the request when the Endpoint Picker fails.
161
- FailClose ExtensionFailureMode = "FailClose"
162
- )
163
-
164
- // InferencePoolStatus defines the observed state of InferencePool.
165
- type InferencePoolStatus struct {
166
- // Parents is a list of parent resources (usually Gateways) that are
167
- // associated with the InferencePool, and the status of the InferencePool with respect to
168
- // each parent.
169
- //
170
- // A maximum of 32 Gateways will be represented in this list. When the list contains
171
- // `kind: Status, name: default`, it indicates that the InferencePool is not
172
- // associated with any Gateway and a controller must perform the following:
173
- //
174
- // - Remove the parent when setting the "Accepted" condition.
175
- // - Add the parent when the controller will no longer manage the InferencePool
176
- // and no other parents exist.
177
- //
178
- // +kubebuilder:validation:MaxItems=32
179
- Parents []PoolStatus `json:"parent,omitempty"`
180
- }
181
-
182
- // PoolStatus defines the observed state of InferencePool from a Gateway.
183
- type PoolStatus struct {
184
- // GatewayRef indicates the gateway that observed state of InferencePool.
185
- GatewayRef ParentGatewayReference `json:"parentRef"`
186
-
187
- // Conditions track the state of the InferencePool.
188
- //
189
- // Known condition types are:
190
- //
191
- // * "Accepted"
192
- // * "ResolvedRefs"
193
- //
194
- // +optional
195
- // +listType=map
196
- // +listMapKey=type
197
- // +kubebuilder:validation:MaxItems=8
198
- // +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
199
- Conditions []metav1.Condition `json:"conditions,omitempty"`
200
- }
201
-
202
- // InferencePoolConditionType is a type of condition for the InferencePool
203
- type InferencePoolConditionType string
204
-
205
- // InferencePoolReason is the reason for a given InferencePoolConditionType
206
- type InferencePoolReason string
207
-
208
- const (
209
- // This condition indicates whether the InferencePool has been accepted or rejected
210
- // by a Gateway, and why.
211
- //
212
- // Possible reasons for this condition to be True are:
213
- //
214
- // * "Accepted"
215
- //
216
- // Possible reasons for this condition to be False are:
217
- //
218
- // * "NotSupportedByGateway"
219
- // * "HTTPRouteNotAccepted"
220
- //
221
- // Possible reasons for this condition to be Unknown are:
222
- //
223
- // * "Pending"
224
- //
225
- // Controllers MAY raise this condition with other reasons, but should
226
- // prefer to use the reasons listed above to improve interoperability.
227
- InferencePoolConditionAccepted InferencePoolConditionType = "Accepted"
228
-
229
- // This reason is used with the "Accepted" condition when the InferencePool has been
230
- // accepted by the Gateway.
231
- InferencePoolReasonAccepted InferencePoolReason = "Accepted"
232
-
233
- // This reason is used with the "Accepted" condition when the InferencePool
234
- // has not been accepted by a Gateway because the Gateway does not support
235
- // InferencePool as a backend.
236
- InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway"
237
-
238
- // This reason is used with the "Accepted" condition when the InferencePool is
239
- // referenced by an HTTPRoute that has been rejected by the Gateway. The user
240
- // should inspect the status of the referring HTTPRoute for the specific reason.
241
- InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted"
242
-
243
- // This reason is used with the "Accepted" when a controller has not yet
244
- // reconciled the InferencePool.
245
- InferencePoolReasonPending InferencePoolReason = "Pending"
246
- )
247
-
248
- const (
249
- // This condition indicates whether the controller was able to resolve all
250
- // the object references for the InferencePool.
251
- //
252
- // Possible reasons for this condition to be True are:
253
- //
254
- // * "ResolvedRefs"
255
- //
256
- // Possible reasons for this condition to be False are:
257
- //
258
- // * "InvalidExtensionRef"
259
- //
260
- // Controllers MAY raise this condition with other reasons, but should
261
- // prefer to use the reasons listed above to improve interoperability.
262
- InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs"
263
-
264
- // This reason is used with the "ResolvedRefs" condition when the condition
265
- // is true.
266
- InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"
267
-
268
- // This reason is used with the "ResolvedRefs" condition when the
269
- // ExtensionRef is invalid in some way. This can include an unsupported kind
270
- // or API group, or a reference to a resource that can not be found.
271
- InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
272
- )
273
-
274
- // ParentGatewayReference identifies an API object including its namespace,
275
- // defaulting to Gateway.
276
- type ParentGatewayReference struct {
277
- // Group is the group of the referent.
278
- //
279
- // +optional
280
- // +kubebuilder:default="gateway.networking.k8s.io"
281
- Group * Group `json:"group"`
282
-
283
- // Kind is kind of the referent. For example "Gateway".
284
- //
285
- // +optional
286
- // +kubebuilder:default=Gateway
287
- Kind * Kind `json:"kind"`
288
-
289
- // Name is the name of the referent.
290
- Name ObjectName `json:"name"`
291
-
292
- // Namespace is the namespace of the referent. If not present,
293
- // the namespace of the referent is assumed to be the same as
294
- // the namespace of the referring object.
295
- //
296
- // +optional
297
- Namespace * Namespace `json:"namespace,omitempty"`
298
- }
0 commit comments