|  | 
|  | 1 | +/* | 
|  | 2 | +Copyright 2025 The Kubernetes Authors. | 
|  | 3 | +
 | 
|  | 4 | +Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | +you may not use this file except in compliance with the License. | 
|  | 6 | +You may obtain a copy of the License at | 
|  | 7 | +
 | 
|  | 8 | +    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | +
 | 
|  | 10 | +Unless required by applicable law or agreed to in writing, software | 
|  | 11 | +distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | +See the License for the specific language governing permissions and | 
|  | 14 | +limitations under the License. | 
|  | 15 | +*/ | 
|  | 16 | + | 
|  | 17 | +package v1 | 
|  | 18 | + | 
|  | 19 | +import ( | 
|  | 20 | +	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | 
|  | 21 | +) | 
|  | 22 | + | 
|  | 23 | +// InferencePool is the Schema for the InferencePools API. | 
|  | 24 | +// | 
|  | 25 | +// +kubebuilder:object:root=true | 
|  | 26 | +// TODO: change the annotation once it gets officially approved | 
|  | 27 | +// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=unapproved, experimental-only" | 
|  | 28 | +// +kubebuilder:subresource:status | 
|  | 29 | +// +kubebuilder:storageversion | 
|  | 30 | +// +genclient | 
|  | 31 | +type InferencePool struct { | 
|  | 32 | +	metav1.TypeMeta   `json:",inline"` | 
|  | 33 | +	metav1.ObjectMeta `json:"metadata,omitempty"` | 
|  | 34 | + | 
|  | 35 | +	Spec InferencePoolSpec `json:"spec,omitempty"` | 
|  | 36 | + | 
|  | 37 | +	// Status defines the observed state of InferencePool. | 
|  | 38 | +	// | 
|  | 39 | +	// +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}} | 
|  | 40 | +	Status InferencePoolStatus `json:"status,omitempty"` | 
|  | 41 | +} | 
|  | 42 | + | 
|  | 43 | +// InferencePoolList contains a list of InferencePool. | 
|  | 44 | +// | 
|  | 45 | +// +kubebuilder:object:root=true | 
|  | 46 | +type InferencePoolList struct { | 
|  | 47 | +	metav1.TypeMeta `json:",inline"` | 
|  | 48 | +	metav1.ListMeta `json:"metadata,omitempty"` | 
|  | 49 | +	Items           []InferencePool `json:"items"` | 
|  | 50 | +} | 
|  | 51 | + | 
|  | 52 | +// InferencePoolSpec defines the desired state of InferencePool | 
|  | 53 | +type InferencePoolSpec struct { | 
|  | 54 | +	// Selector defines a map of labels to watch model server Pods | 
|  | 55 | +	// that should be included in the InferencePool. | 
|  | 56 | +	// In some cases, implementations may translate this field to a Service selector, so this matches the simple | 
|  | 57 | +	// map used for Service selectors instead of the full Kubernetes LabelSelector type. | 
|  | 58 | +	// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool. | 
|  | 59 | +	// Cross namesoace selector is not supported. | 
|  | 60 | +	// | 
|  | 61 | +	// +kubebuilder:validation:Required | 
|  | 62 | +	Selector map[LabelKey]LabelValue `json:"selector"` | 
|  | 63 | + | 
|  | 64 | +	// TargetPortNumber defines the port number to access the selected model server Pods. | 
|  | 65 | +	// The number must be in the range 1 to 65535. | 
|  | 66 | +	// | 
|  | 67 | +	// +kubebuilder:validation:Minimum=1 | 
|  | 68 | +	// +kubebuilder:validation:Maximum=65535 | 
|  | 69 | +	// +kubebuilder:validation:Required | 
|  | 70 | +	TargetPortNumber int32 `json:"targetPortNumber"` | 
|  | 71 | + | 
|  | 72 | +	// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint | 
|  | 73 | +	// picker service that picks endpoints for the requests routed to this pool. | 
|  | 74 | +	EndpointPickerConfig `json:",inline"` | 
|  | 75 | +} | 
|  | 76 | + | 
|  | 77 | +// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. | 
|  | 78 | +// This type is intended to be a union of mutually exclusive configuration options that we may add in the future. | 
|  | 79 | +type EndpointPickerConfig struct { | 
|  | 80 | +	// Extension configures an endpoint picker as an extension service. | 
|  | 81 | +	// | 
|  | 82 | +	// +kubebuilder:validation:Required | 
|  | 83 | +	ExtensionRef *Extension `json:"extensionRef,omitempty"` | 
|  | 84 | +} | 
|  | 85 | + | 
|  | 86 | +// Extension specifies how to configure an extension that runs the endpoint picker. | 
|  | 87 | +type Extension struct { | 
|  | 88 | +	// Reference is a reference to a service extension. When ExtensionReference is invalid, | 
|  | 89 | +	// a 5XX status code MUST be returned for the request that would have otherwise been routed | 
|  | 90 | +	// to the invalid backend. | 
|  | 91 | +	ExtensionReference `json:",inline"` | 
|  | 92 | + | 
|  | 93 | +	// ExtensionConnection configures the connection between the Gateway and the extension. | 
|  | 94 | +	ExtensionConnection `json:",inline"` | 
|  | 95 | +} | 
|  | 96 | + | 
|  | 97 | +// ExtensionReference is a reference to the extension. | 
|  | 98 | +// | 
|  | 99 | +// If a reference is invalid, the implementation MUST update the `ResolvedRefs` | 
|  | 100 | +// Condition on the InferencePool's status to `status: False`. A 5XX status code MUST be returned | 
|  | 101 | +// for the request that would have otherwise been routed to the invalid backend. | 
|  | 102 | +type ExtensionReference struct { | 
|  | 103 | +	// Group is the group of the referent. | 
|  | 104 | +	// The default value is "", representing the Core API group. | 
|  | 105 | +	// | 
|  | 106 | +	// +optional | 
|  | 107 | +	// +kubebuilder:default="" | 
|  | 108 | +	Group *Group `json:"group,omitempty"` | 
|  | 109 | + | 
|  | 110 | +	// Kind is the Kubernetes resource kind of the referent. | 
|  | 111 | +	// | 
|  | 112 | +	// Defaults to "Service" when not specified. | 
|  | 113 | +	// | 
|  | 114 | +	// ExternalName services can refer to CNAME DNS records that may live | 
|  | 115 | +	// outside of the cluster and as such are difficult to reason about in | 
|  | 116 | +	// terms of conformance. They also may not be safe to forward to (see | 
|  | 117 | +	// CVE-2021-25740 for more information). Implementations MUST NOT | 
|  | 118 | +	// support ExternalName Services. | 
|  | 119 | +	// | 
|  | 120 | +	// +optional | 
|  | 121 | +	// +kubebuilder:default=Service | 
|  | 122 | +	Kind *Kind `json:"kind,omitempty"` | 
|  | 123 | + | 
|  | 124 | +	// Name is the name of the referent. | 
|  | 125 | +	// | 
|  | 126 | +	// +kubebuilder:validation:Required | 
|  | 127 | +	Name ObjectName `json:"name"` | 
|  | 128 | + | 
|  | 129 | +	// The port number on the service running the extension. When unspecified, | 
|  | 130 | +	// implementations SHOULD infer a default value of 9002 when the Kind is | 
|  | 131 | +	// Service. | 
|  | 132 | +	// | 
|  | 133 | +	// +optional | 
|  | 134 | +	PortNumber *PortNumber `json:"portNumber,omitempty"` | 
|  | 135 | +} | 
|  | 136 | + | 
|  | 137 | +// ExtensionConnection encapsulates options that configures the connection to the extension. | 
|  | 138 | +type ExtensionConnection struct { | 
|  | 139 | +	// Configures how the gateway handles the case when the extension is not responsive. | 
|  | 140 | +	// Defaults to failClose. | 
|  | 141 | +	// | 
|  | 142 | +	// +optional | 
|  | 143 | +	// +kubebuilder:default="FailClose" | 
|  | 144 | +	FailureMode *ExtensionFailureMode `json:"failureMode"` | 
|  | 145 | +} | 
|  | 146 | + | 
|  | 147 | +// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not | 
|  | 148 | +// responsive. | 
|  | 149 | +// +kubebuilder:validation:Enum=FailOpen;FailClose | 
|  | 150 | +type ExtensionFailureMode string | 
|  | 151 | + | 
|  | 152 | +const ( | 
|  | 153 | +	// FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails. | 
|  | 154 | +	FailOpen ExtensionFailureMode = "FailOpen" | 
|  | 155 | +	// FailClose specifies that the proxy should drop the request when the Endpoint Picker fails. | 
|  | 156 | +	FailClose ExtensionFailureMode = "FailClose" | 
|  | 157 | +) | 
|  | 158 | + | 
|  | 159 | +// InferencePoolStatus defines the observed state of InferencePool. | 
|  | 160 | +type InferencePoolStatus struct { | 
|  | 161 | +	// Parents is a list of parent resources (usually Gateways) that are | 
|  | 162 | +	// associated with the InferencePool, and the status of the InferencePool with respect to | 
|  | 163 | +	// each parent. | 
|  | 164 | +	// | 
|  | 165 | +	// A maximum of 32 Gateways will be represented in this list. When the list contains | 
|  | 166 | +	// `kind: Status, name: default`, it indicates that the InferencePool is not | 
|  | 167 | +	// associated with any Gateway and a controller must perform the following: | 
|  | 168 | +	// | 
|  | 169 | +	//  - Remove the parent when setting the "Accepted" condition. | 
|  | 170 | +	//  - Add the parent when the controller will no longer manage the InferencePool | 
|  | 171 | +	//    and no other parents exist. | 
|  | 172 | +	// | 
|  | 173 | +	// +kubebuilder:validation:MaxItems=32 | 
|  | 174 | +	Parents []PoolStatus `json:"parent,omitempty"` | 
|  | 175 | +} | 
|  | 176 | + | 
|  | 177 | +// PoolStatus defines the observed state of InferencePool from a Gateway. | 
|  | 178 | +type PoolStatus struct { | 
|  | 179 | +	// GatewayRef indicates the gateway that observed state of InferencePool. | 
|  | 180 | +	GatewayRef ParentGatewayReference `json:"parentRef"` | 
|  | 181 | + | 
|  | 182 | +	// Conditions track the state of the InferencePool. | 
|  | 183 | +	// | 
|  | 184 | +	// Known condition types are: | 
|  | 185 | +	// | 
|  | 186 | +	// * "Accepted" | 
|  | 187 | +	// * "ResolvedRefs" | 
|  | 188 | +	// | 
|  | 189 | +	// +optional | 
|  | 190 | +	// +listType=map | 
|  | 191 | +	// +listMapKey=type | 
|  | 192 | +	// +kubebuilder:validation:MaxItems=8 | 
|  | 193 | +	// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} | 
|  | 194 | +	Conditions []metav1.Condition `json:"conditions,omitempty"` | 
|  | 195 | +} | 
|  | 196 | + | 
|  | 197 | +// InferencePoolConditionType is a type of condition for the InferencePool | 
|  | 198 | +type InferencePoolConditionType string | 
|  | 199 | + | 
|  | 200 | +// InferencePoolReason is the reason for a given InferencePoolConditionType | 
|  | 201 | +type InferencePoolReason string | 
|  | 202 | + | 
|  | 203 | +const ( | 
|  | 204 | +	// This condition indicates whether the InferencePool has been accepted or rejected | 
|  | 205 | +	// by a Gateway, and why. | 
|  | 206 | +	// | 
|  | 207 | +	// Possible reasons for this condition to be True are: | 
|  | 208 | +	// | 
|  | 209 | +	// * "Accepted" | 
|  | 210 | +	// | 
|  | 211 | +	// Possible reasons for this condition to be False are: | 
|  | 212 | +	// | 
|  | 213 | +	// * "NotSupportedByGateway" | 
|  | 214 | +	// * "HTTPRouteNotAccepted" | 
|  | 215 | +	// | 
|  | 216 | +	// Possible reasons for this condition to be Unknown are: | 
|  | 217 | +	// | 
|  | 218 | +	// * "Pending" | 
|  | 219 | +	// | 
|  | 220 | +	// Controllers MAY raise this condition with other reasons, but should | 
|  | 221 | +	// prefer to use the reasons listed above to improve interoperability. | 
|  | 222 | +	InferencePoolConditionAccepted InferencePoolConditionType = "Accepted" | 
|  | 223 | + | 
|  | 224 | +	// This reason is used with the "Accepted" condition when the InferencePool has been | 
|  | 225 | +	// accepted by the Gateway. | 
|  | 226 | +	InferencePoolReasonAccepted InferencePoolReason = "Accepted" | 
|  | 227 | + | 
|  | 228 | +	// This reason is used with the "Accepted" condition when the InferencePool | 
|  | 229 | +	// has not been accepted by a Gateway because the Gateway does not support | 
|  | 230 | +	// InferencePool as a backend. | 
|  | 231 | +	InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway" | 
|  | 232 | + | 
|  | 233 | +	// This reason is used with the "Accepted" condition when the InferencePool is | 
|  | 234 | +	// referenced by an HTTPRoute that has been rejected by the Gateway. The user | 
|  | 235 | +	// should inspect the status of the referring HTTPRoute for the specific reason. | 
|  | 236 | +	InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted" | 
|  | 237 | + | 
|  | 238 | +	// This reason is used with the "Accepted" when a controller has not yet | 
|  | 239 | +	// reconciled the InferencePool. | 
|  | 240 | +	InferencePoolReasonPending InferencePoolReason = "Pending" | 
|  | 241 | +) | 
|  | 242 | + | 
|  | 243 | +const ( | 
|  | 244 | +	// This condition indicates whether the controller was able to resolve all | 
|  | 245 | +	// the object references for the InferencePool. | 
|  | 246 | +	// | 
|  | 247 | +	// Possible reasons for this condition to be True are: | 
|  | 248 | +	// | 
|  | 249 | +	// * "ResolvedRefs" | 
|  | 250 | +	// | 
|  | 251 | +	// Possible reasons for this condition to be False are: | 
|  | 252 | +	// | 
|  | 253 | +	// * "InvalidExtensionRef" | 
|  | 254 | +	// | 
|  | 255 | +	// Controllers MAY raise this condition with other reasons, but should | 
|  | 256 | +	// prefer to use the reasons listed above to improve interoperability. | 
|  | 257 | +	InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs" | 
|  | 258 | + | 
|  | 259 | +	// This reason is used with the "ResolvedRefs" condition when the condition | 
|  | 260 | +	// is true. | 
|  | 261 | +	InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs" | 
|  | 262 | + | 
|  | 263 | +	// This reason is used with the "ResolvedRefs" condition when the | 
|  | 264 | +	// ExtensionRef is invalid in some way. This can include an unsupported kind | 
|  | 265 | +	// or API group, or a reference to a resource that can not be found. | 
|  | 266 | +	InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef" | 
|  | 267 | +) | 
|  | 268 | + | 
|  | 269 | +// ParentGatewayReference identifies an API object including its namespace, | 
|  | 270 | +// defaulting to Gateway. | 
|  | 271 | +type ParentGatewayReference struct { | 
|  | 272 | +	// Group is the group of the referent. | 
|  | 273 | +	// | 
|  | 274 | +	// +optional | 
|  | 275 | +	// +kubebuilder:default="gateway.networking.k8s.io" | 
|  | 276 | +	Group *Group `json:"group"` | 
|  | 277 | + | 
|  | 278 | +	// Kind is kind of the referent. For example "Gateway". | 
|  | 279 | +	// | 
|  | 280 | +	// +optional | 
|  | 281 | +	// +kubebuilder:default=Gateway | 
|  | 282 | +	Kind *Kind `json:"kind"` | 
|  | 283 | + | 
|  | 284 | +	// Name is the name of the referent. | 
|  | 285 | +	Name ObjectName `json:"name"` | 
|  | 286 | + | 
|  | 287 | +	// Namespace is the namespace of the referent.  If not present, | 
|  | 288 | +	// the namespace of the referent is assumed to be the same as | 
|  | 289 | +	// the namespace of the referring object. | 
|  | 290 | +	// | 
|  | 291 | +	// +optional | 
|  | 292 | +	Namespace *Namespace `json:"namespace,omitempty"` | 
|  | 293 | +} | 
0 commit comments