Skip to content

feat: avoid duplicate of InferencPool golang types #1201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 2 additions & 259 deletions apix/v1alpha2/inferencepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package v1alpha2

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
)

// InferencePool is the Schema for the InferencePools API.
Expand All @@ -28,17 +29,7 @@ import (
// +kubebuilder:subresource:status
// +kubebuilder:storageversion
// +genclient
type InferencePool struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec InferencePoolSpec `json:"spec,omitempty"`

// Status defines the observed state of InferencePool.
//
// +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}}
Status InferencePoolStatus `json:"status,omitempty"`
}
type InferencePool v1.InferencePool

// InferencePoolList contains a list of InferencePool.
//
Expand All @@ -48,251 +39,3 @@ type InferencePoolList struct {
metav1.ListMeta `json:"metadata,omitempty"`
Items []InferencePool `json:"items"`
}

// InferencePoolSpec defines the desired state of InferencePool
type InferencePoolSpec struct {
// Selector defines a map of labels to watch model server Pods
// that should be included in the InferencePool.
// In some cases, implementations may translate this field to a Service selector, so this matches the simple
// map used for Service selectors instead of the full Kubernetes LabelSelector type.
// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
// Cross namesoace selector is not supported.
//
// +kubebuilder:validation:Required
Selector map[LabelKey]LabelValue `json:"selector"`

// TargetPortNumber defines the port number to access the selected model server Pods.
// The number must be in the range 1 to 65535.
//
// +kubebuilder:validation:Minimum=1
// +kubebuilder:validation:Maximum=65535
// +kubebuilder:validation:Required
TargetPortNumber int32 `json:"targetPortNumber"`

// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
// picker service that picks endpoints for the requests routed to this pool.
EndpointPickerConfig `json:",inline"`
}

// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
type EndpointPickerConfig struct {
// Extension configures an endpoint picker as an extension service.
//
// +kubebuilder:validation:Required
ExtensionRef *Extension `json:"extensionRef,omitempty"`
}

// Extension specifies how to configure an extension that runs the endpoint picker.
type Extension struct {
// Reference is a reference to a service extension. When ExtensionReference is invalid,
// a 5XX status code MUST be returned for the request that would have otherwise been routed
// to the invalid backend.
ExtensionReference `json:",inline"`

// ExtensionConnection configures the connection between the Gateway and the extension.
ExtensionConnection `json:",inline"`
}

// ExtensionReference is a reference to the extension.
//
// Connections to this extension MUST use TLS by default. Implementations MAY
// provide a way to customize this connection to use cleartext, a different
// protocol, or custom TLS configuration.
//
// If a reference is invalid, the implementation MUST update the `ResolvedRefs`
// Condition on the InferencePool's status to `status: False`. A 5XX status code
// MUST be returned for the request that would have otherwise been routed to the
// invalid backend.
type ExtensionReference struct {
// Group is the group of the referent.
// The default value is "", representing the Core API group.
//
// +optional
// +kubebuilder:default=""
Group *Group `json:"group,omitempty"`

// Kind is the Kubernetes resource kind of the referent.
//
// Defaults to "Service" when not specified.
//
// ExternalName services can refer to CNAME DNS records that may live
// outside of the cluster and as such are difficult to reason about in
// terms of conformance. They also may not be safe to forward to (see
// CVE-2021-25740 for more information). Implementations MUST NOT
// support ExternalName Services.
//
// +optional
// +kubebuilder:default=Service
Kind *Kind `json:"kind,omitempty"`

// Name is the name of the referent.
//
// +kubebuilder:validation:Required
Name ObjectName `json:"name"`

// The port number on the service running the extension. When unspecified,
// implementations SHOULD infer a default value of 9002 when the Kind is
// Service.
//
// +optional
PortNumber *PortNumber `json:"portNumber,omitempty"`
}

// ExtensionConnection encapsulates options that configures the connection to the extension.
type ExtensionConnection struct {
// Configures how the gateway handles the case when the extension is not responsive.
// Defaults to failClose.
//
// +optional
// +kubebuilder:default="FailClose"
FailureMode *ExtensionFailureMode `json:"failureMode"`
}

// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
// responsive.
// +kubebuilder:validation:Enum=FailOpen;FailClose
type ExtensionFailureMode string

const (
// FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails.
FailOpen ExtensionFailureMode = "FailOpen"
// FailClose specifies that the proxy should drop the request when the Endpoint Picker fails.
FailClose ExtensionFailureMode = "FailClose"
)

// InferencePoolStatus defines the observed state of InferencePool.
type InferencePoolStatus struct {
// Parents is a list of parent resources (usually Gateways) that are
// associated with the InferencePool, and the status of the InferencePool with respect to
// each parent.
//
// A maximum of 32 Gateways will be represented in this list. When the list contains
// `kind: Status, name: default`, it indicates that the InferencePool is not
// associated with any Gateway and a controller must perform the following:
//
// - Remove the parent when setting the "Accepted" condition.
// - Add the parent when the controller will no longer manage the InferencePool
// and no other parents exist.
//
// +kubebuilder:validation:MaxItems=32
Parents []PoolStatus `json:"parent,omitempty"`
}

// PoolStatus defines the observed state of InferencePool from a Gateway.
type PoolStatus struct {
// GatewayRef indicates the gateway that observed state of InferencePool.
GatewayRef ParentGatewayReference `json:"parentRef"`

// Conditions track the state of the InferencePool.
//
// Known condition types are:
//
// * "Accepted"
// * "ResolvedRefs"
//
// +optional
// +listType=map
// +listMapKey=type
// +kubebuilder:validation:MaxItems=8
// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

// InferencePoolConditionType is a type of condition for the InferencePool
type InferencePoolConditionType string

// InferencePoolReason is the reason for a given InferencePoolConditionType
type InferencePoolReason string

const (
// This condition indicates whether the InferencePool has been accepted or rejected
// by a Gateway, and why.
//
// Possible reasons for this condition to be True are:
//
// * "Accepted"
//
// Possible reasons for this condition to be False are:
//
// * "NotSupportedByGateway"
// * "HTTPRouteNotAccepted"
//
// Possible reasons for this condition to be Unknown are:
//
// * "Pending"
//
// Controllers MAY raise this condition with other reasons, but should
// prefer to use the reasons listed above to improve interoperability.
InferencePoolConditionAccepted InferencePoolConditionType = "Accepted"

// This reason is used with the "Accepted" condition when the InferencePool has been
// accepted by the Gateway.
InferencePoolReasonAccepted InferencePoolReason = "Accepted"

// This reason is used with the "Accepted" condition when the InferencePool
// has not been accepted by a Gateway because the Gateway does not support
// InferencePool as a backend.
InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway"

// This reason is used with the "Accepted" condition when the InferencePool is
// referenced by an HTTPRoute that has been rejected by the Gateway. The user
// should inspect the status of the referring HTTPRoute for the specific reason.
InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted"

// This reason is used with the "Accepted" when a controller has not yet
// reconciled the InferencePool.
InferencePoolReasonPending InferencePoolReason = "Pending"
)

const (
// This condition indicates whether the controller was able to resolve all
// the object references for the InferencePool.
//
// Possible reasons for this condition to be True are:
//
// * "ResolvedRefs"
//
// Possible reasons for this condition to be False are:
//
// * "InvalidExtensionRef"
//
// Controllers MAY raise this condition with other reasons, but should
// prefer to use the reasons listed above to improve interoperability.
InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs"

// This reason is used with the "ResolvedRefs" condition when the condition
// is true.
InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"

// This reason is used with the "ResolvedRefs" condition when the
// ExtensionRef is invalid in some way. This can include an unsupported kind
// or API group, or a reference to a resource that can not be found.
InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
)

// ParentGatewayReference identifies an API object including its namespace,
// defaulting to Gateway.
type ParentGatewayReference struct {
// Group is the group of the referent.
//
// +optional
// +kubebuilder:default="gateway.networking.k8s.io"
Group *Group `json:"group"`

// Kind is kind of the referent. For example "Gateway".
//
// +optional
// +kubebuilder:default=Gateway
Kind *Kind `json:"kind"`

// Name is the name of the referent.
Name ObjectName `json:"name"`

// Namespace is the namespace of the referent. If not present,
// the namespace of the referent is assumed to be the same as
// the namespace of the referring object.
//
// +optional
Namespace *Namespace `json:"namespace,omitempty"`
}
Loading