kubernetes-sigs
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎api/v1/inferencepool_types.go‎
Lines changed: 9 additions & 36 deletions b/‎api/v1/inferencepool_types.go‎
Lines changed: 9 additions & 36 deletions
diff --git a/‎api/v1/shared_types.go‎
Lines changed: 12 additions & 0 deletions b/‎api/v1/shared_types.go‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎api/v1/zz_generated.deepcopy.go‎
Lines changed: 35 additions & 67 deletions b/‎api/v1/zz_generated.deepcopy.go‎
Lines changed: 35 additions & 67 deletions
diff --git a/‎apix/config/v1alpha1/defaults.go‎
Lines changed: 0 additions & 47 deletions b/‎apix/config/v1alpha1/defaults.go‎
Lines changed: 0 additions & 47 deletions
diff --git a/‎apix/config/v1alpha1/endpointpickerconfig_types.go‎
Lines changed: 29 additions & 1 deletion b/‎apix/config/v1alpha1/endpointpickerconfig_types.go‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎apix/config/v1alpha1/zz_generated.defaults.go‎
Lines changed: 0 additions & 38 deletions b/‎apix/config/v1alpha1/zz_generated.defaults.go‎
Lines changed: 0 additions & 38 deletions
@@ -60,7 +60,7 @@ For deeper insights and more advanced concepts, refer to our [proposals](/docs/p
 
 ## Technical Overview
 
-This extension upgrades an [ext-proc](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_proc_filter) capable proxy or gateway - such as Envoy Gateway, kGateway, or the GKE Gateway - to become an **[inference gateway]** - supporting inference platform teams self-hosting Generative Models (with a current focus on large language models) on Kubernetes. This integration makes it easy to expose and control access to your local [OpenAI-compatible chat completion endpoints](https://platform.openai.com/docs/api-reference/chat) to other workloads on or off cluster, or to integrate your self-hosted models alongside model-as-a-service providers in a higher level **AI Gateway** like LiteLLM, Solo AI Gateway, or Apigee.
+This extension upgrades an [ext-proc](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_proc_filter) capable proxy or gateway - such as Envoy Gateway, kgateway, or the GKE Gateway - to become an **[inference gateway]** - supporting inference platform teams self-hosting Generative Models (with a current focus on large language models) on Kubernetes. This integration makes it easy to expose and control access to your local [OpenAI-compatible chat completion endpoints](https://platform.openai.com/docs/api-reference/chat) to other workloads on or off cluster, or to integrate your self-hosted models alongside model-as-a-service providers in a higher level **AI Gateway** like LiteLLM, Solo AI Gateway, or Apigee.
 
 The Inference Gateway:
 
 
@@ -51,15 +51,16 @@ type InferencePoolList struct {
 
 // InferencePoolSpec defines the desired state of InferencePool
 type InferencePoolSpec struct {
-	// Selector defines a map of labels to watch model server Pods
-	// that should be included in the InferencePool.
-	// In some cases, implementations may translate this field to a Service selector, so this matches the simple
-	// map used for Service selectors instead of the full Kubernetes LabelSelector type.
-	// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
-	// Cross namesoace selector is not supported.
+	// Selector determines which Pods are members of this inference pool.
+	// It matches Pods by their labels only within the same namespace; cross-namespace
+	// selection is not supported.
+	//
+	// The structure of this LabelSelector is intentionally simple to be compatible
+	// with Kubernetes Service selectors, as some implementations may translate
+	// this configuration into a Service resource.
 	//
 	// +kubebuilder:validation:Required
-	Selector map[LabelKey]LabelValue `json:"selector"`
+	Selector LabelSelector `json:"selector"`
 
 	// TargetPortNumber defines the port number to access the selected model server Pods.
 	// The number must be in the range 1 to 65535.
@@ -69,37 +70,12 @@ type InferencePoolSpec struct {
 	// +kubebuilder:validation:Required
 	TargetPortNumber int32 `json:"targetPortNumber"`
 
-	// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
-	// picker service that picks endpoints for the requests routed to this pool.
-	EndpointPickerConfig `json:",inline"`
-}
-
-// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
-// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
-type EndpointPickerConfig struct {
 	// Extension configures an endpoint picker as an extension service.
-	//
-	// +kubebuilder:validation:Required
 	ExtensionRef *Extension `json:"extensionRef,omitempty"`
 }
 
 // Extension specifies how to configure an extension that runs the endpoint picker.
 type Extension struct {
-	// Reference is a reference to a service extension. When ExtensionReference is invalid,
-	// a 5XX status code MUST be returned for the request that would have otherwise been routed
-	// to the invalid backend.
-	ExtensionReference `json:",inline"`
-
-	// ExtensionConnection configures the connection between the Gateway and the extension.
-	ExtensionConnection `json:",inline"`
-}
-
-// ExtensionReference is a reference to the extension.
-//
-// If a reference is invalid, the implementation MUST update the `ResolvedRefs`
-// Condition on the InferencePool's status to `status: False`. A 5XX status code MUST be returned
-// for the request that would have otherwise been routed to the invalid backend.
-type ExtensionReference struct {
 	// Group is the group of the referent.
 	// The default value is "", representing the Core API group.
 	//
@@ -132,10 +108,7 @@ type ExtensionReference struct {
 	//
 	// +optional
 	PortNumber *PortNumber `json:"portNumber,omitempty"`
-}
 
-// ExtensionConnection encapsulates options that configures the connection to the extension.
-type ExtensionConnection struct {
 	// Configures how the gateway handles the case when the extension is not responsive.
 	// Defaults to failClose.
 	//
@@ -261,7 +234,7 @@ const (
 	InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"
 
 	// This reason is used with the "ResolvedRefs" condition when the
-	// ExtensionRef is invalid in some way. This can include an unsupported kind
+	// Extension is invalid in some way. This can include an unsupported kind
 	// or API group, or a reference to a resource that can not be found.
 	InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
 )
 
@@ -127,3 +127,15 @@ type LabelKey string
 // +kubebuilder:validation:MaxLength=63
 // +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
 type LabelValue string
+
+// LabelSelector defines a query for resources based on their labels.
+// This simplified version uses only the matchLabels field.
+type LabelSelector struct {
+	// matchLabels contains a set of required {key,value} pairs.
+	// An object must match every label in this map to be selected.
+	// The matching logic is an AND operation on all entries.
+	//
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MaxItems=64
+	MatchLabels map[LabelKey]LabelValue `json:"matchLabels,omitempty" protobuf:"bytes,1,rep,name=matchLabels"`
+}
@@ -18,11 +18,11 @@ package v1alpha1
 
 import (
 	"encoding/json"
+	"fmt"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
-// +k8s:defaulter-gen=true
 // +kubebuilder:object:root=true
 
 // EndpointPickerConfig is the Schema for the endpointpickerconfigs API
@@ -41,6 +41,14 @@ type EndpointPickerConfig struct {
 	SchedulingProfiles []SchedulingProfile `json:"schedulingProfiles"`
 }
 
+func (cfg EndpointPickerConfig) String() string {
+	return fmt.Sprintf(
+		"{Plugins: %v, SchedulingProfiles: %v}",
+		cfg.Plugins,
+		cfg.SchedulingProfiles,
+	)
+}
+
 // PluginSpec contains the information that describes a plugin that
 // will be instantiated.
 type PluginSpec struct {
@@ -61,6 +69,14 @@ type PluginSpec struct {
 	Parameters json.RawMessage `json:"parameters"`
 }
 
+func (ps PluginSpec) String() string {
+	var parameters string
+	if ps.Parameters != nil {
+		parameters = fmt.Sprintf(", Parameters: %s", ps.Parameters)
+	}
+	return fmt.Sprintf("{%s/%s%s}", ps.Name, ps.Type, parameters)
+}
+
 // SchedulingProfile contains the information to create a SchedulingProfile
 // entry to be used by the scheduler.
 type SchedulingProfile struct {
@@ -75,6 +91,10 @@ type SchedulingProfile struct {
 	Plugins []SchedulingPlugin `json:"plugins"`
 }
 
+func (sp SchedulingProfile) String() string {
+	return fmt.Sprintf("{Name: %s, Plugins: %v}", sp.Name, sp.Plugins)
+}
+
 // SchedulingPlugin describes a plugin that will be associated with a
 // SchedulingProfile entry.
 type SchedulingPlugin struct {
@@ -90,3 +110,11 @@ type SchedulingPlugin struct {
 	// Weight is the weight fo be used if this plugin is a Scorer.
 	Weight *int `json:"weight"`
 }
+
+func (sp SchedulingPlugin) String() string {
+	var weight string
+	if sp.Weight != nil {
+		weight = fmt.Sprintf(", Weight: %d", *sp.Weight)
+	}
+	return fmt.Sprintf("{PluginRef: %s%s}", sp.PluginRef, weight)
+}