Skip to content

Commit db14992

Browse files
committed
Merge branch 'main' into adding_kubvernor_implementor_v2
2 parents 96f2ceb + 3ab409d commit db14992

File tree

156 files changed

+5327
-2868
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

156 files changed

+5327
-2868
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ For deeper insights and more advanced concepts, refer to our [proposals](/docs/p
6060

6161
## Technical Overview
6262

63-
This extension upgrades an [ext-proc](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_proc_filter) capable proxy or gateway - such as Envoy Gateway, kGateway, or the GKE Gateway - to become an **[inference gateway]** - supporting inference platform teams self-hosting Generative Models (with a current focus on large language models) on Kubernetes. This integration makes it easy to expose and control access to your local [OpenAI-compatible chat completion endpoints](https://platform.openai.com/docs/api-reference/chat) to other workloads on or off cluster, or to integrate your self-hosted models alongside model-as-a-service providers in a higher level **AI Gateway** like LiteLLM, Solo AI Gateway, or Apigee.
63+
This extension upgrades an [ext-proc](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_proc_filter) capable proxy or gateway - such as Envoy Gateway, kgateway, or the GKE Gateway - to become an **[inference gateway]** - supporting inference platform teams self-hosting Generative Models (with a current focus on large language models) on Kubernetes. This integration makes it easy to expose and control access to your local [OpenAI-compatible chat completion endpoints](https://platform.openai.com/docs/api-reference/chat) to other workloads on or off cluster, or to integrate your self-hosted models alongside model-as-a-service providers in a higher level **AI Gateway** like LiteLLM, Solo AI Gateway, or Apigee.
6464

6565
The Inference Gateway:
6666

api/v1/inferencepool_types.go

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,16 @@ type InferencePoolList struct {
5151

5252
// InferencePoolSpec defines the desired state of InferencePool
5353
type InferencePoolSpec struct {
54-
// Selector defines a map of labels to watch model server Pods
55-
// that should be included in the InferencePool.
56-
// In some cases, implementations may translate this field to a Service selector, so this matches the simple
57-
// map used for Service selectors instead of the full Kubernetes LabelSelector type.
58-
// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
59-
// Cross namesoace selector is not supported.
54+
// Selector determines which Pods are members of this inference pool.
55+
// It matches Pods by their labels only within the same namespace; cross-namespace
56+
// selection is not supported.
57+
//
58+
// The structure of this LabelSelector is intentionally simple to be compatible
59+
// with Kubernetes Service selectors, as some implementations may translate
60+
// this configuration into a Service resource.
6061
//
6162
// +kubebuilder:validation:Required
62-
Selector map[LabelKey]LabelValue `json:"selector"`
63+
Selector LabelSelector `json:"selector"`
6364

6465
// TargetPortNumber defines the port number to access the selected model server Pods.
6566
// The number must be in the range 1 to 65535.
@@ -69,37 +70,12 @@ type InferencePoolSpec struct {
6970
// +kubebuilder:validation:Required
7071
TargetPortNumber int32 `json:"targetPortNumber"`
7172

72-
// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
73-
// picker service that picks endpoints for the requests routed to this pool.
74-
EndpointPickerConfig `json:",inline"`
75-
}
76-
77-
// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
78-
// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
79-
type EndpointPickerConfig struct {
8073
// Extension configures an endpoint picker as an extension service.
81-
//
82-
// +kubebuilder:validation:Required
8374
ExtensionRef *Extension `json:"extensionRef,omitempty"`
8475
}
8576

8677
// Extension specifies how to configure an extension that runs the endpoint picker.
8778
type Extension struct {
88-
// Reference is a reference to a service extension. When ExtensionReference is invalid,
89-
// a 5XX status code MUST be returned for the request that would have otherwise been routed
90-
// to the invalid backend.
91-
ExtensionReference `json:",inline"`
92-
93-
// ExtensionConnection configures the connection between the Gateway and the extension.
94-
ExtensionConnection `json:",inline"`
95-
}
96-
97-
// ExtensionReference is a reference to the extension.
98-
//
99-
// If a reference is invalid, the implementation MUST update the `ResolvedRefs`
100-
// Condition on the InferencePool's status to `status: False`. A 5XX status code MUST be returned
101-
// for the request that would have otherwise been routed to the invalid backend.
102-
type ExtensionReference struct {
10379
// Group is the group of the referent.
10480
// The default value is "", representing the Core API group.
10581
//
@@ -132,10 +108,7 @@ type ExtensionReference struct {
132108
//
133109
// +optional
134110
PortNumber *PortNumber `json:"portNumber,omitempty"`
135-
}
136111

137-
// ExtensionConnection encapsulates options that configures the connection to the extension.
138-
type ExtensionConnection struct {
139112
// Configures how the gateway handles the case when the extension is not responsive.
140113
// Defaults to failClose.
141114
//
@@ -261,7 +234,7 @@ const (
261234
InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"
262235

263236
// This reason is used with the "ResolvedRefs" condition when the
264-
// ExtensionRef is invalid in some way. This can include an unsupported kind
237+
// Extension is invalid in some way. This can include an unsupported kind
265238
// or API group, or a reference to a resource that can not be found.
266239
InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
267240
)

api/v1/shared_types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,15 @@ type LabelKey string
127127
// +kubebuilder:validation:MaxLength=63
128128
// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
129129
type LabelValue string
130+
131+
// LabelSelector defines a query for resources based on their labels.
132+
// This simplified version uses only the matchLabels field.
133+
type LabelSelector struct {
134+
// matchLabels contains a set of required {key,value} pairs.
135+
// An object must match every label in this map to be selected.
136+
// The matching logic is an AND operation on all entries.
137+
//
138+
// +kubebuilder:validation:Required
139+
// +kubebuilder:validation:MaxItems=64
140+
MatchLabels map[LabelKey]LabelValue `json:"matchLabels,omitempty" protobuf:"bytes,1,rep,name=matchLabels"`
141+
}

api/v1/zz_generated.deepcopy.go

Lines changed: 35 additions & 67 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apix/config/v1alpha1/defaults.go

Lines changed: 0 additions & 47 deletions
This file was deleted.

apix/config/v1alpha1/endpointpickerconfig_types.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ package v1alpha1
1818

1919
import (
2020
"encoding/json"
21+
"fmt"
2122

2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2324
)
2425

25-
// +k8s:defaulter-gen=true
2626
// +kubebuilder:object:root=true
2727

2828
// EndpointPickerConfig is the Schema for the endpointpickerconfigs API
@@ -41,6 +41,14 @@ type EndpointPickerConfig struct {
4141
SchedulingProfiles []SchedulingProfile `json:"schedulingProfiles"`
4242
}
4343

44+
func (cfg EndpointPickerConfig) String() string {
45+
return fmt.Sprintf(
46+
"{Plugins: %v, SchedulingProfiles: %v}",
47+
cfg.Plugins,
48+
cfg.SchedulingProfiles,
49+
)
50+
}
51+
4452
// PluginSpec contains the information that describes a plugin that
4553
// will be instantiated.
4654
type PluginSpec struct {
@@ -61,6 +69,14 @@ type PluginSpec struct {
6169
Parameters json.RawMessage `json:"parameters"`
6270
}
6371

72+
func (ps PluginSpec) String() string {
73+
var parameters string
74+
if ps.Parameters != nil {
75+
parameters = fmt.Sprintf(", Parameters: %s", ps.Parameters)
76+
}
77+
return fmt.Sprintf("{%s/%s%s}", ps.Name, ps.Type, parameters)
78+
}
79+
6480
// SchedulingProfile contains the information to create a SchedulingProfile
6581
// entry to be used by the scheduler.
6682
type SchedulingProfile struct {
@@ -75,6 +91,10 @@ type SchedulingProfile struct {
7591
Plugins []SchedulingPlugin `json:"plugins"`
7692
}
7793

94+
func (sp SchedulingProfile) String() string {
95+
return fmt.Sprintf("{Name: %s, Plugins: %v}", sp.Name, sp.Plugins)
96+
}
97+
7898
// SchedulingPlugin describes a plugin that will be associated with a
7999
// SchedulingProfile entry.
80100
type SchedulingPlugin struct {
@@ -90,3 +110,11 @@ type SchedulingPlugin struct {
90110
// Weight is the weight fo be used if this plugin is a Scorer.
91111
Weight *int `json:"weight"`
92112
}
113+
114+
func (sp SchedulingPlugin) String() string {
115+
var weight string
116+
if sp.Weight != nil {
117+
weight = fmt.Sprintf(", Weight: %d", *sp.Weight)
118+
}
119+
return fmt.Sprintf("{PluginRef: %s%s}", sp.PluginRef, weight)
120+
}

apix/config/v1alpha1/zz_generated.defaults.go

Lines changed: 0 additions & 38 deletions
This file was deleted.

0 commit comments

Comments
 (0)