Skip to content

Commit e12197b

Browse files
authored
feat(api): Introduce InferenceModelRewrite API (#1816)
* Add inferenceomodelrewrite api. * poolRef pointer, modelRewrite requried, remove default status. * add back the default value on condition.
1 parent 72df012 commit e12197b

25 files changed

+1759
-21
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha2
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// InferenceModelRewrite is the Schema for the InferenceModelRewrite API.
24+
// +kubebuilder:object:root=true
25+
// +kubebuilder:subresource:status
26+
// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name`
27+
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
28+
// +genclient
29+
type InferenceModelRewrite struct {
30+
metav1.TypeMeta `json:",inline"`
31+
metav1.ObjectMeta `json:"metadata,omitempty"`
32+
33+
Spec InferenceModelRewriteSpec `json:"spec,omitempty"`
34+
Status InferenceModelRewriteStatus `json:"status,omitempty"`
35+
}
36+
37+
// InferenceModelRewriteList contains a list of InferenceModelRewrite.
38+
//
39+
// +kubebuilder:object:root=true
40+
type InferenceModelRewriteList struct {
41+
metav1.TypeMeta `json:",inline"`
42+
metav1.ListMeta `json:"metadata,omitempty"`
43+
Items []InferenceModelRewrite `json:"items"`
44+
}
45+
46+
// InferenceModelRewriteSpec defines the desired state of InferenceModelRewrite.
47+
type InferenceModelRewriteSpec struct {
48+
// PoolRef is a reference to the inference pool.
49+
// +kubebuilder:validation:Required
50+
PoolRef *PoolObjectReference `json:"poolRef"`
51+
52+
// Rules are the ordered set of rules for rewriting inference requests.
53+
// The first rule to match a request will be used.
54+
55+
//
56+
// --- Precedence and Conflict Resolution ---
57+
// If multiple InferenceModelRewrite resources target the same
58+
// InferencePool, the controller will merge them based on precedence.
59+
//
60+
// **Timestamp Wins:** If two rules from different rewrites all matches,
61+
// the rule from the *oldest*
62+
// InferenceModelRewrite resource (determined by
63+
// metadata.creationTimestamp) will be used.
64+
// +required
65+
Rules []InferenceModelRewriteRule `json:"rules"`
66+
}
67+
68+
// InferenceModelRewriteRule defines the match criteria and corresponding action.
69+
//
70+
// A specific model name can only be matched by one rule across all
71+
// rules attached to the same InferencePool. If multiple rules attempt
72+
// to match the same model name, the oldest rule (by creationTimestamp)
73+
// will be the only one considered valid.
74+
type InferenceModelRewriteRule struct {
75+
// Matches defines the criteria for matching a request.
76+
// If multiple match criteria are specified, a request matches if
77+
// ANY of the criteria are satisfied (logical OR).
78+
// If empty, the rule matches all requests.
79+
80+
// +optional
81+
Matches []Match `json:"matches,omitempty"`
82+
83+
// --- Actions ---
84+
// Targets defines how to distribute traffic across a set of
85+
// weighted model targets. This is used for traffic splitting, A/B tests,
86+
// or canary rollouts.
87+
// +optional
88+
// +kubebuilder:validation:MinItems=1
89+
//
90+
Targets []TargetModel `json:"split,omitempty"`
91+
}
92+
93+
// TargetModel defines a weighted model destination for traffic distribution.
94+
type TargetModel struct {
95+
// (The following comment is copied from the original targetModel)
96+
// Weight is used to determine the proportion of traffic that should be
97+
// sent to this model when multiple target models are specified.
98+
//
99+
// Weight defines the proportion of requests forwarded to the specified
100+
// model. This is computed as weight/(sum of all weights in this
101+
// TargetModels list). For non-zero values, there may be some epsilon from
102+
// the exact proportion defined here depending on the precision an
103+
// implementation supports. Weight is not a percentage and the sum of
104+
// weights does not need to equal 100.
105+
//
106+
// If a weight is set for any targetModel, it must be set for all targetModels.
107+
// Conversely weights are optional, so long as ALL targetModels do not specify a weight.
108+
//
109+
// +optional
110+
// +kubebuilder:validation:Minimum=1
111+
// +kubebuilder:validation:Maximum=1000000
112+
Weight int32 `json:"weight"`
113+
114+
// --- Destination Types ---
115+
// ModelRewrite specifies a static model name destination.
116+
// +required
117+
ModelRewrite string `json:"modelRewrite"`
118+
}
119+
120+
// Match defines the criteria for matching the LLM requests.
121+
type Match struct {
122+
// Model specifies the criteria for matching the 'model' field
123+
// within the JSON request body.
124+
// +required
125+
Model *ModelMatch `json:"model,omitempty"`
126+
}
127+
128+
// ModelMatch defines how to match against the model name in the request body.
129+
type ModelMatch struct {
130+
// Type specifies the kind of string matching to use.
131+
// Supported value is "Exact". Defaults to "Exact".
132+
// +optional
133+
// +kubebuilder:default=Exact
134+
Type *MatchValidationType `json:"type,omitempty"`
135+
136+
// Value is the model name string to match against.
137+
// +required
138+
// +kubebuilder:validation:MinLength=1
139+
Value string `json:"value"`
140+
}
141+
142+
// MatchValidationType specifies the type of string matching to use.
143+
// +kubebuilder:validation:Enum=Exact
144+
type MatchValidationType string
145+
146+
const (
147+
// MatchExact indicates that the model name must match exactly.
148+
MatchExact MatchValidationType = "Exact"
149+
)
150+
151+
// InferenceModelRewriteStatus defines the observed state of InferenceModelRewrite.
152+
type InferenceModelRewriteStatus struct {
153+
// Conditions track the state of the InferenceModelRewrite.
154+
//
155+
// Known condition types are:
156+
//
157+
// * "Accepted"
158+
//
159+
// +optional
160+
// +listType=map
161+
// +listMapKey=type
162+
// +kubebuilder:validation:MaxItems=8
163+
// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
164+
Conditions []metav1.Condition `json:"conditions,omitempty"`
165+
}
166+
167+
// InferenceModelRewriteConditionType is a type of condition for the InferenceModelRewrite.
168+
type InferenceModelRewriteConditionType string
169+
170+
// InferenceModelRewriteConditionReason is the reason for a given InferenceModelRewriteConditionType.
171+
type InferenceModelRewriteConditionReason string
172+
173+
const (
174+
// RewriteConditionAccepted indicates if the rewrite is accepted, and if not, why.
175+
// This is the primary condition for this resource.
176+
//
177+
// Possible reasons for this condition to be True are:
178+
//
179+
// * "Accepted"
180+
//
181+
// Possible reasons for this condition to be Unknown are:
182+
//
183+
// * "Pending"
184+
//
185+
RewriteConditionAccepted InferenceModelRewriteConditionType = "Accepted"
186+
187+
// RewriteReasonAccepted indicates the rewrite is valid, non-conflicting,
188+
// and has been successfully applied to the inference pool.
189+
RewriteReasonAccepted InferenceModelRewriteConditionReason = "Accepted"
190+
191+
// RewriteReasonPending is the initial state, and indicates that the
192+
// controller has not yet reconciled the InferenceModelRewrite.
193+
RewriteReasonPending InferenceModelRewriteConditionReason = "Pending"
194+
)

apix/v1alpha2/inferenceobjective_types.go

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,27 +78,6 @@ type InferenceObjectiveSpec struct {
7878
PoolRef PoolObjectReference `json:"poolRef"`
7979
}
8080

81-
// PoolObjectReference identifies an API object within the namespace of the
82-
// referrer.
83-
type PoolObjectReference struct {
84-
// Group is the group of the referent.
85-
//
86-
// +optional
87-
// +kubebuilder:default="inference.networking.k8s.io"
88-
Group Group `json:"group,omitempty"`
89-
90-
// Kind is kind of the referent. For example "InferencePool".
91-
//
92-
// +optional
93-
// +kubebuilder:default="InferencePool"
94-
Kind Kind `json:"kind,omitempty"`
95-
96-
// Name is the name of the referent.
97-
//
98-
// +kubebuilder:validation:Required
99-
Name ObjectName `json:"name"`
100-
}
101-
10281
// InferenceObjectiveStatus defines the observed state of InferenceObjective
10382
type InferenceObjectiveStatus struct {
10483
// Conditions track the state of the InferenceObjective.

apix/v1alpha2/shared_types.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,24 @@ type LabelKey string
127127
// +kubebuilder:validation:MaxLength=63
128128
// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
129129
type LabelValue string
130+
131+
// PoolObjectReference identifies an API object within the namespace of the
132+
// referrer.
133+
type PoolObjectReference struct {
134+
// Group is the group of the referent.
135+
//
136+
// +optional
137+
// +kubebuilder:default="inference.networking.k8s.io"
138+
Group Group `json:"group,omitempty"`
139+
140+
// Kind is kind of the referent. For example "InferencePool".
141+
//
142+
// +optional
143+
// +kubebuilder:default="InferencePool"
144+
Kind Kind `json:"kind,omitempty"`
145+
146+
// Name is the name of the referent.
147+
//
148+
// +kubebuilder:validation:Required
149+
Name ObjectName `json:"name"`
150+
}

0 commit comments

Comments
 (0)