Skip to content

Commit fe2dcef

Browse files
authored
Initial commit (#1307)
1 parent 6193b60 commit fe2dcef

29 files changed

+158
-387
lines changed

apix/v1alpha2/inferenceobjective_types.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,20 +63,6 @@ type InferenceObjectiveList struct {
6363
// creation timestamp, will be selected to remain valid. In the event of a race
6464
// condition, one will be selected at random.
6565
type InferenceObjectiveSpec struct {
66-
// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
67-
// ModelNames must be unique for a referencing InferencePool
68-
// (names can be reused for a different pool in the same cluster).
69-
// The modelName with the oldest creation timestamp is retained, and the incoming
70-
// InferenceObjective's Ready status is set to false with a corresponding reason.
71-
// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
72-
// Names can be reserved without an underlying model configured in the pool.
73-
// This can be done by specifying a target model and setting the weight to zero,
74-
// an error will be returned specifying that no valid target model is found.
75-
//
76-
// +kubebuilder:validation:MaxLength=256
77-
// +kubebuilder:validation:Required
78-
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="modelName is immutable"
79-
ModelName string `json:"modelName"`
8066

8167
// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
8268
// Criticality impacts how traffic is handled in resource constrained situations. It handles this by

client-go/applyconfiguration/apix/v1alpha2/inferenceobjectivespec.go

Lines changed: 0 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/inference.networking.x-k8s.io_inferenceobjectives.yaml

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,22 +83,6 @@ spec:
8383
- Standard
8484
- Sheddable
8585
type: string
86-
modelName:
87-
description: |-
88-
ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
89-
ModelNames must be unique for a referencing InferencePool
90-
(names can be reused for a different pool in the same cluster).
91-
The modelName with the oldest creation timestamp is retained, and the incoming
92-
InferenceObjective's Ready status is set to false with a corresponding reason.
93-
In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
94-
Names can be reserved without an underlying model configured in the pool.
95-
This can be done by specifying a target model and setting the weight to zero,
96-
an error will be returned specifying that no valid target model is found.
97-
maxLength: 256
98-
type: string
99-
x-kubernetes-validations:
100-
- message: modelName is immutable
101-
rule: self == oldSelf
10286
poolRef:
10387
description: PoolRef is a reference to the inference pool, the pool
10488
must exist in the same namespace.
@@ -171,7 +155,6 @@ spec:
171155
- message: Weights should be set for all models, or none of the models.
172156
rule: self.all(model, has(model.weight)) || self.all(model, !has(model.weight))
173157
required:
174-
- modelName
175158
- poolRef
176159
type: object
177160
status:

config/manifests/inferenceobjective.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ kind: InferenceObjective
33
metadata:
44
name: food-review
55
spec:
6-
modelName: food-review
76
criticality: Standard
87
poolRef:
98
name: vllm-llama3-8b-instruct
@@ -16,7 +15,6 @@ kind: InferenceObjective
1615
metadata:
1716
name: base-model
1817
spec:
19-
modelName: meta-llama/Llama-3.1-8B-Instruct
2018
criticality: Critical
2119
poolRef:
2220
name: vllm-llama3-8b-instruct
@@ -26,7 +24,6 @@ kind: InferenceObjective
2624
metadata:
2725
name: base-model-cpu
2826
spec:
29-
modelName: Qwen/Qwen2.5-1.5B-Instruct
3027
criticality: Critical
3128
poolRef:
3229
name: vllm-llama3-8b-instruct

config/manifests/regression-testing/inferenceobjective.yaml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ kind: InferenceObjective
33
metadata:
44
name: adapter-0
55
spec:
6-
modelName: adapter-0
76
criticality: Critical
87
poolRef:
98
name: vllm-llama3-8b-instruct
@@ -18,7 +17,6 @@ kind: InferenceObjective
1817
metadata:
1918
name: adapter-1
2019
spec:
21-
modelName: adapter-1
2220
criticality: Critical
2321
poolRef:
2422
name: vllm-llama3-8b-instruct
@@ -33,7 +31,6 @@ kind: InferenceObjective
3331
metadata:
3432
name: adapter-2
3533
spec:
36-
modelName: adapter-2
3734
criticality: Critical
3835
poolRef:
3936
name: vllm-llama3-8b-instruct
@@ -48,7 +45,6 @@ kind: InferenceObjective
4845
metadata:
4946
name: adapter-3
5047
spec:
51-
modelName: adapter-3
5248
criticality: Critical
5349
poolRef:
5450
name: vllm-llama3-8b-instruct
@@ -63,7 +59,6 @@ kind: InferenceObjective
6359
metadata:
6460
name: adapter-4
6561
spec:
66-
modelName: adapter-4
6762
criticality: Critical
6863
poolRef:
6964
name: vllm-llama3-8b-instruct
@@ -78,7 +73,6 @@ kind: InferenceObjective
7873
metadata:
7974
name: adapter-5
8075
spec:
81-
modelName: adapter-5
8276
criticality: Critical
8377
poolRef:
8478
name: vllm-llama3-8b-instruct
@@ -93,7 +87,6 @@ kind: InferenceObjective
9387
metadata:
9488
name: adapter-6
9589
spec:
96-
modelName: adapter-6
9790
criticality: Critical
9891
poolRef:
9992
name: vllm-llama3-8b-instruct
@@ -108,7 +101,6 @@ kind: InferenceObjective
108101
metadata:
109102
name: adapter-7
110103
spec:
111-
modelName: adapter-7
112104
criticality: Critical
113105
poolRef:
114106
name: vllm-llama3-8b-instruct
@@ -123,7 +115,6 @@ kind: InferenceObjective
123115
metadata:
124116
name: adapter-8
125117
spec:
126-
modelName: adapter-8
127118
criticality: Critical
128119
poolRef:
129120
name: vllm-llama3-8b-instruct
@@ -138,7 +129,6 @@ kind: InferenceObjective
138129
metadata:
139130
name: adapter-9
140131
spec:
141-
modelName: adapter-9
142132
criticality: Critical
143133
poolRef:
144134
name: vllm-llama3-8b-instruct
@@ -153,7 +143,6 @@ kind: InferenceObjective
153143
metadata:
154144
name: adapter-10
155145
spec:
156-
modelName: adapter-10
157146
criticality: Critical
158147
poolRef:
159148
name: vllm-llama3-8b-instruct
@@ -168,7 +157,6 @@ kind: InferenceObjective
168157
metadata:
169158
name: adapter-11
170159
spec:
171-
modelName: adapter-11
172160
criticality: Critical
173161
poolRef:
174162
name: vllm-llama3-8b-instruct
@@ -183,7 +171,6 @@ kind: InferenceObjective
183171
metadata:
184172
name: adapter-12
185173
spec:
186-
modelName: adapter-12
187174
criticality: Critical
188175
poolRef:
189176
name: vllm-llama3-8b-instruct
@@ -199,7 +186,6 @@ kind: InferenceObjective
199186
metadata:
200187
name: adapter-13
201188
spec:
202-
modelName: adapter-13
203189
criticality: Critical
204190
poolRef:
205191
name: vllm-llama3-8b-instruct
@@ -215,7 +201,6 @@ kind: InferenceObjective
215201
metadata:
216202
name: adapter-14
217203
spec:
218-
modelName: adapter-14
219204
criticality: Critical
220205
poolRef:
221206
name: vllm-llama3-8b-instruct
@@ -231,7 +216,6 @@ kind: InferenceObjective
231216
metadata:
232217
name: base-model
233218
spec:
234-
modelName: meta-llama/Llama-3.1-8B-Instruct
235219
criticality: Critical
236220
poolRef:
237221
name: vllm-llama3-8b-instruct

conformance/tests/epp_unavailable_fail_open.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ metadata:
66
name: conformance-fake-model-server
77
namespace: gateway-conformance-app-backend
88
spec:
9-
modelName: conformance-fake-model
109
criticality: Critical # Mark it as critical to bypass the saturation check since the model server is fake and don't have such metrics.
1110
poolRef:
1211
name: secondary-inference-pool

conformance/tests/gateway_following_epp_routing.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ metadata:
66
name: conformance-fake-model-server
77
namespace: gateway-conformance-app-backend
88
spec:
9-
modelName: conformance-fake-model
109
criticality: Critical # Mark it as critical to bypass the saturation check since the model server is fake and don't have such metrics.
1110
poolRef:
1211
name: primary-inference-pool

docs/proposals/002-api-proposal/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,6 @@ kind: InferenceModel
341341
metadata:
342342
name: sql-code-assist
343343
spec:
344-
modelName: sql-code-assist
345344
poolRef:
346345
name: base-model-pool
347346
---
@@ -350,7 +349,6 @@ kind: InferenceModel
350349
metadata:
351350
name: npc-bot
352351
spec:
353-
modelName: npc-bot
354352
criticality: Critical
355353
targetModels:
356354
- name: npc-bot-v1

pkg/epp/controller/inferenceobjective_reconciler.go

Lines changed: 5 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@ package controller
1818

1919
import (
2020
"context"
21-
"fmt"
2221

2322
"k8s.io/apimachinery/pkg/api/errors"
24-
"k8s.io/apimachinery/pkg/types"
2523
ctrl "sigs.k8s.io/controller-runtime"
2624
"sigs.k8s.io/controller-runtime/pkg/client"
2725
"sigs.k8s.io/controller-runtime/pkg/event"
@@ -58,60 +56,19 @@ func (c *InferenceObjectiveReconciler) Reconcile(ctx context.Context, req ctrl.R
5856

5957
if notFound || !infObjective.DeletionTimestamp.IsZero() || infObjective.Spec.PoolRef.Name != v1alpha2.ObjectName(c.PoolGKNN.Name) {
6058
// InferenceObjective object got deleted or changed the referenced pool.
61-
err := c.handleObjectiveDeleted(ctx, req.NamespacedName)
62-
return ctrl.Result{}, err
59+
c.Datastore.ObjectiveDelete(req.NamespacedName)
60+
return ctrl.Result{}, nil
6361
}
6462

6563
// Add or update if the InferenceObjective instance has a creation timestamp older than the existing entry of the model.
66-
logger = logger.WithValues("poolRef", infObjective.Spec.PoolRef).WithValues("modelName", infObjective.Spec.ModelName)
67-
if !c.Datastore.ObjectiveSetIfOlder(infObjective) {
68-
logger.Info("Skipping InferenceObjective, existing instance has older creation timestamp")
69-
} else {
70-
logger.Info("Added/Updated InferenceObjective")
71-
}
64+
logger = logger.WithValues("poolRef", infObjective.Spec.PoolRef)
65+
c.Datastore.ObjectiveSet(infObjective)
66+
logger.Info("Added/Updated InferenceObjective")
7267

7368
return ctrl.Result{}, nil
7469
}
7570

76-
func (c *InferenceObjectiveReconciler) handleObjectiveDeleted(ctx context.Context, req types.NamespacedName) error {
77-
logger := log.FromContext(ctx)
78-
79-
// We will lookup and delete the modelName associated with this object, and search for
80-
// other instances referencing the same modelName if exist, and store the oldest in
81-
// its place. This ensures that the InferenceObjective with the oldest creation
82-
// timestamp is active.
83-
existing := c.Datastore.ObjectiveDelete(req)
84-
if existing == nil {
85-
// No entry exists in the first place, nothing to do.
86-
return nil
87-
}
88-
logger.Info("InferenceObjective removed from datastore", "poolRef", existing.Spec.PoolRef, "modelName", existing.Spec.ModelName)
89-
90-
// TODO(#409): replace this backfill logic with one that is based on InferenceObjective Ready conditions once those are set by an external controller.
91-
updated, err := c.Datastore.ObjectiveResync(ctx, c.Reader, existing.Spec.ModelName)
92-
if err != nil {
93-
return err
94-
}
95-
if updated {
96-
logger.Info("Model replaced.", "modelName", existing.Spec.ModelName)
97-
}
98-
return nil
99-
}
100-
101-
func indexInferenceObjectivesByModelName(obj client.Object) []string {
102-
m, ok := obj.(*v1alpha2.InferenceObjective)
103-
if !ok {
104-
return nil
105-
}
106-
return []string{m.Spec.ModelName}
107-
}
108-
10971
func (c *InferenceObjectiveReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
110-
// Create an index on ModelName for InferenceObjective objects.
111-
indexer := mgr.GetFieldIndexer()
112-
if err := indexer.IndexField(ctx, &v1alpha2.InferenceObjective{}, datastore.ModelNameIndexKey, indexInferenceObjectivesByModelName); err != nil {
113-
return fmt.Errorf("setting index on ModelName for InferenceObjective: %w", err)
114-
}
11572
return ctrl.NewControllerManagedBy(mgr).
11673
For(&v1alpha2.InferenceObjective{}).
11774
WithEventFilter(predicate.Funcs{

0 commit comments

Comments
 (0)