@@ -20,7 +20,7 @@ import (
20
20
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21
21
)
22
22
23
- // InferenceModel is the Schema for the InferenceModels API.
23
+ // InferenceObjective is the Schema for the InferenceObjectives API.
24
24
//
25
25
// +kubebuilder:object:root=true
26
26
// +kubebuilder:subresource:status
@@ -30,44 +30,44 @@ import (
30
30
// +kubebuilder:printcolumn:name="Criticality",type=string,JSONPath=`.spec.criticality`
31
31
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
32
32
// +genclient
33
- type InferenceModel struct {
33
+ type InferenceObjective struct {
34
34
metav1.TypeMeta `json:",inline"`
35
35
metav1.ObjectMeta `json:"metadata,omitempty"`
36
36
37
- Spec InferenceModelSpec `json:"spec,omitempty"`
38
- Status InferenceModelStatus `json:"status,omitempty"`
37
+ Spec InferenceObjectiveSpec `json:"spec,omitempty"`
38
+ Status InferenceObjectiveStatus `json:"status,omitempty"`
39
39
}
40
40
41
- // InferenceModelList contains a list of InferenceModel .
41
+ // InferenceObjectiveList contains a list of InferenceObjective .
42
42
//
43
43
// +kubebuilder:object:root=true
44
- type InferenceModelList struct {
44
+ type InferenceObjectiveList struct {
45
45
metav1.TypeMeta `json:",inline"`
46
46
metav1.ListMeta `json:"metadata,omitempty"`
47
- Items []InferenceModel `json:"items"`
47
+ Items []InferenceObjective `json:"items"`
48
48
}
49
49
50
- // InferenceModelSpec represents the desired state of a specific model use case. This resource is
50
+ // InferenceObjectiveSpec represents the desired state of a specific model use case. This resource is
51
51
// managed by the "Inference Workload Owner" persona.
52
52
//
53
53
// The Inference Workload Owner persona is someone that trains, verifies, and
54
54
// leverages a large language model from a model frontend, drives the lifecycle
55
55
// and rollout of new versions of those models, and defines the specific
56
56
// performance and latency goals for the model. These workloads are
57
57
// expected to operate within an InferencePool sharing compute capacity with other
58
- // InferenceModels , defined by the Inference Platform Admin.
58
+ // InferenceObjectives , defined by the Inference Platform Admin.
59
59
//
60
- // InferenceModel 's modelName (not the ObjectMeta name) is unique for a given InferencePool,
60
+ // InferenceObjective 's modelName (not the ObjectMeta name) is unique for a given InferencePool,
61
61
// if the name is reused, an error will be shown on the status of a
62
- // InferenceModel that attempted to reuse. The oldest InferenceModel , based on
62
+ // InferenceObjective that attempted to reuse. The oldest InferenceObjective , based on
63
63
// creation timestamp, will be selected to remain valid. In the event of a race
64
64
// condition, one will be selected at random.
65
- type InferenceModelSpec struct {
65
+ type InferenceObjectiveSpec struct {
66
66
// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
67
67
// ModelNames must be unique for a referencing InferencePool
68
68
// (names can be reused for a different pool in the same cluster).
69
69
// The modelName with the oldest creation timestamp is retained, and the incoming
70
- // InferenceModel 's Ready status is set to false with a corresponding reason.
70
+ // InferenceObjective 's Ready status is set to false with a corresponding reason.
71
71
// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
72
72
// Names can be reserved without an underlying model configured in the pool.
73
73
// This can be done by specifying a target model and setting the weight to zero,
@@ -80,7 +80,7 @@ type InferenceModelSpec struct {
80
80
81
81
// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
82
82
// Criticality impacts how traffic is handled in resource constrained situations. It handles this by
83
- // queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
83
+ // queuing or rejecting requests of lower criticality. InferenceObjectives of an equivalent Criticality will
84
84
// fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
85
85
// and the proportionality of fairness will be configurable.
86
86
//
@@ -151,7 +151,7 @@ const (
151
151
// Gateway assumes that the model exists on the model server and it's the
152
152
// responsibility of the user to validate a correct match. Should a model fail
153
153
// to exist at request time, the error is processed by the Inference Gateway
154
- // and emitted on the appropriate InferenceModel object.
154
+ // and emitted on the appropriate InferenceObjective object.
155
155
type TargetModel struct {
156
156
// Name is the name of the adapter or base model, as expected by the ModelServer.
157
157
//
@@ -178,9 +178,9 @@ type TargetModel struct {
178
178
Weight * int32 `json:"weight,omitempty"`
179
179
}
180
180
181
- // InferenceModelStatus defines the observed state of InferenceModel
182
- type InferenceModelStatus struct {
183
- // Conditions track the state of the InferenceModel .
181
+ // InferenceObjectiveStatus defines the observed state of InferenceObjective
182
+ type InferenceObjectiveStatus struct {
183
+ // Conditions track the state of the InferenceObjective .
184
184
//
185
185
// Known condition types are:
186
186
//
@@ -194,14 +194,14 @@ type InferenceModelStatus struct {
194
194
Conditions []metav1.Condition `json:"conditions,omitempty"`
195
195
}
196
196
197
- // InferenceModelConditionType is a type of condition for the InferenceModel .
198
- type InferenceModelConditionType string
197
+ // InferenceObjectiveConditionType is a type of condition for the InferenceObjective .
198
+ type InferenceObjectiveConditionType string
199
199
200
- // InferenceModelConditionReason is the reason for a given InferenceModelConditionType .
201
- type InferenceModelConditionReason string
200
+ // InferenceObjectiveConditionReason is the reason for a given InferenceObjectiveConditionType .
201
+ type InferenceObjectiveConditionReason string
202
202
203
203
const (
204
- // ModelConditionAccepted indicates if the model config is accepted, and if not, why.
204
+ // ObjectiveConditionAccepted indicates if the objective config is accepted, and if not, why.
205
205
//
206
206
// Possible reasons for this condition to be True are:
207
207
//
@@ -215,15 +215,15 @@ const (
215
215
//
216
216
// * "Pending"
217
217
//
218
- ModelConditionAccepted InferenceModelConditionType = "Accepted"
218
+ ObjectiveConditionAccepted InferenceObjectiveConditionType = "Accepted"
219
219
220
- // ModelReasonAccepted is the desired state. Model conforms to the state of the pool.
221
- ModelReasonAccepted InferenceModelConditionReason = "Accepted"
220
+ // ObjectiveReasonAccepted is the desired state. Model conforms to the state of the pool.
221
+ ObjectiveReasonAccepted InferenceObjectiveConditionReason = "Accepted"
222
222
223
- // ModelReasonNameInUse is used when a given ModelName already exists within the pool.
223
+ // ObjectiveReasonNameInUse is used when a given ModelName already exists within the pool.
224
224
// Details about naming conflict resolution are on the ModelName field itself.
225
- ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse"
225
+ ObjectiveReasonNameInUse InferenceObjectiveConditionReason = "ModelNameInUse"
226
226
227
- // ModelReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceModel .
228
- ModelReasonPending InferenceModelConditionReason = "Pending"
227
+ // ObjectiveReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceObjective .
228
+ ObjectiveReasonPending InferenceObjectiveConditionReason = "Pending"
229
229
)
0 commit comments