Skip to content

Commit f4f803b

Browse files
authored
Initial commit changing InferenceModel to InferenceObjectives (#1255)
1 parent 62b5e60 commit f4f803b

File tree

58 files changed

+926
-925
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+926
-925
lines changed

PROJECT

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ resources:
2121
namespaced: true
2222
domain: x-k8s.io
2323
group: inference
24-
kind: InferenceModel
24+
kind: InferenceObjective
2525
path: sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1
2626
version: v1alpha1
2727
- api:

apix/v1alpha2/inferencemodel_types.go renamed to apix/v1alpha2/inferenceobjective_types.go

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2121
)
2222

23-
// InferenceModel is the Schema for the InferenceModels API.
23+
// InferenceObjective is the Schema for the InferenceObjectives API.
2424
//
2525
// +kubebuilder:object:root=true
2626
// +kubebuilder:subresource:status
@@ -30,44 +30,44 @@ import (
3030
// +kubebuilder:printcolumn:name="Criticality",type=string,JSONPath=`.spec.criticality`
3131
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
3232
// +genclient
33-
type InferenceModel struct {
33+
type InferenceObjective struct {
3434
metav1.TypeMeta `json:",inline"`
3535
metav1.ObjectMeta `json:"metadata,omitempty"`
3636

37-
Spec InferenceModelSpec `json:"spec,omitempty"`
38-
Status InferenceModelStatus `json:"status,omitempty"`
37+
Spec InferenceObjectiveSpec `json:"spec,omitempty"`
38+
Status InferenceObjectiveStatus `json:"status,omitempty"`
3939
}
4040

41-
// InferenceModelList contains a list of InferenceModel.
41+
// InferenceObjectiveList contains a list of InferenceObjective.
4242
//
4343
// +kubebuilder:object:root=true
44-
type InferenceModelList struct {
44+
type InferenceObjectiveList struct {
4545
metav1.TypeMeta `json:",inline"`
4646
metav1.ListMeta `json:"metadata,omitempty"`
47-
Items []InferenceModel `json:"items"`
47+
Items []InferenceObjective `json:"items"`
4848
}
4949

50-
// InferenceModelSpec represents the desired state of a specific model use case. This resource is
50+
// InferenceObjectiveSpec represents the desired state of a specific model use case. This resource is
5151
// managed by the "Inference Workload Owner" persona.
5252
//
5353
// The Inference Workload Owner persona is someone that trains, verifies, and
5454
// leverages a large language model from a model frontend, drives the lifecycle
5555
// and rollout of new versions of those models, and defines the specific
5656
// performance and latency goals for the model. These workloads are
5757
// expected to operate within an InferencePool sharing compute capacity with other
58-
// InferenceModels, defined by the Inference Platform Admin.
58+
// InferenceObjectives, defined by the Inference Platform Admin.
5959
//
60-
// InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
60+
// InferenceObjective's modelName (not the ObjectMeta name) is unique for a given InferencePool,
6161
// if the name is reused, an error will be shown on the status of a
62-
// InferenceModel that attempted to reuse. The oldest InferenceModel, based on
62+
// InferenceObjective that attempted to reuse. The oldest InferenceObjective, based on
6363
// creation timestamp, will be selected to remain valid. In the event of a race
6464
// condition, one will be selected at random.
65-
type InferenceModelSpec struct {
65+
type InferenceObjectiveSpec struct {
6666
// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
6767
// ModelNames must be unique for a referencing InferencePool
6868
// (names can be reused for a different pool in the same cluster).
6969
// The modelName with the oldest creation timestamp is retained, and the incoming
70-
// InferenceModel's Ready status is set to false with a corresponding reason.
70+
// InferenceObjective's Ready status is set to false with a corresponding reason.
7171
// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
7272
// Names can be reserved without an underlying model configured in the pool.
7373
// This can be done by specifying a target model and setting the weight to zero,
@@ -80,7 +80,7 @@ type InferenceModelSpec struct {
8080

8181
// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
8282
// Criticality impacts how traffic is handled in resource constrained situations. It handles this by
83-
// queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
83+
// queuing or rejecting requests of lower criticality. InferenceObjectives of an equivalent Criticality will
8484
// fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
8585
// and the proportionality of fairness will be configurable.
8686
//
@@ -151,7 +151,7 @@ const (
151151
// Gateway assumes that the model exists on the model server and it's the
152152
// responsibility of the user to validate a correct match. Should a model fail
153153
// to exist at request time, the error is processed by the Inference Gateway
154-
// and emitted on the appropriate InferenceModel object.
154+
// and emitted on the appropriate InferenceObjective object.
155155
type TargetModel struct {
156156
// Name is the name of the adapter or base model, as expected by the ModelServer.
157157
//
@@ -178,9 +178,9 @@ type TargetModel struct {
178178
Weight *int32 `json:"weight,omitempty"`
179179
}
180180

181-
// InferenceModelStatus defines the observed state of InferenceModel
182-
type InferenceModelStatus struct {
183-
// Conditions track the state of the InferenceModel.
181+
// InferenceObjectiveStatus defines the observed state of InferenceObjective
182+
type InferenceObjectiveStatus struct {
183+
// Conditions track the state of the InferenceObjective.
184184
//
185185
// Known condition types are:
186186
//
@@ -194,14 +194,14 @@ type InferenceModelStatus struct {
194194
Conditions []metav1.Condition `json:"conditions,omitempty"`
195195
}
196196

197-
// InferenceModelConditionType is a type of condition for the InferenceModel.
198-
type InferenceModelConditionType string
197+
// InferenceObjectiveConditionType is a type of condition for the InferenceObjective.
198+
type InferenceObjectiveConditionType string
199199

200-
// InferenceModelConditionReason is the reason for a given InferenceModelConditionType.
201-
type InferenceModelConditionReason string
200+
// InferenceObjectiveConditionReason is the reason for a given InferenceObjectiveConditionType.
201+
type InferenceObjectiveConditionReason string
202202

203203
const (
204-
// ModelConditionAccepted indicates if the model config is accepted, and if not, why.
204+
// ObjectiveConditionAccepted indicates if the objective config is accepted, and if not, why.
205205
//
206206
// Possible reasons for this condition to be True are:
207207
//
@@ -215,15 +215,15 @@ const (
215215
//
216216
// * "Pending"
217217
//
218-
ModelConditionAccepted InferenceModelConditionType = "Accepted"
218+
ObjectiveConditionAccepted InferenceObjectiveConditionType = "Accepted"
219219

220-
// ModelReasonAccepted is the desired state. Model conforms to the state of the pool.
221-
ModelReasonAccepted InferenceModelConditionReason = "Accepted"
220+
// ObjectiveReasonAccepted is the desired state. Model conforms to the state of the pool.
221+
ObjectiveReasonAccepted InferenceObjectiveConditionReason = "Accepted"
222222

223-
// ModelReasonNameInUse is used when a given ModelName already exists within the pool.
223+
// ObjectiveReasonNameInUse is used when a given ModelName already exists within the pool.
224224
// Details about naming conflict resolution are on the ModelName field itself.
225-
ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse"
225+
ObjectiveReasonNameInUse InferenceObjectiveConditionReason = "ModelNameInUse"
226226

227-
// ModelReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceModel.
228-
ModelReasonPending InferenceModelConditionReason = "Pending"
227+
// ObjectiveReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceObjective.
228+
ObjectiveReasonPending InferenceObjectiveConditionReason = "Pending"
229229
)

apix/v1alpha2/zz_generated.deepcopy.go

Lines changed: 19 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apix/v1alpha2/zz_generated.register.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)