feat(inference): add modelId and quantization to updateDeploymentRequest (scaleway#2473)

scaleway-bot · web-flow · commit 0bc5eb4643f9 · 2025-03-18T07:05:13.000Z
diff --git a/api/inference/v1/inference_sdk.go b/api/inference/v1/inference_sdk.go
@@ -773,6 +773,12 @@ type UpdateDeploymentRequest struct {
 
 	// MaxSize: defines the new maximum size of the pool.
 	MaxSize *uint32 `json:"max_size,omitempty"`
+
+	// ModelID: id of the model to set to the deployment.
+	ModelID *string `json:"model_id,omitempty"`
+
+	// Quantization: quantization to use to the deployment.
+	Quantization *DeploymentQuantization `json:"quantization,omitempty"`
 }
 
 // UpdateEndpointRequest: update endpoint request.