diff --git a/scaleway-async/scaleway_async/inference/v1/api.py b/scaleway-async/scaleway_async/inference/v1/api.py index 4a25975a8..e6d86e580 100644 --- a/scaleway-async/scaleway_async/inference/v1/api.py +++ b/scaleway-async/scaleway_async/inference/v1/api.py @@ -311,6 +311,8 @@ async def update_deployment( tags: Optional[List[str]] = None, min_size: Optional[int] = None, max_size: Optional[int] = None, + model_id: Optional[str] = None, + quantization: Optional[DeploymentQuantization] = None, ) -> Deployment: """ Update a deployment. @@ -321,6 +323,8 @@ async def update_deployment( :param tags: List of tags to apply to the deployment. :param min_size: Defines the new minimum size of the pool. :param max_size: Defines the new maximum size of the pool. + :param model_id: Id of the model to set to the deployment. + :param quantization: Quantization to use to the deployment. :return: :class:`Deployment ` Usage: @@ -347,6 +351,8 @@ async def update_deployment( tags=tags, min_size=min_size, max_size=max_size, + model_id=model_id, + quantization=quantization, ), self.client, ), diff --git a/scaleway-async/scaleway_async/inference/v1/marshalling.py b/scaleway-async/scaleway_async/inference/v1/marshalling.py index 9f10b88e1..d314f97c0 100644 --- a/scaleway-async/scaleway_async/inference/v1/marshalling.py +++ b/scaleway-async/scaleway_async/inference/v1/marshalling.py @@ -639,6 +639,14 @@ def marshal_UpdateDeploymentRequest( if request.max_size is not None: output["max_size"] = request.max_size + if request.model_id is not None: + output["model_id"] = request.model_id + + if request.quantization is not None: + output["quantization"] = marshal_DeploymentQuantization( + request.quantization, defaults + ) + return output diff --git a/scaleway-async/scaleway_async/inference/v1/types.py b/scaleway-async/scaleway_async/inference/v1/types.py index 9dd6fce4b..7278dc37c 100644 --- a/scaleway-async/scaleway_async/inference/v1/types.py +++ b/scaleway-async/scaleway_async/inference/v1/types.py @@ -750,6 +750,16 @@ class UpdateDeploymentRequest: Defines the new maximum size of the pool. """ + model_id: Optional[str] + """ + Id of the model to set to the deployment. + """ + + quantization: Optional[DeploymentQuantization] + """ + Quantization to use to the deployment. + """ + @dataclass class UpdateEndpointRequest: diff --git a/scaleway/scaleway/inference/v1/api.py b/scaleway/scaleway/inference/v1/api.py index 4cecd31cd..9088a7835 100644 --- a/scaleway/scaleway/inference/v1/api.py +++ b/scaleway/scaleway/inference/v1/api.py @@ -309,6 +309,8 @@ def update_deployment( tags: Optional[List[str]] = None, min_size: Optional[int] = None, max_size: Optional[int] = None, + model_id: Optional[str] = None, + quantization: Optional[DeploymentQuantization] = None, ) -> Deployment: """ Update a deployment. @@ -319,6 +321,8 @@ def update_deployment( :param tags: List of tags to apply to the deployment. :param min_size: Defines the new minimum size of the pool. :param max_size: Defines the new maximum size of the pool. + :param model_id: Id of the model to set to the deployment. + :param quantization: Quantization to use to the deployment. :return: :class:`Deployment ` Usage: @@ -345,6 +349,8 @@ def update_deployment( tags=tags, min_size=min_size, max_size=max_size, + model_id=model_id, + quantization=quantization, ), self.client, ), diff --git a/scaleway/scaleway/inference/v1/marshalling.py b/scaleway/scaleway/inference/v1/marshalling.py index 9f10b88e1..d314f97c0 100644 --- a/scaleway/scaleway/inference/v1/marshalling.py +++ b/scaleway/scaleway/inference/v1/marshalling.py @@ -639,6 +639,14 @@ def marshal_UpdateDeploymentRequest( if request.max_size is not None: output["max_size"] = request.max_size + if request.model_id is not None: + output["model_id"] = request.model_id + + if request.quantization is not None: + output["quantization"] = marshal_DeploymentQuantization( + request.quantization, defaults + ) + return output diff --git a/scaleway/scaleway/inference/v1/types.py b/scaleway/scaleway/inference/v1/types.py index 9dd6fce4b..7278dc37c 100644 --- a/scaleway/scaleway/inference/v1/types.py +++ b/scaleway/scaleway/inference/v1/types.py @@ -750,6 +750,16 @@ class UpdateDeploymentRequest: Defines the new maximum size of the pool. """ + model_id: Optional[str] + """ + Id of the model to set to the deployment. + """ + + quantization: Optional[DeploymentQuantization] + """ + Quantization to use to the deployment. + """ + @dataclass class UpdateEndpointRequest: