diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py
index 05b981d21b..3ca25fb3ce 100644
--- a/src/sagemaker/huggingface/model.py
+++ b/src/sagemaker/huggingface/model.py
@@ -218,6 +218,7 @@ def deploy(
         container_startup_health_check_timeout=None,
         inference_recommendation_id=None,
         explainer_config=None,
+        update_endpoint: Optional[bool] = False,
         **kwargs,
     ):
         """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -296,6 +297,11 @@ def deploy(
                 would like to deploy the model and endpoint with recommended parameters.
             explainer_config (sagemaker.explainer.ExplainerConfig): Specifies online explainability
                 configuration for use with Amazon SageMaker Clarify. (default: None)
+            update_endpoint (Optional[bool]):
+                Flag to update the model in an existing Amazon SageMaker endpoint.
+                If True, this will deploy a new EndpointConfig to an already existing endpoint
+                and delete resources corresponding to the previous EndpointConfig. Default: False
+                Note: Currently this is supported for single model endpoints
         Raises:
              ValueError: If arguments combination check failed in these circumstances:
                 - If no role is specified or
@@ -335,6 +341,7 @@ def deploy(
             container_startup_health_check_timeout=container_startup_health_check_timeout,
             inference_recommendation_id=inference_recommendation_id,
             explainer_config=explainer_config,
+            update_endpoint=update_endpoint,
             **kwargs,
         )
 
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
index e5ea1ea314..b281d9f489 100644
--- a/src/sagemaker/model.py
+++ b/src/sagemaker/model.py
@@ -53,7 +53,6 @@
 from sagemaker.model_card.schema_constraints import ModelApprovalStatusEnum
 from sagemaker.session import Session
 from sagemaker.model_metrics import ModelMetrics
-from sagemaker.deprecations import removed_kwargs
 from sagemaker.drift_check_baselines import DriftCheckBaselines
 from sagemaker.explainer import ExplainerConfig
 from sagemaker.metadata_properties import MetadataProperties
@@ -1386,6 +1385,7 @@ def deploy(
         routing_config: Optional[Dict[str, Any]] = None,
         model_reference_arn: Optional[str] = None,
         inference_ami_version: Optional[str] = None,
+        update_endpoint: Optional[bool] = False,
         **kwargs,
     ):
         """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -1497,6 +1497,11 @@ def deploy(
             inference_ami_version (Optional [str]): Specifies an option from a collection of preconfigured
              Amazon Machine Image (AMI) images. For a full list of options, see:
              https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html
+            update_endpoint (Optional[bool]):
+                Flag to update the model in an existing Amazon SageMaker endpoint.
+                If True, this will deploy a new EndpointConfig to an already existing endpoint
+                and delete resources corresponding to the previous EndpointConfig. Default: False
+                Note: Currently this is supported for single model endpoints
         Raises:
              ValueError: If arguments combination check failed in these circumstances:
                 - If no role is specified or
@@ -1512,8 +1517,6 @@ def deploy(
         """
         self.accept_eula = accept_eula
 
-        removed_kwargs("update_endpoint", kwargs)
-
         self._init_sagemaker_session_if_does_not_exist(instance_type)
         # Depending on the instance type, a local session (or) a session is initialized.
         self.role = resolve_value_from_config(
@@ -1628,6 +1631,10 @@ def deploy(
 
         # Support multiple models on same endpoint
         if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
+            if update_endpoint:
+                raise ValueError(
+                    "Currently update_endpoint is supported for single model endpoints"
+                )
             if endpoint_name:
                 self.endpoint_name = endpoint_name
             else:
@@ -1783,17 +1790,38 @@ def deploy(
             if is_explainer_enabled:
                 explainer_config_dict = explainer_config._to_request_dict()
 
-            self.sagemaker_session.endpoint_from_production_variants(
-                name=self.endpoint_name,
-                production_variants=[production_variant],
-                tags=tags,
-                kms_key=kms_key,
-                wait=wait,
-                data_capture_config_dict=data_capture_config_dict,
-                explainer_config_dict=explainer_config_dict,
-                async_inference_config_dict=async_inference_config_dict,
-                live_logging=endpoint_logging,
-            )
+            if update_endpoint:
+                endpoint_config_name = self.sagemaker_session.create_endpoint_config(
+                    name=self.name,
+                    model_name=self.name,
+                    initial_instance_count=initial_instance_count,
+                    instance_type=instance_type,
+                    accelerator_type=accelerator_type,
+                    tags=tags,
+                    kms_key=kms_key,
+                    data_capture_config_dict=data_capture_config_dict,
+                    volume_size=volume_size,
+                    model_data_download_timeout=model_data_download_timeout,
+                    container_startup_health_check_timeout=container_startup_health_check_timeout,
+                    explainer_config_dict=explainer_config_dict,
+                    async_inference_config_dict=async_inference_config_dict,
+                    serverless_inference_config=serverless_inference_config_dict,
+                    routing_config=routing_config,
+                    inference_ami_version=inference_ami_version,
+                )
+                self.sagemaker_session.update_endpoint(self.endpoint_name, endpoint_config_name)
+            else:
+                self.sagemaker_session.endpoint_from_production_variants(
+                    name=self.endpoint_name,
+                    production_variants=[production_variant],
+                    tags=tags,
+                    kms_key=kms_key,
+                    wait=wait,
+                    data_capture_config_dict=data_capture_config_dict,
+                    explainer_config_dict=explainer_config_dict,
+                    async_inference_config_dict=async_inference_config_dict,
+                    live_logging=endpoint_logging,
+                )
 
             if self.predictor_cls:
                 predictor = self.predictor_cls(self.endpoint_name, self.sagemaker_session)
diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py
index a7a518105c..9122f22e44 100644
--- a/src/sagemaker/serve/builder/model_builder.py
+++ b/src/sagemaker/serve/builder/model_builder.py
@@ -1602,6 +1602,7 @@ def deploy(
                 ResourceRequirements,
             ]
         ] = None,
+        update_endpoint: Optional[bool] = False,
     ) -> Union[Predictor, Transformer]:
         """Deploys the built Model.
 
@@ -1615,24 +1616,33 @@ def deploy(
                AsyncInferenceConfig, BatchTransformInferenceConfig, ResourceRequirements]]) :
                 Additional Config for different deployment types such as
                 serverless, async, batch and multi-model/container
+            update_endpoint (Optional[bool]):
+                Flag to update the model in an existing Amazon SageMaker endpoint.
+                If True, this will deploy a new EndpointConfig to an already existing endpoint
+                and delete resources corresponding to the previous EndpointConfig. Default: False
+                Note: Currently this is supported for single model endpoints
         Returns:
             Transformer for Batch Deployments
             Predictors for all others
         """
         if not hasattr(self, "built_model"):
             raise ValueError("Model Needs to be built before deploying")
-        endpoint_name = unique_name_from_base(endpoint_name)
+        if not update_endpoint:
+            endpoint_name = unique_name_from_base(endpoint_name)
+
         if not inference_config:  # Real-time Deployment
             return self.built_model.deploy(
                 instance_type=self.instance_type,
                 initial_instance_count=initial_instance_count,
                 endpoint_name=endpoint_name,
+                update_endpoint=update_endpoint,
             )
 
         if isinstance(inference_config, ServerlessInferenceConfig):
             return self.built_model.deploy(
                 serverless_inference_config=inference_config,
                 endpoint_name=endpoint_name,
+                update_endpoint=update_endpoint,
             )
 
         if isinstance(inference_config, AsyncInferenceConfig):
@@ -1641,6 +1651,7 @@ def deploy(
                 initial_instance_count=initial_instance_count,
                 async_inference_config=inference_config,
                 endpoint_name=endpoint_name,
+                update_endpoint=update_endpoint,
             )
 
         if isinstance(inference_config, BatchTransformInferenceConfig):
@@ -1652,6 +1663,10 @@ def deploy(
             return transformer
 
         if isinstance(inference_config, ResourceRequirements):
+            if update_endpoint:
+                raise ValueError(
+                    "Currently update_endpoint is supported for single model endpoints"
+                )
             # Multi Model and MultiContainer endpoints with Inference Component
             return self.built_model.deploy(
                 instance_type=self.instance_type,
@@ -1660,6 +1675,7 @@ def deploy(
                 resources=inference_config,
                 initial_instance_count=initial_instance_count,
                 role=self.role_arn,
+                update_endpoint=update_endpoint,
             )
 
         raise ValueError("Deployment Options not supported")
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
index b2398e03d1..38fa7f8c26 100644
--- a/src/sagemaker/session.py
+++ b/src/sagemaker/session.py
@@ -4488,6 +4488,10 @@ def create_endpoint_config(
         model_data_download_timeout=None,
         container_startup_health_check_timeout=None,
         explainer_config_dict=None,
+        async_inference_config_dict=None,
+        serverless_inference_config_dict=None,
+        routing_config: Optional[Dict[str, Any]] = None,
+        inference_ami_version: Optional[str] = None,
     ):
         """Create an Amazon SageMaker endpoint configuration.
 
@@ -4525,6 +4529,30 @@ def create_endpoint_config(
                 -inference-algo-ping-requests
             explainer_config_dict (dict): Specifies configuration to enable explainers.
                 Default: None.
+            async_inference_config_dict (dict): Specifies
+                configuration related to async endpoint. Use this configuration when trying
+                to create async endpoint and make async inference. If empty config object
+                passed through, will use default config to deploy async endpoint. Deploy a
+                real-time endpoint if it's None. (default: None).
+            serverless_inference_config_dict (dict):
+                Specifies configuration related to serverless endpoint. Use this configuration
+                when trying to create serverless endpoint and make serverless inference. If
+                empty object passed through, will use pre-defined values in
+                ``ServerlessInferenceConfig`` class to deploy serverless endpoint. Deploy an
+                instance based endpoint if it's None. (default: None).
+            routing_config (Optional[Dict[str, Any]): Settings the control how the endpoint routes
+                incoming traffic to the instances that the endpoint hosts.
+                Currently, support dictionary key ``RoutingStrategy``.
+
+                .. code:: python
+
+                    {
+                        "RoutingStrategy":  sagemaker.enums.RoutingStrategy.RANDOM
+                    }
+            inference_ami_version (Optional [str]):
+             Specifies an option from a collection of preconfigured
+             Amazon Machine Image (AMI) images. For a full list of options, see:
+             https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html
 
         Example:
             >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}]
@@ -4544,9 +4572,12 @@ def create_endpoint_config(
             instance_type,
             initial_instance_count,
             accelerator_type=accelerator_type,
+            serverless_inference_config=serverless_inference_config_dict,
             volume_size=volume_size,
             model_data_download_timeout=model_data_download_timeout,
             container_startup_health_check_timeout=container_startup_health_check_timeout,
+            routing_config=routing_config,
+            inference_ami_version=inference_ami_version,
         )
         production_variants = [provided_production_variant]
         # Currently we just inject CoreDumpConfig.KmsKeyId from the config for production variant.
@@ -4586,6 +4617,14 @@ def create_endpoint_config(
             )
             request["DataCaptureConfig"] = inferred_data_capture_config_dict
 
+        if async_inference_config_dict is not None:
+            inferred_async_inference_config_dict = update_nested_dictionary_with_values_from_config(
+                async_inference_config_dict,
+                ENDPOINT_CONFIG_ASYNC_INFERENCE_PATH,
+                sagemaker_session=self,
+            )
+            request["AsyncInferenceConfig"] = inferred_async_inference_config_dict
+
         if explainer_config_dict is not None:
             request["ExplainerConfig"] = explainer_config_dict
 
diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py
index c7f624114f..b384cbbbb5 100644
--- a/src/sagemaker/tensorflow/model.py
+++ b/src/sagemaker/tensorflow/model.py
@@ -358,6 +358,7 @@ def deploy(
         container_startup_health_check_timeout=None,
         inference_recommendation_id=None,
         explainer_config=None,
+        update_endpoint: Optional[bool] = False,
         **kwargs,
     ):
         """Deploy a Tensorflow ``Model`` to a SageMaker ``Endpoint``."""
@@ -383,6 +384,7 @@ def deploy(
             container_startup_health_check_timeout=container_startup_health_check_timeout,
             inference_recommendation_id=inference_recommendation_id,
             explainer_config=explainer_config,
+            update_endpoint=update_endpoint,
             **kwargs,
         )
 
diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py
index be961828f4..d9b126f651 100644
--- a/tests/unit/sagemaker/jumpstart/model/test_model.py
+++ b/tests/unit/sagemaker/jumpstart/model/test_model.py
@@ -794,7 +794,7 @@ def test_jumpstart_model_kwargs_match_parent_class(self):
         and reach out to JumpStart team."""
 
         init_args_to_skip: Set[str] = set(["model_reference_arn"])
-        deploy_args_to_skip: Set[str] = set(["kwargs", "model_reference_arn"])
+        deploy_args_to_skip: Set[str] = set(["kwargs", "model_reference_arn", "update_endpoint"])
         deploy_args_removed_at_deploy_time: Set[str] = set(["model_access_configs"])
 
         parent_class_init = Model.__init__
diff --git a/tests/unit/sagemaker/model/test_deploy.py b/tests/unit/sagemaker/model/test_deploy.py
index 7b99281b96..4167ca62c3 100644
--- a/tests/unit/sagemaker/model/test_deploy.py
+++ b/tests/unit/sagemaker/model/test_deploy.py
@@ -23,6 +23,7 @@
 from sagemaker.serverless import ServerlessInferenceConfig
 from sagemaker.explainer import ExplainerConfig
 from sagemaker.compute_resource_requirements.resource_requirements import ResourceRequirements
+from sagemaker.enums import EndpointType
 from tests.unit.sagemaker.inference_recommender.constants import (
     DESCRIBE_COMPILATION_JOB_RESPONSE,
     DESCRIBE_MODEL_PACKAGE_RESPONSE,
@@ -1051,3 +1052,143 @@ def test_deploy_with_name_and_resources(sagemaker_session):
         async_inference_config_dict=None,
         live_logging=False,
     )
+
+
+@patch("sagemaker.model.Model._create_sagemaker_model", Mock())
+@patch("sagemaker.utils.name_from_base", return_value=ENDPOINT_NAME)
+@patch("sagemaker.production_variant", return_value=BASE_PRODUCTION_VARIANT)
+def test_deploy_with_update_endpoint(production_variant, name_from_base, sagemaker_session):
+    model = Model(
+        MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session
+    )
+
+    # Mock the create_endpoint_config to return a specific config name
+    endpoint_config_name = "test-config-name"
+    sagemaker_session.create_endpoint_config.return_value = endpoint_config_name
+
+    # Test update_endpoint=True scenario
+    endpoint_name = "existing-endpoint"
+    model.deploy(
+        instance_type=INSTANCE_TYPE,
+        initial_instance_count=INSTANCE_COUNT,
+        endpoint_name=endpoint_name,
+        update_endpoint=True,
+    )
+
+    # Verify create_endpoint_config is called with correct parameters
+    sagemaker_session.create_endpoint_config.assert_called_with(
+        name=MODEL_NAME,
+        model_name=MODEL_NAME,
+        initial_instance_count=INSTANCE_COUNT,
+        instance_type=INSTANCE_TYPE,
+        accelerator_type=None,
+        tags=None,
+        kms_key=None,
+        data_capture_config_dict=None,
+        volume_size=None,
+        model_data_download_timeout=None,
+        container_startup_health_check_timeout=None,
+        explainer_config_dict=None,
+        async_inference_config_dict=None,
+        serverless_inference_config=None,
+        routing_config=None,
+        inference_ami_version=None,
+    )
+
+    # Verify update_endpoint is called with correct parameters
+    sagemaker_session.update_endpoint.assert_called_with(endpoint_name, endpoint_config_name)
+
+    # Test update_endpoint with serverless config
+    serverless_inference_config = ServerlessInferenceConfig()
+    serverless_inference_config_dict = {
+        "MemorySizeInMB": 2048,
+        "MaxConcurrency": 5,
+    }
+    model.deploy(
+        endpoint_name=endpoint_name,
+        update_endpoint=True,
+        serverless_inference_config=serverless_inference_config,
+    )
+
+    sagemaker_session.create_endpoint_config.assert_called_with(
+        name=MODEL_NAME,
+        model_name=MODEL_NAME,
+        initial_instance_count=None,
+        instance_type=None,
+        accelerator_type=None,
+        tags=None,
+        kms_key=None,
+        data_capture_config_dict=None,
+        volume_size=None,
+        model_data_download_timeout=None,
+        container_startup_health_check_timeout=None,
+        explainer_config_dict=None,
+        async_inference_config_dict=None,
+        serverless_inference_config=serverless_inference_config_dict,
+        routing_config=None,
+        inference_ami_version=None,
+    )
+
+    # Verify update_endpoint is called with the new config
+    sagemaker_session.update_endpoint.assert_called_with(endpoint_name, endpoint_config_name)
+
+    # Test update_endpoint with async inference config
+    async_inference_config = AsyncInferenceConfig(
+        output_path="s3://bucket/output", failure_path="s3://bucket/failure"
+    )
+    async_inference_config_dict = {
+        "OutputConfig": {
+            "S3OutputPath": "s3://bucket/output",
+            "S3FailurePath": "s3://bucket/failure",
+        },
+    }
+    model.deploy(
+        endpoint_name=endpoint_name,
+        instance_type=INSTANCE_TYPE,
+        initial_instance_count=INSTANCE_COUNT,
+        update_endpoint=True,
+        async_inference_config=async_inference_config,
+    )
+
+    sagemaker_session.create_endpoint_config.assert_called_with(
+        name=MODEL_NAME,
+        model_name=MODEL_NAME,
+        initial_instance_count=INSTANCE_COUNT,
+        instance_type=INSTANCE_TYPE,
+        accelerator_type=None,
+        tags=None,
+        kms_key=None,
+        data_capture_config_dict=None,
+        volume_size=None,
+        model_data_download_timeout=None,
+        container_startup_health_check_timeout=None,
+        explainer_config_dict=None,
+        async_inference_config_dict=async_inference_config_dict,
+        serverless_inference_config=None,
+        routing_config=None,
+        inference_ami_version=None,
+    )
+
+    # Verify update_endpoint is called with the new config
+    sagemaker_session.update_endpoint.assert_called_with(endpoint_name, endpoint_config_name)
+
+
+@patch("sagemaker.model.Model._create_sagemaker_model", Mock())
+@patch("sagemaker.production_variant", return_value=BASE_PRODUCTION_VARIANT)
+def test_deploy_with_update_endpoint_inference_component(production_variant, sagemaker_session):
+    model = Model(
+        MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session
+    )
+
+    # Test that updating endpoint with inference component raises error
+    with pytest.raises(
+        ValueError, match="Currently update_endpoint is supported for single model endpoints"
+    ):
+        model.deploy(
+            endpoint_name="test-endpoint",
+            instance_type=INSTANCE_TYPE,
+            initial_instance_count=INSTANCE_COUNT,
+            update_endpoint=True,
+            resources=RESOURCES,
+            endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
+        )
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py
index 107d65c301..6661c6e2bf 100644
--- a/tests/unit/sagemaker/serve/builder/test_model_builder.py
+++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py
@@ -4041,14 +4041,30 @@ def test_neuron_configurations_rule_set(self):
 @pytest.mark.parametrize(
     "test_case",
     [
+        # Real-time deployment without update
         {
             "input_args": {"endpoint_name": "test"},
             "call_params": {
                 "instance_type": "ml.g5.2xlarge",
                 "initial_instance_count": 1,
                 "endpoint_name": "test",
+                "update_endpoint": False,
             },
         },
+        # Real-time deployment with update
+        {
+            "input_args": {
+                "endpoint_name": "existing-endpoint",
+                "update_endpoint": True,
+            },
+            "call_params": {
+                "instance_type": "ml.g5.2xlarge",
+                "initial_instance_count": 1,
+                "endpoint_name": "existing-endpoint",
+                "update_endpoint": True,
+            },
+        },
+        # Serverless deployment without update
         {
             "input_args": {
                 "endpoint_name": "test",
@@ -4057,8 +4073,23 @@ def test_neuron_configurations_rule_set(self):
             "call_params": {
                 "serverless_inference_config": ServerlessInferenceConfig(),
                 "endpoint_name": "test",
+                "update_endpoint": False,
             },
         },
+        # Serverless deployment with update
+        {
+            "input_args": {
+                "endpoint_name": "existing-endpoint",
+                "inference_config": ServerlessInferenceConfig(),
+                "update_endpoint": True,
+            },
+            "call_params": {
+                "serverless_inference_config": ServerlessInferenceConfig(),
+                "endpoint_name": "existing-endpoint",
+                "update_endpoint": True,
+            },
+        },
+        # Async deployment without update
         {
             "input_args": {
                 "endpoint_name": "test",
@@ -4069,10 +4100,30 @@ def test_neuron_configurations_rule_set(self):
                 "instance_type": "ml.g5.2xlarge",
                 "initial_instance_count": 1,
                 "endpoint_name": "test",
+                "update_endpoint": False,
             },
         },
+        # Async deployment with update
         {
-            "input_args": {"endpoint_name": "test", "inference_config": RESOURCE_REQUIREMENTS},
+            "input_args": {
+                "endpoint_name": "existing-endpoint",
+                "inference_config": AsyncInferenceConfig(output_path="op-path"),
+                "update_endpoint": True,
+            },
+            "call_params": {
+                "async_inference_config": AsyncInferenceConfig(output_path="op-path"),
+                "instance_type": "ml.g5.2xlarge",
+                "initial_instance_count": 1,
+                "endpoint_name": "existing-endpoint",
+                "update_endpoint": True,
+            },
+        },
+        # Multi-Model deployment (update_endpoint not supported)
+        {
+            "input_args": {
+                "endpoint_name": "test",
+                "inference_config": RESOURCE_REQUIREMENTS,
+            },
             "call_params": {
                 "resources": RESOURCE_REQUIREMENTS,
                 "role": "role-arn",
@@ -4080,8 +4131,10 @@ def test_neuron_configurations_rule_set(self):
                 "instance_type": "ml.g5.2xlarge",
                 "mode": Mode.SAGEMAKER_ENDPOINT,
                 "endpoint_type": EndpointType.INFERENCE_COMPONENT_BASED,
+                "update_endpoint": False,
             },
         },
+        # Batch transform
         {
             "input_args": {
                 "inference_config": BatchTransformInferenceConfig(
@@ -4096,7 +4149,16 @@ def test_neuron_configurations_rule_set(self):
             "id": "Batch",
         },
     ],
-    ids=["Real Time", "Serverless", "Async", "Multi-Model", "Batch"],
+    ids=[
+        "Real Time",
+        "Real Time Update",
+        "Serverless",
+        "Serverless Update",
+        "Async",
+        "Async Update",
+        "Multi-Model",
+        "Batch",
+    ],
 )
 @patch("sagemaker.serve.builder.model_builder.unique_name_from_base")
 def test_deploy(mock_unique_name_from_base, test_case):
@@ -4119,3 +4181,20 @@ def test_deploy(mock_unique_name_from_base, test_case):
 
     diff = deepdiff.DeepDiff(kwargs, test_case["call_params"])
     assert diff == {}
+
+
+def test_deploy_multi_model_update_error():
+    model_builder = ModelBuilder(
+        model="meta-llama/Meta-Llama-3-8B-Instruct",
+        env_vars={"HUGGING_FACE_HUB_TOKEN": "token"},
+        role_arn="role-arn",
+        instance_type="ml.g5.2xlarge",
+    )
+    setattr(model_builder, "built_model", MagicMock())
+
+    with pytest.raises(
+        ValueError, match="Currently update_endpoint is supported for single model endpoints"
+    ):
+        model_builder.deploy(
+            endpoint_name="test", inference_config=RESOURCE_REQUIREMENTS, update_endpoint=True
+        )