diff --git a/src/sagemaker/serve/builder/djl_builder.py b/src/sagemaker/serve/builder/djl_builder.py index 75acd0d1fe..608e1c604f 100644 --- a/src/sagemaker/serve/builder/djl_builder.py +++ b/src/sagemaker/serve/builder/djl_builder.py @@ -92,6 +92,7 @@ def __init__(self): self.nb_instance_type = None self.ram_usage_model_load = None self.role_arn = None + self.name = None @abstractmethod def _prepare_for_mode(self): @@ -130,6 +131,7 @@ def _create_djl_model(self) -> Type[Model]: huggingface_hub_token=self.env_vars.get("HF_TOKEN"), image_config=self.image_config, vpc_config=self.vpc_config, + name=self.name, ) if not self.image_uri: diff --git a/src/sagemaker/serve/builder/jumpstart_builder.py b/src/sagemaker/serve/builder/jumpstart_builder.py index eb57dec1fa..e689f41839 100644 --- a/src/sagemaker/serve/builder/jumpstart_builder.py +++ b/src/sagemaker/serve/builder/jumpstart_builder.py @@ -121,6 +121,7 @@ def __init__(self): self.is_compiled = False self.is_quantized = False self.speculative_decoding_draft_model_source = None + self.name = None @abstractmethod def _prepare_for_mode(self, **kwargs): @@ -147,7 +148,10 @@ def _is_jumpstart_model_id(self) -> bool: def _create_pre_trained_js_model(self) -> Type[Model]: """Placeholder docstring""" pysdk_model = JumpStartModel( - self.model, vpc_config=self.vpc_config, sagemaker_session=self.sagemaker_session + self.model, + vpc_config=self.vpc_config, + sagemaker_session=self.sagemaker_session, + name=self.name, ) self._original_deploy = pysdk_model.deploy diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index a919aa7342..d1f1ab6ba2 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -492,6 +492,7 @@ def _create_model(self): env=self.env_vars, sagemaker_session=self.sagemaker_session, predictor_cls=self._get_predictor, + name=self.name, ) # store the modes in the model so that we may diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index b64985cd65..c77a57f1a7 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -65,6 +65,7 @@ def __init__(self): self.ram_usage_model_load = None self.secret_key = None self.role_arn = None + self.name = None @abstractmethod def _prepare_for_mode(self, *args, **kwargs): @@ -105,6 +106,7 @@ def _create_tei_model(self, **kwargs) -> Type[Model]: env=self.env_vars, role=self.role_arn, sagemaker_session=self.sagemaker_session, + name=self.name, ) logger.info("Detected %s. Proceeding with the the deployment.", self.image_uri) diff --git a/src/sagemaker/serve/builder/tf_serving_builder.py b/src/sagemaker/serve/builder/tf_serving_builder.py index 9b171b1d98..044e0460bc 100644 --- a/src/sagemaker/serve/builder/tf_serving_builder.py +++ b/src/sagemaker/serve/builder/tf_serving_builder.py @@ -51,6 +51,7 @@ def __init__(self): self.pysdk_model = None self.schema_builder = None self.env_vars = None + self.name = None @abstractmethod def _prepare_for_mode(self): @@ -97,6 +98,7 @@ def _create_tensorflow_model(self): env=self.env_vars, sagemaker_session=self.sagemaker_session, predictor_cls=self._get_tensorflow_predictor, + name=self.name, ) self.pysdk_model.mode = self.mode diff --git a/src/sagemaker/serve/builder/tgi_builder.py b/src/sagemaker/serve/builder/tgi_builder.py index 9bde777af2..3614e90914 100644 --- a/src/sagemaker/serve/builder/tgi_builder.py +++ b/src/sagemaker/serve/builder/tgi_builder.py @@ -92,6 +92,7 @@ def __init__(self): self.ram_usage_model_load = None self.secret_key = None self.role_arn = None + self.name = None @abstractmethod def _prepare_for_mode(self, *args, **kwargs): @@ -142,6 +143,7 @@ def _create_tgi_model(self) -> Type[Model]: env=self.env_vars, role=self.role_arn, sagemaker_session=self.sagemaker_session, + name=self.name, ) self._original_deploy = pysdk_model.deploy diff --git a/src/sagemaker/serve/builder/transformers_builder.py b/src/sagemaker/serve/builder/transformers_builder.py index b380dc8455..b7baf6b513 100644 --- a/src/sagemaker/serve/builder/transformers_builder.py +++ b/src/sagemaker/serve/builder/transformers_builder.py @@ -89,6 +89,7 @@ def __init__(self): self.schema_builder = None self.inference_spec = None self.shared_libs = None + self.name = None @abstractmethod def _prepare_for_mode(self, *args, **kwargs): @@ -105,6 +106,7 @@ def _create_transformers_model(self) -> Type[Model]: env=self.env_vars, role=self.role_arn, sagemaker_session=self.sagemaker_session, + name=self.name, ) logger.info("Detected %s. Proceeding with the the deployment.", self.image_uri) diff --git a/tests/unit/sagemaker/serve/builder/test_djl_builder.py b/tests/unit/sagemaker/serve/builder/test_djl_builder.py index 3ecd55e301..69f8c7a8d5 100644 --- a/tests/unit/sagemaker/serve/builder/test_djl_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_djl_builder.py @@ -78,6 +78,7 @@ def test_build_deploy_for_djl_local_container( ): builder = ModelBuilder( model=mock_model_id, + name="mock_model_name", schema_builder=mock_schema_builder, mode=Mode.LOCAL_CONTAINER, model_server=ModelServer.DJL_SERVING, @@ -89,6 +90,8 @@ def test_build_deploy_for_djl_local_container( builder._prepare_for_mode.side_effect = None model = builder.build() + assert model.name == "mock_model_name" + builder.serve_settings.telemetry_opt_out = True assert isinstance(model, DJLModel) diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index 2752e991ff..b50aa17c34 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -317,7 +317,7 @@ def test_build_happy_path_with_sagemaker_endpoint_mode_and_byoc( ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and image_config == MOCK_IMAGE_CONFIG @@ -326,6 +326,7 @@ def test_build_happy_path_with_sagemaker_endpoint_mode_and_byoc( and role == mock_role_arn and env == ENV_VARS and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -425,13 +426,14 @@ def test_build_happy_path_with_sagemaker_endpoint_mode_and_1p_dlc_as_byoc( ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_1p_dlc_image_uri and model_data == model_data and role == mock_role_arn and env == ENV_VARS and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -532,13 +534,14 @@ def test_build_happy_path_with_sagemaker_endpoint_mode_and_inference_spec( ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and model_data == model_data and role == mock_role_arn and env == ENV_VARS_INF_SPEC and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -633,13 +636,14 @@ def test_build_happy_path_with_sagemakerEndpoint_mode_and_model( ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and model_data == model_data and role == mock_role_arn and env == ENV_VARS and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -742,13 +746,14 @@ def test_build_happy_path_with_sagemakerEndpoint_mode_and_xgboost_model( ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and model_data == model_data and role == mock_role_arn and env == ENV_VARS and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -847,13 +852,14 @@ def test_build_happy_path_with_local_container_mode( mock_mode.prepare.side_effect = lambda: None mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and model_data is None and role == mock_role_arn and env == {} and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -968,13 +974,14 @@ def test_build_happy_path_with_localContainer_mode_overwritten_with_sagemaker_mo ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and model_data is None and role == mock_role_arn and env == {} and sagemaker_session == mock_session + and "model-name-" in name else None ) @@ -1119,13 +1126,14 @@ def test_build_happy_path_with_sagemaker_endpoint_mode_overwritten_with_local_co ) mock_model_obj = Mock() - mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls: ( # noqa E501 + mock_sdk_model.side_effect = lambda image_uri, image_config, vpc_config, model_data, role, env, sagemaker_session, predictor_cls, name: ( # noqa E501 mock_model_obj if image_uri == mock_image_uri and model_data == model_data and role == mock_role_arn and env == ENV_VARS and sagemaker_session == mock_session + and "model-name-" in name else None ) diff --git a/tests/unit/sagemaker/serve/builder/test_tei_builder.py b/tests/unit/sagemaker/serve/builder/test_tei_builder.py index 2ede60290b..74e49e345f 100644 --- a/tests/unit/sagemaker/serve/builder/test_tei_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_tei_builder.py @@ -79,6 +79,7 @@ def test_tei_builder_sagemaker_endpoint_mode_no_s3_upload_success( # verify SAGEMAKER_ENDPOINT deploy builder = ModelBuilder( model=MOCK_MODEL_ID, + name="mock_model_name", schema_builder=MOCK_SCHEMA_BUILDER, mode=Mode.SAGEMAKER_ENDPOINT, model_metadata={ @@ -88,7 +89,10 @@ def test_tei_builder_sagemaker_endpoint_mode_no_s3_upload_success( builder._prepare_for_mode = MagicMock() builder._prepare_for_mode.return_value = (None, {}) + model = builder.build() + assert model.name == "mock_model_name" + builder.serve_settings.telemetry_opt_out = True builder._original_deploy = MagicMock() diff --git a/tests/unit/sagemaker/serve/builder/test_tensorflow_serving_builder.py b/tests/unit/sagemaker/serve/builder/test_tensorflow_serving_builder.py index 9d51b04e08..e8ae892b45 100644 --- a/tests/unit/sagemaker/serve/builder/test_tensorflow_serving_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_tensorflow_serving_builder.py @@ -33,6 +33,7 @@ def setUp(self): self.instance.image_config = {} self.instance.vpc_config = {} self.instance.modes = {} + self.instance.name = "model-name-mock-uuid-hex" @patch("os.makedirs") @patch("os.path.exists") @@ -71,5 +72,6 @@ def test_create_tensorflow_model(self, mock_model): env=self.instance.env_vars, sagemaker_session=self.instance.sagemaker_session, predictor_cls=self.instance._get_tensorflow_predictor, + name="model-name-mock-uuid-hex", ) self.assertEqual(model, mock_model.return_value) diff --git a/tests/unit/sagemaker/serve/builder/test_tgi_builder.py b/tests/unit/sagemaker/serve/builder/test_tgi_builder.py index c77dbfffd6..0fa227f5d4 100644 --- a/tests/unit/sagemaker/serve/builder/test_tgi_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_tgi_builder.py @@ -61,13 +61,17 @@ def test_tgi_builder_sagemaker_endpoint_mode_no_s3_upload_success( # verify SAGEMAKER_ENDPOINT deploy builder = ModelBuilder( model=MOCK_MODEL_ID, + name="mock_model_name", schema_builder=MOCK_SCHEMA_BUILDER, mode=Mode.SAGEMAKER_ENDPOINT, ) builder._prepare_for_mode = MagicMock() builder._prepare_for_mode.return_value = (None, {}) + model = builder.build() + assert model.name == "mock_model_name" + builder.serve_settings.telemetry_opt_out = True builder._original_deploy = MagicMock() @@ -187,3 +191,75 @@ def test_tgi_builder_optimized_sagemaker_endpoint_mode_no_s3_upload_success( # verify that if optimized, no s3 upload occurs builder._prepare_for_mode.assert_called_with() + + @patch( + "sagemaker.serve.builder.tgi_builder._get_nb_instance", + return_value="ml.g5.24xlarge", + ) + @patch("sagemaker.serve.builder.tgi_builder._capture_telemetry", side_effect=None) + @patch( + "sagemaker.serve.builder.model_builder.get_huggingface_model_metadata", + return_value={"pipeline_tag": "text-generation"}, + ) + @patch( + "sagemaker.serve.builder.tgi_builder._get_model_config_properties_from_hf", + return_value=({}, None), + ) + @patch( + "sagemaker.serve.builder.tgi_builder._get_default_tgi_configurations", + return_value=({}, None), + ) + @patch( + "sagemaker.serve.builder.tgi_builder._get_admissible_tensor_parallel_degrees", + return_value=[4, 8], + ) + @patch("sagemaker.serve.builder.tgi_builder._get_admissible_dtypes", return_value=["fp16"]) + @patch("sagemaker.serve.builder.tgi_builder.datetime") + @patch("sagemaker.serve.builder.tgi_builder.timedelta", return_value=1800) + @patch("sagemaker.serve.builder.tgi_builder._serial_benchmark") + @patch("sagemaker.serve.builder.tgi_builder._concurrent_benchmark") + def test_tgi_builder_tune_success( + self, + mock_concurrent_benchmark, + mock_serial_benchmark, + mock_timedelta, + mock_datetime, + mock_get_admissible_dtypes, + mock_get_admissible_tensor_parallel_degrees, + mock_default_tgi_configurations, + mock_hf_model_config, + mock_hf_model_md, + mock_get_nb_instance, + mock_telemetry, + ): + # WHERE + mock_datetime.now.side_effect = [0, 100, 200] + mock_serial_benchmark.side_effect = [(1000, 10000, 10), (500, 5000, 50)] + mock_concurrent_benchmark.side_effect = [(10, 10), (50, 5)] + + builder = ModelBuilder( + model=MOCK_MODEL_ID, + schema_builder=MOCK_SCHEMA_BUILDER, + mode=Mode.LOCAL_CONTAINER, + model_path=MOCK_MODEL_PATH, + ) + builder._prepare_for_mode = MagicMock() + builder._prepare_for_mode.side_effect = None + + model = builder.build() + + builder.serve_settings.telemetry_opt_out = True + builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock() + builder.pysdk_model = MagicMock() + + # WHEN + ret_new_model = model.tune(max_tuning_duration=1800) + + # THEN + assert ret_new_model != model + assert len(mock_datetime.now.call_args_list) == 3 + assert len(mock_serial_benchmark.call_args_list) == 2 + assert len(mock_concurrent_benchmark.call_args_list) == 2 + assert ret_new_model.env["NUM_SHARD"] == "8" + assert ret_new_model.env["DTYPE"] == "fp16" + assert ret_new_model.env["SHARDED"] == "true"