@@ -1198,6 +1198,65 @@ def test_optimize_quantize_for_jumpstart(
11981198
11991199 self .assertIsNotNone (out_put )
12001200
1201+ @patch ("sagemaker.serve.builder.jumpstart_builder._capture_telemetry" , side_effect = None )
1202+ @patch .object (ModelBuilder , "_get_serve_setting" , autospec = True )
1203+ def test_optimize_quantize_and_compile_for_jumpstart (
1204+ self ,
1205+ mock_serve_settings ,
1206+ mock_telemetry ,
1207+ ):
1208+ mock_sagemaker_session = Mock ()
1209+ mock_metadata_config = Mock ()
1210+ mock_metadata_config .resolved_config = {
1211+ "supported_inference_instance_types" : ["ml.inf2.48xlarge" ],
1212+ "hosting_neuron_model_id" : "huggingface-llmneuron-mistral-7b" ,
1213+ }
1214+
1215+ mock_pysdk_model = Mock ()
1216+ mock_pysdk_model .env = {"SAGEMAKER_ENV" : "1" }
1217+ mock_pysdk_model .model_data = mock_model_data
1218+ mock_pysdk_model .image_uri = mock_tgi_image_uri
1219+ mock_pysdk_model .list_deployment_configs .return_value = DEPLOYMENT_CONFIGS
1220+ mock_pysdk_model .deployment_config = DEPLOYMENT_CONFIGS [0 ]
1221+ mock_pysdk_model .config_name = "config_name"
1222+ mock_pysdk_model ._metadata_configs = {"config_name" : mock_metadata_config }
1223+
1224+ sample_input = {
1225+ "inputs" : "The diamondback terrapin or simply terrapin is a species "
1226+ "of turtle native to the brackish coastal tidal marshes of the" ,
1227+ "parameters" : {"max_new_tokens" : 1024 },
1228+ }
1229+ sample_output = [
1230+ {
1231+ "generated_text" : "The diamondback terrapin or simply terrapin is a "
1232+ "species of turtle native to the brackish coastal "
1233+ "tidal marshes of the east coast."
1234+ }
1235+ ]
1236+
1237+ model_builder = ModelBuilder (
1238+ model = "meta-textgeneration-llama-3-70b" ,
1239+ schema_builder = SchemaBuilder (sample_input , sample_output ),
1240+ sagemaker_session = mock_sagemaker_session ,
1241+ )
1242+
1243+ model_builder .pysdk_model = mock_pysdk_model
1244+
1245+ out_put = model_builder ._optimize_for_jumpstart (
1246+ accept_eula = True ,
1247+ quantization_config = {
1248+ "OverrideEnvironment" : {"OPTION_QUANTIZE" : "awq" },
1249+ },
1250+ compilation_config = {"OverrideEnvironment" : {"OPTION_TENSOR_PARALLEL_DEGREE" : "2" }},
1251+ env_vars = {
1252+ "OPTION_TENSOR_PARALLEL_DEGREE" : "1" ,
1253+ "OPTION_MAX_ROLLING_BATCH_SIZE" : "2" ,
1254+ },
1255+ output_path = "s3://bucket/code/" ,
1256+ )
1257+
1258+ self .assertIsNotNone (out_put )
1259+
12011260 @patch ("sagemaker.serve.builder.jumpstart_builder._capture_telemetry" , side_effect = None )
12021261 @patch .object (ModelBuilder , "_get_serve_setting" , autospec = True )
12031262 @patch (
@@ -1383,3 +1442,103 @@ def test_optimize_compile_for_jumpstart_with_neuron_env(
13831442 self .assertEqual (optimized_model .env ["OPTION_ROLLING_BATCH" ], "auto" )
13841443 self .assertEqual (optimized_model .env ["OPTION_MAX_ROLLING_BATCH_SIZE" ], "4" )
13851444 self .assertEqual (optimized_model .env ["OPTION_NEURON_OPTIMIZE_LEVEL" ], "2" )
1445+
1446+ @patch ("sagemaker.serve.builder.jumpstart_builder._capture_telemetry" , side_effect = None )
1447+ @patch .object (ModelBuilder , "_get_serve_setting" , autospec = True )
1448+ @patch (
1449+ "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_gated_model" ,
1450+ return_value = True ,
1451+ )
1452+ @patch ("sagemaker.serve.builder.jumpstart_builder.JumpStartModel" )
1453+ @patch (
1454+ "sagemaker.serve.builder.jumpstart_builder.JumpStart._is_jumpstart_model_id" ,
1455+ return_value = True ,
1456+ )
1457+ @patch ("sagemaker.serve.builder.jumpstart_builder.JumpStart._create_pre_trained_js_model" )
1458+ @patch (
1459+ "sagemaker.serve.builder.jumpstart_builder.prepare_tgi_js_resources" ,
1460+ return_value = ({"model_type" : "t5" , "n_head" : 71 }, True ),
1461+ )
1462+ def test_optimize_compile_for_jumpstart_without_compilation_config (
1463+ self ,
1464+ mock_prepare_for_tgi ,
1465+ mock_pre_trained_model ,
1466+ mock_is_jumpstart_model ,
1467+ mock_js_model ,
1468+ mock_is_gated_model ,
1469+ mock_serve_settings ,
1470+ mock_telemetry ,
1471+ ):
1472+ mock_sagemaker_session = Mock ()
1473+ mock_metadata_config = Mock ()
1474+ mock_sagemaker_session .wait_for_optimization_job .side_effect = (
1475+ lambda * args : mock_optimization_job_response
1476+ )
1477+
1478+ mock_metadata_config .resolved_config = {
1479+ "supported_inference_instance_types" : ["ml.inf2.48xlarge" ],
1480+ "hosting_neuron_model_id" : "huggingface-llmneuron-mistral-7b" ,
1481+ }
1482+
1483+ mock_js_model .return_value = MagicMock ()
1484+ mock_js_model .return_value .env = {
1485+ "SAGEMAKER_PROGRAM" : "inference.py" ,
1486+ "ENDPOINT_SERVER_TIMEOUT" : "3600" ,
1487+ "MODEL_CACHE_ROOT" : "/opt/ml/model" ,
1488+ "SAGEMAKER_ENV" : "1" ,
1489+ "HF_MODEL_ID" : "/opt/ml/model" ,
1490+ "SAGEMAKER_MODEL_SERVER_WORKERS" : "1" ,
1491+ }
1492+
1493+ mock_pre_trained_model .return_value = MagicMock ()
1494+ mock_pre_trained_model .return_value .env = dict ()
1495+ mock_pre_trained_model .return_value .config_name = "config_name"
1496+ mock_pre_trained_model .return_value .model_data = mock_model_data
1497+ mock_pre_trained_model .return_value .image_uri = mock_tgi_image_uri
1498+ mock_pre_trained_model .return_value .list_deployment_configs .return_value = (
1499+ DEPLOYMENT_CONFIGS
1500+ )
1501+ mock_pre_trained_model .return_value .deployment_config = DEPLOYMENT_CONFIGS [0 ]
1502+ mock_pre_trained_model .return_value ._metadata_configs = {
1503+ "config_name" : mock_metadata_config
1504+ }
1505+
1506+ sample_input = {
1507+ "inputs" : "The diamondback terrapin or simply terrapin is a species "
1508+ "of turtle native to the brackish coastal tidal marshes of the" ,
1509+ "parameters" : {"max_new_tokens" : 1024 },
1510+ }
1511+ sample_output = [
1512+ {
1513+ "generated_text" : "The diamondback terrapin or simply terrapin is a "
1514+ "species of turtle native to the brackish coastal "
1515+ "tidal marshes of the east coast."
1516+ }
1517+ ]
1518+
1519+ model_builder = ModelBuilder (
1520+ model = "meta-textgeneration-llama-3-70b" ,
1521+ schema_builder = SchemaBuilder (sample_input , sample_output ),
1522+ sagemaker_session = mock_sagemaker_session ,
1523+ )
1524+
1525+ optimized_model = model_builder .optimize (
1526+ accept_eula = True ,
1527+ instance_type = "ml.inf2.24xlarge" ,
1528+ output_path = "s3://bucket/code/" ,
1529+ )
1530+
1531+ self .assertEqual (
1532+ optimized_model .image_uri ,
1533+ mock_optimization_job_response ["OptimizationOutput" ]["RecommendedInferenceImage" ],
1534+ )
1535+ self .assertEqual (
1536+ optimized_model .model_data ["S3DataSource" ]["S3Uri" ],
1537+ mock_optimization_job_response ["OutputConfig" ]["S3OutputLocation" ],
1538+ )
1539+ self .assertEqual (optimized_model .env ["SAGEMAKER_PROGRAM" ], "inference.py" )
1540+ self .assertEqual (optimized_model .env ["ENDPOINT_SERVER_TIMEOUT" ], "3600" )
1541+ self .assertEqual (optimized_model .env ["MODEL_CACHE_ROOT" ], "/opt/ml/model" )
1542+ self .assertEqual (optimized_model .env ["SAGEMAKER_ENV" ], "1" )
1543+ self .assertEqual (optimized_model .env ["HF_MODEL_ID" ], "/opt/ml/model" )
1544+ self .assertEqual (optimized_model .env ["SAGEMAKER_MODEL_SERVER_WORKERS" ], "1" )
0 commit comments