@@ -2891,3 +2891,58 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
28912891 output_path = "s3://bucket/code/" ,
28922892 ),
28932893 )
2894+
2895+ @patch .object (ModelBuilder , "_prepare_for_mode" )
2896+ @patch .object (ModelBuilder , "_get_serve_setting" , autospec = True )
2897+ def test_optimize_with_gpu_instance_and_compilation_with_speculative_decoding (
2898+ self ,
2899+ mock_get_serve_setting ,
2900+ mock_prepare_for_mode ,
2901+ ):
2902+ mock_prepare_for_mode .side_effect = lambda * args , ** kwargs : (
2903+ {
2904+ "S3DataSource" : {
2905+ "CompressionType" : "None" ,
2906+ "S3DataType" : "S3Prefix" ,
2907+ "S3Uri" : "s3://bucket/code/code/" ,
2908+ }
2909+ },
2910+ {"DTYPE" : "bfloat16" },
2911+ )
2912+
2913+ mock_pysdk_model = Mock ()
2914+ mock_pysdk_model .model_data = None
2915+ mock_pysdk_model .env = {"HF_MODEL_ID" : "modelid" }
2916+
2917+ sample_input = {"inputs" : "dummy prompt" , "parameters" : {}}
2918+
2919+ sample_output = [{"generated_text" : "dummy response" }]
2920+
2921+ dummy_schema_builder = SchemaBuilder (sample_input , sample_output )
2922+
2923+ model_builder = ModelBuilder (
2924+ model = "modelid" ,
2925+ schema_builder = dummy_schema_builder ,
2926+ env_vars = {"HF_TOKEN" : "token" },
2927+ model_metadata = {
2928+ "CUSTOM_MODEL_PATH" : "s3://bucket/path/" ,
2929+ },
2930+ role_arn = "role-arn" ,
2931+ instance_type = "ml.g5.2xlarge" ,
2932+ )
2933+
2934+ model_builder .pysdk_model = mock_pysdk_model
2935+
2936+ self .assertRaisesRegex (
2937+ ValueError ,
2938+ "Compilation is not supported with speculative decoding with a GPU instance." ,
2939+ lambda : model_builder .optimize (
2940+ job_name = "job_name-123" ,
2941+ speculative_decoding_config = {
2942+ "ModelProvider" : "custom" ,
2943+ "ModelSource" : "s3://data-source" ,
2944+ },
2945+ compilation_config = {"OverrideEnvironment" : {"OPTION_TENSOR_PARALLEL_DEGREE" : "2" }},
2946+ output_path = "s3://bucket/code/" ,
2947+ ),
2948+ )
0 commit comments