@@ -2840,3 +2840,54 @@ def test_optimize_for_hf_without_custom_s3_path(
28402840 "OutputConfig" : {"S3OutputLocation" : "s3://bucket/code/" },
28412841 },
28422842 )
2843+
2844+ @patch .object (ModelBuilder , "_prepare_for_mode" )
2845+ @patch .object (ModelBuilder , "_get_serve_setting" , autospec = True )
2846+ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation (
2847+ self ,
2848+ mock_get_serve_setting ,
2849+ mock_prepare_for_mode ,
2850+ ):
2851+ mock_prepare_for_mode .side_effect = lambda * args , ** kwargs : (
2852+ {
2853+ "S3DataSource" : {
2854+ "CompressionType" : "None" ,
2855+ "S3DataType" : "S3Prefix" ,
2856+ "S3Uri" : "s3://bucket/code/code/" ,
2857+ }
2858+ },
2859+ {"DTYPE" : "bfloat16" },
2860+ )
2861+
2862+ mock_pysdk_model = Mock ()
2863+ mock_pysdk_model .model_data = None
2864+ mock_pysdk_model .env = {"HF_MODEL_ID" : "meta-llama/Meta-Llama-3-1-8B-Instruct" }
2865+
2866+ sample_input = {"inputs" : "dummy prompt" , "parameters" : {}}
2867+
2868+ sample_output = [{"generated_text" : "dummy response" }]
2869+
2870+ dummy_schema_builder = SchemaBuilder (sample_input , sample_output )
2871+
2872+ model_builder = ModelBuilder (
2873+ model = "meta-llama/Meta-Llama-3-1-8B-Instruct" ,
2874+ schema_builder = dummy_schema_builder ,
2875+ env_vars = {"HF_TOKEN" : "token" },
2876+ model_metadata = {
2877+ "CUSTOM_MODEL_PATH" : "s3://bucket/path/" ,
2878+ },
2879+ role_arn = "role-arn" ,
2880+ instance_type = "ml.g5.2xlarge" ,
2881+ )
2882+
2883+ model_builder .pysdk_model = mock_pysdk_model
2884+
2885+ self .assertRaisesRegex (
2886+ ValueError ,
2887+ "Compilation is not supported for Llama-3.1 with a GPU instance." ,
2888+ lambda : model_builder .optimize (
2889+ job_name = "job_name-123" ,
2890+ compilation_config = {"OverrideEnvironment" : {"OPTION_TENSOR_PARALLEL_DEGREE" : "2" }},
2891+ output_path = "s3://bucket/code/" ,
2892+ ),
2893+ )
0 commit comments