@@ -2927,6 +2927,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
29272927 "Compilation is not supported for Llama-3.1 with a GPU instance." ,
29282928 lambda : model_builder .optimize (
29292929 job_name = "job_name-123" ,
2930+ instance_type = "ml.g5.24xlarge" ,
29302931 compilation_config = {"OverrideEnvironment" : {"OPTION_TENSOR_PARALLEL_DEGREE" : "2" }},
29312932 output_path = "s3://bucket/code/" ,
29322933 ),
@@ -2975,9 +2976,10 @@ def test_optimize_with_gpu_instance_and_compilation_with_speculative_decoding(
29752976
29762977 self .assertRaisesRegex (
29772978 ValueError ,
2978- "Compilation is not supported with speculative decoding with a GPU instance ." ,
2979+ "Optimizations that use Compilation and Speculative Decoding are not supported for GPU instances ." ,
29792980 lambda : model_builder .optimize (
29802981 job_name = "job_name-123" ,
2982+ instance_type = "ml.g5.24xlarge" ,
29812983 speculative_decoding_config = {
29822984 "ModelProvider" : "custom" ,
29832985 "ModelSource" : "s3://data-source" ,
@@ -3481,6 +3483,7 @@ def test_corner_cases_throw_errors(self):
34813483 ValueError ,
34823484 "Optimizations that uses None instance type are not currently supported" ,
34833485 lambda : _validate_optimization_configuration (
3486+ is_jumpstart = False ,
34843487 sharding_config = {"key" : "value" },
34853488 instance_type = None ,
34863489 quantization_config = None ,
@@ -3496,6 +3499,7 @@ def test_corner_cases_throw_errors(self):
34963499 "are currently not support on both GPU and Neuron instances."
34973500 ),
34983501 lambda : _validate_optimization_configuration (
3502+ is_jumpstart = False ,
34993503 instance_type = "ml.g5.24xlarge" ,
35003504 quantization_config = None ,
35013505 speculative_decoding_config = None ,
@@ -3504,12 +3508,22 @@ def test_corner_cases_throw_errors(self):
35043508 ),
35053509 )
35063510
3511+ _validate_optimization_configuration (
3512+ is_jumpstart = True ,
3513+ instance_type = "ml.inf2.xlarge" ,
3514+ quantization_config = None ,
3515+ speculative_decoding_config = None ,
3516+ compilation_config = None ,
3517+ sharding_config = None ,
3518+ )
3519+
35073520 def test_trt_and_vllm_configurations_throw_errors_for_rule_set (self ):
35083521 # Quantization:smoothquant without compilation
35093522 self .assertRaisesRegex (
35103523 ValueError ,
35113524 "Optimizations that use Quantization:smoothquant must be provided with Compilation for GPU instances." ,
35123525 lambda : _validate_optimization_configuration (
3526+ is_jumpstart = False ,
35133527 instance_type = "ml.g5.24xlarge" ,
35143528 quantization_config = {
35153529 "OverrideEnvironment" : {"OPTION_QUANTIZE" : "smoothquant" },
@@ -3525,6 +3539,7 @@ def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
35253539 ValueError ,
35263540 "Optimizations that use Quantization:test are not supported for GPU instances." ,
35273541 lambda : _validate_optimization_configuration (
3542+ is_jumpstart = False ,
35283543 instance_type = "ml.g5.24xlarge" ,
35293544 quantization_config = {
35303545 "OverrideEnvironment" : {"OPTION_QUANTIZE" : "test" },
@@ -3540,6 +3555,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
35403555 ValueError ,
35413556 "Optimizations that use Speculative Decoding are not supported on Neuron instances." ,
35423557 lambda : _validate_optimization_configuration (
3558+ is_jumpstart = False ,
35433559 instance_type = "ml.inf2.xlarge" ,
35443560 quantization_config = None ,
35453561 speculative_decoding_config = {"key" : "value" },
@@ -3552,6 +3568,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
35523568 ValueError ,
35533569 "Optimizations that use Sharding are not supported on Neuron instances." ,
35543570 lambda : _validate_optimization_configuration (
3571+ is_jumpstart = False ,
35553572 instance_type = "ml.inf2.xlarge" ,
35563573 quantization_config = None ,
35573574 speculative_decoding_config = None ,
@@ -3563,6 +3580,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
35633580 def test_trt_configurations_rule_set (self ):
35643581 # Can be compiled with quantization
35653582 _validate_optimization_configuration (
3583+ is_jumpstart = False ,
35663584 instance_type = "ml.g5.24xlarge" ,
35673585 quantization_config = {
35683586 "OverrideEnvironment" : {"OPTION_QUANTIZE" : "smoothquant" },
@@ -3574,6 +3592,7 @@ def test_trt_configurations_rule_set(self):
35743592
35753593 # Can be just compiled
35763594 _validate_optimization_configuration (
3595+ is_jumpstart = False ,
35773596 instance_type = "ml.g5.24xlarge" ,
35783597 quantization_config = None ,
35793598 sharding_config = None ,
@@ -3583,6 +3602,7 @@ def test_trt_configurations_rule_set(self):
35833602
35843603 # Can be just compiled with empty dict
35853604 _validate_optimization_configuration (
3605+ is_jumpstart = False ,
35863606 instance_type = "ml.g5.24xlarge" ,
35873607 quantization_config = None ,
35883608 sharding_config = None ,
@@ -3593,6 +3613,7 @@ def test_trt_configurations_rule_set(self):
35933613 def test_vllm_configurations_rule_set (self ):
35943614 # Can use speculative decoding
35953615 _validate_optimization_configuration (
3616+ is_jumpstart = False ,
35963617 instance_type = "ml.g5.24xlarge" ,
35973618 quantization_config = None ,
35983619 sharding_config = None ,
@@ -3602,6 +3623,7 @@ def test_vllm_configurations_rule_set(self):
36023623
36033624 # Can be quantized
36043625 _validate_optimization_configuration (
3626+ is_jumpstart = False ,
36053627 instance_type = "ml.g5.24xlarge" ,
36063628 quantization_config = {
36073629 "OverrideEnvironment" : {"OPTION_QUANTIZE" : "awq" },
@@ -3613,6 +3635,7 @@ def test_vllm_configurations_rule_set(self):
36133635
36143636 # Can be sharded
36153637 _validate_optimization_configuration (
3638+ is_jumpstart = False ,
36163639 instance_type = "ml.g5.24xlarge" ,
36173640 quantization_config = None ,
36183641 sharding_config = {"key" : "value" },
@@ -3623,6 +3646,7 @@ def test_vllm_configurations_rule_set(self):
36233646 def test_neuron_configurations_rule_set (self ):
36243647 # Can be compiled
36253648 _validate_optimization_configuration (
3649+ is_jumpstart = False ,
36263650 instance_type = "ml.inf2.xlarge" ,
36273651 quantization_config = None ,
36283652 sharding_config = None ,
@@ -3632,6 +3656,7 @@ def test_neuron_configurations_rule_set(self):
36323656
36333657 # Can be compiled with empty dict
36343658 _validate_optimization_configuration (
3659+ is_jumpstart = False ,
36353660 instance_type = "ml.inf2.xlarge" ,
36363661 quantization_config = None ,
36373662 sharding_config = None ,
0 commit comments