@@ -2927,6 +2927,7 @@ def test_optimize_with_gpu_instance_and_llama_3_1_and_compilation(
2927
2927
"Compilation is not supported for Llama-3.1 with a GPU instance." ,
2928
2928
lambda : model_builder .optimize (
2929
2929
job_name = "job_name-123" ,
2930
+ instance_type = "ml.g5.24xlarge" ,
2930
2931
compilation_config = {"OverrideEnvironment" : {"OPTION_TENSOR_PARALLEL_DEGREE" : "2" }},
2931
2932
output_path = "s3://bucket/code/" ,
2932
2933
),
@@ -2975,9 +2976,10 @@ def test_optimize_with_gpu_instance_and_compilation_with_speculative_decoding(
2975
2976
2976
2977
self .assertRaisesRegex (
2977
2978
ValueError ,
2978
- "Compilation is not supported with speculative decoding with a GPU instance ." ,
2979
+ "Optimizations that use Compilation and Speculative Decoding are not supported for GPU instances ." ,
2979
2980
lambda : model_builder .optimize (
2980
2981
job_name = "job_name-123" ,
2982
+ instance_type = "ml.g5.24xlarge" ,
2981
2983
speculative_decoding_config = {
2982
2984
"ModelProvider" : "custom" ,
2983
2985
"ModelSource" : "s3://data-source" ,
@@ -3481,6 +3483,7 @@ def test_corner_cases_throw_errors(self):
3481
3483
ValueError ,
3482
3484
"Optimizations that uses None instance type are not currently supported" ,
3483
3485
lambda : _validate_optimization_configuration (
3486
+ is_jumpstart = False ,
3484
3487
sharding_config = {"key" : "value" },
3485
3488
instance_type = None ,
3486
3489
quantization_config = None ,
@@ -3496,6 +3499,7 @@ def test_corner_cases_throw_errors(self):
3496
3499
"are currently not support on both GPU and Neuron instances."
3497
3500
),
3498
3501
lambda : _validate_optimization_configuration (
3502
+ is_jumpstart = False ,
3499
3503
instance_type = "ml.g5.24xlarge" ,
3500
3504
quantization_config = None ,
3501
3505
speculative_decoding_config = None ,
@@ -3504,12 +3508,22 @@ def test_corner_cases_throw_errors(self):
3504
3508
),
3505
3509
)
3506
3510
3511
+ _validate_optimization_configuration (
3512
+ is_jumpstart = True ,
3513
+ instance_type = "ml.inf2.xlarge" ,
3514
+ quantization_config = None ,
3515
+ speculative_decoding_config = None ,
3516
+ compilation_config = None ,
3517
+ sharding_config = None ,
3518
+ )
3519
+
3507
3520
def test_trt_and_vllm_configurations_throw_errors_for_rule_set (self ):
3508
3521
# Quantization:smoothquant without compilation
3509
3522
self .assertRaisesRegex (
3510
3523
ValueError ,
3511
3524
"Optimizations that use Quantization:smoothquant must be provided with Compilation for GPU instances." ,
3512
3525
lambda : _validate_optimization_configuration (
3526
+ is_jumpstart = False ,
3513
3527
instance_type = "ml.g5.24xlarge" ,
3514
3528
quantization_config = {
3515
3529
"OverrideEnvironment" : {"OPTION_QUANTIZE" : "smoothquant" },
@@ -3525,6 +3539,7 @@ def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
3525
3539
ValueError ,
3526
3540
"Optimizations that use Quantization:test are not supported for GPU instances." ,
3527
3541
lambda : _validate_optimization_configuration (
3542
+ is_jumpstart = False ,
3528
3543
instance_type = "ml.g5.24xlarge" ,
3529
3544
quantization_config = {
3530
3545
"OverrideEnvironment" : {"OPTION_QUANTIZE" : "test" },
@@ -3540,6 +3555,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
3540
3555
ValueError ,
3541
3556
"Optimizations that use Speculative Decoding are not supported on Neuron instances." ,
3542
3557
lambda : _validate_optimization_configuration (
3558
+ is_jumpstart = False ,
3543
3559
instance_type = "ml.inf2.xlarge" ,
3544
3560
quantization_config = None ,
3545
3561
speculative_decoding_config = {"key" : "value" },
@@ -3552,6 +3568,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
3552
3568
ValueError ,
3553
3569
"Optimizations that use Sharding are not supported on Neuron instances." ,
3554
3570
lambda : _validate_optimization_configuration (
3571
+ is_jumpstart = False ,
3555
3572
instance_type = "ml.inf2.xlarge" ,
3556
3573
quantization_config = None ,
3557
3574
speculative_decoding_config = None ,
@@ -3563,6 +3580,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
3563
3580
def test_trt_configurations_rule_set (self ):
3564
3581
# Can be compiled with quantization
3565
3582
_validate_optimization_configuration (
3583
+ is_jumpstart = False ,
3566
3584
instance_type = "ml.g5.24xlarge" ,
3567
3585
quantization_config = {
3568
3586
"OverrideEnvironment" : {"OPTION_QUANTIZE" : "smoothquant" },
@@ -3574,6 +3592,7 @@ def test_trt_configurations_rule_set(self):
3574
3592
3575
3593
# Can be just compiled
3576
3594
_validate_optimization_configuration (
3595
+ is_jumpstart = False ,
3577
3596
instance_type = "ml.g5.24xlarge" ,
3578
3597
quantization_config = None ,
3579
3598
sharding_config = None ,
@@ -3583,6 +3602,7 @@ def test_trt_configurations_rule_set(self):
3583
3602
3584
3603
# Can be just compiled with empty dict
3585
3604
_validate_optimization_configuration (
3605
+ is_jumpstart = False ,
3586
3606
instance_type = "ml.g5.24xlarge" ,
3587
3607
quantization_config = None ,
3588
3608
sharding_config = None ,
@@ -3593,6 +3613,7 @@ def test_trt_configurations_rule_set(self):
3593
3613
def test_vllm_configurations_rule_set (self ):
3594
3614
# Can use speculative decoding
3595
3615
_validate_optimization_configuration (
3616
+ is_jumpstart = False ,
3596
3617
instance_type = "ml.g5.24xlarge" ,
3597
3618
quantization_config = None ,
3598
3619
sharding_config = None ,
@@ -3602,6 +3623,7 @@ def test_vllm_configurations_rule_set(self):
3602
3623
3603
3624
# Can be quantized
3604
3625
_validate_optimization_configuration (
3626
+ is_jumpstart = False ,
3605
3627
instance_type = "ml.g5.24xlarge" ,
3606
3628
quantization_config = {
3607
3629
"OverrideEnvironment" : {"OPTION_QUANTIZE" : "awq" },
@@ -3613,6 +3635,7 @@ def test_vllm_configurations_rule_set(self):
3613
3635
3614
3636
# Can be sharded
3615
3637
_validate_optimization_configuration (
3638
+ is_jumpstart = False ,
3616
3639
instance_type = "ml.g5.24xlarge" ,
3617
3640
quantization_config = None ,
3618
3641
sharding_config = {"key" : "value" },
@@ -3623,6 +3646,7 @@ def test_vllm_configurations_rule_set(self):
3623
3646
def test_neuron_configurations_rule_set (self ):
3624
3647
# Can be compiled
3625
3648
_validate_optimization_configuration (
3649
+ is_jumpstart = False ,
3626
3650
instance_type = "ml.inf2.xlarge" ,
3627
3651
quantization_config = None ,
3628
3652
sharding_config = None ,
@@ -3632,6 +3656,7 @@ def test_neuron_configurations_rule_set(self):
3632
3656
3633
3657
# Can be compiled with empty dict
3634
3658
_validate_optimization_configuration (
3659
+ is_jumpstart = False ,
3635
3660
instance_type = "ml.inf2.xlarge" ,
3636
3661
quantization_config = None ,
3637
3662
sharding_config = None ,
0 commit comments