@@ -2860,14 +2860,10 @@ def test_corner_cases_throw_errors(self):
2860
2860
)
2861
2861
2862
2862
def test_trt_and_vllm_configurations_throw_errors_for_rule_set (self ):
2863
- expected_compilation_quantization_error_message = """
2864
- Optimization cannot be performed for the following reasons:
2865
- - Optimizations that use Quantization must be provided with Compilation for GPU instances.
2866
- - Optimizations that use Quantization:smoothquant are not supported for GPU instances.
2867
- """
2863
+ # Quantization:smoothquant without compilation
2868
2864
self .assertRaisesRegex (
2869
2865
ValueError ,
2870
- textwrap . dedent ( expected_compilation_quantization_error_message ) ,
2866
+ "Optimizations that use Quantization:smoothquant must be provided with Compilation for GPU instances." ,
2871
2867
lambda : _validate_optimization_configuration (
2872
2868
instance_type = "ml.g5.24xlarge" ,
2873
2869
quantization_config = {
@@ -2879,6 +2875,7 @@ def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
2879
2875
),
2880
2876
)
2881
2877
2878
+ # Invalid quantization technique
2882
2879
expected_quantization_error_message = """
2883
2880
Optimization cannot be performed for the following reasons:
2884
2881
- Optimizations that use Quantization:test are not supported for GPU instances.
@@ -2943,6 +2940,15 @@ def test_trt_configurations_rule_set(self):
2943
2940
compilation_config = {"key" : "value" },
2944
2941
)
2945
2942
2943
+ # Can be just compiled with empty dict
2944
+ _validate_optimization_configuration (
2945
+ instance_type = "ml.g5.24xlarge" ,
2946
+ quantization_config = None ,
2947
+ sharding_config = None ,
2948
+ speculative_decoding_config = None ,
2949
+ compilation_config = {},
2950
+ )
2951
+
2946
2952
def test_vllm_configurations_rule_set (self ):
2947
2953
# Can use speculative decoding
2948
2954
_validate_optimization_configuration (
@@ -2982,3 +2988,12 @@ def test_neuron_configurations_rule_set(self):
2982
2988
speculative_decoding_config = None ,
2983
2989
compilation_config = {"key" : "value" },
2984
2990
)
2991
+
2992
+ # Can be compiled with empty dict
2993
+ _validate_optimization_configuration (
2994
+ instance_type = "ml.inf2.xlarge" ,
2995
+ quantization_config = None ,
2996
+ sharding_config = None ,
2997
+ speculative_decoding_config = None ,
2998
+ compilation_config = {},
2999
+ )
0 commit comments