@@ -2860,14 +2860,10 @@ def test_corner_cases_throw_errors(self):
28602860 )
28612861
28622862 def test_trt_and_vllm_configurations_throw_errors_for_rule_set (self ):
2863- expected_compilation_quantization_error_message = """
2864- Optimization cannot be performed for the following reasons:
2865- - Optimizations that use Quantization must be provided with Compilation for GPU instances.
2866- - Optimizations that use Quantization:smoothquant are not supported for GPU instances.
2867- """
2863+ # Quantization:smoothquant without compilation
28682864 self .assertRaisesRegex (
28692865 ValueError ,
2870- textwrap . dedent ( expected_compilation_quantization_error_message ) ,
2866+ "Optimizations that use Quantization:smoothquant must be provided with Compilation for GPU instances." ,
28712867 lambda : _validate_optimization_configuration (
28722868 instance_type = "ml.g5.24xlarge" ,
28732869 quantization_config = {
@@ -2879,6 +2875,7 @@ def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
28792875 ),
28802876 )
28812877
2878+ # Invalid quantization technique
28822879 expected_quantization_error_message = """
28832880 Optimization cannot be performed for the following reasons:
28842881 - Optimizations that use Quantization:test are not supported for GPU instances.
@@ -2943,6 +2940,15 @@ def test_trt_configurations_rule_set(self):
29432940 compilation_config = {"key" : "value" },
29442941 )
29452942
2943+ # Can be just compiled with empty dict
2944+ _validate_optimization_configuration (
2945+ instance_type = "ml.g5.24xlarge" ,
2946+ quantization_config = None ,
2947+ sharding_config = None ,
2948+ speculative_decoding_config = None ,
2949+ compilation_config = {},
2950+ )
2951+
29462952 def test_vllm_configurations_rule_set (self ):
29472953 # Can use speculative decoding
29482954 _validate_optimization_configuration (
@@ -2982,3 +2988,12 @@ def test_neuron_configurations_rule_set(self):
29822988 speculative_decoding_config = None ,
29832989 compilation_config = {"key" : "value" },
29842990 )
2991+
2992+ # Can be compiled with empty dict
2993+ _validate_optimization_configuration (
2994+ instance_type = "ml.inf2.xlarge" ,
2995+ quantization_config = None ,
2996+ sharding_config = None ,
2997+ speculative_decoding_config = None ,
2998+ compilation_config = {},
2999+ )
0 commit comments