@@ -2910,8 +2910,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
29102910 ),
29112911 )
29122912
2913- def test_trt_configurations_rule_set (self ):
2914- # Can be quantized
2913+ def test_trt_configurations_throw_errors_for_rule_se (self ):
29152914 expected_compilation_quantization_error_message = """
29162915 Optimization cannot be performed for the following reasons:
29172916 - Optimizations that use Compilation must be provided with Quantization and vice-versa for GPU instances.
@@ -2931,7 +2930,6 @@ def test_trt_configurations_rule_set(self):
29312930 ),
29322931 )
29332932
2934- # Can be compiled
29352933 expected_compilation_quantization_error_message = """
29362934 Optimization cannot be performed for the following reasons:
29372935 - Optimizations that use Compilation must be provided with Quantization and vice-versa for GPU instances.
@@ -2949,6 +2947,18 @@ def test_trt_configurations_rule_set(self):
29492947 ),
29502948 )
29512949
2950+ def test_trt_configurations_rule_set (self ):
2951+ # Can be compiled with quantization
2952+ _validate_optimization_configuration (
2953+ instance_type = "ml.g5.24xlarge" ,
2954+ quantization_config = {
2955+ "OverrideEnvironment" : {"OPTION_QUANTIZE" : "awq" },
2956+ },
2957+ sharding_config = None ,
2958+ speculative_decoding_config = None ,
2959+ compilation_config = {"key" : "value" },
2960+ ),
2961+
29522962 def test_vllm_configurations_rule_set (self ):
29532963 # Can use speculative decoding
29542964 _validate_optimization_configuration (
0 commit comments