add more UTs

gwang111 · gwang111 · commit 8d97eb1e1a14 · 2024-11-18T21:20:40.000Z
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py
@@ -2910,8 +2910,7 @@ def test_neuron_configurations_throw_errors_for_rule_set(self):
             ),
         )
 
-    def test_trt_configurations_rule_set(self):
-        # Can be quantized
+    def test_trt_configurations_throw_errors_for_rule_se(self):
         expected_compilation_quantization_error_message = """
         Optimization cannot be performed for the following reasons:
         - Optimizations that use Compilation must be provided with Quantization and vice-versa for GPU instances.
@@ -2931,7 +2930,6 @@ def test_trt_configurations_rule_set(self):
             ),
         )
 
-        # Can be compiled
         expected_compilation_quantization_error_message = """
         Optimization cannot be performed for the following reasons:
         - Optimizations that use Compilation must be provided with Quantization and vice-versa for GPU instances.
@@ -2949,6 +2947,18 @@ def test_trt_configurations_rule_set(self):
             ),
         )
 
+    def test_trt_configurations_rule_set(self):
+        # Can be compiled with quantization
+        _validate_optimization_configuration(
+            instance_type="ml.g5.24xlarge",
+            quantization_config={
+                "OverrideEnvironment": {"OPTION_QUANTIZE": "awq"},
+            },
+            sharding_config=None,
+            speculative_decoding_config=None,
+            compilation_config={"key": "value"},
+        ),
+
     def test_vllm_configurations_rule_set(self):
         # Can use speculative decoding
         _validate_optimization_configuration(