update bug fixes

gwang111 · gwang111 · commit 3e977083ee3e · 2024-11-18T21:20:40.000Z
diff --git a/src/sagemaker/serve/validations/optimization.py b/src/sagemaker/serve/validations/optimization.py
@@ -66,7 +66,7 @@ def validate_against(self, optimization_combination, rule_set: _OptimizationCont
             is_compiled = optimization_combination.compilation.copy().pop()
             is_quantized = optimization_combination.quantization_technique.copy().pop()
             if is_quantized and not is_compiled:
-                raise ValueError("Quantization must be provided with Compilation")
+                raise ValueError(f"Quantization:{is_quantized} must be provided with Compilation")
 
 
 TRUTHY_SET = {None, True}
@@ -203,20 +203,20 @@ def _validate_optimization_configuration(
                         )
                     )
                 except ValueError as trt_compare_error:
-                    if "Quantization must be provided with Compilation" in str(trt_compare_error):
+                    if (
+                        str(trt_compare_error)
+                        == "Quantization:smoothquant must be provided with Compilation"
+                    ):
+                        raise ValueError(
+                            f"Optimizations that use {trt_compare_error} for GPU instances."
+                        )
+                    if str(trt_compare_error) == str(vllm_compare_error):
                         joint_error_msg = f"""
-                    Optimization cannot be performed for the following reasons:
-                    - Optimizations that use {trt_compare_error} for GPU instances.
-                    - Optimizations that use {vllm_compare_error} are not supported for GPU instances.
-                    """
-                    else:
-                        if str(trt_compare_error) == str(vllm_compare_error):
-                            joint_error_msg = f"""
                         Optimization cannot be performed for the following reasons:
                         - Optimizations that use {trt_compare_error} are not supported for GPU instances.
                         """
-                        else:
-                            joint_error_msg = f"""
+                    else:
+                        joint_error_msg = f"""
                         Optimization cannot be performed for the following reasons:
                         - Optimizations that use {trt_compare_error} are not supported for GPU instances.
                         - Optimizations that use {vllm_compare_error} are not supported for GPU instances.
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py
@@ -2860,14 +2860,10 @@ def test_corner_cases_throw_errors(self):
         )
 
     def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
-        expected_compilation_quantization_error_message = """
-        Optimization cannot be performed for the following reasons:
-        - Optimizations that use Quantization must be provided with Compilation for GPU instances.
-        - Optimizations that use Quantization:smoothquant are not supported for GPU instances.
-        """
+        # Quantization:smoothquant without compilation
         self.assertRaisesRegex(
             ValueError,
-            textwrap.dedent(expected_compilation_quantization_error_message),
+            "Optimizations that use Quantization:smoothquant must be provided with Compilation for GPU instances.",
             lambda: _validate_optimization_configuration(
                 instance_type="ml.g5.24xlarge",
                 quantization_config={
@@ -2879,6 +2875,7 @@ def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
             ),
         )
 
+        # Invalid quantization technique
         expected_quantization_error_message = """
         Optimization cannot be performed for the following reasons:
         - Optimizations that use Quantization:test are not supported for GPU instances.
@@ -2943,6 +2940,15 @@ def test_trt_configurations_rule_set(self):
             compilation_config={"key": "value"},
         )
 
+        # Can be just compiled with empty dict
+        _validate_optimization_configuration(
+            instance_type="ml.g5.24xlarge",
+            quantization_config=None,
+            sharding_config=None,
+            speculative_decoding_config=None,
+            compilation_config={},
+        )
+
     def test_vllm_configurations_rule_set(self):
         # Can use speculative decoding
         _validate_optimization_configuration(
@@ -2982,3 +2988,12 @@ def test_neuron_configurations_rule_set(self):
             speculative_decoding_config=None,
             compilation_config={"key": "value"},
         )
+
+        # Can be compiled with empty dict
+        _validate_optimization_configuration(
+            instance_type="ml.inf2.xlarge",
+            quantization_config=None,
+            sharding_config=None,
+            speculative_decoding_config=None,
+            compilation_config={},
+        )