Skip to content

Commit 3e97708

Browse files
committed
update bug fixes
1 parent abd3e92 commit 3e97708

File tree

2 files changed

+32
-17
lines changed

2 files changed

+32
-17
lines changed

src/sagemaker/serve/validations/optimization.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def validate_against(self, optimization_combination, rule_set: _OptimizationCont
6666
is_compiled = optimization_combination.compilation.copy().pop()
6767
is_quantized = optimization_combination.quantization_technique.copy().pop()
6868
if is_quantized and not is_compiled:
69-
raise ValueError("Quantization must be provided with Compilation")
69+
raise ValueError(f"Quantization:{is_quantized} must be provided with Compilation")
7070

7171

7272
TRUTHY_SET = {None, True}
@@ -203,20 +203,20 @@ def _validate_optimization_configuration(
203203
)
204204
)
205205
except ValueError as trt_compare_error:
206-
if "Quantization must be provided with Compilation" in str(trt_compare_error):
206+
if (
207+
str(trt_compare_error)
208+
== "Quantization:smoothquant must be provided with Compilation"
209+
):
210+
raise ValueError(
211+
f"Optimizations that use {trt_compare_error} for GPU instances."
212+
)
213+
if str(trt_compare_error) == str(vllm_compare_error):
207214
joint_error_msg = f"""
208-
Optimization cannot be performed for the following reasons:
209-
- Optimizations that use {trt_compare_error} for GPU instances.
210-
- Optimizations that use {vllm_compare_error} are not supported for GPU instances.
211-
"""
212-
else:
213-
if str(trt_compare_error) == str(vllm_compare_error):
214-
joint_error_msg = f"""
215215
Optimization cannot be performed for the following reasons:
216216
- Optimizations that use {trt_compare_error} are not supported for GPU instances.
217217
"""
218-
else:
219-
joint_error_msg = f"""
218+
else:
219+
joint_error_msg = f"""
220220
Optimization cannot be performed for the following reasons:
221221
- Optimizations that use {trt_compare_error} are not supported for GPU instances.
222222
- Optimizations that use {vllm_compare_error} are not supported for GPU instances.

tests/unit/sagemaker/serve/builder/test_model_builder.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2860,14 +2860,10 @@ def test_corner_cases_throw_errors(self):
28602860
)
28612861

28622862
def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
2863-
expected_compilation_quantization_error_message = """
2864-
Optimization cannot be performed for the following reasons:
2865-
- Optimizations that use Quantization must be provided with Compilation for GPU instances.
2866-
- Optimizations that use Quantization:smoothquant are not supported for GPU instances.
2867-
"""
2863+
# Quantization:smoothquant without compilation
28682864
self.assertRaisesRegex(
28692865
ValueError,
2870-
textwrap.dedent(expected_compilation_quantization_error_message),
2866+
"Optimizations that use Quantization:smoothquant must be provided with Compilation for GPU instances.",
28712867
lambda: _validate_optimization_configuration(
28722868
instance_type="ml.g5.24xlarge",
28732869
quantization_config={
@@ -2879,6 +2875,7 @@ def test_trt_and_vllm_configurations_throw_errors_for_rule_set(self):
28792875
),
28802876
)
28812877

2878+
# Invalid quantization technique
28822879
expected_quantization_error_message = """
28832880
Optimization cannot be performed for the following reasons:
28842881
- Optimizations that use Quantization:test are not supported for GPU instances.
@@ -2943,6 +2940,15 @@ def test_trt_configurations_rule_set(self):
29432940
compilation_config={"key": "value"},
29442941
)
29452942

2943+
# Can be just compiled with empty dict
2944+
_validate_optimization_configuration(
2945+
instance_type="ml.g5.24xlarge",
2946+
quantization_config=None,
2947+
sharding_config=None,
2948+
speculative_decoding_config=None,
2949+
compilation_config={},
2950+
)
2951+
29462952
def test_vllm_configurations_rule_set(self):
29472953
# Can use speculative decoding
29482954
_validate_optimization_configuration(
@@ -2982,3 +2988,12 @@ def test_neuron_configurations_rule_set(self):
29822988
speculative_decoding_config=None,
29832989
compilation_config={"key": "value"},
29842990
)
2991+
2992+
# Can be compiled with empty dict
2993+
_validate_optimization_configuration(
2994+
instance_type="ml.inf2.xlarge",
2995+
quantization_config=None,
2996+
sharding_config=None,
2997+
speculative_decoding_config=None,
2998+
compilation_config={},
2999+
)

0 commit comments

Comments
 (0)