15
15
16
16
import textwrap
17
17
import logging
18
- from typing import Any , Dict , Set
18
+ from typing import Any , Dict , Set , Optional
19
19
from enum import Enum
20
20
from pydantic import BaseModel
21
21
@@ -34,10 +34,10 @@ class _OptimizationCombination(BaseModel):
34
34
"""Optimization ruleset data structure for comparing input to ruleset"""
35
35
36
36
optimization_container : _OptimizationContainer = None
37
- compilation : Set [bool | None ]
38
- speculative_decoding : Set [bool | None ]
39
- sharding : Set [bool | None ]
40
- quantization_technique : Set [str | None ]
37
+ compilation : Set [Optional [ bool ] ]
38
+ speculative_decoding : Set [Optional [ bool ] ]
39
+ sharding : Set [Optional [ bool ] ]
40
+ quantization_technique : Set [Optional [ str ] ]
41
41
42
42
def validate_against (self , optimization_combination , rule_set : _OptimizationContainer ):
43
43
"""Validator for optimization containers"""
@@ -66,16 +66,7 @@ def validate_against(self, optimization_combination, rule_set: _OptimizationCont
66
66
is_compiled = optimization_combination .compilation .copy ().pop ()
67
67
is_quantized = optimization_combination .quantization_technique .copy ().pop ()
68
68
if is_compiled and not is_quantized or is_quantized and not is_compiled :
69
- raise ValueError ("Compilation must be provided with Quantization" )
70
- else :
71
- is_compiled = optimization_combination .compilation .copy ().pop ()
72
- is_quantization_technique = optimization_combination .quantization_technique .copy ().pop ()
73
- if (
74
- is_compiled and is_quantization_technique
75
- ): # Check that the 2 techniques are not None
76
- raise ValueError (
77
- f"Compilation and Quantization:{ optimization_combination .quantization_technique .pop ()} "
78
- )
69
+ raise ValueError (f"Compilation must be provided with Quantization" )
79
70
80
71
81
72
TRUTHY_SET = {None , True }
@@ -95,7 +86,7 @@ def validate_against(self, optimization_combination, rule_set: _OptimizationCont
95
86
"optimization_combination" : _OptimizationCombination (
96
87
optimization_container = _OptimizationContainer .VLLM ,
97
88
compilation = FALSY_SET ,
98
- quantization_technique = {None },
89
+ quantization_technique = {None , "awq" , "fp8" },
99
90
speculative_decoding = TRUTHY_SET ,
100
91
sharding = TRUTHY_SET ,
101
92
),
@@ -200,7 +191,7 @@ def _validate_optimization_configuration(
200
191
)
201
192
print ("fsdafas" )
202
193
except ValueError as vllm_compare_error :
203
- if str ( trt_compare_error ) == "Compilation must be provided with Quantization" :
194
+ if "Compilation must be provided with Quantization" in str ( trt_compare_error ) :
204
195
joint_error_msg = f"""
205
196
Optimization cannot be performed for the following reasons:
206
197
- Optimizations that use { trt_compare_error } and vice-versa for GPU instances.
0 commit comments