File tree Expand file tree Collapse file tree 1 file changed +20
-1
lines changed
src/compressed_tensors/quantization Expand file tree Collapse file tree 1 file changed +20
-1
lines changed Original file line number Diff line number Diff line change @@ -165,7 +165,7 @@ def is_preset_scheme(name: str) -> bool:
165
165
input_activations = QuantizationArgs (
166
166
num_bits = 8 ,
167
167
type = QuantizationType .INT ,
168
- strategy = QuantizationStrategy .TENSOR ,
168
+ strategy = QuantizationStrategy .TOKEN ,
169
169
symmetric = True ,
170
170
dynamic = True ,
171
171
),
@@ -189,6 +189,24 @@ def is_preset_scheme(name: str) -> bool:
189
189
),
190
190
)
191
191
192
+ # FP8 weights and FP8 dynamic activations quantization
193
+ FP8_DYNAMIC = dict (
194
+ weights = QuantizationArgs (
195
+ num_bits = 8 ,
196
+ type = QuantizationType .FLOAT ,
197
+ strategy = QuantizationStrategy .CHANNEL ,
198
+ symmetric = True ,
199
+ dynamic = False ,
200
+ ),
201
+ input_activations = QuantizationArgs (
202
+ num_bits = 8 ,
203
+ type = QuantizationType .FLOAT ,
204
+ strategy = QuantizationStrategy .TOKEN ,
205
+ symmetric = True ,
206
+ dynamic = True ,
207
+ ),
208
+ )
209
+
192
210
PRESET_SCHEMES = {
193
211
# Integer weight only schemes
194
212
"W8A16" : W8A16 ,
@@ -198,4 +216,5 @@ def is_preset_scheme(name: str) -> bool:
198
216
"W4A8" : W4A8 ,
199
217
# Float weight and activation schemes
200
218
"FP8" : FP8 ,
219
+ "FP8_DYNAMIC" : FP8_DYNAMIC ,
201
220
}
You can’t perform that action at this time.
0 commit comments