Skip to content

Commit c214cbc

Browse files
authored
Add FP8 dynamic scheme for latest Llama3.1 meta models and fix an issue with the W4A8 representation to have dynamic token for activations (#114)
1 parent 1d4a39f commit c214cbc

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

src/compressed_tensors/quantization/quant_scheme.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def is_preset_scheme(name: str) -> bool:
165165
input_activations=QuantizationArgs(
166166
num_bits=8,
167167
type=QuantizationType.INT,
168-
strategy=QuantizationStrategy.TENSOR,
168+
strategy=QuantizationStrategy.TOKEN,
169169
symmetric=True,
170170
dynamic=True,
171171
),
@@ -189,6 +189,24 @@ def is_preset_scheme(name: str) -> bool:
189189
),
190190
)
191191

192+
# FP8 weights and FP8 dynamic activations quantization
193+
FP8_DYNAMIC = dict(
194+
weights=QuantizationArgs(
195+
num_bits=8,
196+
type=QuantizationType.FLOAT,
197+
strategy=QuantizationStrategy.CHANNEL,
198+
symmetric=True,
199+
dynamic=False,
200+
),
201+
input_activations=QuantizationArgs(
202+
num_bits=8,
203+
type=QuantizationType.FLOAT,
204+
strategy=QuantizationStrategy.TOKEN,
205+
symmetric=True,
206+
dynamic=True,
207+
),
208+
)
209+
192210
PRESET_SCHEMES = {
193211
# Integer weight only schemes
194212
"W8A16": W8A16,
@@ -198,4 +216,5 @@ def is_preset_scheme(name: str) -> bool:
198216
"W4A8": W4A8,
199217
# Float weight and activation schemes
200218
"FP8": FP8,
219+
"FP8_DYNAMIC": FP8_DYNAMIC,
201220
}

0 commit comments

Comments
 (0)