File tree Expand file tree Collapse file tree 4 files changed +9
-9
lines changed
test/quantization/quantize_/workflows/int4 Expand file tree Collapse file tree 4 files changed +9
-9
lines changed Original file line number Diff line number Diff line change @@ -24,7 +24,7 @@ Inference APIs for quantize\_
24
24
:nosignatures:
25
25
26
26
Int4WeightOnlyConfig
27
- Float8ActivationInt4WeightConfig
27
+ Float8DynamicActivationInt4WeightConfig
28
28
Float8DynamicActivationFloat8WeightConfig
29
29
Float8WeightOnlyConfig
30
30
Float8StaticActivationFloat8WeightConfig
Original file line number Diff line number Diff line change 16
16
)
17
17
18
18
from torchao .quantization import (
19
- Float8ActivationInt4WeightConfig ,
19
+ Float8DynamicActivationInt4WeightConfig ,
20
20
Int4WeightOnlyConfig ,
21
21
quantize_ ,
22
22
)
33
33
VERSION = 2 ,
34
34
)
35
35
36
- FP8_ACT_CONFIG = Float8ActivationInt4WeightConfig (
36
+ FP8_ACT_CONFIG = Float8DynamicActivationInt4WeightConfig (
37
37
group_size = 128 ,
38
38
packing_format = "preshuffled" ,
39
39
)
Original file line number Diff line number Diff line change 44
44
from .quant_api import (
45
45
CutlassInt4PackedLayout ,
46
46
FbgemmConfig ,
47
- Float8ActivationInt4WeightConfig ,
48
47
Float8DynamicActivationFloat8SemiSparseWeightConfig ,
49
48
Float8DynamicActivationFloat8WeightConfig ,
49
+ Float8DynamicActivationInt4WeightConfig ,
50
50
Float8MMConfig ,
51
51
Float8StaticActivationFloat8WeightConfig ,
52
52
Float8WeightOnlyConfig ,
143
143
"Int8DynamicActivationInt8WeightConfig" ,
144
144
"Int8DynamicActivationIntxWeightConfig" ,
145
145
"Int4WeightOnlyConfig" ,
146
- "Float8ActivationInt4WeightConfig " ,
146
+ "Float8DynamicActivationInt4WeightConfig " ,
147
147
"Int8WeightOnlyConfig" ,
148
148
"Float8WeightOnlyConfig" ,
149
149
"Float8DynamicActivationFloat8WeightConfig" ,
Original file line number Diff line number Diff line change @@ -1252,7 +1252,7 @@ def _int4_weight_only_transform(
1252
1252
1253
1253
1254
1254
@dataclass
1255
- class Float8ActivationInt4WeightConfig (AOBaseConfig ):
1255
+ class Float8DynamicActivationInt4WeightConfig (AOBaseConfig ):
1256
1256
"""Configuration for apply float8 dynamic per row quantization and int4
1257
1257
per group weight quantization to linear
1258
1258
@@ -1265,9 +1265,9 @@ class Float8ActivationInt4WeightConfig(AOBaseConfig):
1265
1265
packing_format : PackingFormat = "preshuffled"
1266
1266
1267
1267
1268
- @register_quantize_module_handler (Float8ActivationInt4WeightConfig )
1269
- def _float8_activation_int4_weight_transform (
1270
- module : torch .nn .Module , config : Float8ActivationInt4WeightConfig
1268
+ @register_quantize_module_handler (Float8DynamicActivationInt4WeightConfig )
1269
+ def _float8_dynamic_activation_int4_weight_transform (
1270
+ module : torch .nn .Module , config : Float8DynamicActivationInt4WeightConfig
1271
1271
) -> torch .nn .Module :
1272
1272
assert hasattr (module , "weight" ), (
1273
1273
"applying int8 weight only quant requires module to have weight attribute"
You can’t perform that action at this time.
0 commit comments