@@ -111,55 +111,91 @@ def is_preset_scheme(name: str) -> bool:
111
111
return name .upper () in PRESET_SCHEMES
112
112
113
113
114
+ # 8 bit integer weights and 8 bit activations quantization
114
115
W8A8 = dict (
115
116
weights = QuantizationArgs (
116
117
num_bits = 8 ,
117
- symmetric = True ,
118
118
type = QuantizationType .INT ,
119
119
strategy = QuantizationStrategy .CHANNEL ,
120
+ symmetric = True ,
121
+ dynamic = False ,
120
122
),
121
123
input_activations = QuantizationArgs (
122
124
num_bits = 8 ,
123
- symmetric = True ,
124
125
type = QuantizationType .INT ,
125
126
strategy = QuantizationStrategy .TOKEN ,
127
+ symmetric = True ,
126
128
dynamic = True ,
127
129
),
128
130
)
129
131
132
+ # 8 bit integer weights only quantization
130
133
W8A16 = dict (
131
134
weights = QuantizationArgs (
132
135
num_bits = 8 ,
133
- symmetric = True ,
134
136
type = QuantizationType .INT ,
135
137
strategy = QuantizationStrategy .CHANNEL ,
136
- )
138
+ symmetric = True ,
139
+ dynamic = False ,
140
+ ),
137
141
)
138
142
143
+ # 4 bit integer weights only quantization
139
144
W4A16 = dict (
140
145
weights = QuantizationArgs (
141
146
num_bits = 4 ,
142
- symmetric = True ,
143
147
type = QuantizationType .INT ,
144
148
strategy = QuantizationStrategy .GROUP ,
145
149
group_size = 128 ,
146
- )
150
+ symmetric = True ,
151
+ dynamic = False ,
152
+ ),
147
153
)
148
154
149
- FP8 = dict (
155
+ # 4 bit integer weights and 8 bit activations quantization
156
+ W4A8 = dict (
150
157
weights = QuantizationArgs (
158
+ num_bits = 4 ,
159
+ type = QuantizationType .INT ,
160
+ group_size = 128 ,
161
+ strategy = QuantizationStrategy .GROUP ,
162
+ symmetric = True ,
163
+ dynamic = False ,
164
+ ),
165
+ input_activations = QuantizationArgs (
151
166
num_bits = 8 ,
167
+ type = QuantizationType .INT ,
168
+ strategy = QuantizationStrategy .TENSOR ,
152
169
symmetric = True ,
170
+ dynamic = True ,
171
+ ),
172
+ )
173
+
174
+ # FP8 weights and FP8 activations quantization
175
+ FP8 = dict (
176
+ weights = QuantizationArgs (
177
+ num_bits = 8 ,
153
178
type = QuantizationType .FLOAT ,
154
179
strategy = QuantizationStrategy .TENSOR ,
180
+ symmetric = True ,
181
+ dynamic = False ,
155
182
),
156
183
input_activations = QuantizationArgs (
157
184
num_bits = 8 ,
158
- symmetric = True ,
159
185
type = QuantizationType .FLOAT ,
160
186
strategy = QuantizationStrategy .TENSOR ,
187
+ symmetric = True ,
161
188
dynamic = False ,
162
189
),
163
190
)
164
191
165
- PRESET_SCHEMES = {"W8A8" : W8A8 , "W8A16" : W8A16 , "W4A16" : W4A16 , "FP8" : FP8 }
192
+ PRESET_SCHEMES = {
193
+ # Integer weight only schemes
194
+ "W8A16" : W8A16 ,
195
+ "W4A16" : W4A16 ,
196
+ # Integer weight and activation schemes
197
+ "W8A8" : W8A8 ,
198
+ "W4A8" : W4A8 ,
199
+ # Float weight and activation schemes
200
+ "FP8" : FP8 ,
201
+ }
0 commit comments