@@ -171,26 +171,28 @@ def _verify_quantization(self) -> None:
171
171
self .quantization = self .quantization .lower ()
172
172
173
173
# Parse quantization method from the HF model config, if available.
174
- hf_quant_config = getattr (self .hf_config , "quantization_config" , None )
175
- if hf_quant_config is not None :
176
- hf_quant_method = str (hf_quant_config ["quant_method" ]).lower ()
177
-
178
- # If the GPTQ model is serialized in marlin format, use marlin.
179
- if (hf_quant_method == "gptq"
180
- and "is_marlin_format" in hf_quant_config
181
- and hf_quant_config ["is_marlin_format" ]):
174
+ quant_cfg = getattr (self .hf_config , "quantization_config" , None )
175
+ if quant_cfg is not None :
176
+ quant_method = quant_cfg .get ("quant_method" , "" ).lower ()
177
+ # compat: autogptq >=0.8.0 use checkpoint_format: str
178
+ # compat: autogptq <=0.7.1 is_marlin_format: bool
179
+ is_format_marlin = (quant_cfg .get ("checkpoint_format" ) == "marlin"
180
+ or quant_cfg .get ("is_marlin_format" , False ))
181
+
182
+ # Use marlin if the GPTQ model is serialized in marlin format.
183
+ if quant_method == "gptq" and is_format_marlin :
182
184
logger .info ("The model is serialized in Marlin format. "
183
185
"Using Marlin kernel." )
184
- hf_quant_method = "marlin"
186
+ quant_method = "marlin"
185
187
if self .quantization == "gptq" :
186
- self .quantization = hf_quant_method
188
+ self .quantization = quant_method
187
189
188
190
if self .quantization is None :
189
- self .quantization = hf_quant_method
190
- elif self .quantization != hf_quant_method :
191
+ self .quantization = quant_method
192
+ elif self .quantization != quant_method :
191
193
raise ValueError (
192
194
"Quantization method specified in the model config "
193
- f"({ hf_quant_method } ) does not match the quantization "
195
+ f"({ quant_method } ) does not match the quantization "
194
196
f"method specified in the `quantization` argument "
195
197
f"({ self .quantization } )." )
196
198
0 commit comments