From 338770d7c7e63b5e9551005a021bd3d54999b2ea Mon Sep 17 00:00:00 2001 From: Rohan Joshi Date: Wed, 2 Jul 2025 10:32:47 -0700 Subject: [PATCH] Fixed bug for 16a4w ptq Summary: Currently running the script executorch/examples/models/llama/export_llama.py with the flag --ptq 16a4w, it does 16a16w quantization; this diff fixes this. This may be related to some GitHub issues Differential Revision: D77671468 --- extension/llm/export/quantizer_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py index 99499e34bb2..b94feb5a1ae 100644 --- a/extension/llm/export/quantizer_lib.py +++ b/extension/llm/export/quantizer_lib.py @@ -192,7 +192,7 @@ def get_qnn_quantizer( act_observer=MinMaxObserver, ) elif quant_config == "16a4w": - quant_dtype = QuantDtype.use_16a16w # pyre-fixme[16] + quant_dtype = QuantDtype.use_16a4w # pyre-fixme[16] qnn_quantizer.set_default_quant_config( quant_dtype, is_qat=is_qat,