From 338770d7c7e63b5e9551005a021bd3d54999b2ea Mon Sep 17 00:00:00 2001
From: Rohan Joshi <rohansjoshi@meta.com>
Date: Wed, 2 Jul 2025 10:32:47 -0700
Subject: [PATCH] Fixed bug for 16a4w ptq

Summary: Currently running the script executorch/examples/models/llama/export_llama.py with the flag --ptq 16a4w, it does 16a16w quantization; this diff fixes this. This may be related to some GitHub issues

Differential Revision: D77671468
---
 extension/llm/export/quantizer_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py
index 99499e34bb2..b94feb5a1ae 100644
--- a/extension/llm/export/quantizer_lib.py
+++ b/extension/llm/export/quantizer_lib.py
@@ -192,7 +192,7 @@ def get_qnn_quantizer(
             act_observer=MinMaxObserver,
         )
     elif quant_config == "16a4w":
-        quant_dtype = QuantDtype.use_16a16w  # pyre-fixme[16]
+        quant_dtype = QuantDtype.use_16a4w  # pyre-fixme[16]
         qnn_quantizer.set_default_quant_config(
             quant_dtype,
             is_qat=is_qat,