From 8b203e2a96e5ca4b9f009f2a84e6c67dbd1c7cbc Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Fri, 26 Sep 2025 15:22:10 +0000
Subject: [PATCH 1/2] run fused layer update on all modules

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 .../modifiers/quantization/quantization/base.py          | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
index aa6208da4..eb0357c54 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/base.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -79,8 +79,15 @@ def on_start(self, state: State, event: Event, **kwargs):
         for _, module in tqdm.tqdm(named_modules):
             update_weight_global_scale(module)
 
-        for _, module in tqdm.tqdm(named_modules, desc="Calibrating weights"):
+        # NOTE: update_fused_layer_weight_global_scales operates on Attention
+        # and MLP layers, not quantizable Linear layers. Rather than running
+        # on targeted modules, we need to run on all modules.
+        # Because this call is idempotent, setting all global_scales to the
+        # min value, it is ok to run potentially multiple times for all modules
+        for module in state.model.modules():
             update_fused_layer_weight_global_scales(module)
+
+        for _, module in tqdm.tqdm(named_modules, desc="Calibrating weights"):
             update_weight_zp_scale(module)
 
     def on_event(self, state: State, event: Event, **kwargs):

From a40d3bfdd735a0a79c9ff957ed62925faaafa8b0 Mon Sep 17 00:00:00 2001
From: Brian Dellabetta <bdellabe@redhat.com>
Date: Fri, 26 Sep 2025 15:37:24 +0000
Subject: [PATCH 2/2] tqdm labels

Signed-off-by: Brian Dellabetta <bdellabe@redhat.com>
---
 src/llmcompressor/modifiers/quantization/quantization/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
index eb0357c54..424cabcf6 100644
--- a/src/llmcompressor/modifiers/quantization/quantization/base.py
+++ b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -76,7 +76,7 @@ def on_start(self, state: State, event: Event, **kwargs):
         # TODO: this step can be combined with update_weight_zp_scale
         # once update_fused_layer_weight_global_scales is removed
         # and not required by vLLM
-        for _, module in tqdm.tqdm(named_modules):
+        for _, module in tqdm.tqdm(named_modules, desc="Updating global scales"):
             update_weight_global_scale(module)
 
         # NOTE: update_fused_layer_weight_global_scales operates on Attention
@@ -84,7 +84,7 @@ def on_start(self, state: State, event: Event, **kwargs):
         # on targeted modules, we need to run on all modules.
         # Because this call is idempotent, setting all global_scales to the
         # min value, it is ok to run potentially multiple times for all modules
-        for module in state.model.modules():
+        for module in tqdm.tqdm(state.model.modules(), desc="Fusing global scales"):
             update_fused_layer_weight_global_scales(module)
 
         for _, module in tqdm.tqdm(named_modules, desc="Calibrating weights"):