Fix bf16 dtype mismatch in AllGatherHandle quantization path and add regression test

juyterman1000 · juyterman1000 · commit 72da75fa3062 · 2026-01-18T11:04:09.000-08:00
- Update AllGatherHandle.wait() to restore original_dtype after dequantization - Pass original_dtype when instantiating AllGatherHandle for quantized parameters - Add regression test for bf16 + zero_quantized_weights configuration Fixes non-coalesced version of issue deepspeedai#7775 Signed-off-by: juyterman1000 <fastrunner10090@gmail.com>
diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py
@@ -696,8 +696,11 @@ def wait(self, handle_dependency=True) -> None:
                 self.__original_dtype).to(self.__param.device)
         elif self.__quantization:
             instrument_w_nvtx(self.__quantization.quant_handle.wait)()
-            self.__param.data = self.__quantization.backend.dequantize(
-                self.__quantization.quantized_param, self.__quantization.scale_buffer).to(self.__param.device)
+            dequantized = self.__quantization.backend.dequantize(self.__quantization.quantized_param,
+                                                                 self.__quantization.scale_buffer)
+            if self.__original_dtype is not None:
+                dequantized = dequantized.to(self.__original_dtype)
+            self.__param.data = dequantized.to(self.__param.device)
         self.__param.ds_status = ZeroParamStatus.AVAILABLE
 
 
@@ -739,8 +742,8 @@ def wait(self, handle_dependency=True) -> None:
             instrument_w_nvtx(self.quantization.quant_handle.wait)()
             # Fix for issue #7775: convert dequantized tensor back to original dtype (e.g., bf16)
             # to prevent dtype mismatch when zero_quantized_weights is used with bf16
-            dequantized = self.quantization.backend.dequantize(
-                self.quantization.quantized_param, self.quantization.scale_buffer)
+            dequantized = self.quantization.backend.dequantize(self.quantization.quantized_param,
+                                                               self.quantization.scale_buffer)
             if self.original_dtype is not None:
                 dequantized = dequantized.to(self.original_dtype)
             flat_tensor = dequantized.to(self.params[0].device)
@@ -1385,7 +1388,7 @@ def all_gather_coalesced(params: Iterable[Parameter],
                     quant_info.backend = self.quantizer_module
                     quant_info.quant_handle = quant_handle
                     quant_info.scale_buffer = quant_scale_buffer
-                    return AllGatherHandle(handle, param, quantization=quant_info)
+                    return AllGatherHandle(handle, param, quantization=quant_info, original_dtype=original_dtype)
 
             else:
                 if self.use_all_reduce_for_fetch_params and not quantize and not use_secondary_tensor:
diff --git a/tests/unit/runtime/zero/test_zero_quant_bf16.py b/tests/unit/runtime/zero/test_zero_quant_bf16.py
@@ -0,0 +1,57 @@
+import pytest
+import torch
+import deepspeed
+from unit.common import DistributedTest
+from unit.simple_model import SimpleModel, random_dataloader
+
+
+class TestZeroQuantBF16(DistributedTest):
+    world_size = 2
+
+    @pytest.mark.parametrize("zero_quantized_weights", [True])
+    def test_bf16_quantized_weights(self, zero_quantized_weights):
+        if not deepspeed.get_accelerator().is_bf16_supported():
+            pytest.skip("bf16 is not supported by this accelerator")
+
+        config_dict = {
+            "train_micro_batch_size_per_gpu": 1,
+            "zero_optimization": {
+                "stage": 3,
+                "zero_quantized_weights": zero_quantized_weights,
+            },
+            "bf16": {
+                "enabled": True
+            },
+            "optimizer": {
+                "type": "Adam",
+                "params": {
+                    "lr": 1e-3
+                }
+            }
+        }
+
+        hidden_dim = 128
+        model = SimpleModel(hidden_dim=hidden_dim)
+        model, _, _, _ = deepspeed.initialize(model=model, config=config_dict)
+
+        # Ensure model is in bf16
+        for param in model.parameters():
+            assert param.dtype == torch.bfloat16
+
+        data_loader = random_dataloader(model=model,
+                                        total_samples=2,
+                                        hidden_dim=hidden_dim,
+                                        device=model.device,
+                                        dtype=torch.bfloat16)
+
+        for n, batch in enumerate(data_loader):
+            # This triggers all_gather and dequantization
+            loss = model(batch[0], batch[1])
+
+            # Verify that param.data is indeed bfloat16 after all_gather
+            for name, param in model.named_parameters():
+                assert param.data.dtype == torch.bfloat16, f"Parameter {name} data dtype is {param.data.dtype}, expected torch.bfloat16"
+
+            model.backward(loss)
+            model.step()
+            break