[Bugfix] Fix bnb 8bit model weights loading (vllm-project#19917)

Isotr0py · fhl2000 · commit c0079d413ace · 2025-06-25T11:12:11.000+08:00
Signed-off-by: Isotr0py &lt;2037008807@qq.com&gt;
Signed-off-by: fhl &lt;2410591650@qq.com&gt;
diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -577,10 +577,10 @@ def dequantize_dq(quant_states: dict) -> None:
     thereby avoiding this computational overhead during inference. This comes 
     at the cost of increased memory usage.
     """
-    from bitsandbytes.functional import dequantize_blockwise
+    from bitsandbytes.functional import QuantState, dequantize_blockwise
     for _, quant_state in quant_states.items():
         # Copied from: https://github.com/bitsandbytes-foundation/bitsandbytes/blob/0.45.3/bitsandbytes/functional.py#L1352-#L1356
-        if quant_state.nested:
+        if isinstance(quant_state, QuantState) and quant_state.nested:
             absmax = dequantize_blockwise(quant_state.absmax,
                                           quant_state.state2)
             absmax += quant_state.offset