File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
vllm/model_executor/model_loader Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -577,10 +577,10 @@ def dequantize_dq(quant_states: dict) -> None:
577
577
thereby avoiding this computational overhead during inference. This comes
578
578
at the cost of increased memory usage.
579
579
"""
580
- from bitsandbytes .functional import dequantize_blockwise
580
+ from bitsandbytes .functional import QuantState , dequantize_blockwise
581
581
for _ , quant_state in quant_states .items ():
582
582
# Copied from: https://github.com/bitsandbytes-foundation/bitsandbytes/blob/0.45.3/bitsandbytes/functional.py#L1352-#L1356
583
- if quant_state .nested :
583
+ if isinstance ( quant_state , QuantState ) and quant_state .nested :
584
584
absmax = dequantize_blockwise (quant_state .absmax ,
585
585
quant_state .state2 )
586
586
absmax += quant_state .offset
You can’t perform that action at this time.
0 commit comments