Skip to content

Commit 42c1949

Browse files
authored
[Bugfix][Quantization] Support BF16 tensors on GGUF (vllm-project#29948)
Signed-off-by: Tsukasa OI <[email protected]>
1 parent cc4e296 commit 42c1949

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

tests/models/quantization/test_gguf.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ def gguf_model(self):
4747
gguf_filename="qwen2.5-1.5b-instruct-q6_k.gguf",
4848
)
4949

50+
QWEN3_CONFIG = GGUFTestConfig(
51+
original_model="Qwen/Qwen3-0.6B",
52+
gguf_repo="unsloth/Qwen3-0.6B-GGUF",
53+
gguf_filename="Qwen3-0.6B-BF16.gguf",
54+
)
55+
5056
PHI3_CONFIG = GGUFTestConfig(
5157
original_model="microsoft/Phi-3.5-mini-instruct",
5258
gguf_repo="bartowski/Phi-3.5-mini-instruct-GGUF",
@@ -87,6 +93,7 @@ def gguf_model(self):
8793
MODELS = [
8894
# LLAMA_CONFIG, # broken: https://github.com/vllm-project/vllm/issues/19458
8995
QWEN2_CONFIG,
96+
QWEN3_CONFIG,
9097
PHI3_CONFIG,
9198
GPT2_CONFIG,
9299
STABLELM_CONFIG,

vllm/model_executor/model_loader/weight_utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,17 @@ def gguf_quant_weights_iterator(
921921
name = gguf_to_hf_name_map[tensor.name]
922922
if weight_type.name not in ("F32", "BF16", "F16"):
923923
name = name.replace("weight", "qweight")
924-
param = torch.tensor(weight)
924+
if weight_type.name == "BF16" and tensor.data.dtype == np.uint8:
925+
# BF16 is currently the only "quantization" type that isn't
926+
# actually quantized but is read as a raw byte tensor.
927+
# Reinterpret as `torch.bfloat16` tensor.
928+
weight = weight.view(np.uint16)
929+
if reader.byte_order == "S":
930+
# GGUF endianness != system endianness
931+
weight = weight.byteswap()
932+
param = torch.tensor(weight).view(torch.bfloat16)
933+
else:
934+
param = torch.tensor(weight)
925935
yield name, param
926936

927937

0 commit comments

Comments
 (0)