Skip to content

Commit b1cbcdd

Browse files
committed
convert : support non-mxfp4 HF model
1 parent 7ad67ba commit b1cbcdd

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8029,7 +8029,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
80298029
if "down_proj" in name:
80308030
if name.endswith("_bias"):
80318031
name = name.replace("down_proj_bias", "down_proj.bias")
8032+
elif "_blocks" not in name and "_scales" not in name:
8033+
logger.warning(f"{name} is not in MXFP4, performance may be degraded")
8034+
name = name.replace("down_proj", "down_proj.weight")
8035+
data_torch = data_torch.transpose(-1, -2)
80328036
else:
8037+
# otherwise, it should already be repacked to ggml MXFP4 format
80338038
return []
80348039

80358040
# split the gate_up into gate and up
@@ -8042,7 +8047,18 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
80428047
(self.map_tensor_name(name_gate), gate_proj_bias),
80438048
(self.map_tensor_name(name_up), up_proj_bias)
80448049
]
8050+
elif "_blocks" not in name and "_scales" not in name:
8051+
logger.warning(f"{name} is not in MXFP4, performance may be degraded")
8052+
name_up = name.replace("gate_up_proj", "up_proj.weight")
8053+
name_gate = name.replace("gate_up_proj", "gate_proj.weight")
8054+
data_torch = data_torch.transpose(-1, -2)
8055+
gate_proj_weight, up_proj_weight = data_torch[:, ::2, :], data_torch[:, 1::2, :]
8056+
return [
8057+
(self.map_tensor_name(name_gate), gate_proj_weight),
8058+
(self.map_tensor_name(name_up), up_proj_weight)
8059+
]
80458060
else:
8061+
# otherwise, it should already be repacked to ggml MXFP4 format
80468062
return []
80478063

80488064
return [(self.map_tensor_name(name), data_torch)]

0 commit comments

Comments
 (0)