Skip to content

Commit 48c95d3

Browse files
Updated surgery.
1 parent f2e4eea commit 48c95d3

File tree

1 file changed

+38
-5
lines changed

1 file changed

+38
-5
lines changed

examples/llava/qwen2_5_vl_surgery.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,26 @@ def to_gguf_name(name: str) -> str:
3939
if "weight_g" in name:
4040
name = name.replace("weight_g", "weight")
4141

42+
# Special handling for merger tensors to match clip.cpp expectations
43+
if "merger.mlp" in name:
44+
# Extract the layer number
45+
parts = name.split(".")
46+
for i, part in enumerate(parts):
47+
if part == "mlp" and i + 1 < len(parts):
48+
layer_num = parts[i + 1]
49+
# Map the merger layers to the expected GGUF tensor names
50+
# Note: clip.cpp looks for mm.0.* and mm.2.* (not mm.1.*)
51+
if layer_num == "0":
52+
name = name.replace(f"merger.mlp.{layer_num}", "mm.0")
53+
elif layer_num == "1":
54+
name = name.replace(f"merger.mlp.{layer_num}", "mm.2")
55+
break
56+
4257
print(f"[to_gguf_name] {og} --> {name}")
4358
return name
4459

4560

46-
def find_vision_tensors(model, dtype) -> Dict[str, np.ndarray]:
61+
def find_vision_tensors(model, dtype, hidden_size) -> Dict[str, np.ndarray]:
4762
visual = model.visual
4863
tensor_map = {}
4964

@@ -68,8 +83,23 @@ def find_vision_tensors(model, dtype) -> Dict[str, np.ndarray]:
6883
elif name.endswith("ln_q.bias") and 'weight_g' not in name:
6984
tensor_map['v.post_ln.bias'] = ten
7085
else:
71-
# "merger.mlp.%d.weight/bias" --> "mm.%d.weight/bias"
72-
tensor_map[to_gguf_name(name)] = ten
86+
# Handle merger tensors with special attention to naming
87+
# First, determine if this is a layer 0 or layer 1 tensor
88+
if "merger.mlp.0" in name:
89+
# First layer gets mapped to mm.0.*
90+
if "weight" in name:
91+
tensor_map["mm.0.weight"] = ten
92+
elif "bias" in name:
93+
tensor_map["mm.0.bias"] = ten
94+
elif "merger.mlp.1" in name:
95+
# Second layer gets mapped to mm.2.* (not mm.1.*)
96+
if "weight" in name:
97+
tensor_map["mm.2.weight"] = ten
98+
elif "bias" in name:
99+
tensor_map["mm.2.bias"] = ten
100+
else:
101+
# For any other tensors, use the standard naming conversion
102+
tensor_map[to_gguf_name(name)] = ten
73103
elif 'patch_embed.proj.weight' in name:
74104
# NOTE: split Conv3D into Conv2Ds
75105
c1, c2, kt, kh, kw = ten.shape
@@ -84,7 +114,10 @@ def find_vision_tensors(model, dtype) -> Dict[str, np.ndarray]:
84114
tensor_map[new_name] = ten.astype(np.float32)
85115
else:
86116
tensor_map[new_name] = ten.astype(dtype)
87-
tensor_map["v.position_embd.weight"] = np.zeros([10, 10], dtype=np.float32) # dummy tensor, just here as a placeholder
117+
# For Qwen2.5, create a properly sized position embedding tensor
118+
# Size it based on the model's hidden dimension and expected sequence length
119+
seq_length = 40 * 40 # Approximate max sequence length
120+
tensor_map["v.position_embd.weight"] = np.zeros([seq_length, hidden_size], dtype=np.float32) # Properly sized placeholder
88121
return tensor_map
89122

90123

@@ -153,7 +186,7 @@ def main(args):
153186
image_size = 14 * 40 # same as used below
154187
fout.add_uint32("clip.vision.image_crop_resolution", image_size)
155188

156-
tensor_map = find_vision_tensors(model, np_dtype)
189+
tensor_map = find_vision_tensors(model, np_dtype, vcfg.hidden_size)
157190
for name, data in tensor_map.items():
158191
fout.add_tensor(name, data)
159192

0 commit comments

Comments
 (0)