Skip to content

Commit 1e08157

Browse files
committed
clip-vit: model convert qkv_proj split
1 parent 8b3d319 commit 1e08157

File tree

1 file changed

+22
-1
lines changed

1 file changed

+22
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5856,6 +5856,27 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
58565856
if ".attn.rel_pos_h" in name or ".attn.rel_pos_w" in name:
58575857
return [(self.map_tensor_name(name, try_suffixes=("",)), data_torch)]
58585858

5859+
if name.startswith("model.vision_model.transformer.layers."):
5860+
# process visual tensors
5861+
# split QKV tensors if needed
5862+
if ".qkv_proj." in name:
5863+
if data_torch.ndim == 2: # weight
5864+
c3, _ = data_torch.shape
5865+
else: # bias
5866+
c3 = data_torch.shape[0]
5867+
assert c3 % 3 == 0
5868+
c = c3 // 3
5869+
wq = data_torch[:c]
5870+
wk = data_torch[c: c * 2]
5871+
wv = data_torch[c * 2:]
5872+
return [
5873+
(self.map_tensor_name(name.replace("qkv", "q")), wq),
5874+
(self.map_tensor_name(name.replace("qkv", "k")), wk),
5875+
(self.map_tensor_name(name.replace("qkv", "v")), wv),
5876+
]
5877+
else:
5878+
return [(self.map_tensor_name(name), data_torch)]
5879+
58595880
return [(self.map_tensor_name(name), data_torch)]
58605881

58615882

@@ -7100,7 +7121,7 @@ def set_gguf_parameters(self):
71007121
else:
71017122
# note: deepseek2 using MLA converts into MQA (ie: GQA with 1 group)
71027123
self.hparams["num_key_value_heads"] = 1
7103-
7124+
71047125
super().set_gguf_parameters()
71057126
hparams = self.hparams
71067127
kv_lora_rank = hparams["q_lora_rank"] if hparams["q_lora_rank"] is not None else 512

0 commit comments

Comments
 (0)