Skip to content

Commit 2be9279

Browse files
committed
use fused qkv in clip
1 parent 0b37fff commit 2be9279

File tree

5 files changed

+23
-30
lines changed

5 files changed

+23
-30
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4147,24 +4147,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
41474147
return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".bias", data_torch)]
41484148

41494149
if name.startswith("visual."):
4150-
if ".qkv." in name:
4151-
if data_torch.ndim == 2:
4152-
c3, _ = data_torch.shape
4153-
else:
4154-
c3 = data_torch.shape[0]
4155-
if c3 % 3 != 0:
4156-
raise ValueError(f"Unexpected QKV shape for {name}: {data_torch.shape}")
4157-
c = c3 // 3
4158-
wq = data_torch[:c]
4159-
wk = data_torch[c: c * 2]
4160-
wv = data_torch[c * 2:]
4161-
base = name.replace("qkv", "{placeholder}")
4162-
return [
4163-
(self.map_tensor_name(base.format(placeholder="q")), wq),
4164-
(self.map_tensor_name(base.format(placeholder="k")), wk),
4165-
(self.map_tensor_name(base.format(placeholder="v")), wv),
4166-
]
4167-
41684150
return [(self.map_tensor_name(name), data_torch)]
41694151

41704152
# Fall back to parent class for other tensors

gguf-py/gguf/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ class MODEL_TENSOR(IntEnum):
614614
V_ENC_EMBD_PATCH = auto()
615615
V_ENC_EMBD_POS = auto()
616616
V_ENC_INPUT_NORM = auto()
617+
V_ENC_ATTN_QKV = auto()
617618
V_ENC_ATTN_Q = auto()
618619
V_ENC_ATTN_Q_NORM = auto()
619620
V_ENC_ATTN_K = auto()
@@ -964,6 +965,7 @@ class MODEL_TENSOR(IntEnum):
964965
MODEL_TENSOR.V_ENC_EMBD_CLS: "v.class_embd",
965966
MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.patch_embd",
966967
MODEL_TENSOR.V_ENC_EMBD_POS: "v.position_embd",
968+
MODEL_TENSOR.V_ENC_ATTN_QKV: "v.blk.{bid}.attn_qkv",
967969
MODEL_TENSOR.V_ENC_ATTN_Q: "v.blk.{bid}.attn_q",
968970
MODEL_TENSOR.V_ENC_ATTN_Q_NORM: "v.blk.{bid}.attn_q_norm",
969971
MODEL_TENSOR.V_ENC_ATTN_K: "v.blk.{bid}.attn_k",
@@ -1036,6 +1038,7 @@ class MODEL_TENSOR(IntEnum):
10361038
MODEL_TENSOR.V_ENC_EMBD_PATCH,
10371039
MODEL_TENSOR.V_ENC_EMBD_POS,
10381040
MODEL_TENSOR.V_ENC_INPUT_NORM,
1041+
MODEL_TENSOR.V_ENC_ATTN_QKV,
10391042
MODEL_TENSOR.V_ENC_ATTN_Q,
10401043
MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
10411044
MODEL_TENSOR.V_ENC_ATTN_K,

gguf-py/gguf/tensor_mapping.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,10 @@ class TensorNameMap:
11881188
"visual.pos_embed", # qwen3vl
11891189
),
11901190

1191+
MODEL_TENSOR.V_ENC_ATTN_QKV: (
1192+
"visual.blocks.{bid}.attn.qkv", # qwen3vl
1193+
),
1194+
11911195
MODEL_TENSOR.V_ENC_ATTN_Q: (
11921196
"vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_proj",
11931197
"model.vision_tower.encoder.layer.{bid}.attention.q_proj", # Intern-S1

tools/mtmd/clip-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#define TN_PATCH_EMBD "v.patch_embd.weight" // not rename tensor with ".0" postfix for backwrad compat
6565
#define TN_PATCH_EMBD_1 "v.patch_embd.weight.1"
6666
#define TN_PATCH_BIAS "v.patch_embd.bias"
67+
#define TN_ATTN_QKV "%s.blk.%d.attn_qkv.%s"
6768
#define TN_ATTN_K "%s.blk.%d.attn_k.%s"
6869
#define TN_ATTN_Q "%s.blk.%d.attn_q.%s"
6970
#define TN_ATTN_V "%s.blk.%d.attn_v.%s"

tools/mtmd/clip.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,8 @@ struct clip_layer {
216216
ggml_tensor * q_b = nullptr;
217217
ggml_tensor * v_w = nullptr;
218218
ggml_tensor * v_b = nullptr;
219+
ggml_tensor * qkv_w = nullptr;
220+
ggml_tensor * qkv_b = nullptr;
219221

220222
ggml_tensor * o_w = nullptr;
221223
ggml_tensor * o_b = nullptr;
@@ -927,16 +929,15 @@ struct clip_graph {
927929

928930
// self-attention
929931
{
930-
ggml_tensor * Qcur = ggml_add(ctx0,
931-
ggml_mul_mat(ctx0, layer.q_w, cur), layer.q_b);
932-
ggml_tensor * Kcur = ggml_add(ctx0,
933-
ggml_mul_mat(ctx0, layer.k_w, cur), layer.k_b);
934-
ggml_tensor * Vcur = ggml_add(ctx0,
935-
ggml_mul_mat(ctx0, layer.v_w, cur), layer.v_b);
932+
cur = ggml_mul_mat(ctx0, layer.qkv_w, cur);
933+
cur = ggml_add(ctx0, cur, layer.qkv_b);
936934

937-
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_patches);
938-
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_patches);
939-
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_patches);
935+
ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
936+
cur->nb[1], 0);
937+
ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
938+
cur->nb[1], n_embd * sizeof(float));
939+
ggml_tensor * Vcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
940+
cur->nb[1], 2 * n_embd * sizeof(float));
940941

941942
cb(Qcur, "Qcur", il);
942943
cb(Kcur, "Kcur", il);
@@ -2758,10 +2759,11 @@ struct clip_model_loader {
27582759
model.layers.resize(hparams.n_layer);
27592760
for (int il = 0; il < hparams.n_layer; ++il) {
27602761
auto & layer = model.layers[il];
2761-
layer.k_w = get_tensor(string_format(TN_ATTN_K, prefix, il, "weight"));
2762-
layer.q_w = get_tensor(string_format(TN_ATTN_Q, prefix, il, "weight"));
2763-
layer.v_w = get_tensor(string_format(TN_ATTN_V, prefix, il, "weight"));
2762+
layer.k_w = get_tensor(string_format(TN_ATTN_K, prefix, il, "weight"), false);
2763+
layer.q_w = get_tensor(string_format(TN_ATTN_Q, prefix, il, "weight"), false);
2764+
layer.v_w = get_tensor(string_format(TN_ATTN_V, prefix, il, "weight"), false);
27642765
layer.o_w = get_tensor(string_format(TN_ATTN_OUTPUT, prefix, il, "weight"));
2766+
layer.qkv_w = get_tensor(string_format(TN_ATTN_QKV, prefix, il, "weight"), false);
27652767
layer.k_norm = get_tensor(string_format(TN_ATTN_K_NORM, prefix, il, "weight"), false);
27662768
layer.q_norm = get_tensor(string_format(TN_ATTN_Q_NORM, prefix, il, "weight"), false);
27672769
layer.ln_1_w = get_tensor(string_format(TN_LN_1, prefix, il, "weight"), false);
@@ -2773,6 +2775,7 @@ struct clip_model_loader {
27732775
layer.q_b = get_tensor(string_format(TN_ATTN_Q, prefix, il, "bias"), false);
27742776
layer.v_b = get_tensor(string_format(TN_ATTN_V, prefix, il, "bias"), false);
27752777
layer.o_b = get_tensor(string_format(TN_ATTN_OUTPUT, prefix, il, "bias"), false);
2778+
layer.qkv_b = get_tensor(string_format(TN_ATTN_QKV, prefix, il, "bias"), false);
27762779
layer.ln_1_b = get_tensor(string_format(TN_LN_1, prefix, il, "bias"), false);
27772780
layer.ln_2_b = get_tensor(string_format(TN_LN_2, prefix, il, "bias"), false);
27782781

0 commit comments

Comments
 (0)