@@ -216,6 +216,8 @@ struct clip_layer {
216216 ggml_tensor * q_b = nullptr ;
217217 ggml_tensor * v_w = nullptr ;
218218 ggml_tensor * v_b = nullptr ;
219+ ggml_tensor * qkv_w = nullptr ;
220+ ggml_tensor * qkv_b = nullptr ;
219221
220222 ggml_tensor * o_w = nullptr ;
221223 ggml_tensor * o_b = nullptr ;
@@ -927,16 +929,15 @@ struct clip_graph {
927929
928930 // self-attention
929931 {
930- ggml_tensor * Qcur = ggml_add (ctx0,
931- ggml_mul_mat (ctx0, layer.q_w , cur), layer.q_b );
932- ggml_tensor * Kcur = ggml_add (ctx0,
933- ggml_mul_mat (ctx0, layer.k_w , cur), layer.k_b );
934- ggml_tensor * Vcur = ggml_add (ctx0,
935- ggml_mul_mat (ctx0, layer.v_w , cur), layer.v_b );
932+ cur = ggml_mul_mat (ctx0, layer.qkv_w , cur);
933+ cur = ggml_add (ctx0, cur, layer.qkv_b );
936934
937- Qcur = ggml_reshape_3d (ctx0, Qcur, d_head, n_head, n_patches);
938- Kcur = ggml_reshape_3d (ctx0, Kcur, d_head, n_head, n_patches);
939- Vcur = ggml_reshape_3d (ctx0, Vcur, d_head, n_head, n_patches);
935+ ggml_tensor * Qcur = ggml_view_3d (ctx0, cur, d_head, n_head, n_pos, d_head*sizeof (float ),
936+ cur->nb [1 ], 0 );
937+ ggml_tensor * Kcur = ggml_view_3d (ctx0, cur, d_head, n_head, n_pos, d_head*sizeof (float ),
938+ cur->nb [1 ], n_embd * sizeof (float ));
939+ ggml_tensor * Vcur = ggml_view_3d (ctx0, cur, d_head, n_head, n_pos, d_head*sizeof (float ),
940+ cur->nb [1 ], 2 * n_embd * sizeof (float ));
940941
941942 cb (Qcur, " Qcur" , il);
942943 cb (Kcur, " Kcur" , il);
@@ -2758,10 +2759,11 @@ struct clip_model_loader {
27582759 model.layers .resize (hparams.n_layer );
27592760 for (int il = 0 ; il < hparams.n_layer ; ++il) {
27602761 auto & layer = model.layers [il];
2761- layer.k_w = get_tensor (string_format (TN_ATTN_K, prefix, il, " weight" ));
2762- layer.q_w = get_tensor (string_format (TN_ATTN_Q, prefix, il, " weight" ));
2763- layer.v_w = get_tensor (string_format (TN_ATTN_V, prefix, il, " weight" ));
2762+ layer.k_w = get_tensor (string_format (TN_ATTN_K, prefix, il, " weight" ), false );
2763+ layer.q_w = get_tensor (string_format (TN_ATTN_Q, prefix, il, " weight" ), false );
2764+ layer.v_w = get_tensor (string_format (TN_ATTN_V, prefix, il, " weight" ), false );
27642765 layer.o_w = get_tensor (string_format (TN_ATTN_OUTPUT, prefix, il, " weight" ));
2766+ layer.qkv_w = get_tensor (string_format (TN_ATTN_QKV, prefix, il, " weight" ), false );
27652767 layer.k_norm = get_tensor (string_format (TN_ATTN_K_NORM, prefix, il, " weight" ), false );
27662768 layer.q_norm = get_tensor (string_format (TN_ATTN_Q_NORM, prefix, il, " weight" ), false );
27672769 layer.ln_1_w = get_tensor (string_format (TN_LN_1, prefix, il, " weight" ), false );
@@ -2773,6 +2775,7 @@ struct clip_model_loader {
27732775 layer.q_b = get_tensor (string_format (TN_ATTN_Q, prefix, il, " bias" ), false );
27742776 layer.v_b = get_tensor (string_format (TN_ATTN_V, prefix, il, " bias" ), false );
27752777 layer.o_b = get_tensor (string_format (TN_ATTN_OUTPUT, prefix, il, " bias" ), false );
2778+ layer.qkv_b = get_tensor (string_format (TN_ATTN_QKV, prefix, il, " bias" ), false );
27762779 layer.ln_1_b = get_tensor (string_format (TN_LN_1, prefix, il, " bias" ), false );
27772780 layer.ln_2_b = get_tensor (string_format (TN_LN_2, prefix, il, " bias" ), false );
27782781
0 commit comments