@@ -216,6 +216,8 @@ struct clip_layer {
216216    ggml_tensor * q_b = nullptr ;
217217    ggml_tensor * v_w = nullptr ;
218218    ggml_tensor * v_b = nullptr ;
219+     ggml_tensor * qkv_w = nullptr ;
220+     ggml_tensor * qkv_b = nullptr ;
219221
220222    ggml_tensor * o_w = nullptr ;
221223    ggml_tensor * o_b = nullptr ;
@@ -927,16 +929,15 @@ struct clip_graph {
927929
928930            //  self-attention
929931            {
930-                 ggml_tensor * Qcur = ggml_add (ctx0,
931-                     ggml_mul_mat (ctx0, layer.q_w , cur), layer.q_b );
932-                 ggml_tensor * Kcur = ggml_add (ctx0,
933-                     ggml_mul_mat (ctx0, layer.k_w , cur), layer.k_b );
934-                 ggml_tensor * Vcur = ggml_add (ctx0,
935-                     ggml_mul_mat (ctx0, layer.v_w , cur), layer.v_b );
932+                 cur = ggml_mul_mat (ctx0, layer.qkv_w , cur);
933+                 cur = ggml_add (ctx0, cur, layer.qkv_b );
936934
937-                 Qcur = ggml_reshape_3d (ctx0, Qcur, d_head, n_head, n_patches);
938-                 Kcur = ggml_reshape_3d (ctx0, Kcur, d_head, n_head, n_patches);
939-                 Vcur = ggml_reshape_3d (ctx0, Vcur, d_head, n_head, n_patches);
935+                 ggml_tensor * Qcur = ggml_view_3d (ctx0, cur, d_head, n_head, n_pos, d_head*sizeof (float ),
936+                     cur->nb [1 ], 0 );
937+                 ggml_tensor * Kcur = ggml_view_3d (ctx0, cur, d_head, n_head, n_pos, d_head*sizeof (float ),
938+                     cur->nb [1 ], n_embd * sizeof (float ));
939+                 ggml_tensor * Vcur = ggml_view_3d (ctx0, cur, d_head, n_head, n_pos, d_head*sizeof (float ),
940+                     cur->nb [1 ], 2  * n_embd * sizeof (float ));
940941
941942                cb (Qcur, " Qcur"  , il);
942943                cb (Kcur, " Kcur"  , il);
@@ -2758,10 +2759,11 @@ struct clip_model_loader {
27582759        model.layers .resize (hparams.n_layer );
27592760        for  (int  il = 0 ; il < hparams.n_layer ; ++il) {
27602761            auto  & layer = model.layers [il];
2761-             layer.k_w     = get_tensor (string_format (TN_ATTN_K,      prefix, il, " weight"  ));
2762-             layer.q_w     = get_tensor (string_format (TN_ATTN_Q,      prefix, il, " weight"  ));
2763-             layer.v_w     = get_tensor (string_format (TN_ATTN_V,      prefix, il, " weight"  ));
2762+             layer.k_w     = get_tensor (string_format (TN_ATTN_K,      prefix, il, " weight"  ),  false );
2763+             layer.q_w     = get_tensor (string_format (TN_ATTN_Q,      prefix, il, " weight"  ),  false );
2764+             layer.v_w     = get_tensor (string_format (TN_ATTN_V,      prefix, il, " weight"  ),  false );
27642765            layer.o_w     = get_tensor (string_format (TN_ATTN_OUTPUT, prefix, il, " weight"  ));
2766+             layer.qkv_w   = get_tensor (string_format (TN_ATTN_QKV,    prefix, il, " weight"  ), false );
27652767            layer.k_norm  = get_tensor (string_format (TN_ATTN_K_NORM, prefix, il, " weight"  ), false );
27662768            layer.q_norm  = get_tensor (string_format (TN_ATTN_Q_NORM, prefix, il, " weight"  ), false );
27672769            layer.ln_1_w  = get_tensor (string_format (TN_LN_1,        prefix, il, " weight"  ), false );
@@ -2773,6 +2775,7 @@ struct clip_model_loader {
27732775            layer.q_b     = get_tensor (string_format (TN_ATTN_Q,      prefix, il, " bias"  ), false );
27742776            layer.v_b     = get_tensor (string_format (TN_ATTN_V,      prefix, il, " bias"  ), false );
27752777            layer.o_b     = get_tensor (string_format (TN_ATTN_OUTPUT, prefix, il, " bias"  ), false );
2778+             layer.qkv_b   = get_tensor (string_format (TN_ATTN_QKV,    prefix, il, " bias"  ), false );
27762779            layer.ln_1_b  = get_tensor (string_format (TN_LN_1,        prefix, il, " bias"  ), false );
27772780            layer.ln_2_b  = get_tensor (string_format (TN_LN_2,        prefix, il, " bias"  ), false );
27782781
0 commit comments