@@ -443,8 +443,8 @@ struct ggml_tensor * forward(
443443 // wk shape [n_embd, n_embd, 1, 1]
444444 // Qcur shape [n_embd/n_head, n_head, N, 1]
445445 // Kcur shape [n_embd/n_head, n_head, N, 1]
446- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 );
447- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 );
446+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 , 0 );
447+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 , 0 );
448448
449449 // store key and value to memory
450450 {
@@ -700,8 +700,8 @@ struct ggml_tensor * forward_batch(
700700 // wk shape [n_embd, n_embd, 1, 1]
701701 // Qcur shape [n_embd/n_head, n_head, N, n_batch]
702702 // Kcur shape [n_embd/n_head, n_head, N, n_batch]
703- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
704- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
703+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
704+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
705705 assert_shape_4d (Qcur, n_embd/n_head, n_head, N, n_batch);
706706 assert_shape_4d (Kcur, n_embd/n_head, n_head, N, n_batch);
707707
@@ -985,8 +985,8 @@ struct ggml_tensor * forward_batch_wo_cache(
985985 // wk shape [n_embd, n_embd, 1, 1]
986986 // Qcur shape [n_embd/n_head, n_head, N, n_batch]
987987 // Kcur shape [n_embd/n_head, n_head, N, n_batch]
988- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
989- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
988+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
989+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
990990 assert_shape_4d (Qcur, n_embd/n_head, n_head, N, n_batch);
991991 assert_shape_4d (Kcur, n_embd/n_head, n_head, N, n_batch);
992992
@@ -1207,8 +1207,8 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
12071207 // compute Q and K and RoPE them
12081208 // wq shape [n_embd, n_embd, 1, 1]
12091209 // wk shape [n_embd, n_embd, 1, 1]
1210- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
1211- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
1210+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
1211+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
12121212 assert_shape_4d (Qcur, n_embd/n_head, n_head, N, n_batch);
12131213 assert_shape_4d (Kcur, n_embd/n_head, n_head, N, n_batch);
12141214
@@ -1607,10 +1607,10 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
16071607 use_buf (-1 ); struct ggml_tensor * t04 = expand (gf, ggml_mul (ctx0, t02, t03)); assert_shape_2d (t04, n_embd, N*n_batch);
16081608 use_buf (-1 ); struct ggml_tensor * t05 = expand (gf, ggml_mul_mat (ctx0, layer.wq , t04)); assert_shape_2d (t05, n_embd, N*n_batch);
16091609 use_buf (-1 ); struct ggml_tensor * t06 = expand (gf, ggml_reshape_4d (ctx0, t05, n_embd/n_head, n_head, N, n_batch)); assert_shape_4d (t06, n_embd/n_head, n_head, N, n_batch);
1610- use_buf (-1 ); struct ggml_tensor * t07 = expand (gf, ggml_rope_inplace (ctx0, t06, n_past, n_rot, rope_mode)); assert_shape_4d (t07, n_embd/n_head, n_head, N, n_batch);
1610+ use_buf (-1 ); struct ggml_tensor * t07 = expand (gf, ggml_rope_inplace (ctx0, t06, n_past, n_rot, rope_mode, 0 )); assert_shape_4d (t07, n_embd/n_head, n_head, N, n_batch);
16111611 use_buf (-1 ); struct ggml_tensor * t08 = expand (gf, ggml_mul_mat (ctx0, layer.wk , t04)); assert_shape_2d (t08, n_embd, N*n_batch);
16121612 use_buf (-1 ); struct ggml_tensor * t09 = expand (gf, ggml_reshape_4d (ctx0, t08, n_embd/n_head, n_head, N, n_batch)); assert_shape_4d (t09, n_embd/n_head, n_head, N, n_batch);
1613- use_buf (-1 ); struct ggml_tensor * t10 = expand (gf, ggml_rope_inplace (ctx0, t09, n_past, n_rot, rope_mode)); assert_shape_4d (t10, n_embd/n_head, n_head, N, n_batch);
1613+ use_buf (-1 ); struct ggml_tensor * t10 = expand (gf, ggml_rope_inplace (ctx0, t09, n_past, n_rot, rope_mode, 0 )); assert_shape_4d (t10, n_embd/n_head, n_head, N, n_batch);
16141614 use_buf (-1 ); struct ggml_tensor * t11 = expand (gf, ggml_mul_mat (ctx0, t04, layer.wv )); assert_shape_2d (t11, N*n_batch, n_embd);
16151615 use_buf (-1 ); struct ggml_tensor * t12 = expand (gf, ggml_reshape_4d (ctx0, t11, N, n_batch, n_embd/n_head, n_head)); assert_shape_4d (t12, N, n_batch, n_embd/n_head, n_head);
16161616 use_buf (-1 ); struct ggml_tensor * t13 = expand (gf, ggml_permute (ctx0, t07, 0 , 2 , 1 , 3 )); assert_shape_4d (t13, n_embd/n_head, N, n_head, n_batch);
0 commit comments