Skip to content

Commit 5f5f9cd

Browse files
committed
fix
1 parent 3395a34 commit 5f5f9cd

File tree

2 files changed

+1
-4
lines changed

2 files changed

+1
-4
lines changed

ggml/src/ggml-cuda/pad.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
7474
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
7575
GGML_ASSERT(dst->type == src0->type);
7676
GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
77-
GGML_LOG_INFO("ggml_cuda_op_pad: type=%ld, ne0=%d, ne1=%d, ne2=%d, ne3=%d, ne0=%d, ne1=%d, ne2=%d, ne3=%d\n",
78-
src0->type, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3]);
7977

8078
if (src0->type == GGML_TYPE_F32) {
8179
const float * src0_d = (const float *)src0->data;

src/llama.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,9 +1338,8 @@ struct llm_build_context {
13381338
}
13391339

13401340
struct ggml_tensor * build_inp_KQ_mask(bool causal = true) {
1341-
const auto i = std::max(n_embd_head_k, n_embd_head_v);
13421341
lctx.inp_KQ_mask = causal
1343-
? ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, cparams.flash_attn ? i * i : n_kv, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD))
1342+
? ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD))
13441343
: ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD));
13451344
cb(lctx.inp_KQ_mask, "KQ_mask", -1);
13461345
ggml_set_input(lctx.inp_KQ_mask);

0 commit comments

Comments
 (0)