@@ -2946,6 +2946,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
29462946 buft = ggml_backend_dev_buffer_type (cpu_dev);
29472947 }
29482948
2949+ LLAMA_LOG_INFO (" wkv_b shape: [%d, %d]\n " , wkv_b->ne [0 ], wkv_b->ne [1 ]);
29492950 LLAMA_LOG_INFO (" n_head_kv: %d, kv_lora_rank: %d, n_embd_head_qk_nope: %d\n " , n_head_kv, kv_lora_rank, n_embd_head_qk_nope);
29502951 ggml_context * ctx = ctx_for_buft (buft);
29512952 layer.wk_b = ggml_new_tensor_2d (ctx,
@@ -2965,6 +2966,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
29652966 for (int col = 0 ; col < n_embd_head_qk_nope; ++col) {
29662967 LLAMA_LOG_INFO (" wk_b row: %d, col: %d\n " , row, col);
29672968 int src_idx = row * src_stride + k_start + col;
2969+ LLAMA_LOG_INFO (" src_idx: %d\n " , src_idx);
29682970 GGML_ASSERT (src_idx < ggml_nelements (wkv_b));
29692971
29702972 int dst_row = h * kv_lora_rank + row;
@@ -2995,6 +2997,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
29952997 LLAMA_LOG_INFO (" wv_b row: %d, col: %d\n " , row, col);
29962998 // 源索引计算
29972999 int src_idx = row * src_stride + v_start + col;
3000+ LLAMA_LOG_INFO (" src_idx: %d\n " , src_idx);
29983001 GGML_ASSERT (src_idx < ggml_nelements (wkv_b));
29993002
30003003 // 目标索引计算
0 commit comments