add log

orca-zhang · orca-zhang · commit fad3960d3594 · 2025-02-26T19:11:52.000+08:00
diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu
@@ -7,11 +7,9 @@
 #include "fattn-vec-f32.cuh"
 #include "fattn-wmma-f16.cuh"
 #include "fattn.cuh"
-#include "ggml-impl.h"
 
 template <int D, int ncols2>
 static void ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    GGML_LOG_INFO("ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1: D=%d, ncols2=%d", D, ncols2);
     const ggml_tensor * Q = dst->src[0];
 
     if (Q->ne[1] <= 8/ncols2) {
@@ -34,7 +32,6 @@ static void ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ggml_backend_cuda_con
 
 template <int ncols2>
 static void ggml_cuda_flash_attn_ext_mma_f16_switch_hs(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    GGML_LOG_INFO("ggml_cuda_flash_attn_ext_mma_f16_switch_hs: ncols2=%d", ncols2);
     const ggml_tensor * Q = dst->src[0];
 
     switch (Q->ne[0]) {
diff --git a/ggml/src/ggml-cuda/pad.cu b/ggml/src/ggml-cuda/pad.cu
@@ -57,6 +57,7 @@ static void pad_f32_cuda(const float * x, float * dst,
     dim3 gridDim(num_blocks, ne1, ne2*ne3);
     pad_f32<<<gridDim, CUDA_PAD_BLOCK_SIZE, 0, stream>>>(x, dst, ne0, ne00, ne01, ne02, ne03);
 }
+#include "ggml-impl.h"
 
 static void pad_f16_cuda(const half * x, half * dst,
     const int ne00, const int ne01, const int ne02, const int ne03,
@@ -73,6 +74,8 @@ void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
     GGML_ASSERT(dst->type == src0->type);
     GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
+    GGML_LOG_INFO("ggml_cuda_op_pad: type=%d, ne0=%d, ne1=%d, ne2=%d, ne3=%d, ne0=%d, ne1=%d, ne2=%d, ne3=%d\n",
+        src0->type, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3]);
 
     if (src0->type == GGML_TYPE_F32) {
         const float * src0_d = (const float *)src0->data;