@@ -57,6 +57,7 @@ static void pad_f32_cuda(const float * x, float * dst,
5757 dim3 gridDim (num_blocks, ne1, ne2*ne3);
5858 pad_f32<<<gridDim , CUDA_PAD_BLOCK_SIZE, 0 , stream>>> (x, dst, ne0, ne00, ne01, ne02, ne03);
5959}
60+ #include " ggml-impl.h"
6061
6162static void pad_f16_cuda (const half * x, half * dst,
6263 const int ne00, const int ne01, const int ne02, const int ne03,
@@ -73,6 +74,8 @@ void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
7374 GGML_ASSERT (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
7475 GGML_ASSERT (dst->type == src0->type );
7576 GGML_ASSERT (src0->ne [3 ] == 1 && dst->ne [3 ] == 1 ); // just 3D tensors
77+ GGML_LOG_INFO (" ggml_cuda_op_pad: type=%d, ne0=%d, ne1=%d, ne2=%d, ne3=%d, ne0=%d, ne1=%d, ne2=%d, ne3=%d\n " ,
78+ src0->type , src0->ne [0 ], src0->ne [1 ], src0->ne [2 ], src0->ne [3 ], dst->ne [0 ], dst->ne [1 ], dst->ne [2 ], dst->ne [3 ]);
7679
7780 if (src0->type == GGML_TYPE_F32) {
7881 const float * src0_d = (const float *)src0->data ;
0 commit comments