Address PR feedback

tdakhran · tdakhran · commit a22c09ac9053 · 2025-07-10T21:15:03.000+02:00
diff --git a/ggml/src/ggml-cuda/ssm-conv.cu b/ggml/src/ggml-cuda/ssm-conv.cu
@@ -111,7 +111,7 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
             ssm_conv_f32<threads, 3><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,
                                                                      dst, dst_nb0, dst_nb1, dst_nb2, n_t);
         } else {
-            GGML_ABORT("Only support kernel size = 4  now.");
+            GGML_ABORT("Only support kernel size = 3 or size = 4 right now.");
         }
     } else {
         if (nc == 4) {
@@ -125,7 +125,7 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int
             ssm_conv_long_token_f32<threads, 3, split_n_t><<<blocks, threads, 0, stream>>>(
                 src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);
         } else {
-            GGML_ABORT("Only support kernel size = 4 right now.");
+            GGML_ABORT("Only support kernel size = 3 or size = 4 right now.");
         }
     }
 }
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -15671,11 +15671,7 @@ struct llm_build_lfm2 : public llm_graph_context {
         GGML_ASSERT(hparams.n_shortconv_l_cache > 0);
 
         // construct ssm_conv op
-        struct ggml_tensor * conv_out = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, bx->ne[1], bx->ne[0] - conv->ne[0], bx->ne[2]);
-        conv_out->op     = GGML_OP_SSM_CONV;
-        conv_out->src[0] = bx;
-        conv_out->src[1] = conv_kernel;
-
+        ggml_tensor * conv_out = ggml_ssm_conv(ctx0, bx, conv_kernel);
         cb(conv_out, "model.layers.{}.conv.conv", il);
 
         auto *y = ggml_mul(ctx0, c, conv_out);

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int`
`111`	`111`	`ssm_conv_f32<threads, 3><<<blocks, threads, 0, stream>>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1,`
`112`	`112`	`dst, dst_nb0, dst_nb1, dst_nb2, n_t);`
`113`	`113`	`} else {`
`114`		`- GGML_ABORT("Only support kernel size = 4 now.");`
	`114`	`+ GGML_ABORT("Only support kernel size = 3 or size = 4 right now.");`
`115`	`115`	`}`
`116`	`116`	`} else {`
`117`	`117`	`if (nc == 4) {`
`@@ -125,7 +125,7 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int`
`125`	`125`	`ssm_conv_long_token_f32<threads, 3, split_n_t><<<blocks, threads, 0, stream>>>(`
`126`	`126`	`src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t);`
`127`	`127`	`} else {`
`128`		`- GGML_ABORT("Only support kernel size = 4 right now.");`
	`128`	`+ GGML_ABORT("Only support kernel size = 3 or size = 4 right now.");`
`129`	`129`	`}`
`130`	`130`	`}`
`131`	`131`	`}`