Fix: Unused Params Warnings, CUDA Build

deepsek · deepsek · commit b55d44a7c5e2 · 2025-07-14T18:34:11.000-04:00
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh
@@ -293,10 +293,13 @@ static constexpr __device__ int mmq_get_granularity_device(ggml_type type, const
 }
 #elif defined(NEW_MMA_AVAILABLE)
 static constexpr __device__ int mmq_get_granularity_device(ggml_type type, const int mmq_x) {
+    GGML_UNUSED(type);
     return mmq_x >= 48 ? 16 : 8;
 }
 #else
 static constexpr __device__ int mmq_get_granularity_device(ggml_type type, const int mmq_x) {
+    GGML_UNUSED(type);
+    GGML_UNUSED(mmq_x);
     return 8;
 }
 #endif // AMD_MMA_AVAILABLE
@@ -367,6 +370,7 @@ static constexpr __device__ int get_mmq_nwarps_device(ggml_type type) {
 }
 #else
 static constexpr __device__ int get_mmq_nwarps_device(ggml_type type) {
+    GGML_UNUSED(type);
     return 8;
 }
 #endif // AMD_MMA_AVAILABLE
@@ -3564,8 +3568,8 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
 
     const int nbytes_shared = mmq_get_nbytes_shared<type>(mmq_x, mmq_y, cc, warp_size, nwarps);
 
-    CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, MMQ_NWARPS, false>), nbytes_shared);
-    CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, MMQ_NWARPS, true>),  nbytes_shared);
+    CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, false>), nbytes_shared);
+    CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x,  true>), nbytes_shared);
 
     const int nty  = (args.nrows_x   + mmq_y - 1) / mmq_y;
     const int ntx  = (args.ncols_dst + mmq_x - 1) / mmq_x;

Original file line number	Diff line number	Diff line change
`@@ -293,10 +293,13 @@ static constexpr __device__ int mmq_get_granularity_device(ggml_type type, const`
`293`	`293`	`}`
`294`	`294`	`#elif defined(NEW_MMA_AVAILABLE)`
`295`	`295`	`static constexpr __device__ int mmq_get_granularity_device(ggml_type type, const int mmq_x) {`
	`296`	`+ GGML_UNUSED(type);`
`296`	`297`	`return mmq_x >= 48 ? 16 : 8;`
`297`	`298`	`}`
`298`	`299`	`#else`
`299`	`300`	`static constexpr __device__ int mmq_get_granularity_device(ggml_type type, const int mmq_x) {`
	`301`	`+ GGML_UNUSED(type);`
	`302`	`+ GGML_UNUSED(mmq_x);`
`300`	`303`	`return 8;`
`301`	`304`	`}`
`302`	`305`	`#endif // AMD_MMA_AVAILABLE`
`@@ -367,6 +370,7 @@ static constexpr __device__ int get_mmq_nwarps_device(ggml_type type) {`
`367`	`370`	`}`
`368`	`371`	`#else`
`369`	`372`	`static constexpr __device__ int get_mmq_nwarps_device(ggml_type type) {`
	`373`	`+ GGML_UNUSED(type);`
`370`	`374`	`return 8;`
`371`	`375`	`}`
`372`	`376`	`#endif // AMD_MMA_AVAILABLE`
`@@ -3564,8 +3568,8 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a`
`3564`	`3568`
`3565`	`3569`	`const int nbytes_shared = mmq_get_nbytes_shared<type>(mmq_x, mmq_y, cc, warp_size, nwarps);`
`3566`	`3570`
`3567`		`- CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, MMQ_NWARPS, false>), nbytes_shared);`
`3568`		`- CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, MMQ_NWARPS, true>), nbytes_shared);`
	`3571`	`+ CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, false>), nbytes_shared);`
	`3572`	`+ CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q<type, mmq_x, true>), nbytes_shared);`
`3569`	`3573`
`3570`	`3574`	`const int nty = (args.nrows_x + mmq_y - 1) / mmq_y;`
`3571`	`3575`	`const int ntx = (args.ncols_dst + mmq_x - 1) / mmq_x;`