vulkan: fix noncontig check for mat_mul_id splitting

jeffbolznv · jeffbolznv · commit 69b7db84cd69 · 2025-07-14T16:34:29.000-05:00
Remove supports_op check for &gt; 4096 (splitting fixes this)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -4916,7 +4916,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {
     return
         tensor->nb[0] == ggml_type_size(tensor->type) &&
         tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
+        (tensor->ne[3] == 1 || tensor->nb[3] == tensor->nb[2]*tensor->ne[2]);
 }
 
 static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) {
@@ -10350,10 +10350,6 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
                         // If there's not enough shared memory for row_ids and the result tile, fallback to CPU
                         return false;
                     }
-                    // Check against size of shared memory variable
-                    if (op->src[2]->ne[0] > 4096) {
-                        return false;
-                    }
                 }
                 switch (src0_type) {
                     case GGML_TYPE_F32:

Original file line number	Diff line number	Diff line change
`@@ -4916,7 +4916,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) {`
`4916`	`4916`	`return`
`4917`	`4917`	`tensor->nb[0] == ggml_type_size(tensor->type) &&`
`4918`	`4918`	`tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&`
`4919`		`- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];`
	`4919`	`+ (tensor->ne[3] == 1 \|\| tensor->nb[3] == tensor->nb[2]*tensor->ne[2]);`
`4920`	`4920`	`}`
`4921`	`4921`
`4922`	`4922`	`static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) {`
`@@ -10350,10 +10350,6 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm`
`10350`	`10350`	`// If there's not enough shared memory for row_ids and the result tile, fallback to CPU`
`10351`	`10351`	`return false;`
`10352`	`10352`	`}`
`10353`		`- // Check against size of shared memory variable`
`10354`		`- if (op->src[2]->ne[0] > 4096) {`
`10355`		`- return false;`
`10356`		`- }`
`10357`	`10353`	`}`
`10358`	`10354`	`switch (src0_type) {`
`10359`	`10355`	`case GGML_TYPE_F32:`