Skip to content

Commit c0c1a0e

Browse files
committed
Fix CUDA grid launch condition for large block_nums.y
1 parent d0660f2 commit c0c1a0e

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/binbcast.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor *
272272
const uint3 ne12 = init_fastdiv_values((uint32_t) cne1[2]);
273273
const uint3 ne13 = init_fastdiv_values((uint32_t) cne1[3]);
274274

275-
if (block_nums.z > 65535) {
275+
if (block_nums.z > 65535 || block_nums.y > 65535) {
276276
int block_num = (ne0 * ne1 * ne2 * ne3 + block_size - 1) / block_size;
277277
const uint3 prod_012 = init_fastdiv_values((uint32_t) (ne0 * ne1 * ne2));
278278
const uint3 prod_01 = init_fastdiv_values((uint32_t) (ne0 * ne1));

0 commit comments

Comments
 (0)