Skip to content

Commit bd86c4c

Browse files
committed
apply more optimization
- original IM2COL kernel + _ext with MIN() Signed-off-by: Junhee Yoo <[email protected]>
1 parent 0084847 commit bd86c4c

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-metal.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2625,7 +2625,7 @@ static void ggml_metal_encode_node(
26252625
[encoder setBytes:&KW length:sizeof(int32_t) atIndex:15];
26262626

26272627
const int64_t D = N / M + (N % M > 0 ? 1 : 0);
2628-
[encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(M, 1, 1)];
2628+
[encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(MIN((uint64_t)N, M), 1, 1)];
26292629
} else {
26302630
[encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
26312631
}

0 commit comments

Comments
 (0)