Skip to content

Commit 9ec98cd

Browse files
committed
fix 2d memcpy
1 parent cbe4ada commit 9ec98cd

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2437,11 +2437,16 @@ static ggml_2d_cpy ggml_backend_cuda_2d_pitch(const ggml_tensor *tensor) {
24372437

24382438
next_nb = tensor->nb[i];
24392439
}
2440+
else if (pitch) {
2441+
// when collapsing a 2d tensor, muliply height by the number of elements in the dimension
2442+
height *= tensor->ne[i];
2443+
}
24402444

24412445
next_nb *= tensor->ne[i];
24422446
}
24432447
// 1d contiguous
24442448
if (!pitch) {
2449+
// a 1d tensor can simply be the total number of bytes.
24452450
return { tensor->nb[3], 1, tensor->nb[3] };
24462451
}
24472452
return { width, height, pitch };
@@ -2516,7 +2521,7 @@ static bool ggml_backend_cuda_cpy_tensor2d_async_common(ggml_backend_t backend_s
25162521
// attempt a 1d copy if possible
25172522
bool src_is_1d = src_pitch.width == src_pitch.pitch;
25182523
bool dst_is_1d = dst_pitch.width == dst_pitch.pitch;
2519-
bool is_1d = src_is_1d && dst_is_1d;
2524+
is_1d = src_is_1d && dst_is_1d;
25202525

25212526
if (!is_1d) {
25222527
// in case one is 1d and the other is not, collapse the dimensions to match

0 commit comments

Comments
 (0)