Skip to content

Commit 18818a2

Browse files
author
bssrdf
committed
more strict check to make sure src0 is a transpose
1 parent d3bdcf8 commit 18818a2

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml/src/ggml-cuda/cpy.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
372372
CUDA_CHECK(cudaMemcpyAsync(src1_ddc, src0_ddc, ggml_nbytes(src0), cudaMemcpyDeviceToDevice, main_stream));
373373
}
374374
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
375-
if(src0->op == GGML_OP_TRANSPOSE){
375+
if(src0->op == GGML_OP_TRANSPOSE && !ggml_is_contiguous(src0)){
376376
ggml_cpy_flt_cuda<float, float, true> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
377377
} else {
378378
ggml_cpy_flt_cuda<float, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
@@ -415,7 +415,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
415415
} else if (src0->type == GGML_TYPE_Q5_1 && src1->type == GGML_TYPE_F32) {
416416
ggml_cpy_q5_1_f32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
417417
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
418-
if(src0->op == GGML_OP_TRANSPOSE){
418+
if(src0->op == GGML_OP_TRANSPOSE && !ggml_is_contiguous(src0)){
419419
ggml_cpy_flt_cuda<half, half, true> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
420420
} else {
421421
ggml_cpy_flt_cuda<half, half> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
@@ -433,7 +433,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
433433
ggml_cpy_flt_cuda<half, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
434434
}
435435
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_BF16) {
436-
if(src0->op == GGML_OP_TRANSPOSE){
436+
if(src0->op == GGML_OP_TRANSPOSE && !ggml_is_contiguous(src0)){
437437
ggml_cpy_flt_cuda<nv_bfloat16, nv_bfloat16, true> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
438438
} else {
439439
ggml_cpy_flt_cuda<nv_bfloat16, nv_bfloat16> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);

0 commit comments

Comments
 (0)