File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -1479,11 +1479,12 @@ static void ggml_cuda_op_mul_mat(
14791479 if (src0_is_contiguous) {
14801480 dev[id].src0_dd = split ? (char *) src0_extra->data_device [id] : (char *) src0->data ;
14811481 } else {
1482- // If src0 is not contiguous it will be copied to a temporary buffer, it may then be necessary to clear padding.
1482+ // If src0 is not contiguous it will be copied to a temporary buffer.
1483+ // This buffer needs to be cleared entirely because multiple regions will function as padding.
14831484 const size_t nbytes_data = ggml_nbytes (src0);
14841485 const size_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
14851486 dev[id].src0_dd = dev[id].src0_dd_alloc .alloc (ctx.pool (id), nbytes_data + nbytes_padding);
1486- CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data , 0 , nbytes_padding, stream));
1487+ CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd , 0 , nbytes_data + nbytes_padding, stream));
14871488 }
14881489
14891490 // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
You can’t perform that action at this time.
0 commit comments