File tree Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -1484,14 +1484,19 @@ static void ggml_cuda_op_mul_mat(
14841484 const size_t nbytes_data = ggml_nbytes (src0);
14851485 const size_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
14861486 dev[id].src0_dd = dev[id].src0_dd_alloc .alloc (ctx.pool (id), nbytes_data + nbytes_padding);
1487+ // TODO: remove this for MUSA once the Guilty Lockup issue is resolved
1488+ #ifndef GGML_USE_MUSA
14871489 CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd , 0 , nbytes_data + nbytes_padding, stream));
1490+ #else // GGML_USE_MUSA
1491+ CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data, 0 , nbytes_padding, stream));
1492+ #endif // !GGML_USE_MUSA
14881493 }
14891494
14901495 // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
14911496 if (ne00 % MATRIX_ROW_PADDING != 0 && ggml_is_quantized (src0->type ) && ggml_backend_buffer_get_usage (src0->buffer ) == GGML_BACKEND_BUFFER_USAGE_COMPUTE && src0->view_src == nullptr ) {
14921497 const size_t nbytes_data = ggml_row_size (src0->type , (dev[id].row_high - dev[id].row_low )*ne00);
14931498 const size_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
1494- CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data , 0 , nbytes_padding, stream));
1499+ CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data, 0 , nbytes_padding, stream));
14951500 }
14961501
14971502 if (src1_on_device && src1_is_contiguous) {
You can’t perform that action at this time.
0 commit comments