@@ -1538,20 +1538,13 @@ static void ggml_cuda_op_mul_mat(
15381538 const size_t nbytes_data = ggml_nbytes (src0);
15391539 const size_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
15401540 dev[id].src0_dd = dev[id].src0_dd_alloc .alloc (ctx.pool (id), nbytes_data + nbytes_padding);
1541- CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd , 0 , nbytes_data + nbytes_padding, stream));
1542- }
1543-
1544- // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
1545- if (ne00 % MATRIX_ROW_PADDING != 0 && ggml_is_quantized (src0->type ) && ggml_backend_buffer_get_usage (src0->buffer ) == GGML_BACKEND_BUFFER_USAGE_COMPUTE && src0->view_src == nullptr ) {
1546- const int64_t nbytes_data = ggml_row_size (src0->type , (dev[id].row_high - dev[id].row_low )*ne00);
1547- const int64_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
15481541 CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data , 0 , nbytes_padding, stream));
15491542 }
15501543
1551- // If src0 is on a temporary compute buffers (partial offloading) there may be some padding that needs to be cleared:
1544+ // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
15521545 if (ne00 % MATRIX_ROW_PADDING != 0 && ggml_is_quantized (src0->type ) && ggml_backend_buffer_get_usage (src0->buffer ) == GGML_BACKEND_BUFFER_USAGE_COMPUTE && src0->view_src == nullptr ) {
1553- const int64_t nbytes_data = ggml_row_size (src0->type , (dev[id].row_high - dev[id].row_low )*ne00);
1554- const int64_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
1546+ const size_t nbytes_data = ggml_row_size (src0->type , (dev[id].row_high - dev[id].row_low )*ne00);
1547+ const size_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
15551548 CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data , 0 , nbytes_padding, stream));
15561549 }
15571550
0 commit comments