@@ -3760,7 +3760,7 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
3760
3760
local_buf = (float *) ggml_sycl_host_malloc(total_size);
3761
3761
ggml_sycl_set_device(g_main_device);
3762
3762
dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0];
3763
- main_stream->memcpy(local_buf, src, total_size);
3763
+ main_stream->memcpy(local_buf, src, total_size).wait() ;
3764
3764
}
3765
3765
else {
3766
3766
local_buf = (float *)src;
@@ -14585,7 +14585,7 @@ static void ggml_sycl_op_flatten(const ggml_tensor *src0,
14585
14585
// copy dst to host if necessary
14586
14586
if (!dst_on_device) {
14587
14587
SYCL_CHECK(CHECK_TRY_ERROR(
14588
- main_stream->memcpy(dst->data, dst_ddf, ggml_nbytes(dst))));
14588
+ main_stream->memcpy(dst->data, dst_ddf, ggml_nbytes(dst)).wait() ));
14589
14589
}
14590
14590
14591
14591
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
@@ -14862,7 +14862,7 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
14862
14862
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
14863
14863
src1_ddq_i, src1_ddq_i_source,
14864
14864
src1_ncols * src1_padded_col_size * q8_1_ts /
14865
- q8_1_bs)));
14865
+ q8_1_bs).wait() ));
14866
14866
} else {
14867
14867
14868
14868
float * src1_ddf_i_source = (float *) src1_extra->data_device[g_main_device];
@@ -14956,7 +14956,7 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
14956
14956
dhf_dst_i += src1_col_0*ne0;
14957
14957
SYCL_CHECK(CHECK_TRY_ERROR(
14958
14958
stream->memcpy(dhf_dst_i, dst_dd_i,
14959
- src1_ncols * ne0 * sizeof(float))));
14959
+ src1_ncols * ne0 * sizeof(float)).wait() ));
14960
14960
}
14961
14961
}
14962
14962
@@ -15686,8 +15686,8 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
15686
15686
if (ids->backend == GGML_BACKEND_TYPE_GPU) {
15687
15687
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
15688
15688
SYCL_CHECK(CHECK_TRY_ERROR(
15689
- stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
15690
- SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
15689
+ stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids)).wait() ));
15690
+ // SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
15691
15691
} else {
15692
15692
memcpy(ids_host.data(), ids->data, ggml_nbytes(ids));
15693
15693
}
@@ -15757,7 +15757,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
15757
15757
15758
15758
SYCL_CHECK(CHECK_TRY_ERROR(
15759
15759
stream->memcpy(src1_contiguous.get() + num_src1_rows * nb11,
15760
- src1_original + i01 * nb11, nb11)));
15760
+ src1_original + i01 * nb11, nb11).wait() ));
15761
15761
num_src1_rows++;
15762
15762
}
15763
15763
@@ -15790,7 +15790,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
15790
15790
15791
15791
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
15792
15792
dst_original + i01 * nb1,
15793
- dst_contiguous.get() + num_src1_rows * nb1, nb1)));
15793
+ dst_contiguous.get() + num_src1_rows * nb1, nb1).wait() ));
15794
15794
num_src1_rows++;
15795
15795
}
15796
15796
}
@@ -17184,7 +17184,7 @@ GGML_CALL static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
17184
17184
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
17185
17185
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
17186
17186
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
17187
- (char *)tensor->data + offset, data, size)));
17187
+ (char *)tensor->data + offset, data, size).wait() ));
17188
17188
}
17189
17189
catch (sycl::exception const &exc) {
17190
17190
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -17200,7 +17200,7 @@ GGML_CALL static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
17200
17200
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
17201
17201
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
17202
17202
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
17203
- data, (const char *)tensor->data + offset, size)));
17203
+ data, (const char *)tensor->data + offset, size).wait() ));
17204
17204
}
17205
17205
catch (sycl::exception const &exc) {
17206
17206
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -17219,7 +17219,7 @@ GGML_CALL static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
17219
17219
was inserted. You need to rewrite this code.
17220
17220
*/
17221
17221
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
17222
- dst->data, src->data, ggml_nbytes(dst))));
17222
+ dst->data, src->data, ggml_nbytes(dst)).wait() ));
17223
17223
return true;
17224
17224
}
17225
17225
0 commit comments