@@ -333,10 +333,11 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
333333 assert (tensor->view_src ->buffer ->buft == buffer->buft );
334334 return GGML_STATUS_SUCCESS;
335335 }
336-
337- ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
338- tensor->extra = extra;
339- ctx->tensor_extras .push_back (extra); // used to release it when destroy ctx.
336+ if (tensor->type == GGML_TYPE_Q4_0) {
337+ ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
338+ tensor->extra = extra;
339+ ctx->tensor_extras .push_back (extra); // used to release it when destroy ctx.
340+ }
340341
341342 if (ggml_is_quantized (tensor->type )) {
342343 // initialize padding to 0 to avoid possible NaN values
@@ -486,6 +487,22 @@ catch (sycl::exception const &exc) {
486487 std::exit (1 );
487488}
488489
490+ static void ggml_backend_sycl_buffer_reset (ggml_backend_buffer_t buffer) {
491+ GGML_SYCL_DEBUG (" [SYCL] call %s\n " , __func__);
492+ if (buffer == nullptr ) {
493+ return ;
494+ }
495+
496+ ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context ;
497+
498+ if (ctx != nullptr ) {
499+ for (ggml_tensor_extra_gpu * extra : ctx->tensor_extras ) {
500+ release_extra_gpu (extra);
501+ }
502+ ctx->tensor_extras .clear (); // reset the tensor_extras vector
503+ }
504+ }
505+
489506static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
490507 /* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
491508 /* .get_base = */ ggml_backend_sycl_buffer_get_base,
@@ -495,7 +512,7 @@ static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
495512 /* .get_tensor = */ ggml_backend_sycl_buffer_get_tensor,
496513 /* .cpy_tensor = */ ggml_backend_sycl_buffer_cpy_tensor,
497514 /* .clear = */ ggml_backend_sycl_buffer_clear,
498- /* .reset = */ NULL ,
515+ /* .reset = */ ggml_backend_sycl_buffer_reset ,
499516};
500517
501518// sycl buffer type
@@ -576,7 +593,6 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
576593 static std::mutex mutex;
577594 std::lock_guard<std::mutex> lock (mutex);
578595
579- GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_buffer_type\n " );
580596
581597 auto dev_count = ggml_backend_sycl_get_device_count ();
582598
@@ -3761,7 +3777,6 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
37613777}
37623778
37633779int ggml_backend_sycl_get_device_count () {
3764- GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_get_device_count\n " );
37653780 return ggml_sycl_info ().device_count ;
37663781}
37673782
0 commit comments