@@ -333,10 +333,11 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
333333        assert (tensor->view_src ->buffer ->buft  == buffer->buft );
334334        return  GGML_STATUS_SUCCESS;
335335    }
336- 
337-     ggml_tensor_extra_gpu * extra = new  ggml_tensor_extra_gpu{};
338-     tensor->extra  = extra;
339-     ctx->tensor_extras .push_back (extra); // used to release it when destroy ctx.
336+     if  (tensor->type  == GGML_TYPE_Q4_0) {
337+         ggml_tensor_extra_gpu * extra = new  ggml_tensor_extra_gpu{};
338+         tensor->extra                  = extra;
339+         ctx->tensor_extras .push_back (extra);  // used to release it when destroy ctx.
340+     }
340341
341342    if  (ggml_is_quantized (tensor->type )) {
342343        //  initialize padding to 0 to avoid possible NaN values
@@ -486,6 +487,22 @@ catch (sycl::exception const &exc) {
486487  std::exit (1 );
487488}
488489
490+ static  void  ggml_backend_sycl_buffer_reset (ggml_backend_buffer_t  buffer) {
491+     GGML_SYCL_DEBUG (" [SYCL] call %s\n " 
492+     if  (buffer == nullptr ) {
493+         return ;
494+     }
495+ 
496+     ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context ;
497+ 
498+     if  (ctx != nullptr ) {
499+         for  (ggml_tensor_extra_gpu * extra : ctx->tensor_extras ) {
500+             release_extra_gpu (extra);
501+         }
502+         ctx->tensor_extras .clear ();  //  reset the tensor_extras vector
503+     }
504+ }
505+ 
489506static  const  ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
490507    /*  .free_buffer     = */ 
491508    /*  .get_base        = */ 
@@ -495,7 +512,7 @@ static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
495512    /*  .get_tensor      = */ 
496513    /*  .cpy_tensor      = */ 
497514    /*  .clear           = */ 
498-     /*  .reset           = */ NULL ,
515+     /*  .reset           = */ ggml_backend_sycl_buffer_reset ,
499516};
500517
501518//  sycl buffer type
@@ -576,7 +593,6 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
576593    static  std::mutex mutex;
577594    std::lock_guard<std::mutex> lock (mutex);
578595
579-     GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_buffer_type\n " 
580596
581597    auto  dev_count = ggml_backend_sycl_get_device_count ();
582598
@@ -3761,7 +3777,6 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
37613777}
37623778
37633779int  ggml_backend_sycl_get_device_count () {
3764-     GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_get_device_count\n " 
37653780    return  ggml_sycl_info ().device_count ;
37663781}
37673782
0 commit comments