@@ -1064,32 +1064,72 @@ struct ggml_backend_opencl_buffer_context {
10641064            CL_CHECK (clReleaseMemObject (im));
10651065        }
10661066
1067+         //  Delete all extras to trigger their destructors
10671068        for  (ggml_tensor_extra_cl * e : temp_tensor_extras) {
10681069            delete  e;
10691070        }
1071+         for  (ggml_tensor_extra_cl * e : temp_tensor_extras_in_use) {
1072+             delete  e;
1073+         }
10701074        for  (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0) {
10711075            delete  e;
10721076        }
1077+         for  (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0_in_use) {
1078+             delete  e;
1079+         }
10731080    }
10741081
10751082    ggml_tensor_extra_cl * ggml_opencl_alloc_temp_tensor_extra () {
1076-         ggml_tensor_extra_cl * extra = new  ggml_tensor_extra_cl ();
1077-         extra->reset ();
1078-         temp_tensor_extras.push_back (extra);
1083+         ggml_tensor_extra_cl * extra;
1084+         if  (temp_tensor_extras.empty ()) {
1085+             extra = new  ggml_tensor_extra_cl ();
1086+         } else  {
1087+             extra = temp_tensor_extras.back ();
1088+             temp_tensor_extras.pop_back ();
1089+         }
1090+ 
1091+         temp_tensor_extras_in_use.push_back (extra);
10791092
1093+         extra->reset ();
10801094        return  extra;
10811095    }
10821096
10831097    ggml_tensor_extra_cl_q4_0 * ggml_opencl_alloc_temp_tensor_extra_q4_0 () {
1084-         ggml_tensor_extra_cl_q4_0 * extra = new  ggml_tensor_extra_cl_q4_0 ();
1085-         extra->reset ();
1086-         temp_tensor_extras_q4_0.push_back (extra);
1098+         ggml_tensor_extra_cl_q4_0 * extra;
1099+         if  (temp_tensor_extras_q4_0.empty ()) {
1100+             extra = new  ggml_tensor_extra_cl_q4_0 ();
1101+         } else  {
1102+             extra = temp_tensor_extras_q4_0.back ();
1103+             temp_tensor_extras_q4_0.pop_back ();
1104+         }
10871105
1106+         temp_tensor_extras_q4_0_in_use.push_back (extra);
1107+ 
1108+         extra->reset ();
10881109        return  extra;
10891110    }
10901111
1112+     void  reset () {
1113+         for  (ggml_tensor_extra_cl * e : temp_tensor_extras_in_use) {
1114+             temp_tensor_extras.push_back (e);
1115+         }
1116+         temp_tensor_extras_in_use.clear ();
1117+ 
1118+         for  (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0_in_use) {
1119+             temp_tensor_extras_q4_0.push_back (e);
1120+         }
1121+         temp_tensor_extras_q4_0_in_use.clear ();
1122+     }
1123+ 
1124+     //  Pools for extras. Available extras are in `temp_tensor_extras`. Extras
1125+     //  being used are in `temp_tensor_extras_in_use`. At the first run, new
1126+     //  extras get created and put in `in_use`. When the buffer is reset via
1127+     //  the `reset` callback, all extras in `in_use` get moved to available extras
1128+     //  for reuse.
10911129    std::vector<ggml_tensor_extra_cl *> temp_tensor_extras;
1130+     std::vector<ggml_tensor_extra_cl *> temp_tensor_extras_in_use;
10921131    std::vector<ggml_tensor_extra_cl_q4_0 *> temp_tensor_extras_q4_0;
1132+     std::vector<ggml_tensor_extra_cl_q4_0 *> temp_tensor_extras_q4_0_in_use;
10931133
10941134    //  The buffer_context is initially created by ggml_backend_buft_alloc_buffer
10951135    //  before any tensor is initialized (at the beginning of alloc_tensor_range).
@@ -1492,6 +1532,11 @@ static void ggml_backend_opencl_buffer_clear(ggml_backend_buffer_t buffer, uint8
14921532    CL_CHECK (clFinish (queue));
14931533}
14941534
1535+ static  void  ggml_backend_opencl_buffer_reset (ggml_backend_buffer_t  buffer) {
1536+     ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context ;
1537+     ctx->reset ();
1538+ }
1539+ 
14951540static  ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
14961541    /*  .free_buffer     = */   ggml_backend_opencl_buffer_free_buffer,
14971542    /*  .get_base        = */   ggml_backend_opencl_buffer_get_base,
@@ -1501,7 +1546,7 @@ static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
15011546    /*  .get_tensor      = */   ggml_backend_opencl_buffer_get_tensor,
15021547    /*  .cpy_tensor      = */   NULL ,
15031548    /*  .clear           = */   ggml_backend_opencl_buffer_clear,
1504-     /*  .reset           = */   NULL ,
1549+     /*  .reset           = */   ggml_backend_opencl_buffer_reset ,
15051550};
15061551
15071552// 
0 commit comments