@@ -805,21 +805,46 @@ struct ggml_tensor_extra_cl {
805805// `offset`, which indicate their locations in the scratch buffer.
806806struct ggml_tensor_extra_cl_q4_0 {
807807 // Quantized values.
808- cl_mem q;
808+ cl_mem q = nullptr ;
809809 // Quantized values in image1d_buffer_t.
810- cl_mem q_img;
810+ cl_mem q_img = nullptr ;
811811 // Scales.
812- cl_mem d;
812+ cl_mem d = nullptr ;
813813 // Scales in image1d_buffer_t.
814- cl_mem d_img;
814+ cl_mem d_img = nullptr ;
815815 // Size of quantized values.
816- size_t size_q;
816+ size_t size_q = 0 ;
817817 // Size of scales.
818- size_t size_d;
818+ size_t size_d = 0 ;
819+
820+ ~ggml_tensor_extra_cl_q4_0 () {
821+ reset ();
822+ }
819823
820824 void reset () {
821- q = nullptr ;
822- d = nullptr ;
825+ // When SMALL_ALLOC is not enabled, q and d are subbuffers into
826+ // the bigger buffer allocated in ggml_backend_buffer.
827+ // They must be properly released so that the original buffer can be
828+ // properly released to avoid memory leak.
829+ // When SMALL_ALLOC is enabled, q and d point to the buffers in
830+ // ggml_backend_opencl2_buffer_context. These buffers get released when
831+ // the context is deleted, so there is no need to release them here.
832+ if (q != nullptr ) {
833+ #ifndef GGML_OPENCL_SMALL_ALLOC
834+ CL_CHECK (clReleaseMemObject (q));
835+ #endif
836+ q = nullptr ;
837+ }
838+ if (d != nullptr ) {
839+ #ifndef GGML_OPENCL_SMALL_ALLOC
840+ CL_CHECK (clReleaseMemObject (d));
841+ #endif
842+ d = nullptr ;
843+ }
844+ // Currently, q_img and d_img are only initialized when SMALL_ALLOC is
845+ // enabled. They point to the images in ggml_backend_opencl2_buffer_context.
846+ // So, there is no need to release them here.
847+ // TODO: initialize them for non SMALL_PATH path, or remove them.
823848 q_img = nullptr ;
824849 d_img = nullptr ;
825850 size_q = 0 ;
@@ -1428,7 +1453,8 @@ static void ggml_backend_opencl2_buffer_set_tensor(ggml_backend_buffer_t buffer,
14281453 GGML_ASSERT (extra_orig && " Tesnors in OpenCL backend should have been allocated and initialized" );
14291454
14301455 // Allocate the new extra and create aliases from the original.
1431- ggml_tensor_extra_cl_q4_0 * extra = new ggml_tensor_extra_cl_q4_0 ();
1456+ ggml_backend_opencl2_buffer_context * ctx = (ggml_backend_opencl2_buffer_context *) buffer->context ;
1457+ ggml_tensor_extra_cl_q4_0 * extra = ctx->ggml_opencl2_alloc_temp_tensor_extra_q4_0 ();
14321458
14331459 size_t size_d = ggml_nelements (tensor)/ggml_blck_size (tensor->type )*sizeof (ggml_fp16_t );
14341460 size_t size_q = ggml_nelements (tensor)/ggml_blck_size (tensor->type )*ggml_blck_size (tensor->type )/2 ;
0 commit comments