@@ -805,21 +805,46 @@ struct ggml_tensor_extra_cl {
805
805
// `offset`, which indicate their locations in the scratch buffer.
806
806
struct ggml_tensor_extra_cl_q4_0 {
807
807
// Quantized values.
808
- cl_mem q;
808
+ cl_mem q = nullptr ;
809
809
// Quantized values in image1d_buffer_t.
810
- cl_mem q_img;
810
+ cl_mem q_img = nullptr ;
811
811
// Scales.
812
- cl_mem d;
812
+ cl_mem d = nullptr ;
813
813
// Scales in image1d_buffer_t.
814
- cl_mem d_img;
814
+ cl_mem d_img = nullptr ;
815
815
// Size of quantized values.
816
- size_t size_q;
816
+ size_t size_q = 0 ;
817
817
// Size of scales.
818
- size_t size_d;
818
+ size_t size_d = 0 ;
819
+
820
+ ~ggml_tensor_extra_cl_q4_0 () {
821
+ reset ();
822
+ }
819
823
820
824
void reset () {
821
- q = nullptr ;
822
- d = nullptr ;
825
+ // When SMALL_ALLOC is not enabled, q and d are subbuffers into
826
+ // the bigger buffer allocated in ggml_backend_buffer.
827
+ // They must be properly released so that the original buffer can be
828
+ // properly released to avoid memory leak.
829
+ // When SMALL_ALLOC is enabled, q and d point to the buffers in
830
+ // ggml_backend_opencl2_buffer_context. These buffers get released when
831
+ // the context is deleted, so there is no need to release them here.
832
+ if (q != nullptr ) {
833
+ #ifndef GGML_OPENCL_SMALL_ALLOC
834
+ CL_CHECK (clReleaseMemObject (q));
835
+ #endif
836
+ q = nullptr ;
837
+ }
838
+ if (d != nullptr ) {
839
+ #ifndef GGML_OPENCL_SMALL_ALLOC
840
+ CL_CHECK (clReleaseMemObject (d));
841
+ #endif
842
+ d = nullptr ;
843
+ }
844
+ // Currently, q_img and d_img are only initialized when SMALL_ALLOC is
845
+ // enabled. They point to the images in ggml_backend_opencl2_buffer_context.
846
+ // So, there is no need to release them here.
847
+ // TODO: initialize them for non SMALL_PATH path, or remove them.
823
848
q_img = nullptr ;
824
849
d_img = nullptr ;
825
850
size_q = 0 ;
@@ -1428,7 +1453,8 @@ static void ggml_backend_opencl2_buffer_set_tensor(ggml_backend_buffer_t buffer,
1428
1453
GGML_ASSERT (extra_orig && " Tesnors in OpenCL backend should have been allocated and initialized" );
1429
1454
1430
1455
// Allocate the new extra and create aliases from the original.
1431
- ggml_tensor_extra_cl_q4_0 * extra = new ggml_tensor_extra_cl_q4_0 ();
1456
+ ggml_backend_opencl2_buffer_context * ctx = (ggml_backend_opencl2_buffer_context *) buffer->context ;
1457
+ ggml_tensor_extra_cl_q4_0 * extra = ctx->ggml_opencl2_alloc_temp_tensor_extra_q4_0 ();
1432
1458
1433
1459
size_t size_d = ggml_nelements (tensor)/ggml_blck_size (tensor->type )*sizeof (ggml_fp16_t );
1434
1460
size_t size_q = ggml_nelements (tensor)/ggml_blck_size (tensor->type )*ggml_blck_size (tensor->type )/2 ;
0 commit comments