@@ -1064,32 +1064,72 @@ struct ggml_backend_opencl_buffer_context {
1064
1064
CL_CHECK (clReleaseMemObject (im));
1065
1065
}
1066
1066
1067
+ // Delete all extras to trigger their destructors
1067
1068
for (ggml_tensor_extra_cl * e : temp_tensor_extras) {
1068
1069
delete e;
1069
1070
}
1071
+ for (ggml_tensor_extra_cl * e : temp_tensor_extras_in_use) {
1072
+ delete e;
1073
+ }
1070
1074
for (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0) {
1071
1075
delete e;
1072
1076
}
1077
+ for (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0_in_use) {
1078
+ delete e;
1079
+ }
1073
1080
}
1074
1081
1075
1082
ggml_tensor_extra_cl * ggml_opencl_alloc_temp_tensor_extra () {
1076
- ggml_tensor_extra_cl * extra = new ggml_tensor_extra_cl ();
1077
- extra->reset ();
1078
- temp_tensor_extras.push_back (extra);
1083
+ ggml_tensor_extra_cl * extra;
1084
+ if (temp_tensor_extras.empty ()) {
1085
+ extra = new ggml_tensor_extra_cl ();
1086
+ } else {
1087
+ extra = temp_tensor_extras.back ();
1088
+ temp_tensor_extras.pop_back ();
1089
+ }
1090
+
1091
+ temp_tensor_extras_in_use.push_back (extra);
1079
1092
1093
+ extra->reset ();
1080
1094
return extra;
1081
1095
}
1082
1096
1083
1097
ggml_tensor_extra_cl_q4_0 * ggml_opencl_alloc_temp_tensor_extra_q4_0 () {
1084
- ggml_tensor_extra_cl_q4_0 * extra = new ggml_tensor_extra_cl_q4_0 ();
1085
- extra->reset ();
1086
- temp_tensor_extras_q4_0.push_back (extra);
1098
+ ggml_tensor_extra_cl_q4_0 * extra;
1099
+ if (temp_tensor_extras_q4_0.empty ()) {
1100
+ extra = new ggml_tensor_extra_cl_q4_0 ();
1101
+ } else {
1102
+ extra = temp_tensor_extras_q4_0.back ();
1103
+ temp_tensor_extras_q4_0.pop_back ();
1104
+ }
1087
1105
1106
+ temp_tensor_extras_q4_0_in_use.push_back (extra);
1107
+
1108
+ extra->reset ();
1088
1109
return extra;
1089
1110
}
1090
1111
1112
+ void reset () {
1113
+ for (ggml_tensor_extra_cl * e : temp_tensor_extras_in_use) {
1114
+ temp_tensor_extras.push_back (e);
1115
+ }
1116
+ temp_tensor_extras_in_use.clear ();
1117
+
1118
+ for (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0_in_use) {
1119
+ temp_tensor_extras_q4_0.push_back (e);
1120
+ }
1121
+ temp_tensor_extras_q4_0_in_use.clear ();
1122
+ }
1123
+
1124
+ // Pools for extras. Available extras are in `temp_tensor_extras`. Extras
1125
+ // being used are in `temp_tensor_extras_in_use`. At the first run, new
1126
+ // extras get created and put in `in_use`. When the buffer is reset via
1127
+ // the `reset` callback, all extras in `in_use` get moved to available extras
1128
+ // for reuse.
1091
1129
std::vector<ggml_tensor_extra_cl *> temp_tensor_extras;
1130
+ std::vector<ggml_tensor_extra_cl *> temp_tensor_extras_in_use;
1092
1131
std::vector<ggml_tensor_extra_cl_q4_0 *> temp_tensor_extras_q4_0;
1132
+ std::vector<ggml_tensor_extra_cl_q4_0 *> temp_tensor_extras_q4_0_in_use;
1093
1133
1094
1134
// The buffer_context is initially created by ggml_backend_buft_alloc_buffer
1095
1135
// before any tensor is initialized (at the beginning of alloc_tensor_range).
@@ -1492,6 +1532,11 @@ static void ggml_backend_opencl_buffer_clear(ggml_backend_buffer_t buffer, uint8
1492
1532
CL_CHECK (clFinish (queue));
1493
1533
}
1494
1534
1535
+ static void ggml_backend_opencl_buffer_reset (ggml_backend_buffer_t buffer) {
1536
+ ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context ;
1537
+ ctx->reset ();
1538
+ }
1539
+
1495
1540
static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
1496
1541
/* .free_buffer = */ ggml_backend_opencl_buffer_free_buffer,
1497
1542
/* .get_base = */ ggml_backend_opencl_buffer_get_base,
@@ -1501,7 +1546,7 @@ static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
1501
1546
/* .get_tensor = */ ggml_backend_opencl_buffer_get_tensor,
1502
1547
/* .cpy_tensor = */ NULL ,
1503
1548
/* .clear = */ ggml_backend_opencl_buffer_clear,
1504
- /* .reset = */ NULL ,
1549
+ /* .reset = */ ggml_backend_opencl_buffer_reset ,
1505
1550
};
1506
1551
1507
1552
//
0 commit comments