Skip to content

Commit 1c4e4d5

Browse files
committed
opencl: use pools for tensor_extra
1 parent 8201823 commit 1c4e4d5

File tree

1 file changed

+52
-7
lines changed

1 file changed

+52
-7
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,32 +1064,72 @@ struct ggml_backend_opencl_buffer_context {
10641064
CL_CHECK(clReleaseMemObject(im));
10651065
}
10661066

1067+
// Delete all extras to trigger their destructors
10671068
for (ggml_tensor_extra_cl * e : temp_tensor_extras) {
10681069
delete e;
10691070
}
1071+
for (ggml_tensor_extra_cl * e : temp_tensor_extras_in_use) {
1072+
delete e;
1073+
}
10701074
for (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0) {
10711075
delete e;
10721076
}
1077+
for (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0_in_use) {
1078+
delete e;
1079+
}
10731080
}
10741081

10751082
ggml_tensor_extra_cl * ggml_opencl_alloc_temp_tensor_extra() {
1076-
ggml_tensor_extra_cl * extra = new ggml_tensor_extra_cl();
1077-
extra->reset();
1078-
temp_tensor_extras.push_back(extra);
1083+
ggml_tensor_extra_cl * extra;
1084+
if (temp_tensor_extras.empty()) {
1085+
extra = new ggml_tensor_extra_cl();
1086+
} else {
1087+
extra = temp_tensor_extras.back();
1088+
temp_tensor_extras.pop_back();
1089+
}
1090+
1091+
temp_tensor_extras_in_use.push_back(extra);
10791092

1093+
extra->reset();
10801094
return extra;
10811095
}
10821096

10831097
ggml_tensor_extra_cl_q4_0 * ggml_opencl_alloc_temp_tensor_extra_q4_0() {
1084-
ggml_tensor_extra_cl_q4_0 * extra = new ggml_tensor_extra_cl_q4_0();
1085-
extra->reset();
1086-
temp_tensor_extras_q4_0.push_back(extra);
1098+
ggml_tensor_extra_cl_q4_0 * extra;
1099+
if (temp_tensor_extras_q4_0.empty()) {
1100+
extra = new ggml_tensor_extra_cl_q4_0();
1101+
} else {
1102+
extra = temp_tensor_extras_q4_0.back();
1103+
temp_tensor_extras_q4_0.pop_back();
1104+
}
10871105

1106+
temp_tensor_extras_q4_0_in_use.push_back(extra);
1107+
1108+
extra->reset();
10881109
return extra;
10891110
}
10901111

1112+
void reset() {
1113+
for (ggml_tensor_extra_cl * e : temp_tensor_extras_in_use) {
1114+
temp_tensor_extras.push_back(e);
1115+
}
1116+
temp_tensor_extras_in_use.clear();
1117+
1118+
for (ggml_tensor_extra_cl_q4_0 * e : temp_tensor_extras_q4_0_in_use) {
1119+
temp_tensor_extras_q4_0.push_back(e);
1120+
}
1121+
temp_tensor_extras_q4_0_in_use.clear();
1122+
}
1123+
1124+
// Pools for extras. Available extras are in `temp_tensor_extras`. Extras
1125+
// being used are in `temp_tensor_extras_in_use`. At the first run, new
1126+
// extras get created and put in `in_use`. When the buffer is reset via
1127+
// the `reset` callback, all extras in `in_use` get moved to available extras
1128+
// for reuse.
10911129
std::vector<ggml_tensor_extra_cl *> temp_tensor_extras;
1130+
std::vector<ggml_tensor_extra_cl *> temp_tensor_extras_in_use;
10921131
std::vector<ggml_tensor_extra_cl_q4_0 *> temp_tensor_extras_q4_0;
1132+
std::vector<ggml_tensor_extra_cl_q4_0 *> temp_tensor_extras_q4_0_in_use;
10931133

10941134
// The buffer_context is initially created by ggml_backend_buft_alloc_buffer
10951135
// before any tensor is initialized (at the beginning of alloc_tensor_range).
@@ -1492,6 +1532,11 @@ static void ggml_backend_opencl_buffer_clear(ggml_backend_buffer_t buffer, uint8
14921532
CL_CHECK(clFinish(queue));
14931533
}
14941534

1535+
static void ggml_backend_opencl_buffer_reset(ggml_backend_buffer_t buffer) {
1536+
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
1537+
ctx->reset();
1538+
}
1539+
14951540
static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
14961541
/* .free_buffer = */ ggml_backend_opencl_buffer_free_buffer,
14971542
/* .get_base = */ ggml_backend_opencl_buffer_get_base,
@@ -1501,7 +1546,7 @@ static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
15011546
/* .get_tensor = */ ggml_backend_opencl_buffer_get_tensor,
15021547
/* .cpy_tensor = */ NULL,
15031548
/* .clear = */ ggml_backend_opencl_buffer_clear,
1504-
/* .reset = */ NULL,
1549+
/* .reset = */ ggml_backend_opencl_buffer_reset,
15051550
};
15061551

15071552
//

0 commit comments

Comments
 (0)