Skip to content

Commit 104d8bd

Browse files
qnixsynapsearthw
authored andcommitted
SYCL: set extras only on GGML_TYPE_Q4_0 (ggml-org#12366)
* SYCL: set extras only on GGML_TYPE_Q4_0 * release tensor_extras in reset buffer interface
1 parent 7e6f894 commit 104d8bd

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,11 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
149149
assert(tensor->view_src->buffer->buft == buffer->buft);
150150
return GGML_STATUS_SUCCESS;
151151
}
152-
153-
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
154-
tensor->extra = extra;
155-
ctx->tensor_extras.push_back(extra); //used to release it when destroy ctx.
152+
if (tensor->type == GGML_TYPE_Q4_0) {
153+
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
154+
tensor->extra = extra;
155+
ctx->tensor_extras.push_back(extra); //used to release it when destroy ctx.
156+
}
156157

157158
if (ggml_is_quantized(tensor->type)) {
158159
// initialize padding to 0 to avoid possible NaN values
@@ -302,6 +303,22 @@ catch (sycl::exception const &exc) {
302303
std::exit(1);
303304
}
304305

306+
static void ggml_backend_sycl_buffer_reset(ggml_backend_buffer_t buffer) {
307+
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
308+
if (buffer == nullptr) {
309+
return;
310+
}
311+
312+
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
313+
314+
if (ctx != nullptr) {
315+
for (ggml_tensor_extra_gpu * extra : ctx->tensor_extras) {
316+
release_extra_gpu(extra);
317+
}
318+
ctx->tensor_extras.clear(); // reset the tensor_extras vector
319+
}
320+
}
321+
305322
static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
306323
/* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
307324
/* .get_base = */ ggml_backend_sycl_buffer_get_base,
@@ -311,7 +328,7 @@ static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
311328
/* .get_tensor = */ ggml_backend_sycl_buffer_get_tensor,
312329
/* .cpy_tensor = */ ggml_backend_sycl_buffer_cpy_tensor,
313330
/* .clear = */ ggml_backend_sycl_buffer_clear,
314-
/* .reset = */ NULL,
331+
/* .reset = */ ggml_backend_sycl_buffer_reset,
315332
};
316333

317334
// sycl buffer type
@@ -3630,7 +3647,6 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
36303647
}
36313648

36323649
int ggml_backend_sycl_get_device_count() {
3633-
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
36343650
return ggml_sycl_info().device_count;
36353651
}
36363652

0 commit comments

Comments
 (0)