Skip to content

Commit d12a712

Browse files
Upgrade init_tensor API to return a ggml_status
To prepare for an 'abort-free' ggml (ggml not to abort on ooms but return a oom status), as agreeed with Diego in the ggml repo, upgrade the backends init_tensor APIs to return a ggml_status.
1 parent a7b8ce2 commit d12a712

File tree

13 files changed

+40
-16
lines changed

13 files changed

+40
-16
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ lcov-report/
4545
tags
4646
.build/
4747
build*
48+
release
49+
debug
4850
!build-info.cmake
4951
!build-info.cpp.in
5052
!build-info.sh

ggml/src/ggml-backend-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern "C" {
4444
// base address of the buffer
4545
void * (*get_base) (ggml_backend_buffer_t buffer);
4646
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
47-
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
47+
enum ggml_status (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
4848
// tensor data access
4949
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
5050
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);

ggml/src/ggml-backend.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
126126
return base;
127127
}
128128

129+
// Check with reviewers: any cons for that method to return a ggml_status?
129130
void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
130131
// init_tensor is optional
131132
if (buffer->iface.init_tensor) {

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,11 +796,11 @@ static bool need_transform(ggml_type type) {
796796
* @param buffer The CANN buffer from which to initialize the tensor.
797797
* @param tensor Pointer to the tensor to be initialized.
798798
*/
799-
static void ggml_backend_cann_buffer_init_tensor(
799+
static enum ggml_status ggml_backend_cann_buffer_init_tensor(
800800
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
801801
if (tensor->view_src != NULL && tensor->view_offs == 0) {
802802
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
803-
return;
803+
return GGML_STATUS_SUCCESS;
804804
}
805805

806806
// TODO: can backend doesn't support quantized yet. Just leave the code
@@ -817,6 +817,7 @@ static void ggml_backend_cann_buffer_init_tensor(
817817
memset_size, 0, memset_size));
818818
}
819819
}
820+
return GGML_STATUS_SUCCESS;
820821
}
821822

822823
// TODO: need handle tensor which has paddings.

ggml/src/ggml-cpu/amx/amx.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,11 @@ static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
5050
return (void *) (buffer->context);
5151
}
5252

53-
static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
53+
static enum ggml_status ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
5454
tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
5555

5656
GGML_UNUSED(buffer);
57+
return GGML_STATUS_SUCCESS;
5758
}
5859

5960
static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4135,10 +4135,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con
41354135
return nullptr;
41364136
}
41374137

4138-
static void ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
4138+
static enum ggml_status ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
41394139
tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_aarch64_get_optimal_repack_type(tensor));
41404140

41414141
GGML_UNUSED(buffer);
4142+
return GGML_STATUS_SUCCESS;
41424143
}
41434144

41444145
static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,

ggml/src/ggml-cuda/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
22

3+
unset(${CMAKE_CUDA_HOST_COMPILER} CACHE)
4+
message(DEBUG "CUDAHOSTCXX= $ENV{CUDAHOSTCXX}")
5+
message(DEBUG "CMAKE_CUDA_HOST_COMPILER= ${CMAKE_CUDA_HOST_COMPILER}") # reminder: must be a C++ compiler, not just C
6+
37
find_package(CUDAToolkit)
48

59
if (CUDAToolkit_FOUND)
6-
message(STATUS "CUDA Toolkit found")
10+
message(STATUS "CUDA Toolkit found: ${CUDAToolkit_VERSION}")
711

812
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
913
# native == GPUs available at build time
@@ -24,6 +28,9 @@ if (CUDAToolkit_FOUND)
2428

2529
enable_language(CUDA)
2630

31+
message(DEBUG "CMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}")
32+
message(DEBUG "CMAKE_CUDA_HOST_COMPILER= ${CMAKE_CUDA_HOST_COMPILER}")
33+
2734
file(GLOB GGML_HEADERS_CUDA "*.cuh")
2835
list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")
2936

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -534,12 +534,12 @@ static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t buffer) {
534534
return ctx->dev_ptr;
535535
}
536536

537-
static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
537+
static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
538538
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
539539

540540
if (tensor->view_src != NULL) {
541541
assert(tensor->view_src->buffer->buft == buffer->buft);
542-
return;
542+
return GGML_STATUS_SUCCESS; // check with reviewers
543543
}
544544

545545
if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
@@ -552,6 +552,7 @@ static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, g
552552
CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
553553
}
554554
}
555+
return GGML_STATUS_SUCCESS;
555556
}
556557

557558
static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
@@ -786,7 +787,7 @@ static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buff
786787
GGML_UNUSED(buffer);
787788
}
788789

789-
static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
790+
static enum ggml_status ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
790791
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
791792

792793
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
@@ -832,6 +833,7 @@ static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buf
832833
}
833834
}
834835
tensor->extra = extra;
836+
return GGML_STATUS_SUCCESS;
835837
}
836838

837839
static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1211,7 +1211,7 @@ static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer)
12111211
GGML_UNUSED(buffer);
12121212
}
12131213

1214-
static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
1214+
static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
12151215
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
12161216

12171217
ggml_cl2_init(buffer->buft->device);
@@ -1251,6 +1251,7 @@ static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer,
12511251
tensor->extra = extra;
12521252
}
12531253
}
1254+
return GGML_STATUS_SUCCESS;
12541255
}
12551256

12561257
// The optimized gemm and gemv kernels are used for large matrices without batch.

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ static rpc_tensor serialize_tensor(const ggml_tensor * tensor) {
464464
return result;
465465
}
466466

467-
static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
467+
static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
468468
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
469469

470470
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
@@ -478,6 +478,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg
478478
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
479479
GGML_ASSERT(status);
480480
}
481+
return GGML_STATUS_SUCCESS;
481482
}
482483

483484
static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {

0 commit comments

Comments
 (0)