Skip to content

Commit 4bdc193

Browse files
opencl: remove small-alloc support and fix build errors for non-opencl platforms
1 parent 8ba2463 commit 4bdc193

File tree

8 files changed

+4
-382
lines changed

8 files changed

+4
-382
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ jobs:
757757
- build: 'msvc-arm64'
758758
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
759759
- build: 'llvm-arm64-opencl-adreno'
760-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/opencl-x64-release -DGGML_OPENCL=ON -DGGML_OPENCL_SMALL_ALLOC=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON -DGGML_OPENCL_EMBED_KERNELS=ON'
760+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/opencl-arm64-release -DGGML_OPENCL=ON -DGGML_OPENCL_SMALL_ALLOC=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON -DGGML_OPENCL_EMBED_KERNELS=ON'
761761

762762
steps:
763763
- name: Clone
@@ -810,7 +810,7 @@ jobs:
810810
-DBUILD_TESTING=OFF `
811811
-DOPENCL_HEADERS_BUILD_TESTING=OFF `
812812
-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
813-
-DCMAKE_INSTALL_PREFIX=${{github.workspace}}/opencl-x64-release
813+
-DCMAKE_INSTALL_PREFIX=${{github.workspace}}/opencl-arm64-release
814814
cmake --build . --target install
815815
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
816816
cd OpenCL-ICD-Loader

ggml/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,6 @@ set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
170170
"ggml: sycl device architecture")
171171

172172
option(GGML_OPENCL "ggml: use OpenCL" OFF)
173-
option(GGML_OPENCL_SMALL_ALLOC "ggml: use small allocation for tensors" ON)
174173
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
175174
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
176175
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)

ggml/include/ggml-alloc.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_i
6969
// Utils
7070
// Create a buffer and allocate all the tensors in a ggml_context
7171
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
72-
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft_for_weights(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
7372
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
7473

7574
#ifdef __cplusplus

ggml/src/CMakeLists.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,15 +274,9 @@ function(ggml_add_backend backend)
274274
endif()
275275
endfunction()
276276

277-
# TODO: This is intrusive. We intend to remove SMALL_ALLOC path once the we fully
278-
# migrate to the non SMALL_ALLOC path. Also need to converge on the backend name
279-
# so we don't need this name conversion.
280277
if (GGML_OPENCL)
281278
set(GGML_OPENCL2 ON)
282279
add_compile_definitions(GGML_USE_OPENCL)
283-
if (GGML_OPENCL_SMALL_ALLOC)
284-
add_compile_definitions(GGML_OPENCL_SMALL_ALLOC)
285-
endif ()
286280
else ()
287281
set(GGML_OPENCL2 OFF)
288282
endif ()

ggml/src/ggml-alloc.c

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,92 +1033,6 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
10331033
return buffer;
10341034
}
10351035

1036-
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_for_weights(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
1037-
#ifndef GGML_OPENCL_SMALL_ALLOC
1038-
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
1039-
#else
1040-
// Small allocation allocates a separate buffer for each tensor. Instead of
1041-
// collecting multiple tensors to allocate a large buffer, each tensor is
1042-
// allocated a buffer immediately. This is only supposed to be used for
1043-
// weights tensors (note that weights can be f32).
1044-
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
1045-
1046-
size_t alignment = ggml_backend_buft_get_alignment(buft);
1047-
1048-
ggml_backend_buffer_t * buffers = NULL;
1049-
size_t n_buffers = 0;
1050-
1051-
struct ggml_tensor * first_view = NULL;
1052-
struct ggml_tensor * first = ggml_get_first_tensor(ctx);
1053-
for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
1054-
size_t this_size = 0;
1055-
if (t->data == NULL && t->view_src == NULL) {
1056-
// Tensor size must be properly padded.
1057-
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
1058-
}
1059-
1060-
// The allocation logic here has gone beyond intention in order to make
1061-
// `test-backend-ops` work. The very initial intention was to allocate
1062-
// memory for weights - each weight tensor gets its own buffer object.
1063-
// The original function should be used to allocate for intermediate tensors.
1064-
// There are usually no view tensors for weights; this is not true for
1065-
// intermediate tensors. However, in `test-backend-ops` there is no
1066-
// differetiation between weight tensors and intermediate tensors.
1067-
// This function is used for general allocation when small allocation is
1068-
// enabled in the test. This requires the function to also handle view
1069-
// tensors, which do no require actual allocation. In the original function,
1070-
// view tensors are allocated with other non-view tensors since view tensors
1071-
// sizes are 0.
1072-
// Here, we try to identify view tensors and allocate them with the next
1073-
// non-view tensor. View tensors cannot allocated (alone) but must be
1074-
// initialized (together with non-view tensors).
1075-
1076-
// This is a view tensor of its size if 0. Record its location if it is the
1077-
// first one after a non-view tensor. If the next tensor is still a view,
1078-
// simply go to the next. We want to allocate all consecutive view tensors
1079-
// together with the next non-view tensor.
1080-
if (this_size == 0 && first_view == NULL) {
1081-
first_view = t;
1082-
continue;
1083-
}
1084-
1085-
if (first_view) {
1086-
// This is a non-view tensor. If there are any view tensors before
1087-
// this non-view tensor, we want to allocate these view tensors and
1088-
// this non-view tensor together.
1089-
// The first tensor to allocate is the first view tensor.
1090-
first = first_view;
1091-
} else {
1092-
// Otherwise, allocate this non-view tensor immediately.
1093-
first = t;
1094-
}
1095-
1096-
if (!alloc_tensor_range(ctx, first, ggml_get_next_tensor(ctx, t), buft, this_size, &buffers, &n_buffers)) {
1097-
return NULL;
1098-
}
1099-
1100-
// Always reset first_view after a non-view tensor.
1101-
first_view = NULL;
1102-
}
1103-
1104-
if (n_buffers == 0) {
1105-
#ifndef NDEBUG
1106-
fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__);
1107-
#endif
1108-
return NULL;
1109-
}
1110-
1111-
ggml_backend_buffer_t buffer;
1112-
if (n_buffers == 1) {
1113-
buffer = buffers[0];
1114-
} else {
1115-
buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
1116-
}
1117-
free(buffers);
1118-
return buffer;
1119-
#endif
1120-
}
1121-
11221036
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
11231037
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
11241038
}

0 commit comments

Comments
 (0)