Skip to content

Commit 223c5f0

Browse files
committed
clblast survived
2 parents 3072db6 + c5b0f4b commit 223c5f0

18 files changed

+1910
-1941
lines changed

ggml-opencl.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2095,7 +2095,6 @@ static void ggml_backend_opencl_buffer_reset(ggml_backend_buffer_t buffer) {
20952095
}
20962096

20972097
static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
2098-
/* .get_name = */ ggml_backend_opencl_buffer_get_name,
20992098
/* .free_buffer = */ ggml_backend_opencl_buffer_free_buffer,
21002099
/* .get_base = */ ggml_backend_opencl_buffer_get_base,
21012100
/* .init_tensor = */ ggml_backend_opencl_buffer_init_tensor,

ggml/include/ggml-backend.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,12 @@ extern "C" {
114114
//
115115

116116
enum ggml_backend_dev_type {
117+
// CPU device using system memory
117118
GGML_BACKEND_DEVICE_TYPE_CPU,
119+
// GPU device using dedicated memory
118120
GGML_BACKEND_DEVICE_TYPE_GPU,
119-
// devices with full capabilities (excludes backends such as BLAS that only support matrix multiplication)
120-
GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
121-
GGML_BACKEND_DEVICE_TYPE_GPU_FULL
121+
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
122+
GGML_BACKEND_DEVICE_TYPE_ACCEL
122123
};
123124

124125
// functionality supported by the device
@@ -167,10 +168,14 @@ extern "C" {
167168
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
168169
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
169170

171+
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
170172

171-
// Functions that may be obtained using ggml_backend_reg_get_proc_address
172-
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
173-
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t, int);
173+
// Split buffer type for tensor parallelism
174+
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
175+
// Set the number of threads for the backend
176+
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
177+
// Get additional buffer types provided by the device (returns a NULL-terminated array)
178+
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
174179

175180
//
176181
// Backend registry
@@ -192,7 +197,7 @@ extern "C" {
192197
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
193198
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
194199
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
195-
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
200+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
196201
GGML_API ggml_backend_t ggml_backend_init_best(void);
197202

198203
//

ggml/include/ggml-cuda.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
2929
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
3030

3131
// split tensor buffer that splits matrices by rows across multiple devices
32-
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
32+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
3333

3434
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
3535
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);

ggml/src/ggml-amx.cpp

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,6 @@
1616
#if defined(__AMX_INT8__)
1717

1818
// AMX buffer interface
19-
static const char * ggml_backend_amx_buffer_get_name(ggml_backend_buffer_t buffer) {
20-
return "AMX";
21-
22-
GGML_UNUSED(buffer);
23-
}
24-
2519
static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {
2620
free(buffer->context);
2721
}
@@ -72,7 +66,6 @@ static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t
7266
}
7367

7468
static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
75-
/* .get_name = */ ggml_backend_amx_buffer_get_name,
7669
/* .free_buffer = */ ggml_backend_amx_buffer_free_buffer,
7770
/* .get_base = */ ggml_backend_amx_buffer_get_base,
7871
/* .init_tensor = */ NULL, // no initialization required
@@ -121,14 +114,14 @@ static bool ggml_backend_amx_buffer_type_is_host(ggml_backend_buffer_type_t buft
121114
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
122115
static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
123116
/* .iface = */ {
124-
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
125-
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
126-
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
127-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
128-
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
129-
/* .is_host = */ ggml_backend_amx_buffer_type_is_host,
117+
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
118+
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
119+
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
120+
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
121+
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
122+
/* .is_host = */ ggml_backend_amx_buffer_type_is_host,
130123
},
131-
/* .device = */ NULL,
124+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0),
132125
/* .context = */ NULL,
133126
};
134127

@@ -149,12 +142,6 @@ static void ggml_backend_amx_free(ggml_backend_t backend) {
149142
delete backend;
150143
}
151144

152-
static ggml_backend_buffer_type_t ggml_backend_amx_get_default_buffer_type(ggml_backend_t backend) {
153-
return ggml_backend_amx_buffer_type();
154-
155-
GGML_UNUSED(backend);
156-
}
157-
158145
static enum ggml_status ggml_backend_amx_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
159146
ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context;
160147

@@ -187,7 +174,6 @@ static enum ggml_status ggml_backend_amx_graph_compute(ggml_backend_t backend, s
187174
static struct ggml_backend_i ggml_backend_amx_i = {
188175
/* .get_name = */ ggml_backend_amx_name,
189176
/* .free = */ ggml_backend_amx_free,
190-
/* .get_default_buffer_type = */ ggml_backend_amx_get_default_buffer_type,
191177
/* .set_tensor_async = */ NULL,
192178
/* .get_tensor_async = */ NULL,
193179
/* .cpy_tensor_async = */ NULL,
@@ -197,9 +183,6 @@ static struct ggml_backend_i ggml_backend_amx_i = {
197183
/* .graph_plan_update = */ NULL,
198184
/* .graph_plan_compute = */ NULL,
199185
/* .graph_compute = */ ggml_backend_amx_graph_compute,
200-
/* .supports_op = */ NULL,
201-
/* .supports_buft = */ NULL,
202-
/* .offload_op = */ NULL,
203186
/* .event_record = */ NULL,
204187
/* .event_wait = */ NULL,
205188
};
@@ -279,7 +262,7 @@ static void ggml_backend_amx_device_get_memory(ggml_backend_dev_t dev, size_t *
279262
}
280263

281264
static enum ggml_backend_dev_type ggml_backend_amx_device_get_type(ggml_backend_dev_t dev) {
282-
return GGML_BACKEND_DEVICE_TYPE_CPU;
265+
return GGML_BACKEND_DEVICE_TYPE_ACCEL;
283266

284267
GGML_UNUSED(dev);
285268
}

ggml/src/ggml-backend-impl.h

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ extern "C" {
2222
size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
2323
// (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
2424
size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
25-
// (optional) check if tensor data is in host memory (defaults to false)
25+
// (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
2626
bool (*is_host) (ggml_backend_buffer_type_t buft);
2727
};
2828

@@ -37,7 +37,6 @@ extern "C" {
3737
//
3838

3939
struct ggml_backend_buffer_i {
40-
const char * (*get_name) (ggml_backend_buffer_t buffer);
4140
// (optional) free the buffer
4241
void (*free_buffer) (ggml_backend_buffer_t buffer);
4342
// base address of the buffer
@@ -88,19 +87,16 @@ extern "C" {
8887

8988
void (*free)(ggml_backend_t backend);
9089

91-
// Will be moved to the device interface
92-
// buffer allocation
93-
ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
94-
9590
// (optional) asynchronous tensor data access
9691
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
9792
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
9893
bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
9994

100-
// (optional) complete all pending operations
95+
// (optional) complete all pending operations (required if the backend supports async operations)
10196
void (*synchronize)(ggml_backend_t backend);
10297

103-
// (optional) compute graph with a plan (not used currently)
98+
// (optional) graph plans (not used currently)
99+
// compute graph with a plan
104100
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
105101
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
106102
// update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
@@ -111,13 +107,6 @@ extern "C" {
111107
// compute graph (always async if supported by the backend)
112108
enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
113109

114-
// IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
115-
// new backends should implement the device interface instead
116-
// These functions are being moved to the device interface
117-
bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
118-
bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
119-
bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
120-
121110
// (optional) event synchronization
122111
// record an event on this stream
123112
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);

0 commit comments

Comments
 (0)