Skip to content

Commit 673f413

Browse files
committed
wip
1 parent ea9c32b commit 673f413

25 files changed

+1227
-783
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,10 +1055,11 @@ ggml/src/ggml-alloc.o: \
10551055
$(CC) $(CFLAGS) -c $< -o $@
10561056

10571057
ggml/src/ggml-backend.o: \
1058-
ggml/src/ggml-backend.c \
1058+
ggml/src/ggml-backend.cpp \
1059+
ggml/src/ggml-backend-impl.h \
10591060
ggml/include/ggml.h \
10601061
ggml/include/ggml-backend.h
1061-
$(CC) $(CFLAGS) -c $< -o $@
1062+
$(CXX) $(CXXFLAGS) -c $< -o $@
10621063

10631064
ggml/src/ggml-quants.o: \
10641065
ggml/src/ggml-quants.c \

examples/llama-bench/llama-bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ struct test {
941941

942942
static std::string get_backend() {
943943
if (cuda) {
944-
return GGML_CUDA_NAME;
944+
return "CUDA";
945945
}
946946
if (vulkan) {
947947
return "Vulkan";

ggml/include/ggml-backend.h

Lines changed: 100 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,25 @@ extern "C" {
1212
typedef struct ggml_backend_event * ggml_backend_event_t;
1313
typedef struct ggml_backend * ggml_backend_t;
1414
typedef void * ggml_backend_graph_plan_t;
15+
typedef struct ggml_backend_reg * ggml_backend_reg_t;
16+
typedef struct ggml_backend_device * ggml_backend_dev_t;
17+
1518

1619
//
17-
// Backend buffer
20+
// Backend buffer type
1821
//
1922

20-
// buffer type
2123
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
22-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
24+
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
2325
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
2426
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
25-
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
27+
GGML_API size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
2628
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
2729

28-
// buffer
30+
//
31+
// Backend buffer
32+
//
33+
2934
enum ggml_backend_buffer_usage {
3035
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
3136
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
@@ -36,7 +41,7 @@ extern "C" {
3641
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
3742
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
3843
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
39-
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
44+
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
4045
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
4146
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
4247
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -48,7 +53,7 @@ extern "C" {
4853
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
4954

5055
//
51-
// Backend
56+
// Backend (stream)
5257
//
5358

5459
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
@@ -64,9 +69,9 @@ extern "C" {
6469
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
6570

6671
// "offset" refers to the offset of the tensor data for setting/getting data
67-
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
68-
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
69-
GGML_API GGML_CALL void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
72+
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
73+
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
74+
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
7075

7176
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
7277

@@ -90,51 +95,88 @@ extern "C" {
9095
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
9196

9297
// events
93-
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
94-
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
95-
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
96-
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
97-
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
98+
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
99+
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
100+
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
101+
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
102+
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
98103

99104
//
100-
// CPU backend
105+
// Backend device
101106
//
102107

103-
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
108+
enum ggml_backend_device_type {
109+
GGML_BACKEND_DEVICE_TYPE_CPU,
110+
GGML_BACKEND_DEVICE_TYPE_GPU,
111+
// devices with full capabilities (excludes backends such as BLAS)
112+
GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
113+
GGML_BACKEND_DEVICE_TYPE_GPU_FULL
114+
};
104115

105-
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
106-
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
107-
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
108-
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
116+
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
117+
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
118+
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
119+
GGML_API enum ggml_backend_device_type ggml_backend_dev_type(ggml_backend_dev_t device);
109120

110-
// Create a backend buffer from an existing pointer
111-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
121+
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
112122

113-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
123+
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
114124

115-
#ifdef GGML_USE_CPU_HBM
116-
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
117-
#endif
125+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
126+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
127+
128+
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
129+
//GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_device_ptr(ggml_backend_device_t device, void * ptr, size_t size, size_t max_tensor_size);
130+
131+
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
132+
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
133+
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
134+
135+
GGML_API ggml_backend_event_t ggml_backend_dev_event_new(ggml_backend_dev_t device);
118136

119137
//
120-
// Backend registry
138+
// Backend (reg)
121139
//
122140

123-
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
141+
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
142+
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
143+
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
144+
GGML_API void ggml_backend_reg_add_device(ggml_backend_reg_t reg, const char * params);
145+
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
146+
GGML_API void ggml_backend_reg_set_log_callback(ggml_backend_reg_t reg, ggml_log_callback log_callback, void * user_data);
124147

125-
GGML_API size_t ggml_backend_reg_get_count(void);
126-
GGML_API size_t ggml_backend_reg_find_by_name(const char * name); // returns index of backend with name, or SIZE_MAX if not found
127-
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
128-
GGML_API const char * ggml_backend_reg_get_name(size_t i);
129-
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
130-
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
131-
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
148+
//
149+
// Backend registry
150+
//
151+
152+
// Backend (reg) enumeration
153+
GGML_API size_t ggml_backend_reg_count(void);
154+
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
155+
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name); // backend names: CPU, CUDA, Metal
156+
157+
// Device enumeration
158+
GGML_API size_t ggml_backend_dev_count(void);
159+
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
160+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name); // device names: CPU, CUDA0, Metal, Vulkan0, etc
161+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_device_type type);
162+
163+
// Set the log callback for all registered backends
164+
GGML_API void ggml_backend_set_log_callback(ggml_log_callback log_callback, void * user_data);
165+
166+
// Convenience functions, may be removed in the future
167+
// Direct Backend (stream) initialization
168+
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
169+
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
170+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
171+
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_device_type type, const char * params);
172+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
173+
GGML_API ggml_backend_t ggml_backend_init_best(void);
132174

133175
//
134176
// Backend scheduler
135177
//
136178

137-
// The backend scheduler allows for multiple backends to be used together
179+
// The backend scheduler allows for multiple backend devices to be used together
138180
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
139181
// The backends are selected based on:
140182
// - the backend that supports the operation
@@ -169,7 +211,6 @@ extern "C" {
169211
}
170212
*/
171213

172-
struct ggml_backend_sched;
173214
typedef struct ggml_backend_sched * ggml_backend_sched_t;
174215

175216
// when ask == true, the scheduler wants to know if the user wants to observe this node
@@ -226,7 +267,7 @@ extern "C" {
226267
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
227268
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
228269

229-
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
270+
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
230271

231272
// Compare the output of two backends
232273
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
@@ -236,6 +277,26 @@ extern "C" {
236277
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
237278

238279

280+
//
281+
// CPU backend
282+
//
283+
284+
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
285+
286+
GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend);
287+
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
288+
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
289+
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
290+
291+
// Create a backend buffer from an existing pointer
292+
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
293+
294+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
295+
296+
#ifdef GGML_USE_CPU_HBM
297+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
298+
#endif
299+
239300
#ifdef __cplusplus
240301
}
241302
#endif

ggml/include/ggml-blas.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ extern "C" {
99
#endif
1010

1111
// backend API
12-
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
12+
GGML_API ggml_backend_t ggml_backend_blas_init(void);
1313

14-
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
14+
GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
1515

1616
// number of threads used for conversion to float
1717
// for openblas and blis, this will also set the number of threads used for blas operations
18-
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18+
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

2020

2121
#ifdef __cplusplus

ggml/include/ggml-cann.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern "C" {
4444
* @param device The index of the device to initialize.
4545
* @return A pointer to the initialized backend instance, or nullptr on failure.
4646
*/
47-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
47+
GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
4848

4949
/**
5050
* @brief Checks if a given backend is a CANN backend.
@@ -55,7 +55,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
5555
* @param backend The backend instance to check.
5656
* @return True if the backend is a CANN backend, false otherwise.
5757
*/
58-
GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
58+
GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
5959

6060
/**
6161
* @brief Retrieves the CANN buffer type for a specified device.
@@ -67,7 +67,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
6767
* @return A pointer to the buffer type interface for the specified device, or
6868
* nullptr if the device index is out of range.
6969
*/
70-
GGML_API GGML_CALL ggml_backend_buffer_type_t
70+
GGML_API ggml_backend_buffer_type_t
7171
ggml_backend_cann_buffer_type(int32_t device);
7272

7373
/**
@@ -78,14 +78,14 @@ ggml_backend_cann_buffer_type(int32_t device);
7878
*
7979
* @return The number of CANN devices available.
8080
*/
81-
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
81+
GGML_API int32_t ggml_backend_cann_get_device_count(void);
8282

8383
/**
8484
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
8585
*
8686
* @return A pointer to the host buffer type interface.
8787
*/
88-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
88+
GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
8989

9090
/**
9191
* @brief Retrieves the description of a specific CANN device.
@@ -97,7 +97,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type
9797
* @param description Pointer to a buffer where the description will be written.
9898
* @param description_size Size of the description buffer.
9999
*/
100-
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
100+
GGML_API void ggml_backend_cann_get_device_description(
101101
int32_t device, char* description, size_t description_size);
102102

103103
/**
@@ -112,7 +112,7 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
112112
* @param total Pointer to a variable where the total memory size will be
113113
* stored.
114114
*/
115-
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
115+
GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
116116
size_t* free,
117117
size_t* total);
118118

ggml/include/ggml-cuda.h

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,37 @@
33
#include "ggml.h"
44
#include "ggml-backend.h"
55

6-
#ifdef GGML_USE_HIPBLAS
7-
#define GGML_CUDA_NAME "ROCm"
8-
#define GGML_CUBLAS_NAME "hipBLAS"
9-
#elif defined(GGML_USE_MUSA)
10-
#define GGML_CUDA_NAME "MUSA"
11-
#define GGML_CUBLAS_NAME "muBLAS"
12-
#else
13-
#define GGML_CUDA_NAME "CUDA"
14-
#define GGML_CUBLAS_NAME "cuBLAS"
15-
#endif
16-
176
#ifdef __cplusplus
187
extern "C" {
198
#endif
209

2110
#define GGML_CUDA_MAX_DEVICES 16
2211

2312
// backend API
24-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
13+
GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
2514

26-
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
15+
GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
2716

2817
// device buffer
29-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
18+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
3019

3120
// split tensor buffer that splits matrices by rows across multiple devices
32-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
21+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
3322

3423
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
35-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
24+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
3625

37-
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
38-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
39-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
26+
GGML_API int ggml_backend_cuda_get_device_count(void);
27+
GGML_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
28+
GGML_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
4029

41-
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
42-
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
30+
GGML_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
31+
GGML_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
4332

4433
GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
34+
35+
GGML_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
36+
4537
#ifdef __cplusplus
4638
}
4739
#endif

0 commit comments

Comments
 (0)