Skip to content

Commit e6b50cd

Browse files
committed
wip
1 parent ea9c32b commit e6b50cd

25 files changed

+1481
-919
lines changed

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,10 +1055,11 @@ ggml/src/ggml-alloc.o: \
10551055
$(CC) $(CFLAGS) -c $< -o $@
10561056

10571057
ggml/src/ggml-backend.o: \
1058-
ggml/src/ggml-backend.c \
1058+
ggml/src/ggml-backend.cpp \
1059+
ggml/src/ggml-backend-impl.h \
10591060
ggml/include/ggml.h \
10601061
ggml/include/ggml-backend.h
1061-
$(CC) $(CFLAGS) -c $< -o $@
1062+
$(CXX) $(CXXFLAGS) -c $< -o $@
10621063

10631064
ggml/src/ggml-quants.o: \
10641065
ggml/src/ggml-quants.c \

examples/llama-bench/llama-bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ struct test {
941941

942942
static std::string get_backend() {
943943
if (cuda) {
944-
return GGML_CUDA_NAME;
944+
return "CUDA";
945945
}
946946
if (vulkan) {
947947
return "Vulkan";

ggml/include/ggml-backend.h

Lines changed: 104 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,25 @@ extern "C" {
1212
typedef struct ggml_backend_event * ggml_backend_event_t;
1313
typedef struct ggml_backend * ggml_backend_t;
1414
typedef void * ggml_backend_graph_plan_t;
15+
typedef struct ggml_backend_reg * ggml_backend_reg_t;
16+
typedef struct ggml_backend_device * ggml_backend_dev_t;
17+
1518

1619
//
17-
// Backend buffer
20+
// Backend buffer type
1821
//
1922

20-
// buffer type
2123
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
22-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
24+
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
2325
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
2426
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
25-
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
27+
GGML_API size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
2628
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
2729

28-
// buffer
30+
//
31+
// Backend buffer
32+
//
33+
2934
enum ggml_backend_buffer_usage {
3035
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
3136
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
@@ -36,7 +41,7 @@ extern "C" {
3641
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
3742
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
3843
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
39-
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
44+
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
4045
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
4146
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
4247
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
@@ -48,7 +53,7 @@ extern "C" {
4853
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
4954

5055
//
51-
// Backend
56+
// Backend (stream)
5257
//
5358

5459
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
@@ -64,9 +69,9 @@ extern "C" {
6469
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
6570

6671
// "offset" refers to the offset of the tensor data for setting/getting data
67-
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
68-
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
69-
GGML_API GGML_CALL void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
72+
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
73+
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
74+
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
7075

7176
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
7277

@@ -76,6 +81,8 @@ extern "C" {
7681
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
7782
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
7883
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
84+
85+
// NOTE: will be removed, use device version instead
7986
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
8087
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
8188
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
@@ -90,51 +97,88 @@ extern "C" {
9097
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
9198

9299
// events
93-
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
94-
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
95-
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
96-
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
97-
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
100+
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_dev_t device);
101+
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
102+
GGML_API void ggml_backend_event_record (ggml_backend_event_t event, ggml_backend_t backend);
103+
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
104+
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
98105

99106
//
100-
// CPU backend
107+
// Backend device
101108
//
102109

103-
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
110+
enum ggml_backend_device_type {
111+
GGML_BACKEND_DEVICE_TYPE_CPU,
112+
GGML_BACKEND_DEVICE_TYPE_GPU,
113+
// devices with full capabilities (excludes backends such as BLAS)
114+
GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
115+
GGML_BACKEND_DEVICE_TYPE_GPU_FULL
116+
};
104117

105-
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
106-
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
107-
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
108-
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
118+
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
119+
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
120+
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
121+
GGML_API enum ggml_backend_device_type ggml_backend_dev_type(ggml_backend_dev_t device);
109122

110-
// Create a backend buffer from an existing pointer
111-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
123+
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
112124

113-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
125+
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
114126

115-
#ifdef GGML_USE_CPU_HBM
116-
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
117-
#endif
127+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
128+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
129+
130+
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
131+
//GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_device_ptr(ggml_backend_device_t device, void * ptr, size_t size, size_t max_tensor_size);
132+
133+
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
134+
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
135+
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
136+
137+
GGML_API ggml_backend_event_t ggml_backend_dev_event_new(ggml_backend_dev_t device);
118138

119139
//
120-
// Backend registry
140+
// Backend (reg)
121141
//
122142

123-
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
143+
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
144+
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
145+
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
146+
GGML_API void ggml_backend_reg_add_device(ggml_backend_reg_t reg, const char * params);
147+
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
148+
GGML_API void ggml_backend_reg_set_log_callback(ggml_backend_reg_t reg, ggml_log_callback log_callback, void * user_data);
124149

125-
GGML_API size_t ggml_backend_reg_get_count(void);
126-
GGML_API size_t ggml_backend_reg_find_by_name(const char * name); // returns index of backend with name, or SIZE_MAX if not found
127-
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
128-
GGML_API const char * ggml_backend_reg_get_name(size_t i);
129-
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
130-
GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
131-
GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
150+
//
151+
// Backend registry
152+
//
153+
154+
// Backend (reg) enumeration
155+
GGML_API size_t ggml_backend_reg_count(void);
156+
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
157+
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name); // backend names: CPU, CUDA, Metal
158+
159+
// Device enumeration
160+
GGML_API size_t ggml_backend_dev_count(void);
161+
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
162+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name); // device names: CPU, CUDA0, Metal, Vulkan0, etc
163+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_device_type type);
164+
165+
// Set the log callback for all registered backends
166+
GGML_API void ggml_backend_set_log_callback(ggml_log_callback log_callback, void * user_data);
167+
168+
// Convenience functions, may be removed in the future
169+
// Direct Backend (stream) initialization
170+
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
171+
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
172+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
173+
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_device_type type, const char * params);
174+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
175+
GGML_API ggml_backend_t ggml_backend_init_best(void);
132176

133177
//
134178
// Backend scheduler
135179
//
136180

137-
// The backend scheduler allows for multiple backends to be used together
181+
// The backend scheduler allows for multiple backend devices to be used together
138182
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
139183
// The backends are selected based on:
140184
// - the backend that supports the operation
@@ -169,7 +213,6 @@ extern "C" {
169213
}
170214
*/
171215

172-
struct ggml_backend_sched;
173216
typedef struct ggml_backend_sched * ggml_backend_sched_t;
174217

175218
// when ask == true, the scheduler wants to know if the user wants to observe this node
@@ -226,7 +269,7 @@ extern "C" {
226269
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
227270
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
228271

229-
typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
272+
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
230273

231274
// Compare the output of two backends
232275
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
@@ -236,6 +279,28 @@ extern "C" {
236279
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
237280

238281

282+
//
283+
// CPU backend
284+
//
285+
286+
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
287+
288+
GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend);
289+
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
290+
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
291+
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
292+
293+
// Create a backend buffer from an existing pointer
294+
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
295+
296+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
297+
298+
GGML_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
299+
300+
#ifdef GGML_USE_CPU_HBM
301+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
302+
#endif
303+
239304
#ifdef __cplusplus
240305
}
241306
#endif

ggml/include/ggml-blas.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ extern "C" {
99
#endif
1010

1111
// backend API
12-
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
12+
GGML_API ggml_backend_t ggml_backend_blas_init(void);
1313

14-
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
14+
GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
1515

1616
// number of threads used for conversion to float
1717
// for openblas and blis, this will also set the number of threads used for blas operations
18-
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18+
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

2020

2121
#ifdef __cplusplus

ggml/include/ggml-cann.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern "C" {
4444
* @param device The index of the device to initialize.
4545
* @return A pointer to the initialized backend instance, or nullptr on failure.
4646
*/
47-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
47+
GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
4848

4949
/**
5050
* @brief Checks if a given backend is a CANN backend.
@@ -55,7 +55,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
5555
* @param backend The backend instance to check.
5656
* @return True if the backend is a CANN backend, false otherwise.
5757
*/
58-
GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
58+
GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
5959

6060
/**
6161
* @brief Retrieves the CANN buffer type for a specified device.
@@ -67,7 +67,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
6767
* @return A pointer to the buffer type interface for the specified device, or
6868
* nullptr if the device index is out of range.
6969
*/
70-
GGML_API GGML_CALL ggml_backend_buffer_type_t
70+
GGML_API ggml_backend_buffer_type_t
7171
ggml_backend_cann_buffer_type(int32_t device);
7272

7373
/**
@@ -78,14 +78,14 @@ ggml_backend_cann_buffer_type(int32_t device);
7878
*
7979
* @return The number of CANN devices available.
8080
*/
81-
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
81+
GGML_API int32_t ggml_backend_cann_get_device_count(void);
8282

8383
/**
8484
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
8585
*
8686
* @return A pointer to the host buffer type interface.
8787
*/
88-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
88+
GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
8989

9090
/**
9191
* @brief Retrieves the description of a specific CANN device.
@@ -97,7 +97,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type
9797
* @param description Pointer to a buffer where the description will be written.
9898
* @param description_size Size of the description buffer.
9999
*/
100-
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
100+
GGML_API void ggml_backend_cann_get_device_description(
101101
int32_t device, char* description, size_t description_size);
102102

103103
/**
@@ -112,7 +112,7 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
112112
* @param total Pointer to a variable where the total memory size will be
113113
* stored.
114114
*/
115-
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
115+
GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
116116
size_t* free,
117117
size_t* total);
118118

0 commit comments

Comments
 (0)