@@ -12,20 +12,25 @@ extern "C" {
1212 typedef struct ggml_backend_event * ggml_backend_event_t ;
1313 typedef struct ggml_backend * ggml_backend_t ;
1414 typedef void * ggml_backend_graph_plan_t ;
15+ typedef struct ggml_backend_reg * ggml_backend_reg_t ;
16+ typedef struct ggml_backend_device * ggml_backend_dev_t ;
17+
1518
1619 //
17- // Backend buffer
20+ // Backend buffer type
1821 //
1922
20- // buffer type
2123 GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft );
22- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size );
24+ GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size );
2325 GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft );
2426 GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft );
25- GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft , struct ggml_tensor * tensor );
27+ GGML_API size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft , struct ggml_tensor * tensor );
2628 GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft );
2729
28- // buffer
30+ //
31+ // Backend buffer
32+ //
33+
2934 enum ggml_backend_buffer_usage {
3035 GGML_BACKEND_BUFFER_USAGE_ANY = 0 ,
3136 GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1 ,
@@ -36,7 +41,7 @@ extern "C" {
3641 GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer );
3742 GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer );
3843 GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer );
39- GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
44+ GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
4045 GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer );
4146 GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer );
4247 GGML_API size_t ggml_backend_buffer_get_alloc_size (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
@@ -48,7 +53,7 @@ extern "C" {
4853 GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer );
4954
5055 //
51- // Backend
56+ // Backend (stream)
5257 //
5358
5459 GGML_API ggml_guid_t ggml_backend_guid (ggml_backend_t backend );
@@ -64,9 +69,9 @@ extern "C" {
6469 GGML_API void ggml_backend_tensor_get_async (ggml_backend_t backend , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
6570
6671 // "offset" refers to the offset of the tensor data for setting/getting data
67- GGML_API GGML_CALL void ggml_backend_tensor_set ( struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
68- GGML_API GGML_CALL void ggml_backend_tensor_get (const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
69- GGML_API GGML_CALL void ggml_backend_tensor_memset ( struct ggml_tensor * tensor , uint8_t value , size_t offset , size_t size );
72+ GGML_API void ggml_backend_tensor_set ( struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
73+ GGML_API void ggml_backend_tensor_get (const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
74+ GGML_API void ggml_backend_tensor_memset ( struct ggml_tensor * tensor , uint8_t value , size_t offset , size_t size );
7075
7176 GGML_API void ggml_backend_synchronize (ggml_backend_t backend );
7277
@@ -76,6 +81,8 @@ extern "C" {
7681 GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend , ggml_backend_graph_plan_t plan );
7782 GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend , struct ggml_cgraph * cgraph );
7883 GGML_API enum ggml_status ggml_backend_graph_compute_async (ggml_backend_t backend , struct ggml_cgraph * cgraph );
84+
85+ // NOTE: will be removed, use device version instead
7986 GGML_API bool ggml_backend_supports_op (ggml_backend_t backend , const struct ggml_tensor * op );
8087 GGML_API bool ggml_backend_supports_buft (ggml_backend_t backend , ggml_backend_buffer_type_t buft );
8188 GGML_API bool ggml_backend_offload_op (ggml_backend_t backend , const struct ggml_tensor * op );
@@ -90,51 +97,88 @@ extern "C" {
9097 GGML_API void ggml_backend_tensor_copy_async (ggml_backend_t backend_src , ggml_backend_t backend_dst , struct ggml_tensor * src , struct ggml_tensor * dst );
9198
9299 // events
93- GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend );
94- GGML_API void ggml_backend_event_free (ggml_backend_event_t event );
95- GGML_API void ggml_backend_event_record (ggml_backend_event_t event );
96- GGML_API void ggml_backend_event_synchronize (ggml_backend_event_t event );
97- GGML_API void ggml_backend_event_wait (ggml_backend_t backend , ggml_backend_event_t event );
100+ GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_dev_t device );
101+ GGML_API void ggml_backend_event_free (ggml_backend_event_t event );
102+ GGML_API void ggml_backend_event_record (ggml_backend_event_t event , ggml_backend_t backend );
103+ GGML_API void ggml_backend_event_synchronize (ggml_backend_event_t event );
104+ GGML_API void ggml_backend_event_wait (ggml_backend_t backend , ggml_backend_event_t event );
98105
99106 //
100- // CPU backend
107+ // Backend device
101108 //
102109
103- GGML_API ggml_backend_t ggml_backend_cpu_init (void );
110+ enum ggml_backend_device_type {
111+ GGML_BACKEND_DEVICE_TYPE_CPU ,
112+ GGML_BACKEND_DEVICE_TYPE_GPU ,
113+ // devices with full capabilities (excludes backends such as BLAS)
114+ GGML_BACKEND_DEVICE_TYPE_CPU_FULL ,
115+ GGML_BACKEND_DEVICE_TYPE_GPU_FULL
116+ };
104117
105- GGML_API GGML_CALL bool ggml_backend_is_cpu ( ggml_backend_t backend );
106- GGML_API void ggml_backend_cpu_set_n_threads ( ggml_backend_t backend_cpu , int n_threads );
107- GGML_API void ggml_backend_cpu_set_threadpool ( ggml_backend_t backend_cpu , ggml_threadpool_t threadpool );
108- GGML_API void ggml_backend_cpu_set_abort_callback ( ggml_backend_t backend_cpu , ggml_abort_callback abort_callback , void * abort_callback_data );
118+ GGML_API const char * ggml_backend_dev_name ( ggml_backend_dev_t device );
119+ GGML_API const char * ggml_backend_dev_description ( ggml_backend_dev_t device );
120+ GGML_API void ggml_backend_dev_memory ( ggml_backend_dev_t device , size_t * free , size_t * total );
121+ GGML_API enum ggml_backend_device_type ggml_backend_dev_type ( ggml_backend_dev_t device );
109122
110- // Create a backend buffer from an existing pointer
111- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr (void * ptr , size_t size );
123+ GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg (ggml_backend_dev_t device );
112124
113- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type ( void );
125+ GGML_API ggml_backend_t ggml_backend_dev_init ( ggml_backend_dev_t device , const char * params );
114126
115- #ifdef GGML_USE_CPU_HBM
116- GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type (void );
117- #endif
127+ GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type (ggml_backend_dev_t device );
128+ GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type (ggml_backend_dev_t device );
129+
130+ GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr (ggml_backend_dev_t device , void * ptr , size_t size , size_t max_tensor_size );
131+ //GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_device_ptr(ggml_backend_device_t device, void * ptr, size_t size, size_t max_tensor_size);
132+
133+ GGML_API bool ggml_backend_dev_supports_op (ggml_backend_dev_t device , const struct ggml_tensor * op );
134+ GGML_API bool ggml_backend_dev_supports_buft (ggml_backend_dev_t device , ggml_backend_buffer_type_t buft );
135+ GGML_API bool ggml_backend_dev_offload_op (ggml_backend_dev_t device , const struct ggml_tensor * op );
136+
137+ GGML_API ggml_backend_event_t ggml_backend_dev_event_new (ggml_backend_dev_t device );
118138
119139 //
120- // Backend registry
140+ // Backend (reg)
121141 //
122142
123- // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
143+ GGML_API const char * ggml_backend_reg_name (ggml_backend_reg_t reg );
144+ GGML_API size_t ggml_backend_reg_dev_count (ggml_backend_reg_t reg );
145+ GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get (ggml_backend_reg_t reg , size_t index );
146+ GGML_API void ggml_backend_reg_add_device (ggml_backend_reg_t reg , const char * params );
147+ GGML_API void * ggml_backend_reg_get_proc_address (ggml_backend_reg_t reg , const char * name );
148+ GGML_API void ggml_backend_reg_set_log_callback (ggml_backend_reg_t reg , ggml_log_callback log_callback , void * user_data );
124149
125- GGML_API size_t ggml_backend_reg_get_count (void );
126- GGML_API size_t ggml_backend_reg_find_by_name (const char * name ); // returns index of backend with name, or SIZE_MAX if not found
127- GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str (const char * backend_str ); // str is backend_name:params (params is optional)
128- GGML_API const char * ggml_backend_reg_get_name (size_t i );
129- GGML_API ggml_backend_t ggml_backend_reg_init_backend (size_t i , const char * params ); // params is backend-specific
130- GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type (size_t i );
131- GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer (size_t i , size_t size );
150+ //
151+ // Backend registry
152+ //
153+
154+ // Backend (reg) enumeration
155+ GGML_API size_t ggml_backend_reg_count (void );
156+ GGML_API ggml_backend_reg_t ggml_backend_reg_get (size_t index );
157+ GGML_API ggml_backend_reg_t ggml_backend_reg_by_name (const char * name ); // backend names: CPU, CUDA, Metal
158+
159+ // Device enumeration
160+ GGML_API size_t ggml_backend_dev_count (void );
161+ GGML_API ggml_backend_dev_t ggml_backend_dev_get (size_t index );
162+ GGML_API ggml_backend_dev_t ggml_backend_dev_by_name (const char * name ); // device names: CPU, CUDA0, Metal, Vulkan0, etc
163+ GGML_API ggml_backend_dev_t ggml_backend_dev_by_type (enum ggml_backend_device_type type );
164+
165+ // Set the log callback for all registered backends
166+ GGML_API void ggml_backend_set_log_callback (ggml_log_callback log_callback , void * user_data );
167+
168+ // Convenience functions, may be removed in the future
169+ // Direct Backend (stream) initialization
170+ // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
171+ GGML_API ggml_backend_t ggml_backend_init_by_name (const char * name , const char * params );
172+ // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
173+ GGML_API ggml_backend_t ggml_backend_init_by_type (enum ggml_backend_device_type type , const char * params );
174+ // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
175+ GGML_API ggml_backend_t ggml_backend_init_best (void );
132176
133177 //
134178 // Backend scheduler
135179 //
136180
137- // The backend scheduler allows for multiple backends to be used together
181+ // The backend scheduler allows for multiple backend devices to be used together
138182 // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
139183 // The backends are selected based on:
140184 // - the backend that supports the operation
@@ -169,7 +213,6 @@ extern "C" {
169213 }
170214 */
171215
172- struct ggml_backend_sched ;
173216 typedef struct ggml_backend_sched * ggml_backend_sched_t ;
174217
175218 // when ask == true, the scheduler wants to know if the user wants to observe this node
@@ -226,7 +269,7 @@ extern "C" {
226269 GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy (ggml_backend_t backend , struct ggml_cgraph * graph );
227270 GGML_API void ggml_backend_graph_copy_free (struct ggml_backend_graph_copy copy );
228271
229- typedef bool (* GGML_CALL ggml_backend_eval_callback )(int node_index , struct ggml_tensor * t1 , struct ggml_tensor * t2 , void * user_data );
272+ typedef bool (* ggml_backend_eval_callback )(int node_index , struct ggml_tensor * t1 , struct ggml_tensor * t2 , void * user_data );
230273
231274 // Compare the output of two backends
232275 GGML_API bool ggml_backend_compare_graph_backend (ggml_backend_t backend1 , ggml_backend_t backend2 , struct ggml_cgraph * graph , ggml_backend_eval_callback callback , void * user_data );
@@ -236,6 +279,28 @@ extern "C" {
236279 GGML_API void ggml_backend_view_init (struct ggml_tensor * tensor );
237280
238281
282+ //
283+ // CPU backend
284+ //
285+
286+ GGML_API ggml_backend_t ggml_backend_cpu_init (void );
287+
288+ GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend );
289+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu , int n_threads );
290+ GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu , ggml_threadpool_t threadpool );
291+ GGML_API void ggml_backend_cpu_set_abort_callback (ggml_backend_t backend_cpu , ggml_abort_callback abort_callback , void * abort_callback_data );
292+
293+ // Create a backend buffer from an existing pointer
294+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr (void * ptr , size_t size );
295+
296+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type (void );
297+
298+ GGML_API ggml_backend_reg_t ggml_backend_cpu_reg (void );
299+
300+ #ifdef GGML_USE_CPU_HBM
301+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type (void );
302+ #endif
303+
239304#ifdef __cplusplus
240305}
241306#endif
0 commit comments