@@ -12,20 +12,25 @@ extern "C" {
1212 typedef struct ggml_backend_event * ggml_backend_event_t ;
1313 typedef struct ggml_backend * ggml_backend_t ;
1414 typedef void * ggml_backend_graph_plan_t ;
15+ typedef struct ggml_backend_reg * ggml_backend_reg_t ;
16+ typedef struct ggml_backend_device * ggml_backend_dev_t ;
17+
1518
1619 //
17- // Backend buffer
20+ // Backend buffer type
1821 //
1922
20- // buffer type
2123 GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft );
22- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size );
24+ GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size );
2325 GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft );
2426 GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft );
25- GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft , struct ggml_tensor * tensor );
27+ GGML_API size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft , struct ggml_tensor * tensor );
2628 GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft );
2729
28- // buffer
30+ //
31+ // Backend buffer
32+ //
33+
2934 enum ggml_backend_buffer_usage {
3035 GGML_BACKEND_BUFFER_USAGE_ANY = 0 ,
3136 GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1 ,
@@ -36,7 +41,7 @@ extern "C" {
3641 GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer );
3742 GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer );
3843 GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer );
39- GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
44+ GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
4045 GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer );
4146 GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer );
4247 GGML_API size_t ggml_backend_buffer_get_alloc_size (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
@@ -48,7 +53,7 @@ extern "C" {
4853 GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer );
4954
5055 //
51- // Backend
56+ // Backend (stream)
5257 //
5358
5459 GGML_API ggml_guid_t ggml_backend_guid (ggml_backend_t backend );
@@ -64,9 +69,9 @@ extern "C" {
6469 GGML_API void ggml_backend_tensor_get_async (ggml_backend_t backend , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
6570
6671 // "offset" refers to the offset of the tensor data for setting/getting data
67- GGML_API GGML_CALL void ggml_backend_tensor_set ( struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
68- GGML_API GGML_CALL void ggml_backend_tensor_get (const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
69- GGML_API GGML_CALL void ggml_backend_tensor_memset ( struct ggml_tensor * tensor , uint8_t value , size_t offset , size_t size );
72+ GGML_API void ggml_backend_tensor_set ( struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
73+ GGML_API void ggml_backend_tensor_get (const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
74+ GGML_API void ggml_backend_tensor_memset ( struct ggml_tensor * tensor , uint8_t value , size_t offset , size_t size );
7075
7176 GGML_API void ggml_backend_synchronize (ggml_backend_t backend );
7277
@@ -90,51 +95,88 @@ extern "C" {
9095 GGML_API void ggml_backend_tensor_copy_async (ggml_backend_t backend_src , ggml_backend_t backend_dst , struct ggml_tensor * src , struct ggml_tensor * dst );
9196
9297 // events
93- GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend );
94- GGML_API void ggml_backend_event_free (ggml_backend_event_t event );
95- GGML_API void ggml_backend_event_record (ggml_backend_event_t event );
96- GGML_API void ggml_backend_event_synchronize (ggml_backend_event_t event );
97- GGML_API void ggml_backend_event_wait (ggml_backend_t backend , ggml_backend_event_t event );
98+ GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend );
99+ GGML_API void ggml_backend_event_free (ggml_backend_event_t event );
100+ GGML_API void ggml_backend_event_record (ggml_backend_event_t event );
101+ GGML_API void ggml_backend_event_synchronize (ggml_backend_event_t event );
102+ GGML_API void ggml_backend_event_wait (ggml_backend_t backend , ggml_backend_event_t event );
98103
99104 //
100- // CPU backend
105+ // Backend device
101106 //
102107
103- GGML_API ggml_backend_t ggml_backend_cpu_init (void );
108+ enum ggml_backend_device_type {
109+ GGML_BACKEND_DEVICE_TYPE_CPU ,
110+ GGML_BACKEND_DEVICE_TYPE_GPU ,
111+ // devices with full capabilities (excludes backends such as BLAS)
112+ GGML_BACKEND_DEVICE_TYPE_CPU_FULL ,
113+ GGML_BACKEND_DEVICE_TYPE_GPU_FULL
114+ };
104115
105- GGML_API GGML_CALL bool ggml_backend_is_cpu ( ggml_backend_t backend );
106- GGML_API void ggml_backend_cpu_set_n_threads ( ggml_backend_t backend_cpu , int n_threads );
107- GGML_API void ggml_backend_cpu_set_threadpool ( ggml_backend_t backend_cpu , ggml_threadpool_t threadpool );
108- GGML_API void ggml_backend_cpu_set_abort_callback ( ggml_backend_t backend_cpu , ggml_abort_callback abort_callback , void * abort_callback_data );
116+ GGML_API const char * ggml_backend_dev_name ( ggml_backend_dev_t device );
117+ GGML_API const char * ggml_backend_dev_description ( ggml_backend_dev_t device );
118+ GGML_API void ggml_backend_dev_memory ( ggml_backend_dev_t device , size_t * free , size_t * total );
119+ GGML_API enum ggml_backend_device_type ggml_backend_dev_type ( ggml_backend_dev_t device );
109120
110- // Create a backend buffer from an existing pointer
111- GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr (void * ptr , size_t size );
121+ GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg (ggml_backend_dev_t device );
112122
113- GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type ( void );
123+ GGML_API ggml_backend_t ggml_backend_dev_init ( ggml_backend_dev_t device , const char * params );
114124
115- #ifdef GGML_USE_CPU_HBM
116- GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type (void );
117- #endif
125+ GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type (ggml_backend_dev_t device );
126+ GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type (ggml_backend_dev_t device );
127+
128+ GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr (ggml_backend_dev_t device , void * ptr , size_t size , size_t max_tensor_size );
129+ //GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_device_ptr(ggml_backend_device_t device, void * ptr, size_t size, size_t max_tensor_size);
130+
131+ GGML_API bool ggml_backend_dev_supports_op (ggml_backend_dev_t device , const struct ggml_tensor * op );
132+ GGML_API bool ggml_backend_dev_supports_buft (ggml_backend_dev_t device , ggml_backend_buffer_type_t buft );
133+ GGML_API bool ggml_backend_dev_offload_op (ggml_backend_dev_t device , const struct ggml_tensor * op );
134+
135+ GGML_API ggml_backend_event_t ggml_backend_dev_event_new (ggml_backend_dev_t device );
118136
119137 //
120- // Backend registry
138+ // Backend (reg)
121139 //
122140
123- // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
141+ GGML_API const char * ggml_backend_reg_name (ggml_backend_reg_t reg );
142+ GGML_API size_t ggml_backend_reg_dev_count (ggml_backend_reg_t reg );
143+ GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get (ggml_backend_reg_t reg , size_t index );
144+ GGML_API void ggml_backend_reg_add_device (ggml_backend_reg_t reg , const char * params );
145+ GGML_API void * ggml_backend_reg_get_proc_address (ggml_backend_reg_t reg , const char * name );
146+ GGML_API void ggml_backend_reg_set_log_callback (ggml_backend_reg_t reg , ggml_log_callback log_callback , void * user_data );
124147
125- GGML_API size_t ggml_backend_reg_get_count (void );
126- GGML_API size_t ggml_backend_reg_find_by_name (const char * name ); // returns index of backend with name, or SIZE_MAX if not found
127- GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str (const char * backend_str ); // str is backend_name:params (params is optional)
128- GGML_API const char * ggml_backend_reg_get_name (size_t i );
129- GGML_API ggml_backend_t ggml_backend_reg_init_backend (size_t i , const char * params ); // params is backend-specific
130- GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type (size_t i );
131- GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer (size_t i , size_t size );
148+ //
149+ // Backend registry
150+ //
151+
152+ // Backend (reg) enumeration
153+ GGML_API size_t ggml_backend_reg_count (void );
154+ GGML_API ggml_backend_reg_t ggml_backend_reg_get (size_t index );
155+ GGML_API ggml_backend_reg_t ggml_backend_reg_by_name (const char * name ); // backend names: CPU, CUDA, Metal
156+
157+ // Device enumeration
158+ GGML_API size_t ggml_backend_dev_count (void );
159+ GGML_API ggml_backend_dev_t ggml_backend_dev_get (size_t index );
160+ GGML_API ggml_backend_dev_t ggml_backend_dev_by_name (const char * name ); // device names: CPU, CUDA0, Metal, Vulkan0, etc
161+ GGML_API ggml_backend_dev_t ggml_backend_dev_by_type (enum ggml_backend_device_type type );
162+
163+ // Set the log callback for all registered backends
164+ GGML_API void ggml_backend_set_log_callback (ggml_log_callback log_callback , void * user_data );
165+
166+ // Convenience functions, may be removed in the future
167+ // Direct Backend (stream) initialization
168+ // = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
169+ GGML_API ggml_backend_t ggml_backend_init_by_name (const char * name , const char * params );
170+ // = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
171+ GGML_API ggml_backend_t ggml_backend_init_by_type (enum ggml_backend_device_type type , const char * params );
172+ // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
173+ GGML_API ggml_backend_t ggml_backend_init_best (void );
132174
133175 //
134176 // Backend scheduler
135177 //
136178
137- // The backend scheduler allows for multiple backends to be used together
179+ // The backend scheduler allows for multiple backend devices to be used together
138180 // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
139181 // The backends are selected based on:
140182 // - the backend that supports the operation
@@ -169,7 +211,6 @@ extern "C" {
169211 }
170212 */
171213
172- struct ggml_backend_sched ;
173214 typedef struct ggml_backend_sched * ggml_backend_sched_t ;
174215
175216 // when ask == true, the scheduler wants to know if the user wants to observe this node
@@ -226,7 +267,7 @@ extern "C" {
226267 GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy (ggml_backend_t backend , struct ggml_cgraph * graph );
227268 GGML_API void ggml_backend_graph_copy_free (struct ggml_backend_graph_copy copy );
228269
229- typedef bool (* GGML_CALL ggml_backend_eval_callback )(int node_index , struct ggml_tensor * t1 , struct ggml_tensor * t2 , void * user_data );
270+ typedef bool (* ggml_backend_eval_callback )(int node_index , struct ggml_tensor * t1 , struct ggml_tensor * t2 , void * user_data );
230271
231272 // Compare the output of two backends
232273 GGML_API bool ggml_backend_compare_graph_backend (ggml_backend_t backend1 , ggml_backend_t backend2 , struct ggml_cgraph * graph , ggml_backend_eval_callback callback , void * user_data );
@@ -236,6 +277,26 @@ extern "C" {
236277 GGML_API void ggml_backend_view_init (struct ggml_tensor * tensor );
237278
238279
280+ //
281+ // CPU backend
282+ //
283+
284+ GGML_API ggml_backend_t ggml_backend_cpu_init (void );
285+
286+ GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend );
287+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu , int n_threads );
288+ GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu , ggml_threadpool_t threadpool );
289+ GGML_API void ggml_backend_cpu_set_abort_callback (ggml_backend_t backend_cpu , ggml_abort_callback abort_callback , void * abort_callback_data );
290+
291+ // Create a backend buffer from an existing pointer
292+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr (void * ptr , size_t size );
293+
294+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type (void );
295+
296+ #ifdef GGML_USE_CPU_HBM
297+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type (void );
298+ #endif
299+
239300#ifdef __cplusplus
240301}
241302#endif
0 commit comments