@@ -91,6 +91,12 @@ def _load_shared_library(lib_base_name: str):
9191c_uint8_p = POINTER (c_uint8 )
9292c_size_t_p = POINTER (c_size_t )
9393
94+ # from ggml-backend.h
95+ # typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
96+ ggml_backend_sched_eval_callback = ctypes .CFUNCTYPE (
97+ c_bool , c_void_p , c_bool , c_void_p
98+ )
99+
94100# llama.h bindings
95101
96102_lib .llama_max_devices .argtypes = []
@@ -448,6 +454,9 @@ class llama_model_params(Structure):
448454# float yarn_beta_slow; // YaRN high correction dim
449455# uint32_t yarn_orig_ctx; // YaRN original context size
450456
457+ # ggml_backend_sched_eval_callback cb_eval;
458+ # void * cb_eval_user_data;
459+
451460# enum ggml_type type_k; // data type for K cache
452461# enum ggml_type type_v; // data type for V cache
453462
@@ -475,6 +484,8 @@ class llama_context_params(Structure):
475484 yarn_beta_fast (float): YaRN low correction dim
476485 yarn_beta_slow (float): YaRN high correction dim
477486 yarn_orig_ctx (int): YaRN original context size
487+ cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
488+ cb_eval_user_data (ctypes.c_void_p): user data for cb_eval
478489 type_k (int): data type for K cache
479490 type_v (int): data type for V cache
480491 mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
@@ -497,6 +508,8 @@ class llama_context_params(Structure):
497508 ("yarn_beta_fast" , c_float ),
498509 ("yarn_beta_slow" , c_float ),
499510 ("yarn_orig_ctx" , c_uint32 ),
511+ ("cb_eval" , ggml_backend_sched_eval_callback ),
512+ ("cb_eval_user_data" , c_void_p ),
500513 ("type_k" , c_int ),
501514 ("type_v" , c_int ),
502515 ("mul_mat_q" , c_bool ),
0 commit comments