@@ -702,6 +702,7 @@ class llama_model_params(ctypes.Structure):
702702 """Parameters for llama_model
703703
704704 Attributes:
705+ devices (ctypes.Array[ggml_backend_dev_t]): NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
705706 tensor_buft_overrides(llama_model_tensor_buft_override): NULL-terminated list of buffer types to use for tensors that match a pattern
706707 n_gpu_layers (int): number of layers to store in VRAM
707708 split_mode (int): how to split the model across multiple GPUs
@@ -716,6 +717,7 @@ class llama_model_params(ctypes.Structure):
716717 check_tensors (bool): validate model tensor data"""
717718
718719 if TYPE_CHECKING :
720+ devices : CtypesArray [ctypes .c_void_p ] # NOTE: unused
719721 tensor_buft_overrides : ctypes .POINTER (llama_model_tensor_buft_override )
720722 n_gpu_layers : int
721723 split_mode : int
@@ -915,6 +917,7 @@ class llama_context_params(ctypes.Structure):
915917# bool keep_split; // quantize to the same number of shards
916918# void * imatrix; // pointer to importance matrix data
917919# void * kv_overrides; // pointer to vector containing overrides
920+ # void * tensor_types; // pointer to vector containing tensor types
918921# } llama_model_quantize_params;
919922class llama_model_quantize_params (ctypes .Structure ):
920923 """Parameters for llama_model_quantize
@@ -931,6 +934,7 @@ class llama_model_quantize_params(ctypes.Structure):
931934 keep_split (bool): quantize to the same number of shards
932935 imatrix (ctypes.c_void_p): pointer to importance matrix data
933936 kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
937+ tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
934938 """
935939
936940 if TYPE_CHECKING :
@@ -945,6 +949,7 @@ class llama_model_quantize_params(ctypes.Structure):
945949 keep_split : bool
946950 imatrix : ctypes .c_void_p
947951 kv_overrides : ctypes .c_void_p
952+ tensor_types : ctypes .c_void_p
948953
949954 _fields_ = [
950955 ("nthread" , ctypes .c_int32 ),
@@ -958,6 +963,7 @@ class llama_model_quantize_params(ctypes.Structure):
958963 ("keep_split" , ctypes .c_bool ),
959964 ("imatrix" , ctypes .c_void_p ),
960965 ("kv_overrides" , ctypes .c_void_p ),
966+ ("tensor_types" , ctypes .c_void_p ),
961967 ]
962968
963969
0 commit comments