Sync llama.cpp API 20250801

JamePeng · JamePeng · commit 9472d751d8e4 · 2025-08-01T20:45:20.000+08:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -721,10 +721,11 @@ class llama_model_tensor_buft_override(ctypes.Structure):
 
 
 #     // Keep the booleans together to avoid misalignment during copy-by-value.
-#     bool vocab_only;    // only load the vocabulary, no weights
-#     bool use_mmap;      // use mmap if possible
-#     bool use_mlock;     // force system to keep model in RAM
-#     bool check_tensors; // validate model tensor data
+#     bool vocab_only;      // only load the vocabulary, no weights
+#     bool use_mmap;        // use mmap if possible
+#     bool use_mlock;       // force system to keep model in RAM
+#     bool check_tensors;   // validate model tensor data
+#     bool use_extra_bufts; // use extra buffer types (used for weight repacking)
 # };
 class llama_model_params(ctypes.Structure):
     """Parameters for llama_model
@@ -742,7 +743,8 @@ class llama_model_params(ctypes.Structure):
         vocab_only (bool): only load the vocabulary, no weights
         use_mmap (bool): use mmap if possible
         use_mlock (bool): force system to keep model in RAM
-        check_tensors (bool): validate model tensor data"""
+        check_tensors (bool): validate model tensor data
+        use_extra_bufts (bool): use extra buffer types (used for weight repacking)"""
 
     if TYPE_CHECKING:
         devices: CtypesArray[ctypes.c_void_p]  # NOTE: unused
@@ -758,6 +760,7 @@ class llama_model_params(ctypes.Structure):
         use_mmap: bool
         use_mlock: bool
         check_tensors: bool
+        use_extra_bufts: bool
 
     _fields_ = [
         ("devices", ctypes.c_void_p), # NOTE: unnused
@@ -773,6 +776,7 @@ class llama_model_params(ctypes.Structure):
         ("use_mmap", ctypes.c_bool),
         ("use_mlock", ctypes.c_bool),
         ("check_tensors", ctypes.c_bool),
+        ("use_extra_bufts", ctypes.c_bool),
     ]