@@ -264,6 +264,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
264264# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
265265# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
266266# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
267+ # LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
267268
268269# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
269270# };
@@ -295,6 +296,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
295296LLAMA_FTYPE_MOSTLY_IQ3_M = 27
296297LLAMA_FTYPE_MOSTLY_IQ2_S = 28
297298LLAMA_FTYPE_MOSTLY_IQ2_M = 29
299+ LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
298300LLAMA_FTYPE_GUESSED = 1024
299301
300302# enum llama_rope_scaling_type {
@@ -548,6 +550,7 @@ class llama_model_params(ctypes.Structure):
548550# float yarn_beta_fast; // YaRN low correction dim
549551# float yarn_beta_slow; // YaRN high correction dim
550552# uint32_t yarn_orig_ctx; // YaRN original context size
553+ # float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
551554
552555# ggml_backend_sched_eval_callback cb_eval;
553556# void * cb_eval_user_data;
@@ -580,6 +583,7 @@ class llama_context_params(ctypes.Structure):
580583 yarn_beta_fast (float): YaRN low correction dim
581584 yarn_beta_slow (float): YaRN high correction dim
582585 yarn_orig_ctx (int): YaRN original context size
586+ defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
583587 cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
584588 cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
585589 type_k (int): data type for K cache
@@ -605,6 +609,7 @@ class llama_context_params(ctypes.Structure):
605609 ("yarn_beta_fast" , ctypes .c_float ),
606610 ("yarn_beta_slow" , ctypes .c_float ),
607611 ("yarn_orig_ctx" , ctypes .c_uint32 ),
612+ ("defrag_thold" , ctypes .c_float ),
608613 ("cb_eval" , ggml_backend_sched_eval_callback ),
609614 ("cb_eval_user_data" , ctypes .c_void_p ),
610615 ("type_k" , ctypes .c_int ),
0 commit comments