@@ -559,9 +559,7 @@ class llama_model_params(ctypes.Structure):
559559# enum ggml_type type_k; // data type for K cache
560560# enum ggml_type type_v; // data type for V cache
561561
562-
563562# // Keep the booleans together to avoid misalignment during copy-by-value.
564- # bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
565563# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
566564# bool embedding; // embedding mode only
567565# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
@@ -589,7 +587,6 @@ class llama_context_params(ctypes.Structure):
589587 cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
590588 type_k (int): data type for K cache
591589 type_v (int): data type for V cache
592- mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
593590 logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
594591 embedding (bool): embedding mode only
595592 offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
@@ -615,7 +612,6 @@ class llama_context_params(ctypes.Structure):
615612 ("cb_eval_user_data" , ctypes .c_void_p ),
616613 ("type_k" , ctypes .c_int ),
617614 ("type_v" , ctypes .c_int ),
618- ("mul_mat_q" , ctypes .c_bool ),
619615 ("logits_all" , ctypes .c_bool ),
620616 ("embedding" , ctypes .c_bool ),
621617 ("offload_kqv" , ctypes .c_bool ),
@@ -1519,11 +1515,11 @@ def llama_copy_state_data(
15191515 ...
15201516
15211517
1522- # Set the state reading from the specified address
1523- # Returns the number of bytes read
1518+ # // Set the state reading from the specified address
1519+ # // Returns the number of bytes read
15241520# LLAMA_API size_t llama_set_state_data(
15251521# struct llama_context * ctx,
1526- # uint8_t * src);
1522+ # const uint8_t * src);
15271523@ctypes_function (
15281524 "llama_set_state_data" ,
15291525 [llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 )],
0 commit comments