@@ -811,6 +811,7 @@ class llama_context_params(ctypes.Structure):
811811# bool quantize_output_tensor; // quantize output.weight
812812# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
813813# bool pure; // quantize all tensors to the default type
814+ # bool keep_split; // quantize to the same number of shards
814815# void * imatrix; // pointer to importance matrix data
815816# void * kv_overrides; // pointer to vector containing overrides
816817# } llama_model_quantize_params;
@@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
826827 quantize_output_tensor (bool): quantize output.weight
827828 only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
828829 pure (bool): quantize all tensors to the default type
830+ keep_split (bool): quantize to the same number of shards
829831 imatrix (ctypes.c_void_p): pointer to importance matrix data
830832 kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
831833 """
@@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
839841 quantize_output_tensor : bool
840842 only_copy : bool
841843 pure : bool
844+ keep_split : bool
842845 imatrix : ctypes .c_void_p
843846 kv_overrides : ctypes .c_void_p
844847
@@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
851854 ("quantize_output_tensor" , ctypes .c_bool ),
852855 ("only_copy" , ctypes .c_bool ),
853856 ("pure" , ctypes .c_bool ),
857+ ("keep_split" , ctypes .c_bool ),
854858 ("imatrix" , ctypes .c_void_p ),
855859 ("kv_overrides" , ctypes .c_void_p ),
856860 ]
0 commit comments