Skip to content

Commit 7f81fd0

Browse files
authored
Merge pull request #815 from mediest/feat/expose-ggml-kv-types
Add KV cache type (K/V) configuration to LlamaContextParams
2 parents e2accc4 + d83319c commit 7f81fd0

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

llama-cpp-2/src/context/params.rs

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,130 @@ impl From<LlamaPoolingType> for i32 {
8888
}
8989
}
9090

91+
/// A rusty wrapper around `ggml_type` for KV cache types.
92+
#[allow(non_camel_case_types, missing_docs)]
93+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
94+
pub enum KvCacheType {
95+
/// Represents an unknown or not-yet-mapped `ggml_type` and carries the raw value.
96+
/// When passed through FFI, the raw value is used as-is (if llama.cpp supports it,
97+
/// the runtime will operate with that type).
98+
/// This variant preserves API compatibility when new `ggml_type` values are
99+
/// introduced in the future.
100+
Unknown(llama_cpp_sys_2::ggml_type),
101+
F32,
102+
F16,
103+
Q4_0,
104+
Q4_1,
105+
Q5_0,
106+
Q5_1,
107+
Q8_0,
108+
Q8_1,
109+
Q2_K,
110+
Q3_K,
111+
Q4_K,
112+
Q5_K,
113+
Q6_K,
114+
Q8_K,
115+
IQ2_XXS,
116+
IQ2_XS,
117+
IQ3_XXS,
118+
IQ1_S,
119+
IQ4_NL,
120+
IQ3_S,
121+
IQ2_S,
122+
IQ4_XS,
123+
I8,
124+
I16,
125+
I32,
126+
I64,
127+
F64,
128+
IQ1_M,
129+
BF16,
130+
TQ1_0,
131+
TQ2_0,
132+
MXFP4,
133+
}
134+
135+
impl From<KvCacheType> for llama_cpp_sys_2::ggml_type {
136+
fn from(value: KvCacheType) -> Self {
137+
match value {
138+
KvCacheType::Unknown(raw) => raw,
139+
KvCacheType::F32 => llama_cpp_sys_2::GGML_TYPE_F32,
140+
KvCacheType::F16 => llama_cpp_sys_2::GGML_TYPE_F16,
141+
KvCacheType::Q4_0 => llama_cpp_sys_2::GGML_TYPE_Q4_0,
142+
KvCacheType::Q4_1 => llama_cpp_sys_2::GGML_TYPE_Q4_1,
143+
KvCacheType::Q5_0 => llama_cpp_sys_2::GGML_TYPE_Q5_0,
144+
KvCacheType::Q5_1 => llama_cpp_sys_2::GGML_TYPE_Q5_1,
145+
KvCacheType::Q8_0 => llama_cpp_sys_2::GGML_TYPE_Q8_0,
146+
KvCacheType::Q8_1 => llama_cpp_sys_2::GGML_TYPE_Q8_1,
147+
KvCacheType::Q2_K => llama_cpp_sys_2::GGML_TYPE_Q2_K,
148+
KvCacheType::Q3_K => llama_cpp_sys_2::GGML_TYPE_Q3_K,
149+
KvCacheType::Q4_K => llama_cpp_sys_2::GGML_TYPE_Q4_K,
150+
KvCacheType::Q5_K => llama_cpp_sys_2::GGML_TYPE_Q5_K,
151+
KvCacheType::Q6_K => llama_cpp_sys_2::GGML_TYPE_Q6_K,
152+
KvCacheType::Q8_K => llama_cpp_sys_2::GGML_TYPE_Q8_K,
153+
KvCacheType::IQ2_XXS => llama_cpp_sys_2::GGML_TYPE_IQ2_XXS,
154+
KvCacheType::IQ2_XS => llama_cpp_sys_2::GGML_TYPE_IQ2_XS,
155+
KvCacheType::IQ3_XXS => llama_cpp_sys_2::GGML_TYPE_IQ3_XXS,
156+
KvCacheType::IQ1_S => llama_cpp_sys_2::GGML_TYPE_IQ1_S,
157+
KvCacheType::IQ4_NL => llama_cpp_sys_2::GGML_TYPE_IQ4_NL,
158+
KvCacheType::IQ3_S => llama_cpp_sys_2::GGML_TYPE_IQ3_S,
159+
KvCacheType::IQ2_S => llama_cpp_sys_2::GGML_TYPE_IQ2_S,
160+
KvCacheType::IQ4_XS => llama_cpp_sys_2::GGML_TYPE_IQ4_XS,
161+
KvCacheType::I8 => llama_cpp_sys_2::GGML_TYPE_I8,
162+
KvCacheType::I16 => llama_cpp_sys_2::GGML_TYPE_I16,
163+
KvCacheType::I32 => llama_cpp_sys_2::GGML_TYPE_I32,
164+
KvCacheType::I64 => llama_cpp_sys_2::GGML_TYPE_I64,
165+
KvCacheType::F64 => llama_cpp_sys_2::GGML_TYPE_F64,
166+
KvCacheType::IQ1_M => llama_cpp_sys_2::GGML_TYPE_IQ1_M,
167+
KvCacheType::BF16 => llama_cpp_sys_2::GGML_TYPE_BF16,
168+
KvCacheType::TQ1_0 => llama_cpp_sys_2::GGML_TYPE_TQ1_0,
169+
KvCacheType::TQ2_0 => llama_cpp_sys_2::GGML_TYPE_TQ2_0,
170+
KvCacheType::MXFP4 => llama_cpp_sys_2::GGML_TYPE_MXFP4,
171+
}
172+
}
173+
}
174+
175+
impl From<llama_cpp_sys_2::ggml_type> for KvCacheType {
176+
fn from(value: llama_cpp_sys_2::ggml_type) -> Self {
177+
match value {
178+
x if x == llama_cpp_sys_2::GGML_TYPE_F32 => KvCacheType::F32,
179+
x if x == llama_cpp_sys_2::GGML_TYPE_F16 => KvCacheType::F16,
180+
x if x == llama_cpp_sys_2::GGML_TYPE_Q4_0 => KvCacheType::Q4_0,
181+
x if x == llama_cpp_sys_2::GGML_TYPE_Q4_1 => KvCacheType::Q4_1,
182+
x if x == llama_cpp_sys_2::GGML_TYPE_Q5_0 => KvCacheType::Q5_0,
183+
x if x == llama_cpp_sys_2::GGML_TYPE_Q5_1 => KvCacheType::Q5_1,
184+
x if x == llama_cpp_sys_2::GGML_TYPE_Q8_0 => KvCacheType::Q8_0,
185+
x if x == llama_cpp_sys_2::GGML_TYPE_Q8_1 => KvCacheType::Q8_1,
186+
x if x == llama_cpp_sys_2::GGML_TYPE_Q2_K => KvCacheType::Q2_K,
187+
x if x == llama_cpp_sys_2::GGML_TYPE_Q3_K => KvCacheType::Q3_K,
188+
x if x == llama_cpp_sys_2::GGML_TYPE_Q4_K => KvCacheType::Q4_K,
189+
x if x == llama_cpp_sys_2::GGML_TYPE_Q5_K => KvCacheType::Q5_K,
190+
x if x == llama_cpp_sys_2::GGML_TYPE_Q6_K => KvCacheType::Q6_K,
191+
x if x == llama_cpp_sys_2::GGML_TYPE_Q8_K => KvCacheType::Q8_K,
192+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ2_XXS => KvCacheType::IQ2_XXS,
193+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ2_XS => KvCacheType::IQ2_XS,
194+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ3_XXS => KvCacheType::IQ3_XXS,
195+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ1_S => KvCacheType::IQ1_S,
196+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ4_NL => KvCacheType::IQ4_NL,
197+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ3_S => KvCacheType::IQ3_S,
198+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ2_S => KvCacheType::IQ2_S,
199+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ4_XS => KvCacheType::IQ4_XS,
200+
x if x == llama_cpp_sys_2::GGML_TYPE_I8 => KvCacheType::I8,
201+
x if x == llama_cpp_sys_2::GGML_TYPE_I16 => KvCacheType::I16,
202+
x if x == llama_cpp_sys_2::GGML_TYPE_I32 => KvCacheType::I32,
203+
x if x == llama_cpp_sys_2::GGML_TYPE_I64 => KvCacheType::I64,
204+
x if x == llama_cpp_sys_2::GGML_TYPE_F64 => KvCacheType::F64,
205+
x if x == llama_cpp_sys_2::GGML_TYPE_IQ1_M => KvCacheType::IQ1_M,
206+
x if x == llama_cpp_sys_2::GGML_TYPE_BF16 => KvCacheType::BF16,
207+
x if x == llama_cpp_sys_2::GGML_TYPE_TQ1_0 => KvCacheType::TQ1_0,
208+
x if x == llama_cpp_sys_2::GGML_TYPE_TQ2_0 => KvCacheType::TQ2_0,
209+
x if x == llama_cpp_sys_2::GGML_TYPE_MXFP4 => KvCacheType::MXFP4,
210+
_ => KvCacheType::Unknown(value),
211+
}
212+
}
213+
}
214+
91215
/// A safe wrapper around `llama_context_params`.
92216
///
93217
/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
@@ -574,6 +698,57 @@ impl LlamaContextParams {
574698
pub fn n_seq_max(&self) -> u32 {
575699
self.context_params.n_seq_max
576700
}
701+
/// Set the KV cache data type for K
702+
/// use llama_cpp_2::context::params::{LlamaContextParams, KvCacheType};
703+
/// let params = LlamaContextParams::default().with_type_k(KvCacheType::Q4_0);
704+
/// assert_eq!(params.type_k(), KvCacheType::Q4_0);
705+
/// ```
706+
#[must_use]
707+
pub fn with_type_k(mut self, type_k: KvCacheType) -> Self {
708+
self.context_params.type_k = type_k.into();
709+
self
710+
}
711+
712+
/// Get the KV cache data type for K
713+
///
714+
/// # Examples
715+
///
716+
/// ```rust
717+
/// let params = llama_cpp_2::context::params::LlamaContextParams::default();
718+
/// let _ = params.type_k();
719+
/// ```
720+
#[must_use]
721+
pub fn type_k(&self) -> KvCacheType {
722+
KvCacheType::from(self.context_params.type_k)
723+
}
724+
725+
/// Set the KV cache data type for V
726+
///
727+
/// # Examples
728+
///
729+
/// ```rust
730+
/// use llama_cpp_2::context::params::{LlamaContextParams, KvCacheType};
731+
/// let params = LlamaContextParams::default().with_type_v(KvCacheType::Q4_1);
732+
/// assert_eq!(params.type_v(), KvCacheType::Q4_1);
733+
/// ```
734+
#[must_use]
735+
pub fn with_type_v(mut self, type_v: KvCacheType) -> Self {
736+
self.context_params.type_v = type_v.into();
737+
self
738+
}
739+
740+
/// Get the KV cache data type for V
741+
///
742+
/// # Examples
743+
///
744+
/// ```rust
745+
/// let params = llama_cpp_2::context::params::LlamaContextParams::default();
746+
/// let _ = params.type_v();
747+
/// ```
748+
#[must_use]
749+
pub fn type_v(&self) -> KvCacheType {
750+
KvCacheType::from(self.context_params.type_v)
751+
}
577752
}
578753

579754
/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)

0 commit comments

Comments
 (0)