@@ -88,6 +88,130 @@ impl From<LlamaPoolingType> for i32 {
88
88
}
89
89
}
90
90
91
+ /// A rusty wrapper around `ggml_type` for KV cache types.
92
+ #[ allow( non_camel_case_types, missing_docs) ]
93
+ #[ derive( Copy , Clone , Debug , PartialEq , Eq ) ]
94
+ pub enum KvCacheType {
95
+ /// Represents an unknown or not-yet-mapped `ggml_type` and carries the raw value.
96
+ /// When passed through FFI, the raw value is used as-is (if llama.cpp supports it,
97
+ /// the runtime will operate with that type).
98
+ /// This variant preserves API compatibility when new `ggml_type` values are
99
+ /// introduced in the future.
100
+ Unknown ( llama_cpp_sys_2:: ggml_type ) ,
101
+ F32 ,
102
+ F16 ,
103
+ Q4_0 ,
104
+ Q4_1 ,
105
+ Q5_0 ,
106
+ Q5_1 ,
107
+ Q8_0 ,
108
+ Q8_1 ,
109
+ Q2_K ,
110
+ Q3_K ,
111
+ Q4_K ,
112
+ Q5_K ,
113
+ Q6_K ,
114
+ Q8_K ,
115
+ IQ2_XXS ,
116
+ IQ2_XS ,
117
+ IQ3_XXS ,
118
+ IQ1_S ,
119
+ IQ4_NL ,
120
+ IQ3_S ,
121
+ IQ2_S ,
122
+ IQ4_XS ,
123
+ I8 ,
124
+ I16 ,
125
+ I32 ,
126
+ I64 ,
127
+ F64 ,
128
+ IQ1_M ,
129
+ BF16 ,
130
+ TQ1_0 ,
131
+ TQ2_0 ,
132
+ MXFP4 ,
133
+ }
134
+
135
+ impl From < KvCacheType > for llama_cpp_sys_2:: ggml_type {
136
+ fn from ( value : KvCacheType ) -> Self {
137
+ match value {
138
+ KvCacheType :: Unknown ( raw) => raw,
139
+ KvCacheType :: F32 => llama_cpp_sys_2:: GGML_TYPE_F32 ,
140
+ KvCacheType :: F16 => llama_cpp_sys_2:: GGML_TYPE_F16 ,
141
+ KvCacheType :: Q4_0 => llama_cpp_sys_2:: GGML_TYPE_Q4_0 ,
142
+ KvCacheType :: Q4_1 => llama_cpp_sys_2:: GGML_TYPE_Q4_1 ,
143
+ KvCacheType :: Q5_0 => llama_cpp_sys_2:: GGML_TYPE_Q5_0 ,
144
+ KvCacheType :: Q5_1 => llama_cpp_sys_2:: GGML_TYPE_Q5_1 ,
145
+ KvCacheType :: Q8_0 => llama_cpp_sys_2:: GGML_TYPE_Q8_0 ,
146
+ KvCacheType :: Q8_1 => llama_cpp_sys_2:: GGML_TYPE_Q8_1 ,
147
+ KvCacheType :: Q2_K => llama_cpp_sys_2:: GGML_TYPE_Q2_K ,
148
+ KvCacheType :: Q3_K => llama_cpp_sys_2:: GGML_TYPE_Q3_K ,
149
+ KvCacheType :: Q4_K => llama_cpp_sys_2:: GGML_TYPE_Q4_K ,
150
+ KvCacheType :: Q5_K => llama_cpp_sys_2:: GGML_TYPE_Q5_K ,
151
+ KvCacheType :: Q6_K => llama_cpp_sys_2:: GGML_TYPE_Q6_K ,
152
+ KvCacheType :: Q8_K => llama_cpp_sys_2:: GGML_TYPE_Q8_K ,
153
+ KvCacheType :: IQ2_XXS => llama_cpp_sys_2:: GGML_TYPE_IQ2_XXS ,
154
+ KvCacheType :: IQ2_XS => llama_cpp_sys_2:: GGML_TYPE_IQ2_XS ,
155
+ KvCacheType :: IQ3_XXS => llama_cpp_sys_2:: GGML_TYPE_IQ3_XXS ,
156
+ KvCacheType :: IQ1_S => llama_cpp_sys_2:: GGML_TYPE_IQ1_S ,
157
+ KvCacheType :: IQ4_NL => llama_cpp_sys_2:: GGML_TYPE_IQ4_NL ,
158
+ KvCacheType :: IQ3_S => llama_cpp_sys_2:: GGML_TYPE_IQ3_S ,
159
+ KvCacheType :: IQ2_S => llama_cpp_sys_2:: GGML_TYPE_IQ2_S ,
160
+ KvCacheType :: IQ4_XS => llama_cpp_sys_2:: GGML_TYPE_IQ4_XS ,
161
+ KvCacheType :: I8 => llama_cpp_sys_2:: GGML_TYPE_I8 ,
162
+ KvCacheType :: I16 => llama_cpp_sys_2:: GGML_TYPE_I16 ,
163
+ KvCacheType :: I32 => llama_cpp_sys_2:: GGML_TYPE_I32 ,
164
+ KvCacheType :: I64 => llama_cpp_sys_2:: GGML_TYPE_I64 ,
165
+ KvCacheType :: F64 => llama_cpp_sys_2:: GGML_TYPE_F64 ,
166
+ KvCacheType :: IQ1_M => llama_cpp_sys_2:: GGML_TYPE_IQ1_M ,
167
+ KvCacheType :: BF16 => llama_cpp_sys_2:: GGML_TYPE_BF16 ,
168
+ KvCacheType :: TQ1_0 => llama_cpp_sys_2:: GGML_TYPE_TQ1_0 ,
169
+ KvCacheType :: TQ2_0 => llama_cpp_sys_2:: GGML_TYPE_TQ2_0 ,
170
+ KvCacheType :: MXFP4 => llama_cpp_sys_2:: GGML_TYPE_MXFP4 ,
171
+ }
172
+ }
173
+ }
174
+
175
+ impl From < llama_cpp_sys_2:: ggml_type > for KvCacheType {
176
+ fn from ( value : llama_cpp_sys_2:: ggml_type ) -> Self {
177
+ match value {
178
+ x if x == llama_cpp_sys_2:: GGML_TYPE_F32 => KvCacheType :: F32 ,
179
+ x if x == llama_cpp_sys_2:: GGML_TYPE_F16 => KvCacheType :: F16 ,
180
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q4_0 => KvCacheType :: Q4_0 ,
181
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q4_1 => KvCacheType :: Q4_1 ,
182
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q5_0 => KvCacheType :: Q5_0 ,
183
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q5_1 => KvCacheType :: Q5_1 ,
184
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q8_0 => KvCacheType :: Q8_0 ,
185
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q8_1 => KvCacheType :: Q8_1 ,
186
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q2_K => KvCacheType :: Q2_K ,
187
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q3_K => KvCacheType :: Q3_K ,
188
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q4_K => KvCacheType :: Q4_K ,
189
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q5_K => KvCacheType :: Q5_K ,
190
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q6_K => KvCacheType :: Q6_K ,
191
+ x if x == llama_cpp_sys_2:: GGML_TYPE_Q8_K => KvCacheType :: Q8_K ,
192
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ2_XXS => KvCacheType :: IQ2_XXS ,
193
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ2_XS => KvCacheType :: IQ2_XS ,
194
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ3_XXS => KvCacheType :: IQ3_XXS ,
195
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ1_S => KvCacheType :: IQ1_S ,
196
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ4_NL => KvCacheType :: IQ4_NL ,
197
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ3_S => KvCacheType :: IQ3_S ,
198
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ2_S => KvCacheType :: IQ2_S ,
199
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ4_XS => KvCacheType :: IQ4_XS ,
200
+ x if x == llama_cpp_sys_2:: GGML_TYPE_I8 => KvCacheType :: I8 ,
201
+ x if x == llama_cpp_sys_2:: GGML_TYPE_I16 => KvCacheType :: I16 ,
202
+ x if x == llama_cpp_sys_2:: GGML_TYPE_I32 => KvCacheType :: I32 ,
203
+ x if x == llama_cpp_sys_2:: GGML_TYPE_I64 => KvCacheType :: I64 ,
204
+ x if x == llama_cpp_sys_2:: GGML_TYPE_F64 => KvCacheType :: F64 ,
205
+ x if x == llama_cpp_sys_2:: GGML_TYPE_IQ1_M => KvCacheType :: IQ1_M ,
206
+ x if x == llama_cpp_sys_2:: GGML_TYPE_BF16 => KvCacheType :: BF16 ,
207
+ x if x == llama_cpp_sys_2:: GGML_TYPE_TQ1_0 => KvCacheType :: TQ1_0 ,
208
+ x if x == llama_cpp_sys_2:: GGML_TYPE_TQ2_0 => KvCacheType :: TQ2_0 ,
209
+ x if x == llama_cpp_sys_2:: GGML_TYPE_MXFP4 => KvCacheType :: MXFP4 ,
210
+ _ => KvCacheType :: Unknown ( value) ,
211
+ }
212
+ }
213
+ }
214
+
91
215
/// A safe wrapper around `llama_context_params`.
92
216
///
93
217
/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
@@ -574,6 +698,57 @@ impl LlamaContextParams {
574
698
pub fn n_seq_max ( & self ) -> u32 {
575
699
self . context_params . n_seq_max
576
700
}
701
+ /// Set the KV cache data type for K
702
+ /// use llama_cpp_2::context::params::{LlamaContextParams, KvCacheType};
703
+ /// let params = LlamaContextParams::default().with_type_k(KvCacheType::Q4_0);
704
+ /// assert_eq!(params.type_k(), KvCacheType::Q4_0);
705
+ /// ```
706
+ #[ must_use]
707
+ pub fn with_type_k ( mut self , type_k : KvCacheType ) -> Self {
708
+ self . context_params . type_k = type_k. into ( ) ;
709
+ self
710
+ }
711
+
712
+ /// Get the KV cache data type for K
713
+ ///
714
+ /// # Examples
715
+ ///
716
+ /// ```rust
717
+ /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
718
+ /// let _ = params.type_k();
719
+ /// ```
720
+ #[ must_use]
721
+ pub fn type_k ( & self ) -> KvCacheType {
722
+ KvCacheType :: from ( self . context_params . type_k )
723
+ }
724
+
725
+ /// Set the KV cache data type for V
726
+ ///
727
+ /// # Examples
728
+ ///
729
+ /// ```rust
730
+ /// use llama_cpp_2::context::params::{LlamaContextParams, KvCacheType};
731
+ /// let params = LlamaContextParams::default().with_type_v(KvCacheType::Q4_1);
732
+ /// assert_eq!(params.type_v(), KvCacheType::Q4_1);
733
+ /// ```
734
+ #[ must_use]
735
+ pub fn with_type_v ( mut self , type_v : KvCacheType ) -> Self {
736
+ self . context_params . type_v = type_v. into ( ) ;
737
+ self
738
+ }
739
+
740
+ /// Get the KV cache data type for V
741
+ ///
742
+ /// # Examples
743
+ ///
744
+ /// ```rust
745
+ /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
746
+ /// let _ = params.type_v();
747
+ /// ```
748
+ #[ must_use]
749
+ pub fn type_v ( & self ) -> KvCacheType {
750
+ KvCacheType :: from ( self . context_params . type_v )
751
+ }
577
752
}
578
753
579
754
/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
0 commit comments