File tree Expand file tree Collapse file tree 1 file changed +30
-0
lines changed
Expand file tree Collapse file tree 1 file changed +30
-0
lines changed Original file line number Diff line number Diff line change @@ -227,6 +227,36 @@ impl LlamaContextParams {
227227 self . context_params . flash_attn
228228 }
229229
230+ /// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
231+ ///
232+ /// # Examples
233+ ///
234+ /// ```rust
235+ /// use llama_cpp_2::context::params::LlamaContextParams;
236+ /// let params = LlamaContextParams::default()
237+ /// .with_offload_kqv(false);
238+ /// assert_eq!(params.offload_kqv(), false);
239+ /// ```
240+ #[ must_use]
241+ pub fn with_offload_kqv ( mut self , enabled : bool ) -> Self {
242+ self . context_params . offload_kqv = enabled;
243+ self
244+ }
245+
246+ /// Get the `offload_kqv` parameter
247+ ///
248+ /// # Examples
249+ ///
250+ /// ```rust
251+ /// use llama_cpp_2::context::params::LlamaContextParams;
252+ /// let params = LlamaContextParams::default();
253+ /// assert_eq!(params.offload_kqv(), true);
254+ /// ```
255+ #[ must_use]
256+ pub fn offload_kqv ( & self ) -> bool {
257+ self . context_params . offload_kqv
258+ }
259+
230260 /// Set the type of rope scaling.
231261 ///
232262 /// # Examples
You can’t perform that action at this time.
0 commit comments