Skip to content

Commit b10bd0a

Browse files
committed
Expose offload_kqv to control GPU KV cache & KQV ops
1 parent fdc70c1 commit b10bd0a

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

llama-cpp-2/src/context/params.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,36 @@ impl LlamaContextParams {
227227
self.context_params.flash_attn
228228
}
229229

230+
/// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
231+
///
232+
/// # Examples
233+
///
234+
/// ```rust
235+
/// use llama_cpp_2::context::params::LlamaContextParams;
236+
/// let params = LlamaContextParams::default()
237+
/// .with_offload_kqv(false);
238+
/// assert_eq!(params.offload_kqv(), false);
239+
/// ```
240+
#[must_use]
241+
pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
242+
self.context_params.offload_kqv = enabled;
243+
self
244+
}
245+
246+
/// Get the `offload_kqv` parameter
247+
///
248+
/// # Examples
249+
///
250+
/// ```rust
251+
/// use llama_cpp_2::context::params::LlamaContextParams;
252+
/// let params = LlamaContextParams::default();
253+
/// assert_eq!(params.offload_kqv(), true);
254+
/// ```
255+
#[must_use]
256+
pub fn offload_kqv(&self) -> bool {
257+
self.context_params.offload_kqv
258+
}
259+
230260
/// Set the type of rope scaling.
231261
///
232262
/// # Examples

0 commit comments

Comments
 (0)