Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llama-cpp-2/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,11 @@ impl<'model> LlamaContext<'model> {
tracing::debug!("Remove lora adapter");
Ok(())
}

/// Print a breakdown of per-device memory use to the default logger.
pub fn print_memory_breakdown(&self) {
unsafe { llama_cpp_sys_2::llama_memory_breakdown_print(self.context.as_ptr()) }
}
}

impl Drop for LlamaContext<'_> {
Expand Down
23 changes: 23 additions & 0 deletions llama-cpp-2/src/model/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,28 @@ impl LlamaModelParams {
}
Ok(self)
}

/// Set `no_alloc`
///
/// If this parameter is true, don't allocate memory for the tensor data
///
/// You can't use `no_alloc` with `use_mmap`, so this also sets `use_mmap` to false.
#[must_use]
pub fn with_no_alloc(mut self, no_alloc: bool) -> Self {
self.params.no_alloc = no_alloc;
if no_alloc {
self = self.with_use_mmap(false);
}
self
}

/// Get `no_alloc`
///
/// If this parameter is true, don't allocate memory for the tensor data
#[must_use]
pub fn no_alloc(&self) -> bool {
self.params.no_alloc
}
}

/// Default parameters for `LlamaModel`. (as defined in llama.cpp by `llama_model_default_params`)
Expand All @@ -420,6 +442,7 @@ impl LlamaModelParams {
/// assert_eq!(params.use_mlock(), false, "use_mlock should be false");
/// assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer), "split_mode should be LAYER");
/// assert_eq!(params.devices().len(), 0, "devices should be empty");
/// assert_eq!(params.no_alloc(), false, "no_alloc should be false");
/// ```
impl Default for LlamaModelParams {
fn default() -> Self {
Expand Down
Loading