Skip to content

Commit 773d2c0

Browse files
authored
Merge pull request #637 from vlovich/logging-intercept
Redirect llama.cpp logs into tracing
2 parents 8fc28b3 + 373f8c6 commit 773d2c0

File tree

9 files changed

+508
-31
lines changed

9 files changed

+508
-31
lines changed

Cargo.lock

Lines changed: 133 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ members = [
1111
# core library deps
1212
thiserror = "1"
1313
tracing = "0.1"
14+
tracing-core = "0.1"
1415

1516
# examples and benchmarks
1617
hf-hub = { version = "0.3.2" }
@@ -21,6 +22,7 @@ cc = "1.2.11"
2122
anyhow = "1.0.95"
2223
clap = "4.5.27"
2324
encoding_rs = "0.8.35"
25+
tracing-subscriber = { version = "0.3", features = ["json"] }
2426

2527
[workspace.lints.rust]
2628
missing_docs = { level = "warn" }

examples/simple/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ hf-hub = { workspace = true }
1111
clap = { workspace = true , features = ["derive"] }
1212
anyhow = { workspace = true }
1313
encoding_rs = { workspace = true }
14+
tracing-subscriber = { workspace = true }
1415

1516
[features]
1617
cuda = ["llama-cpp-2/cuda"]

examples/simple/src/main.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use anyhow::{anyhow, bail, Context, Result};
1010
use clap::Parser;
1111
use hf_hub::api::sync::ApiBuilder;
1212
use llama_cpp_2::context::params::LlamaContextParams;
13-
use llama_cpp_2::ggml_time_us;
13+
use llama_cpp_2::{ggml_time_us, send_logs_to_tracing, LogOptions};
1414
use llama_cpp_2::llama_backend::LlamaBackend;
1515
use llama_cpp_2::llama_batch::LlamaBatch;
1616
use llama_cpp_2::model::params::kv_overrides::ParamOverrideValue;
@@ -67,6 +67,12 @@ struct Args {
6767
help = "size of the prompt context (default: loaded from themodel)"
6868
)]
6969
ctx_size: Option<NonZeroU32>,
70+
#[arg(
71+
short = 'v',
72+
long,
73+
help = "enable verbose llama.cpp logs",
74+
)]
75+
verbose: bool,
7076
}
7177

7278
/// Parse a single key-value pair
@@ -132,8 +138,14 @@ fn main() -> Result<()> {
132138
threads,
133139
threads_batch,
134140
ctx_size,
141+
verbose,
135142
} = Args::parse();
136143

144+
if verbose {
145+
tracing_subscriber::fmt().init();
146+
}
147+
send_logs_to_tracing(LogOptions::default().with_logs_enabled(verbose));
148+
137149
// init LLM
138150
let backend = LlamaBackend::init()?;
139151

llama-cpp-2/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enumflags2 = "0.7.11"
1313
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
1414
thiserror = { workspace = true }
1515
tracing = { workspace = true }
16+
tracing-core = { workspace = true }
1617

1718
[dev-dependencies]
1819
encoding_rs = { workspace = true }

llama-cpp-2/src/lib.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use std::string::FromUtf8Error;
2525
pub mod context;
2626
pub mod llama_backend;
2727
pub mod llama_batch;
28+
mod log;
2829
pub mod model;
2930
pub mod sampling;
3031
pub mod timing;
@@ -323,3 +324,76 @@ pub fn ggml_time_us() -> i64 {
323324
pub fn llama_supports_mlock() -> bool {
324325
unsafe { llama_cpp_sys_2::llama_supports_mlock() }
325326
}
327+
328+
/// Options to configure how llama.cpp logs are intercepted.
329+
#[derive(Default, Debug, Clone)]
330+
pub struct LogOptions {
331+
disabled: bool,
332+
}
333+
334+
impl LogOptions {
335+
/// If enabled, logs are sent to tracing. If disabled, all logs are suppressed. Default is for
336+
/// logs to be sent to tracing.
337+
pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
338+
self.disabled = !enabled;
339+
self
340+
}
341+
}
342+
343+
extern "C" fn logs_to_trace(
344+
level: llama_cpp_sys_2::ggml_log_level,
345+
text: *const ::std::os::raw::c_char,
346+
data: *mut ::std::os::raw::c_void,
347+
) {
348+
// In the "fast-path" (i.e. the vast majority of logs) we want to avoid needing to take the log state
349+
// lock at all. Similarly, we try to avoid any heap allocations within this function. This is accomplished
350+
// by being a dummy pass-through to tracing in the normal case of DEBUG/INFO/WARN/ERROR logs that are
351+
// newline terminated and limiting the slow-path of locks and/or heap allocations for other cases.
352+
use std::borrow::Borrow;
353+
354+
let log_state = unsafe { &*(data as *const log::State) };
355+
356+
let text = unsafe { std::ffi::CStr::from_ptr(text) };
357+
let text = text.to_string_lossy();
358+
let text: &str = text.borrow();
359+
360+
if log_state.options.disabled {
361+
return;
362+
}
363+
364+
// As best I can tell llama.cpp / ggml require all log format strings at call sites to have the '\n'.
365+
// If it's missing, it means that you expect more logs via CONT (or there's a typo in the codebase). To
366+
// distinguish typo from intentional support for CONT, we have to buffer until the next message comes in
367+
// to know how to flush it.
368+
369+
if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
370+
log_state.cont_buffered_log(text);
371+
} else if text.ends_with('\n') {
372+
log_state.emit_non_cont_line(level, text);
373+
} else {
374+
log_state.buffer_non_cont(level, text);
375+
}
376+
}
377+
378+
/// Redirect llama.cpp logs into tracing.
379+
pub fn send_logs_to_tracing(options: LogOptions) {
380+
// TODO: Reinitialize the state to support calling send_logs_to_tracing multiple times.
381+
382+
// We set up separate log states for llama.cpp and ggml to make sure that CONT logs between the two
383+
// can't possibly interfere with each other. In other words, if llama.cpp emits a log without a trailing
384+
// newline and calls a GGML function, the logs won't be weirdly intermixed and instead we'll llama.cpp logs
385+
// will CONT previous llama.cpp logs and GGML logs will CONT previous ggml logs.
386+
let llama_heap_state = Box::as_ref(
387+
log::LLAMA_STATE
388+
.get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
389+
) as *const _;
390+
let ggml_heap_state = Box::as_ref(
391+
log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
392+
) as *const _;
393+
394+
unsafe {
395+
// GGML has to be set after llama since setting llama sets ggml as well.
396+
llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
397+
llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
398+
}
399+
}

0 commit comments

Comments
 (0)