@@ -25,6 +25,7 @@ use std::string::FromUtf8Error;
2525pub mod context;
2626pub mod llama_backend;
2727pub mod llama_batch;
28+ mod log;
2829pub mod model;
2930pub mod sampling;
3031pub mod timing;
@@ -323,3 +324,76 @@ pub fn ggml_time_us() -> i64 {
323324pub fn llama_supports_mlock ( ) -> bool {
324325 unsafe { llama_cpp_sys_2:: llama_supports_mlock ( ) }
325326}
327+
328+ /// Options to configure how llama.cpp logs are intercepted.
329+ #[ derive( Default , Debug , Clone ) ]
330+ pub struct LogOptions {
331+ disabled : bool ,
332+ }
333+
334+ impl LogOptions {
335+ /// If enabled, logs are sent to tracing. If disabled, all logs are suppressed. Default is for
336+ /// logs to be sent to tracing.
337+ pub fn with_logs_enabled ( mut self , enabled : bool ) -> Self {
338+ self . disabled = !enabled;
339+ self
340+ }
341+ }
342+
343+ extern "C" fn logs_to_trace (
344+ level : llama_cpp_sys_2:: ggml_log_level ,
345+ text : * const :: std:: os:: raw:: c_char ,
346+ data : * mut :: std:: os:: raw:: c_void ,
347+ ) {
348+ // In the "fast-path" (i.e. the vast majority of logs) we want to avoid needing to take the log state
349+ // lock at all. Similarly, we try to avoid any heap allocations within this function. This is accomplished
350+ // by being a dummy pass-through to tracing in the normal case of DEBUG/INFO/WARN/ERROR logs that are
351+ // newline terminated and limiting the slow-path of locks and/or heap allocations for other cases.
352+ use std:: borrow:: Borrow ;
353+
354+ let log_state = unsafe { & * ( data as * const log:: State ) } ;
355+
356+ let text = unsafe { std:: ffi:: CStr :: from_ptr ( text) } ;
357+ let text = text. to_string_lossy ( ) ;
358+ let text: & str = text. borrow ( ) ;
359+
360+ if log_state. options . disabled {
361+ return ;
362+ }
363+
364+ // As best I can tell llama.cpp / ggml require all log format strings at call sites to have the '\n'.
365+ // If it's missing, it means that you expect more logs via CONT (or there's a typo in the codebase). To
366+ // distinguish typo from intentional support for CONT, we have to buffer until the next message comes in
367+ // to know how to flush it.
368+
369+ if level == llama_cpp_sys_2:: GGML_LOG_LEVEL_CONT {
370+ log_state. cont_buffered_log ( text) ;
371+ } else if text. ends_with ( '\n' ) {
372+ log_state. emit_non_cont_line ( level, text) ;
373+ } else {
374+ log_state. buffer_non_cont ( level, text) ;
375+ }
376+ }
377+
378+ /// Redirect llama.cpp logs into tracing.
379+ pub fn send_logs_to_tracing ( options : LogOptions ) {
380+ // TODO: Reinitialize the state to support calling send_logs_to_tracing multiple times.
381+
382+ // We set up separate log states for llama.cpp and ggml to make sure that CONT logs between the two
383+ // can't possibly interfere with each other. In other words, if llama.cpp emits a log without a trailing
384+ // newline and calls a GGML function, the logs won't be weirdly intermixed and instead we'll llama.cpp logs
385+ // will CONT previous llama.cpp logs and GGML logs will CONT previous ggml logs.
386+ let llama_heap_state = Box :: as_ref (
387+ log:: LLAMA_STATE
388+ . get_or_init ( || Box :: new ( log:: State :: new ( log:: Module :: LlamaCpp , options. clone ( ) ) ) ) ,
389+ ) as * const _ ;
390+ let ggml_heap_state = Box :: as_ref (
391+ log:: GGML_STATE . get_or_init ( || Box :: new ( log:: State :: new ( log:: Module :: GGML , options) ) ) ,
392+ ) as * const _ ;
393+
394+ unsafe {
395+ // GGML has to be set after llama since setting llama sets ggml as well.
396+ llama_cpp_sys_2:: llama_log_set ( Some ( logs_to_trace) , llama_heap_state as * mut _ ) ;
397+ llama_cpp_sys_2:: ggml_log_set ( Some ( logs_to_trace) , ggml_heap_state as * mut _ ) ;
398+ }
399+ }
0 commit comments