refactor logging system

ngxson · ngxson · commit 88aec684cf6a · 2025-04-05T15:28:11.000+02:00
diff --git a/examples/llava/clip-impl.h b/examples/llava/clip-impl.h
@@ -120,6 +120,64 @@ static projector_type clip_projector_type_from_string(const std::string & str) {
     return PROJECTOR_TYPE_UNKNOWN;
 }
 
+//
+// logging
+//
+
+static void clip_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
+    (void) level;
+    (void) user_data;
+    fputs(text, stderr);
+    fflush(stderr);
+}
+
+struct clip_logger_state {
+    ggml_log_level verbosity_thold;
+    ggml_log_callback log_callback;
+    void * log_callback_user_data;
+};
+
+extern struct clip_logger_state g_logger_state;
+
+static void clip_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
+    if (format == NULL) {
+        return;
+    }
+    va_list args_copy;
+    va_copy(args_copy, args);
+    char buffer[128];
+    int len = vsnprintf(buffer, 128, format, args);
+    if (len < 128) {
+        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
+    } else {
+        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
+        vsnprintf(buffer2, len + 1, format, args_copy);
+        buffer2[len] = 0;
+        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
+        free(buffer2);
+    }
+    va_end(args_copy);
+}
+
+static void clip_log_internal(enum ggml_log_level level, const char * format, ...) {
+    va_list args;
+    va_start(args, format);
+    clip_log_internal_v(level, format, args);
+    va_end(args);
+}
+
+#define LOG_TMPL(level, ...) \
+    do { \
+        if ((level) >= g_logger_state.verbosity_thold) { \
+            clip_log_internal((level), __VA_ARGS__); \
+        } \
+    } while (0)
+#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  __VA_ARGS__)
+#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  __VA_ARGS__)
+#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
+#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
+#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT,  __VA_ARGS__)
+
 //
 // common utils
 //
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
@@ -28,17 +28,7 @@
 #include <cinttypes>
 #include <limits>
 
-#if defined(LLAVA_LOG_OFF)
-#   define LOG_INF(...)
-#   define LOG_WRN(...)
-#   define LOG_ERR(...)
-#   define LOG_DBG(...)
-#else // defined(LLAVA_LOG_OFF)
-#   define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
-#   define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
-#   define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
-#   define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
-#endif // defined(LLAVA_LOG_OFF)
+struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL};
 
 //#define CLIP_DEBUG_FUNCTIONS
 
@@ -1121,7 +1111,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
 }
 
 struct clip_model_loader {
-    clip_log_level verbosity;
     ggml_context_ptr ctx_meta;
     gguf_context_ptr ctx_gguf;
 
@@ -1131,7 +1120,7 @@ struct clip_model_loader {
     size_t model_size; // in bytes
 
     // TODO @ngxson : we should not pass clip_ctx here, it should be clip_vision_model
-    clip_model_loader(const char * fname, struct clip_context_params ctx_params, clip_ctx & ctx_clip) : verbosity(ctx_params.verbosity), ctx_clip(ctx_clip), fname(fname) {
+    clip_model_loader(const char * fname, clip_ctx & ctx_clip) : ctx_clip(ctx_clip), fname(fname) {
         struct ggml_context * meta = NULL;
 
         struct gguf_init_params params = {
@@ -1149,7 +1138,7 @@ struct clip_model_loader {
         const int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
 
         // print gguf info
-        if (verbosity >= CLIP_LOG_ERROR) {
+        {
             int ftype = -1;
             get_u32(KEY_FTYPE, ftype, false);
             const std::string ftype_str = ggml_type_name(static_cast<ggml_type>(ftype));
@@ -1176,10 +1165,8 @@ struct clip_model_loader {
                 struct ggml_tensor * cur = ggml_get_tensor(meta, name);
                 size_t tensor_size = ggml_nbytes(cur);
                 model_size += tensor_size;
-                if (verbosity >= CLIP_LOG_DEBUG) {
-                    LOG_INF("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n",
-                        __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type));
-                }
+                LOG_DBG("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n",
+                    __func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type));
             }
         }
     }
@@ -1262,16 +1249,14 @@ struct clip_model_loader {
             // Calculate the deepest feature layer based on hparams and projector type
             ctx_clip.max_feature_layer = get_deepest_feature_layer(&ctx_clip);
 
-            if (verbosity >= CLIP_LOG_ERROR) {
-                LOG_INF("%s: text_encoder:       %d\n", __func__, ctx_clip.has_text_encoder);
-                LOG_INF("%s: vision_encoder:     %d\n", __func__, ctx_clip.has_vision_encoder);
-                LOG_INF("%s: llava_projector:    %d\n", __func__, ctx_clip.has_llava_projector);
-                LOG_INF("%s: minicpmv_projector: %d\n", __func__, ctx_clip.has_minicpmv_projector);
-                LOG_INF("%s: minicpmv_version:   %d\n", __func__, ctx_clip.minicpmv_version);
-                LOG_INF("%s: glm_projector:      %d\n", __func__, ctx_clip.has_glm_projector);
-                LOG_INF("%s: model size:         %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0);
-                LOG_INF("%s: metadata size:      %.2f MiB\n", __func__, ggml_get_mem_size(ctx_meta.get()) / 1024.0 / 1024.0);
-            }
+            LOG_INF("%s: text_encoder:       %d\n", __func__, ctx_clip.has_text_encoder);
+            LOG_INF("%s: vision_encoder:     %d\n", __func__, ctx_clip.has_vision_encoder);
+            LOG_INF("%s: llava_projector:    %d\n", __func__, ctx_clip.has_llava_projector);
+            LOG_INF("%s: minicpmv_projector: %d\n", __func__, ctx_clip.has_minicpmv_projector);
+            LOG_INF("%s: minicpmv_version:   %d\n", __func__, ctx_clip.minicpmv_version);
+            LOG_INF("%s: glm_projector:      %d\n", __func__, ctx_clip.has_glm_projector);
+            LOG_INF("%s: model size:         %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0);
+            LOG_INF("%s: metadata size:      %.2f MiB\n", __func__, ggml_get_mem_size(ctx_meta.get()) / 1024.0 / 1024.0);
         }
     }
 
@@ -1495,9 +1480,7 @@ struct clip_model_loader {
             }
             fin.close();
 
-            if (verbosity >= CLIP_LOG_INFO) {
-                LOG_INF("%s: loaded %zu tensors from %s\n", __func__, tensors_to_load.size(), fname.c_str());
-            }
+            LOG_DBG("%s: loaded %zu tensors from %s\n", __func__, tensors_to_load.size(), fname.c_str());
         }
     }
 
@@ -1581,18 +1564,19 @@ struct clip_model_loader {
 };
 
 // read and create ggml_context containing the tensors and their data
-struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
+struct clip_ctx * clip_model_load(const char * fname, const int verbosity) {
     return clip_init(fname, clip_context_params{
         /* use_gpu */   true,
-        /* verbosity */ static_cast<clip_log_level>(verbosity),
+        /* verbosity */ static_cast<ggml_log_level>(verbosity),
     });
 }
 
 struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) {
+    g_logger_state.verbosity_thold = ctx_params.verbosity;
     clip_ctx * ctx_clip = new clip_ctx(ctx_params);
 
     try {
-        clip_model_loader loader(fname, ctx_params, *ctx_clip);
+        clip_model_loader loader(fname, *ctx_clip);
         loader.load_hparams();
         loader.load_tensors();
         loader.alloc_compute_meta();
@@ -1974,7 +1958,7 @@ static std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_imag
     const int multiple = fmin(ceil(ratio), max_slice_nums);
 
     std::vector<std::vector<clip_image_u8 *>> images;
-    LOG_INF("%s: multiple %d\n", __func__, multiple);
+    LOG_DBG("%s: multiple %d\n", __func__, multiple);
     images.push_back(std::vector<clip_image_u8 *>());
 
     if (multiple <= 1) {
@@ -1989,17 +1973,17 @@ static std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_imag
         clip_image_u8 * source_image = clip_image_u8_init();
         bicubic_resize(*img, *source_image, best_size.first, best_size.second);
         // source_image = image.copy().resize(best_resize, Image.Resampling.BICUBIC)
-        LOG_INF("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second);
+        LOG_DBG("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second);
         images[images.size()-1].push_back(source_image);
 
         std::pair<int, int> best_grid = uhd_best_grid(max_slice_nums, multiple, log_ratio);
-        LOG_INF("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second);
+        LOG_DBG("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second);
 
         auto refine_size = uhd_get_refine_size(original_size, best_grid, scale_resolution, patch_size, true);
         clip_image_u8 * refine_image = clip_image_u8_init();
         bicubic_resize(*img, *refine_image, refine_size.first, refine_size.second);
 
-        LOG_INF("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second);
+        LOG_DBG("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second);
 
         // split_to_patches
         int width = refine_image->nx;
@@ -2107,7 +2091,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
 
     bool pad_to_square = true;
     if (!ctx->has_vision_encoder) {
-        LOG_ERR("This gguf file seems to have no vision encoder\n");
+        LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__);
         return false;
     }
     auto & params = ctx->vision_model.hparams;
@@ -2444,7 +2428,7 @@ static std::vector<std::vector<float>> get_2d_sincos_pos_embed(int embed_dim, co
 
 bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, float * vec) {
     if (!ctx->has_vision_encoder) {
-        LOG_ERR("This gguf file seems to have no vision encoder\n");
+        LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__);
         return false;
     }
 
@@ -2456,7 +2440,7 @@ bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f3
 
 bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs, float * vec) {
     if (!ctx->has_vision_encoder) {
-        LOG_ERR("This gguf file seems to have no vision encoder\n");
+        LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__);
         return false;
     }
 
@@ -2673,7 +2657,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
 
     auto * ctx_clip = clip_init(fname_inp, clip_context_params{
         /* use_gpu */   false,
-        /* verbosity */ CLIP_LOG_ERROR,
+        /* verbosity */ GGML_LOG_LEVEL_ERROR,
     });
 
     const auto & ctx_src = ctx_clip->ctx_gguf;
@@ -2751,7 +2735,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
                 f32_data = (float *)conv_buf.data();
                 break;
             default:
-                LOG_ERR("Please use an input file in f32 or f16\n");
+                LOG_ERR("%s: Please use an input file in f32 or f16\n", __func__);
                 gguf_free(ctx_out);
                 return false;
             }
diff --git a/examples/llava/clip.h b/examples/llava/clip.h
@@ -1,6 +1,7 @@
 #ifndef CLIP_H
 #define CLIP_H
 
+#include "ggml.h"
 #include <stddef.h>
 #include <stdint.h>
 
@@ -24,14 +25,6 @@ extern "C" {
 
 struct clip_ctx;
 
-enum clip_log_level {
-    CLIP_LOG_NONE    = 0,
-    CLIP_LOG_ERROR   = 1,
-    CLIP_LOG_WARNING = 2,
-    CLIP_LOG_INFO    = 3,
-    CLIP_LOG_DEBUG   = 4,
-};
-
 struct clip_image_size {
     int width;
     int height;
@@ -49,7 +42,7 @@ struct clip_image_f32_batch {
 
 struct clip_context_params {
     bool use_gpu;
-    clip_log_level verbosity;
+    ggml_log_level verbosity;
 };
 
 // deprecated, use clip_init
diff --git a/examples/llava/gemma3-cli.cpp b/examples/llava/gemma3-cli.cpp
@@ -79,7 +79,7 @@ struct gemma3_context {
 
     void init_clip_model(common_params & params) {
         const char * clip_path = params.mmproj.path.c_str();
-        ctx_clip = clip_model_load(clip_path, CLIP_LOG_INFO);
+        ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
         if (!ctx_clip) {
             LOG_ERR("Failed to load CLIP model from %s\n", clip_path);
             exit(1);
diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp
@@ -241,7 +241,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m
         prompt = "describe the image in detail.";
     }
 
-    auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
+    auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
 
     llama_context_params ctx_params = common_context_params_to_llama(*params);
     ctx_params.n_ctx           = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp
@@ -88,7 +88,7 @@ static struct clip_ctx * clip_init_context(common_params * params) {
     }
     struct clip_context_params clip_params = {
         /* use_gpu */   params->n_gpu_layers != 0,
-        /* verbosity */ CLIP_LOG_INFO, // TODO: make this configurable
+        /* verbosity */ GGML_LOG_LEVEL_INFO, // TODO: make this configurable
     };
     auto * ctx_clip = clip_init(clip_path, clip_params);
     return ctx_clip;
diff --git a/examples/llava/qwen2vl-cli.cpp b/examples/llava/qwen2vl-cli.cpp
@@ -330,7 +330,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m
         prompt = "describe the image in detail.";
     }
 
-    auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
+    auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
 
     llama_context_params ctx_params = common_context_params_to_llama(*params);
     ctx_params.n_ctx           = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings

Original file line number	Diff line number	Diff line change
`@@ -241,7 +241,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m`
`241`	`241`	`prompt = "describe the image in detail.";`
`242`	`242`	`}`
`243`	`243`
`244`		`- auto ctx_clip = clip_model_load(clip_path, /verbosity=/ 1);`
	`244`	`+ auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);`
`245`	`245`
`246`	`246`	`llama_context_params ctx_params = common_context_params_to_llama(*params);`
`247`	`247`	`ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings`
Original file line number	Diff line number	Diff line change
`@@ -88,7 +88,7 @@ static struct clip_ctx * clip_init_context(common_params * params) {`
`88`	`88`	`}`
`89`	`89`	`struct clip_context_params clip_params = {`
`90`	`90`	`/* use_gpu */ params->n_gpu_layers != 0,`
`91`		`- /* verbosity */ CLIP_LOG_INFO, // TODO: make this configurable`
	`91`	`+ /* verbosity */ GGML_LOG_LEVEL_INFO, // TODO: make this configurable`
`92`	`92`	`};`
`93`	`93`	`auto * ctx_clip = clip_init(clip_path, clip_params);`
`94`	`94`	`return ctx_clip;`
Original file line number	Diff line number	Diff line change
`@@ -330,7 +330,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m`
`330`	`330`	`prompt = "describe the image in detail.";`
`331`	`331`	`}`
`332`	`332`
`333`		`- auto ctx_clip = clip_model_load(clip_path, /verbosity=/ 1);`
	`333`	`+ auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);`
`334`	`334`
`335`	`335`	`llama_context_params ctx_params = common_context_params_to_llama(*params);`
`336`	`336`	`ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings`