Skip to content

Commit 88aec68

Browse files
committed
refactor logging system
1 parent 84b35d2 commit 88aec68

File tree

7 files changed

+91
-56
lines changed

7 files changed

+91
-56
lines changed

examples/llava/clip-impl.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,64 @@ static projector_type clip_projector_type_from_string(const std::string & str) {
120120
return PROJECTOR_TYPE_UNKNOWN;
121121
}
122122

123+
//
124+
// logging
125+
//
126+
127+
static void clip_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
128+
(void) level;
129+
(void) user_data;
130+
fputs(text, stderr);
131+
fflush(stderr);
132+
}
133+
134+
struct clip_logger_state {
135+
ggml_log_level verbosity_thold;
136+
ggml_log_callback log_callback;
137+
void * log_callback_user_data;
138+
};
139+
140+
extern struct clip_logger_state g_logger_state;
141+
142+
static void clip_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
143+
if (format == NULL) {
144+
return;
145+
}
146+
va_list args_copy;
147+
va_copy(args_copy, args);
148+
char buffer[128];
149+
int len = vsnprintf(buffer, 128, format, args);
150+
if (len < 128) {
151+
g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
152+
} else {
153+
char * buffer2 = (char *) calloc(len + 1, sizeof(char));
154+
vsnprintf(buffer2, len + 1, format, args_copy);
155+
buffer2[len] = 0;
156+
g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
157+
free(buffer2);
158+
}
159+
va_end(args_copy);
160+
}
161+
162+
static void clip_log_internal(enum ggml_log_level level, const char * format, ...) {
163+
va_list args;
164+
va_start(args, format);
165+
clip_log_internal_v(level, format, args);
166+
va_end(args);
167+
}
168+
169+
#define LOG_TMPL(level, ...) \
170+
do { \
171+
if ((level) >= g_logger_state.verbosity_thold) { \
172+
clip_log_internal((level), __VA_ARGS__); \
173+
} \
174+
} while (0)
175+
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
176+
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
177+
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
178+
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
179+
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, __VA_ARGS__)
180+
123181
//
124182
// common utils
125183
//

examples/llava/clip.cpp

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,7 @@
2828
#include <cinttypes>
2929
#include <limits>
3030

31-
#if defined(LLAVA_LOG_OFF)
32-
# define LOG_INF(...)
33-
# define LOG_WRN(...)
34-
# define LOG_ERR(...)
35-
# define LOG_DBG(...)
36-
#else // defined(LLAVA_LOG_OFF)
37-
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
38-
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
39-
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
40-
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
41-
#endif // defined(LLAVA_LOG_OFF)
31+
struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL};
4232

4333
//#define CLIP_DEBUG_FUNCTIONS
4434

@@ -1121,7 +1111,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
11211111
}
11221112

11231113
struct clip_model_loader {
1124-
clip_log_level verbosity;
11251114
ggml_context_ptr ctx_meta;
11261115
gguf_context_ptr ctx_gguf;
11271116

@@ -1131,7 +1120,7 @@ struct clip_model_loader {
11311120
size_t model_size; // in bytes
11321121

11331122
// TODO @ngxson : we should not pass clip_ctx here, it should be clip_vision_model
1134-
clip_model_loader(const char * fname, struct clip_context_params ctx_params, clip_ctx & ctx_clip) : verbosity(ctx_params.verbosity), ctx_clip(ctx_clip), fname(fname) {
1123+
clip_model_loader(const char * fname, clip_ctx & ctx_clip) : ctx_clip(ctx_clip), fname(fname) {
11351124
struct ggml_context * meta = NULL;
11361125

11371126
struct gguf_init_params params = {
@@ -1149,7 +1138,7 @@ struct clip_model_loader {
11491138
const int n_tensors = gguf_get_n_tensors(ctx_gguf.get());
11501139

11511140
// print gguf info
1152-
if (verbosity >= CLIP_LOG_ERROR) {
1141+
{
11531142
int ftype = -1;
11541143
get_u32(KEY_FTYPE, ftype, false);
11551144
const std::string ftype_str = ggml_type_name(static_cast<ggml_type>(ftype));
@@ -1176,10 +1165,8 @@ struct clip_model_loader {
11761165
struct ggml_tensor * cur = ggml_get_tensor(meta, name);
11771166
size_t tensor_size = ggml_nbytes(cur);
11781167
model_size += tensor_size;
1179-
if (verbosity >= CLIP_LOG_DEBUG) {
1180-
LOG_INF("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n",
1181-
__func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type));
1182-
}
1168+
LOG_DBG("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, offset=%zu, shape:[%" PRIu64 ", %" PRIu64 ", %" PRIu64 ", %" PRIu64 "], type = %s\n",
1169+
__func__, i, ggml_n_dims(cur), cur->name, tensor_size, offset, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3], ggml_type_name(type));
11831170
}
11841171
}
11851172
}
@@ -1262,16 +1249,14 @@ struct clip_model_loader {
12621249
// Calculate the deepest feature layer based on hparams and projector type
12631250
ctx_clip.max_feature_layer = get_deepest_feature_layer(&ctx_clip);
12641251

1265-
if (verbosity >= CLIP_LOG_ERROR) {
1266-
LOG_INF("%s: text_encoder: %d\n", __func__, ctx_clip.has_text_encoder);
1267-
LOG_INF("%s: vision_encoder: %d\n", __func__, ctx_clip.has_vision_encoder);
1268-
LOG_INF("%s: llava_projector: %d\n", __func__, ctx_clip.has_llava_projector);
1269-
LOG_INF("%s: minicpmv_projector: %d\n", __func__, ctx_clip.has_minicpmv_projector);
1270-
LOG_INF("%s: minicpmv_version: %d\n", __func__, ctx_clip.minicpmv_version);
1271-
LOG_INF("%s: glm_projector: %d\n", __func__, ctx_clip.has_glm_projector);
1272-
LOG_INF("%s: model size: %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0);
1273-
LOG_INF("%s: metadata size: %.2f MiB\n", __func__, ggml_get_mem_size(ctx_meta.get()) / 1024.0 / 1024.0);
1274-
}
1252+
LOG_INF("%s: text_encoder: %d\n", __func__, ctx_clip.has_text_encoder);
1253+
LOG_INF("%s: vision_encoder: %d\n", __func__, ctx_clip.has_vision_encoder);
1254+
LOG_INF("%s: llava_projector: %d\n", __func__, ctx_clip.has_llava_projector);
1255+
LOG_INF("%s: minicpmv_projector: %d\n", __func__, ctx_clip.has_minicpmv_projector);
1256+
LOG_INF("%s: minicpmv_version: %d\n", __func__, ctx_clip.minicpmv_version);
1257+
LOG_INF("%s: glm_projector: %d\n", __func__, ctx_clip.has_glm_projector);
1258+
LOG_INF("%s: model size: %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0);
1259+
LOG_INF("%s: metadata size: %.2f MiB\n", __func__, ggml_get_mem_size(ctx_meta.get()) / 1024.0 / 1024.0);
12751260
}
12761261
}
12771262

@@ -1495,9 +1480,7 @@ struct clip_model_loader {
14951480
}
14961481
fin.close();
14971482

1498-
if (verbosity >= CLIP_LOG_INFO) {
1499-
LOG_INF("%s: loaded %zu tensors from %s\n", __func__, tensors_to_load.size(), fname.c_str());
1500-
}
1483+
LOG_DBG("%s: loaded %zu tensors from %s\n", __func__, tensors_to_load.size(), fname.c_str());
15011484
}
15021485
}
15031486

@@ -1581,18 +1564,19 @@ struct clip_model_loader {
15811564
};
15821565

15831566
// read and create ggml_context containing the tensors and their data
1584-
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
1567+
struct clip_ctx * clip_model_load(const char * fname, const int verbosity) {
15851568
return clip_init(fname, clip_context_params{
15861569
/* use_gpu */ true,
1587-
/* verbosity */ static_cast<clip_log_level>(verbosity),
1570+
/* verbosity */ static_cast<ggml_log_level>(verbosity),
15881571
});
15891572
}
15901573

15911574
struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) {
1575+
g_logger_state.verbosity_thold = ctx_params.verbosity;
15921576
clip_ctx * ctx_clip = new clip_ctx(ctx_params);
15931577

15941578
try {
1595-
clip_model_loader loader(fname, ctx_params, *ctx_clip);
1579+
clip_model_loader loader(fname, *ctx_clip);
15961580
loader.load_hparams();
15971581
loader.load_tensors();
15981582
loader.alloc_compute_meta();
@@ -1974,7 +1958,7 @@ static std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_imag
19741958
const int multiple = fmin(ceil(ratio), max_slice_nums);
19751959

19761960
std::vector<std::vector<clip_image_u8 *>> images;
1977-
LOG_INF("%s: multiple %d\n", __func__, multiple);
1961+
LOG_DBG("%s: multiple %d\n", __func__, multiple);
19781962
images.push_back(std::vector<clip_image_u8 *>());
19791963

19801964
if (multiple <= 1) {
@@ -1989,17 +1973,17 @@ static std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_imag
19891973
clip_image_u8 * source_image = clip_image_u8_init();
19901974
bicubic_resize(*img, *source_image, best_size.first, best_size.second);
19911975
// source_image = image.copy().resize(best_resize, Image.Resampling.BICUBIC)
1992-
LOG_INF("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second);
1976+
LOG_DBG("%s: image_size: %d %d; source_image size: %d %d\n", __func__, img->nx, img->ny, best_size.first, best_size.second);
19931977
images[images.size()-1].push_back(source_image);
19941978

19951979
std::pair<int, int> best_grid = uhd_best_grid(max_slice_nums, multiple, log_ratio);
1996-
LOG_INF("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second);
1980+
LOG_DBG("%s: image_size: %d %d; best_grid: %d %d\n", __func__, img->nx, img->ny, best_grid.first, best_grid.second);
19971981

19981982
auto refine_size = uhd_get_refine_size(original_size, best_grid, scale_resolution, patch_size, true);
19991983
clip_image_u8 * refine_image = clip_image_u8_init();
20001984
bicubic_resize(*img, *refine_image, refine_size.first, refine_size.second);
20011985

2002-
LOG_INF("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second);
1986+
LOG_DBG("%s: refine_image_size: %d %d; refine_size: %d %d\n", __func__, refine_image->nx, refine_image->ny, refine_size.first, refine_size.second);
20031987

20041988
// split_to_patches
20051989
int width = refine_image->nx;
@@ -2107,7 +2091,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
21072091

21082092
bool pad_to_square = true;
21092093
if (!ctx->has_vision_encoder) {
2110-
LOG_ERR("This gguf file seems to have no vision encoder\n");
2094+
LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__);
21112095
return false;
21122096
}
21132097
auto & params = ctx->vision_model.hparams;
@@ -2444,7 +2428,7 @@ static std::vector<std::vector<float>> get_2d_sincos_pos_embed(int embed_dim, co
24442428

24452429
bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, float * vec) {
24462430
if (!ctx->has_vision_encoder) {
2447-
LOG_ERR("This gguf file seems to have no vision encoder\n");
2431+
LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__);
24482432
return false;
24492433
}
24502434

@@ -2456,7 +2440,7 @@ bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f3
24562440

24572441
bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs, float * vec) {
24582442
if (!ctx->has_vision_encoder) {
2459-
LOG_ERR("This gguf file seems to have no vision encoder\n");
2443+
LOG_ERR("%s: This gguf file seems to have no vision encoder\n", __func__);
24602444
return false;
24612445
}
24622446

@@ -2673,7 +2657,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
26732657

26742658
auto * ctx_clip = clip_init(fname_inp, clip_context_params{
26752659
/* use_gpu */ false,
2676-
/* verbosity */ CLIP_LOG_ERROR,
2660+
/* verbosity */ GGML_LOG_LEVEL_ERROR,
26772661
});
26782662

26792663
const auto & ctx_src = ctx_clip->ctx_gguf;
@@ -2751,7 +2735,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
27512735
f32_data = (float *)conv_buf.data();
27522736
break;
27532737
default:
2754-
LOG_ERR("Please use an input file in f32 or f16\n");
2738+
LOG_ERR("%s: Please use an input file in f32 or f16\n", __func__);
27552739
gguf_free(ctx_out);
27562740
return false;
27572741
}

examples/llava/clip.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef CLIP_H
22
#define CLIP_H
33

4+
#include "ggml.h"
45
#include <stddef.h>
56
#include <stdint.h>
67

@@ -24,14 +25,6 @@ extern "C" {
2425

2526
struct clip_ctx;
2627

27-
enum clip_log_level {
28-
CLIP_LOG_NONE = 0,
29-
CLIP_LOG_ERROR = 1,
30-
CLIP_LOG_WARNING = 2,
31-
CLIP_LOG_INFO = 3,
32-
CLIP_LOG_DEBUG = 4,
33-
};
34-
3528
struct clip_image_size {
3629
int width;
3730
int height;
@@ -49,7 +42,7 @@ struct clip_image_f32_batch {
4942

5043
struct clip_context_params {
5144
bool use_gpu;
52-
clip_log_level verbosity;
45+
ggml_log_level verbosity;
5346
};
5447

5548
// deprecated, use clip_init

examples/llava/gemma3-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct gemma3_context {
7979

8080
void init_clip_model(common_params & params) {
8181
const char * clip_path = params.mmproj.path.c_str();
82-
ctx_clip = clip_model_load(clip_path, CLIP_LOG_INFO);
82+
ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
8383
if (!ctx_clip) {
8484
LOG_ERR("Failed to load CLIP model from %s\n", clip_path);
8585
exit(1);

examples/llava/llava-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m
241241
prompt = "describe the image in detail.";
242242
}
243243

244-
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
244+
auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
245245

246246
llama_context_params ctx_params = common_context_params_to_llama(*params);
247247
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings

examples/llava/minicpmv-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ static struct clip_ctx * clip_init_context(common_params * params) {
8888
}
8989
struct clip_context_params clip_params = {
9090
/* use_gpu */ params->n_gpu_layers != 0,
91-
/* verbosity */ CLIP_LOG_INFO, // TODO: make this configurable
91+
/* verbosity */ GGML_LOG_LEVEL_INFO, // TODO: make this configurable
9292
};
9393
auto * ctx_clip = clip_init(clip_path, clip_params);
9494
return ctx_clip;

examples/llava/qwen2vl-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ static struct llava_context * llava_init_context(common_params * params, llama_m
330330
prompt = "describe the image in detail.";
331331
}
332332

333-
auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
333+
auto ctx_clip = clip_model_load(clip_path, GGML_LOG_LEVEL_INFO);
334334

335335
llama_context_params ctx_params = common_context_params_to_llama(*params);
336336
ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings

0 commit comments

Comments
 (0)