Skip to content

Commit 9677640

Browse files
authored
ggml : move more prints to the ggml log system (ggml-org#9839)
* ggml : move more prints to the ggml log system * show BLAS OpenMP warnings in all builds using debug print
1 parent 7eee341 commit 9677640

File tree

4 files changed

+48
-48
lines changed

4 files changed

+48
-48
lines changed

ggml/src/ggml-alloc.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
//#define GGML_ALLOCATOR_DEBUG
1616

17-
//#define AT_PRINTF(...) fprintf(stderr, __VA_ARGS__)
17+
//#define AT_PRINTF(...) GGML_LOG_DEBUG(__VA_ARGS__)
1818
#define AT_PRINTF(...)
1919

2020

@@ -89,7 +89,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso
8989
size = GGML_PAD(size, talloc->alignment);
9090

9191
if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
92-
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
92+
GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
9393
__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
9494
GGML_ABORT("not enough space in the buffer");
9595
}
@@ -172,7 +172,7 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
172172
best_fit_block = alloc->n_free_blocks - 1;
173173
} else {
174174
// this should never happen
175-
fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
175+
GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
176176
__func__, size, max_avail);
177177
GGML_ABORT("not enough space in the buffer");
178178
}
@@ -209,16 +209,16 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz
209209
}
210210
}
211211
}
212-
fprintf(stderr, "max_size = %.2f MB: tensors: ", cur_max / 1024.0 / 1024.0);
212+
GGML_LOG_DEBUG("max_size = %.2f MB: tensors: ", cur_max / 1024.0 / 1024.0);
213213
for (int i = 0; i < 1024; i++) {
214214
if (alloc->allocated_tensors[i].tensor) {
215-
fprintf(stderr, "%s [%zx-%zx] (%.2f MB) ", alloc->allocated_tensors[i].tensor->name,
215+
GGML_LOG_DEBUG("%s [%zx-%zx] (%.2f MB) ", alloc->allocated_tensors[i].tensor->name,
216216
alloc->allocated_tensors[i].offset,
217217
alloc->allocated_tensors[i].offset + ggml_nbytes(alloc->allocated_tensors[i].tensor),
218218
ggml_nbytes(alloc->allocated_tensors[i].tensor) / 1024.0 / 1024.0);
219219
}
220220
}
221-
fprintf(stderr, "\n");
221+
GGML_LOG_DEBUG("\n");
222222
}
223223
#endif
224224

@@ -768,13 +768,13 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
768768
// even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
769769
if (new_size > cur_size || galloc->buffers[i] == NULL) {
770770
#ifndef NDEBUG
771-
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
771+
GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
772772
#endif
773773

774774
ggml_backend_buffer_free(galloc->buffers[i]);
775775
galloc->buffers[i] = ggml_backend_buft_alloc_buffer(galloc->bufts[i], new_size);
776776
if (galloc->buffers[i] == NULL) {
777-
fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
777+
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
778778
return false;
779779
}
780780
ggml_backend_buffer_set_usage(galloc->buffers[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
@@ -825,14 +825,14 @@ static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_t
825825
static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph * graph) {
826826
if (galloc->n_nodes != graph->n_nodes) {
827827
#ifndef NDEBUG
828-
fprintf(stderr, "%s: graph has different number of nodes\n", __func__);
828+
GGML_LOG_DEBUG("%s: graph has different number of nodes\n", __func__);
829829
#endif
830830
return true;
831831
}
832832

833833
if (galloc->n_leafs != graph->n_leafs) {
834834
#ifndef NDEBUG
835-
fprintf(stderr, "%s: graph has different number of leafs\n", __func__);
835+
GGML_LOG_DEBUG("%s: graph has different number of leafs\n", __func__);
836836
#endif
837837
return true;
838838
}
@@ -843,7 +843,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
843843

844844
if (!ggml_gallocr_node_needs_realloc(galloc, node, &node_alloc->dst)) {
845845
#ifndef NDEBUG
846-
fprintf(stderr, "%s: node %s is not valid\n", __func__, node->name);
846+
GGML_LOG_DEBUG("%s: node %s is not valid\n", __func__, node->name);
847847
#endif
848848
return true;
849849
}
@@ -855,7 +855,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
855855
}
856856
if (!ggml_gallocr_node_needs_realloc(galloc, src, &node_alloc->src[j])) {
857857
#ifndef NDEBUG
858-
fprintf(stderr, "%s: src %d (%s) of node %s is not valid\n", __func__, j, src->name, node->name);
858+
GGML_LOG_DEBUG("%s: src %d (%s) of node %s is not valid\n", __func__, j, src->name, node->name);
859859
#endif
860860
return true;
861861
}
@@ -869,14 +869,14 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
869869
if (ggml_gallocr_needs_realloc(galloc, graph)) {
870870
if (galloc->n_buffers == 1) {
871871
#ifndef NDEBUG
872-
fprintf(stderr, "%s: reallocating buffers automatically\n", __func__);
872+
GGML_LOG_DEBUG("%s: reallocating buffers automatically\n", __func__);
873873
#endif
874874
if (!ggml_gallocr_reserve(galloc, graph)) {
875875
return false;
876876
}
877877
} else {
878878
#ifndef NDEBUG
879-
fprintf(stderr, "%s: cannot reallocate multi buffer graph automatically, call reserve\n", __func__);
879+
GGML_LOG_DEBUG("%s: cannot reallocate multi buffer graph automatically, call reserve\n", __func__);
880880
#endif
881881
return false;
882882
}
@@ -940,7 +940,7 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
940940
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
941941
if (buffer == NULL) {
942942
#ifndef NDEBUG
943-
fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
943+
GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
944944
#endif
945945
for (size_t i = 0; i < *n_buffers; i++) {
946946
ggml_backend_buffer_free((*buffers)[i]);
@@ -990,7 +990,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
990990
}
991991

992992
if (this_size > max_size) {
993-
fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
993+
GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
994994
__func__, t->name,
995995
ggml_backend_buft_name(buft),
996996
this_size, max_size);
@@ -1022,7 +1022,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
10221022

10231023
if (n_buffers == 0) {
10241024
#ifndef NDEBUG
1025-
fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__);
1025+
GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
10261026
#endif
10271027
return NULL;
10281028
}

ggml/src/ggml-backend.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst
379379
ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
380380
} else if (!ggml_backend_buffer_copy_tensor(src, dst)) {
381381
#ifndef NDEBUG
382-
fprintf(stderr, "%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer));
382+
GGML_LOG_DEBUG("%s: warning: slow copy from %s to %s\n", __func__, ggml_backend_buffer_name(src->buffer), ggml_backend_buffer_name(dst->buffer));
383383
#endif
384384
size_t nbytes = ggml_nbytes(src);
385385
void * data = malloc(nbytes);
@@ -571,7 +571,7 @@ struct ggml_backend_registry {
571571

572572
void register_backend(ggml_backend_reg_t reg) {
573573
#ifndef NDEBUG
574-
fprintf(stderr, "%s: registered backend %s (%zu devices)\n",
574+
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
575575
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
576576
#endif
577577
backends.push_back(reg);
@@ -582,7 +582,7 @@ struct ggml_backend_registry {
582582

583583
void register_device(ggml_backend_dev_t device) {
584584
#ifndef NDEBUG
585-
fprintf(stderr, "%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
585+
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
586586
#endif
587587
devices.push_back(device);
588588
}
@@ -773,7 +773,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
773773
size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
774774
void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h)
775775
if (data == NULL) {
776-
fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
776+
GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
777777
return NULL;
778778
}
779779

@@ -836,7 +836,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_
836836
void * ptr;
837837
int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
838838
if (result != 0) {
839-
fprintf(stderr, "failed to allocate HBM buffer of size %zu\n", size);
839+
GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
840840
return NULL;
841841
}
842842

@@ -1459,7 +1459,7 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
14591459
}
14601460

14611461
#ifndef NDEBUG
1462-
fprintf(stderr, "%s: warning: no backend supports op %s with a weight with buffer type %s used in tensor %s, the weight will need to be copied\n",
1462+
GGML_LOG_DEBUG("%s: warning: no backend supports op %s with a weight with buffer type %s used in tensor %s, the weight will need to be copied\n",
14631463
__func__, ggml_op_desc(tensor), ggml_backend_buffer_name(buffer), tensor->name);
14641464
#endif
14651465

@@ -1548,32 +1548,32 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
15481548
for (int i = 0; i < graph->n_nodes; i++) {
15491549
if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) {
15501550
ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id];
1551-
fprintf(stderr, "\n## SPLIT #%d: %s # %d inputs: ", cur_split, ggml_backend_name(split_backend),
1551+
GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs: ", cur_split, ggml_backend_name(split_backend),
15521552
sched->splits[cur_split].n_inputs);
15531553
for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) {
1554-
fprintf(stderr, "[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name,
1554+
GGML_LOG_DEBUG("[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name,
15551555
fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j])));
15561556
}
1557-
fprintf(stderr, "\n");
1557+
GGML_LOG_DEBUG("\n");
15581558
cur_split++;
15591559
}
15601560
struct ggml_tensor * node = graph->nodes[i];
15611561
if (ggml_is_view_op(node->op)) {
15621562
continue;
15631563
}
15641564
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
1565-
fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
1565+
GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
15661566
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
15671567
for (int j = 0; j < GGML_MAX_SRC; j++) {
15681568
struct ggml_tensor * src = node->src[j];
15691569
if (src == NULL) {
15701570
continue;
15711571
}
15721572
ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
1573-
fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
1573+
GGML_LOG_DEBUG(" %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
15741574
fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src));
15751575
}
1576-
fprintf(stderr, "\n");
1576+
GGML_LOG_DEBUG("\n");
15771577
}
15781578
}
15791579

@@ -2087,11 +2087,11 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
20872087
// the re-allocation may cause the split inputs to be moved to a different address
20882088
ggml_backend_sched_synchronize(sched);
20892089
#ifndef NDEBUG
2090-
fprintf(stderr, "%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
2090+
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
20912091
#endif
20922092
ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
20932093
if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
2094-
fprintf(stderr, "%s: failed to allocate graph\n", __func__);
2094+
GGML_LOG_ERROR("%s: failed to allocate graph\n", __func__);
20952095
return false;
20962096
}
20972097
}
@@ -2485,7 +2485,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
24852485
struct ggml_context * ctx_unallocated = ggml_init(params);
24862486

24872487
if (ctx_allocated == NULL || ctx_unallocated == NULL) {
2488-
fprintf(stderr, "failed to allocate context for graph copy\n");
2488+
GGML_LOG_ERROR("%s: failed to allocate context for graph copy\n", __func__);
24892489
ggml_hash_set_free(&hash_set);
24902490
free(node_copies);
24912491
free(node_init);
@@ -2508,7 +2508,7 @@ struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, s
25082508
// allocate nodes
25092509
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
25102510
if (buffer == NULL) {
2511-
fprintf(stderr, "failed to allocate buffer for graph copy\n");
2511+
GGML_LOG_ERROR("%s: failed to allocate buffer for graph copy\n", __func__);
25122512
ggml_hash_set_free(&hash_set);
25132513
free(node_copies);
25142514
free(node_init);

ggml/src/ggml-blas.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,14 +297,14 @@ ggml_backend_t ggml_backend_blas_init(void) {
297297
/* .context = */ ctx,
298298
};
299299

300-
#if !defined(NDEBUG) && defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP)
300+
#if defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP)
301301
if (openblas_get_parallel() != OPENBLAS_OPENMP) {
302-
fprintf(stderr, "%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
302+
GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
303303
}
304304
#endif
305305

306-
#if !defined(NDEBUG) && defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
307-
fprintf(stderr, "%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
306+
#if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
307+
GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
308308
#endif
309309

310310
return backend;

0 commit comments

Comments
 (0)