Skip to content

Commit ba919e3

Browse files
committed
ggml : add ggml_backend_sched_debug_tensor ggml_backend API
This commit adds a new function `ggml_backend_sched_debug_tensor` to the ggml_backend API. This function allows users to print the values of a specified tensor after graph computation, along with the mean squared value. The motivation for this addition is that it can be useful to use this as ha "ballpark" check to check tensors before/after operations have been been executed. This came out of use cases when converting new models to llama.cpp and the need to track down discrepancies in tensor values. As an example of usage, this function can be called after the graph has been excuted, for example in `process_ubatch` in llama-context.cpp: ```c++ ggml_backend_sched_debug_tensor(sched.get(), res->get_gf(), "inp_embd", 10); ``` This will log something like the following, assuming logging is set to debug/verbose level: ```console ggml_backend_sched_debug_tensor: Tensor 'inp_embd', type: f32 ggml_backend_sched_debug_tensor: ne = [2048 6 1 1] ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 0]: 7.241361 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 1]: 5.649519 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 2]: 9.418730 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 3]: 8.292873 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 4]: 9.473540 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 5]: 9.034624 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 6]: 9.187912 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 7]: 1.406322 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 8]: 4.729420 ggml_backend_sched_debug_tensor: Tensor value at [0, 0, 0, 9]: 4.343110 ggml_backend_sched_debug_tensor: inp_embd mean_sq = 41.4566065470 ``` One thing to keep in mind is that the tensor needs to have a name and also we need to ensure that the graph does not reuse the tensor during scheduling. This can be done by setting the tensor as output to preserve it.
1 parent 609a2d0 commit ba919e3

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

ggml/include/ggml-backend.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,13 @@ extern "C" {
339339
// Set a callback to be called for each resulting node during graph compute
340340
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
341341

342+
// Debug/log the tensor mean squared value and optionally a specified number
343+
// of values from the tensor.
344+
//
345+
// Note that the tensor in needs to be named using ggml_set_name or equivalent,
346+
// and it also has to be prevented from being reused (optimized out) by the graph scheduler.
347+
GGML_API void ggml_backend_sched_debug_tensor(ggml_backend_sched_t sched, struct ggml_cgraph * graph, const char * name, size_t n_values_to_log);
348+
342349
//
343350
// Utils
344351
//

ggml/src/ggml-backend.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,6 +1803,74 @@ void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backe
18031803
sched->callback_eval_user_data = user_data;
18041804
}
18051805

1806+
static float ggml_get_float_value(const uint8_t * data, enum ggml_type type,
1807+
const size_t * nb, size_t i0, size_t i1, size_t i2, size_t i3) {
1808+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
1809+
switch (type) {
1810+
case GGML_TYPE_F16:
1811+
return ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]);
1812+
case GGML_TYPE_F32:
1813+
return *(const float *) &data[i];
1814+
case GGML_TYPE_I64:
1815+
return (float) *(const int64_t *) &data[i];
1816+
case GGML_TYPE_I32:
1817+
return (float) *(const int32_t *) &data[i];
1818+
case GGML_TYPE_I16:
1819+
return (float) *(const int16_t *) &data[i];
1820+
case GGML_TYPE_I8:
1821+
return (float) *(const int8_t *) &data[i];
1822+
case GGML_TYPE_BF16:
1823+
return ggml_compute_bf16_to_fp32(*(const ggml_bf16_t *) &data[i]);
1824+
default:
1825+
GGML_ABORT("fatal error");
1826+
}
1827+
}
1828+
1829+
void ggml_backend_sched_debug_tensor(ggml_backend_sched_t sched, struct ggml_cgraph * graph, const char * name, size_t n_values_to_log) {
1830+
GGML_ASSERT(sched);
1831+
GGML_ASSERT(graph);
1832+
1833+
struct ggml_tensor * t = ggml_graph_get_tensor(graph, name);
1834+
if (t == nullptr) {
1835+
GGML_LOG_DEBUG("%s: Tensor '%s' not found in graph.\n", __func__, name);
1836+
return;
1837+
}
1838+
1839+
GGML_LOG_DEBUG("%s: Tensor '%s', type: %s\n", __func__, t->name, ggml_type_name(t->type));
1840+
GGML_LOG_DEBUG("%s: ne = [%lld %lld %lld %lld]\n", __func__, (long long) t->ne[0], (long long) t->ne[1], (long long) t->ne[2], (long long) t->ne[3]);
1841+
1842+
size_t n_bytes = ggml_nbytes(t);
1843+
std::vector<uint8_t> data_bytes(n_bytes);
1844+
1845+
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, t);
1846+
1847+
ggml_backend_tensor_get_async(backend, t, data_bytes.data(), 0, n_bytes);
1848+
ggml_backend_sched_synchronize(sched);
1849+
1850+
float sum_sq = 0.0;
1851+
uint8_t * d = data_bytes.data();
1852+
1853+
size_t v_count = 0;
1854+
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
1855+
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
1856+
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
1857+
for (int64_t i0 = 0; i0 < t->ne[0]; i0++) {
1858+
const float v = ggml_get_float_value(d, t->type, t->nb, i0, i1, i2, i3);
1859+
sum_sq += v * v;
1860+
1861+
if (v_count++ < n_values_to_log) {
1862+
GGML_LOG_DEBUG("%s: Tensor value at [%lld, %lld, %lld, %lld]: %.6f\n", __func__,
1863+
(long long)i3, (long long)i2, (long long)i1, (long long)i0, v);
1864+
}
1865+
}
1866+
}
1867+
}
1868+
}
1869+
1870+
double mean_sq = sum_sq / (double) ggml_nelements(t);
1871+
GGML_LOG_DEBUG("%s: %s mean_sq = %.10f\n", __func__, t->name, mean_sq);
1872+
}
1873+
18061874
int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
18071875
GGML_ASSERT(sched);
18081876
return sched->n_splits;

0 commit comments

Comments
 (0)