Skip to content

Commit a8bca68

Browse files
authored
fix: Compute the full sum in llama-eval-callback, not just the sum of printed values (ggml-org#15637)
This makes it much easier to compare between llama.cpp and transformers! https://github.com/ggml-org/llama.cpp/issues/nemotron-nano-15409 Branch: gabe-l-hart/nvidia-nemotron-nano-15409 Signed-off-by: Gabe Goodhart <[email protected]>
1 parent c97dc09 commit a8bca68

File tree

1 file changed

+32
-18
lines changed

1 file changed

+32
-18
lines changed

examples/eval-callback/eval-callback.cpp

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,40 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
2828
return str;
2929
}
3030

31+
static float ggml_get_float_value(uint8_t * data, ggml_type type, const size_t * nb, size_t i0, size_t i1, size_t i2, size_t i3) {
32+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
33+
float v;
34+
if (type == GGML_TYPE_F16) {
35+
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
36+
} else if (type == GGML_TYPE_F32) {
37+
v = *(float *) &data[i];
38+
} else if (type == GGML_TYPE_I64) {
39+
v = (float) *(int64_t *) &data[i];
40+
} else if (type == GGML_TYPE_I32) {
41+
v = (float) *(int32_t *) &data[i];
42+
} else if (type == GGML_TYPE_I16) {
43+
v = (float) *(int16_t *) &data[i];
44+
} else if (type == GGML_TYPE_I8) {
45+
v = (float) *(int8_t *) &data[i];
46+
} else {
47+
GGML_ABORT("fatal error");
48+
}
49+
return v;
50+
}
51+
3152
static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
3253
GGML_ASSERT(n > 0);
3354
float sum = 0;
55+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
56+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
57+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
58+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
59+
const float v = ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
60+
sum += v;
61+
}
62+
}
63+
}
64+
}
3465
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
3566
LOG(" [\n");
3667
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
@@ -50,25 +81,8 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
5081
LOG("..., ");
5182
i0 = ne[0] - n;
5283
}
53-
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
54-
float v;
55-
if (type == GGML_TYPE_F16) {
56-
v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]);
57-
} else if (type == GGML_TYPE_F32) {
58-
v = *(float *) &data[i];
59-
} else if (type == GGML_TYPE_I64) {
60-
v = (float) *(int64_t *) &data[i];
61-
} else if (type == GGML_TYPE_I32) {
62-
v = (float) *(int32_t *) &data[i];
63-
} else if (type == GGML_TYPE_I16) {
64-
v = (float) *(int16_t *) &data[i];
65-
} else if (type == GGML_TYPE_I8) {
66-
v = (float) *(int8_t *) &data[i];
67-
} else {
68-
GGML_ABORT("fatal error");
69-
}
84+
const float v = ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
7085
LOG("%12.4f", v);
71-
sum += v;
7286
if (i0 < ne[0] - 1) LOG(", ");
7387
}
7488
LOG("],\n");

0 commit comments

Comments
 (0)