Skip to content

Commit 8d9eab7

Browse files
committed
Add debug code
1 parent fa9f0b5 commit 8d9eab7

File tree

1 file changed

+98
-98
lines changed

1 file changed

+98
-98
lines changed

src/llama-context.cpp

Lines changed: 98 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,104 +1131,104 @@ int llama_context::decode(const llama_batch & batch_inp) {
11311131
}
11321132

11331133
// Debug: Dump tensor values after computation (for PLaMo-2 only)
1134-
// if (model.arch == LLM_ARCH_PLAMO2) { // Only for small inputs
1135-
// // Create debug directory if it doesn't exist
1136-
// #ifdef _WIN32
1137-
// _mkdir("debug_tensors");
1138-
// #else
1139-
// mkdir("debug_tensors", 0755);
1140-
// #endif
1141-
// // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
1142-
// ggml_cgraph* current_gf = res->get_graph();
1143-
// for (int i = 0; i < ggml_graph_n_nodes(current_gf); ++i) {
1144-
// ggml_tensor* node = ggml_graph_node(current_gf, i);
1145-
// printf("Processing node: %s\n", node->name ? node->name : "unknown");
1146-
// if (node && node->name) {
1147-
// bool should_dump = (strcmp(node->name, "embedding_output") == 0) ||
1148-
// (strstr(node->name, "mamba_") == node->name) ||
1149-
// (strstr(node->name, "attn_norm") == node->name) ||
1150-
// (strstr(node->name, "norm") == node->name) ||
1151-
// (strcmp(node->name, "tokens") == 0) ||
1152-
// (strstr(node->name, "attn_pre_norm") == node->name) ||
1153-
// (strcmp(node->name, "inp_embd") == 0) ||
1154-
// (strcmp(node->name, "inp_tokens") == 0);
1155-
1156-
// if (strcmp(node->name, "tokens") == 0) {
1157-
// llama_token* token_data = (llama_token*)node->data;
1158-
// printf("Input Tokens: ");
1159-
// for (int j = 0; j < node->ne[0]; ++j) {
1160-
// printf("%d ", token_data[j]);
1161-
// }
1162-
// printf("\n");
1163-
// continue; // Skip dumping tensor values for "tokens"
1164-
// }
1165-
1166-
// if (should_dump && node->data) {
1167-
// printf("=== Post-Compute Tensor Values ===\n");
1168-
// printf("Tensor: %s\n", node->name);
1169-
// printf("Shape: [%ld, %ld", node->ne[0], node->ne[1]);
1170-
// if (node->ne[2] > 1) printf(", %ld", node->ne[2]);
1171-
// if (node->ne[3] > 1) printf(", %ld", node->ne[3]);
1172-
// printf("]\n");
1173-
1174-
// int64_t total_elements = ggml_nelements(node);
1175-
// float* data = new float[total_elements];
1176-
// if (node->type == GGML_TYPE_F32) {
1177-
// data = (float*)node->data;
1178-
// } else if (node->type == GGML_TYPE_BF16) {
1179-
// ggml_bf16_t * bf16_data = (ggml_bf16_t*)node->data;
1180-
// for (int64_t j = 0; j < total_elements; j++) {
1181-
// printf("%.6f -> %.6f \n", bf16_data[j], ggml_bf16_to_fp32(bf16_data[j]));
1182-
// }
1183-
// ggml_bf16_to_fp32_row((ggml_bf16_t*)node->data, data, total_elements);
1184-
// }
1185-
1186-
// if (total_elements > 0) {
1187-
// // Calculate statistics
1188-
// float sum = 0.0f, sum_sq = 0.0f, min_val = data[0], max_val = data[0];
1189-
// for (int64_t j = 0; j < total_elements; j++) {
1190-
// sum += data[j];
1191-
// sum_sq += data[j] * data[j];
1192-
// min_val = fminf(min_val, data[j]);
1193-
// max_val = fmaxf(max_val, data[j]);
1194-
// }
1195-
1196-
// float mean = sum / total_elements;
1197-
// float variance = (sum_sq / total_elements) - (mean * mean);
1198-
// float std_dev = sqrtf(variance);
1199-
1200-
// printf("Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n",
1201-
// mean, std_dev, min_val, max_val);
1202-
1203-
// // Print first 8 values
1204-
// printf("First 8 values: ");
1205-
// for (int j = 0; j < 8 && j < total_elements; j++) {
1206-
// printf("%.6f ", data[j]);
1207-
// }
1208-
// printf("\n");
1209-
1210-
// // Save to file for detailed comparison
1211-
// char filename[256];
1212-
// snprintf(filename, sizeof(filename), "debug_tensors/%s.csv", node->name);
1213-
// FILE* f = fopen(filename, "w");
1214-
// if (f) {
1215-
// for (int64_t j = 0; j < total_elements; ++j) {
1216-
// fprintf(f, "%f", data[j]);
1217-
// if ((j + 1) % node->ne[0] == 0) {
1218-
// fprintf(f, "\n");
1219-
// } else {
1220-
// fprintf(f, ",");
1221-
// }
1222-
// }
1223-
// fclose(f);
1224-
// printf("Saved to: %s\n", filename);
1225-
// }
1226-
// }
1227-
// printf("==================================\n");
1228-
// }
1229-
// }
1230-
// }
1231-
// }
1134+
if (model.arch == LLM_ARCH_PLAMO2) { // Only for small inputs
1135+
// Create debug directory if it doesn't exist
1136+
#ifdef _WIN32
1137+
_mkdir("debug_tensors");
1138+
#else
1139+
mkdir("debug_tensors", 0755);
1140+
#endif
1141+
// Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
1142+
ggml_cgraph* current_gf = res->get_graph();
1143+
for (int i = 0; i < ggml_graph_n_nodes(current_gf); ++i) {
1144+
ggml_tensor* node = ggml_graph_node(current_gf, i);
1145+
printf("Processing node: %s\n", node->name ? node->name : "unknown");
1146+
if (node && node->name) {
1147+
bool should_dump = (strcmp(node->name, "embedding_output") == 0) ||
1148+
(strstr(node->name, "mamba_") == node->name) ||
1149+
(strstr(node->name, "attn_norm") == node->name) ||
1150+
(strstr(node->name, "norm") == node->name) ||
1151+
(strcmp(node->name, "tokens") == 0) ||
1152+
(strstr(node->name, "attn_pre_norm") == node->name) ||
1153+
(strcmp(node->name, "inp_embd") == 0) ||
1154+
(strcmp(node->name, "inp_tokens") == 0);
1155+
1156+
if (strcmp(node->name, "tokens") == 0) {
1157+
llama_token* token_data = (llama_token*)node->data;
1158+
printf("Input Tokens: ");
1159+
for (int j = 0; j < node->ne[0]; ++j) {
1160+
printf("%d ", token_data[j]);
1161+
}
1162+
printf("\n");
1163+
continue; // Skip dumping tensor values for "tokens"
1164+
}
1165+
1166+
if (should_dump && node->data) {
1167+
printf("=== Post-Compute Tensor Values ===\n");
1168+
printf("Tensor: %s\n", node->name);
1169+
printf("Shape: [%ld, %ld", node->ne[0], node->ne[1]);
1170+
if (node->ne[2] > 1) printf(", %ld", node->ne[2]);
1171+
if (node->ne[3] > 1) printf(", %ld", node->ne[3]);
1172+
printf("]\n");
1173+
1174+
int64_t total_elements = ggml_nelements(node);
1175+
float* data = new float[total_elements];
1176+
if (node->type == GGML_TYPE_F32) {
1177+
data = (float*)node->data;
1178+
} else if (node->type == GGML_TYPE_BF16) {
1179+
ggml_bf16_t * bf16_data = (ggml_bf16_t*)node->data;
1180+
for (int64_t j = 0; j < total_elements; j++) {
1181+
printf("%.6f -> %.6f \n", bf16_data[j], ggml_bf16_to_fp32(bf16_data[j]));
1182+
}
1183+
ggml_bf16_to_fp32_row((ggml_bf16_t*)node->data, data, total_elements);
1184+
}
1185+
1186+
if (total_elements > 0) {
1187+
// Calculate statistics
1188+
float sum = 0.0f, sum_sq = 0.0f, min_val = data[0], max_val = data[0];
1189+
for (int64_t j = 0; j < total_elements; j++) {
1190+
sum += data[j];
1191+
sum_sq += data[j] * data[j];
1192+
min_val = fminf(min_val, data[j]);
1193+
max_val = fmaxf(max_val, data[j]);
1194+
}
1195+
1196+
float mean = sum / total_elements;
1197+
float variance = (sum_sq / total_elements) - (mean * mean);
1198+
float std_dev = sqrtf(variance);
1199+
1200+
printf("Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n",
1201+
mean, std_dev, min_val, max_val);
1202+
1203+
// Print first 8 values
1204+
printf("First 8 values: ");
1205+
for (int j = 0; j < 8 && j < total_elements; j++) {
1206+
printf("%.6f ", data[j]);
1207+
}
1208+
printf("\n");
1209+
1210+
// Save to file for detailed comparison
1211+
char filename[256];
1212+
snprintf(filename, sizeof(filename), "debug_tensors/%s.csv", node->name);
1213+
FILE* f = fopen(filename, "w");
1214+
if (f) {
1215+
for (int64_t j = 0; j < total_elements; ++j) {
1216+
fprintf(f, "%f", data[j]);
1217+
if ((j + 1) % node->ne[0] == 0) {
1218+
fprintf(f, "\n");
1219+
} else {
1220+
fprintf(f, ",");
1221+
}
1222+
}
1223+
fclose(f);
1224+
printf("Saved to: %s\n", filename);
1225+
}
1226+
}
1227+
printf("==================================\n");
1228+
}
1229+
}
1230+
}
1231+
}
12321232

12331233
n_outputs_prev += n_outputs;
12341234
} while (mctx->next());

0 commit comments

Comments
 (0)