@@ -1132,104 +1132,104 @@ int llama_context::decode(const llama_batch & batch_inp) {
11321132 }
11331133
11341134 // Debug: Dump tensor values after computation (for PLaMo-2 only)
1135- // if (model.arch == LLM_ARCH_PLAMO2) { // Only for small inputs
1136- // // Create debug directory if it doesn't exist
1137- // #ifdef _WIN32
1138- // _mkdir("debug_tensors");
1139- // #else
1140- // mkdir("debug_tensors", 0755);
1141- // #endif
1142- // // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
1143- // ggml_cgraph* current_gf = res->get_graph();
1144- // for (int i = 0; i < ggml_graph_n_nodes(current_gf); ++i) {
1145- // ggml_tensor* node = ggml_graph_node(current_gf, i);
1146- // printf("Processing node: %s\n", node->name ? node->name : "unknown");
1147- // if (node && node->name) {
1148- // bool should_dump = (strcmp(node->name, "embedding_output") == 0) ||
1149- // (strstr(node->name, "mamba_") == node->name) ||
1150- // (strstr(node->name, "attn_norm") == node->name) ||
1151- // (strstr(node->name, "norm") == node->name) ||
1152- // (strcmp(node->name, "tokens") == 0) ||
1153- // (strstr(node->name, "attn_pre_norm") == node->name) ||
1154- // (strcmp(node->name, "inp_embd") == 0) ||
1155- // (strcmp(node->name, "inp_tokens") == 0);
1156-
1157- // if (strcmp(node->name, "tokens") == 0) {
1158- // llama_token* token_data = (llama_token*)node->data;
1159- // printf("Input Tokens: ");
1160- // for (int j = 0; j < node->ne[0]; ++j) {
1161- // printf("%d ", token_data[j]);
1162- // }
1163- // printf("\n");
1164- // continue; // Skip dumping tensor values for "tokens"
1165- // }
1166-
1167- // if (should_dump && node->data) {
1168- // printf("=== Post-Compute Tensor Values ===\n");
1169- // printf("Tensor: %s\n", node->name);
1170- // printf("Shape: [%ld, %ld", node->ne[0], node->ne[1]);
1171- // if (node->ne[2] > 1) printf(", %ld", node->ne[2]);
1172- // if (node->ne[3] > 1) printf(", %ld", node->ne[3]);
1173- // printf("]\n");
1174-
1175- // int64_t total_elements = ggml_nelements(node);
1176- // float* data = new float[total_elements];
1177- // if (node->type == GGML_TYPE_F32) {
1178- // data = (float*)node->data;
1179- // } else if (node->type == GGML_TYPE_BF16) {
1180- // ggml_bf16_t * bf16_data = (ggml_bf16_t*)node->data;
1181- // for (int64_t j = 0; j < total_elements; j++) {
1182- // printf("%.6f -> %.6f \n", bf16_data[j], ggml_bf16_to_fp32(bf16_data[j]));
1183- // }
1184- // ggml_bf16_to_fp32_row((ggml_bf16_t*)node->data, data, total_elements);
1185- // }
1186-
1187- // if (total_elements > 0) {
1188- // // Calculate statistics
1189- // float sum = 0.0f, sum_sq = 0.0f, min_val = data[0], max_val = data[0];
1190- // for (int64_t j = 0; j < total_elements; j++) {
1191- // sum += data[j];
1192- // sum_sq += data[j] * data[j];
1193- // min_val = fminf(min_val, data[j]);
1194- // max_val = fmaxf(max_val, data[j]);
1195- // }
1196-
1197- // float mean = sum / total_elements;
1198- // float variance = (sum_sq / total_elements) - (mean * mean);
1199- // float std_dev = sqrtf(variance);
1200-
1201- // printf("Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n",
1202- // mean, std_dev, min_val, max_val);
1203-
1204- // // Print first 8 values
1205- // printf("First 8 values: ");
1206- // for (int j = 0; j < 8 && j < total_elements; j++) {
1207- // printf("%.6f ", data[j]);
1208- // }
1209- // printf("\n");
1210-
1211- // // Save to file for detailed comparison
1212- // char filename[256];
1213- // snprintf(filename, sizeof(filename), "debug_tensors/%s.csv", node->name);
1214- // FILE* f = fopen(filename, "w");
1215- // if (f) {
1216- // for (int64_t j = 0; j < total_elements; ++j) {
1217- // fprintf(f, "%f", data[j]);
1218- // if ((j + 1) % node->ne[0] == 0) {
1219- // fprintf(f, "\n");
1220- // } else {
1221- // fprintf(f, ",");
1222- // }
1223- // }
1224- // fclose(f);
1225- // printf("Saved to: %s\n", filename);
1226- // }
1227- // }
1228- // printf("==================================\n");
1229- // }
1230- // }
1231- // }
1232- // }
1135+ if (model.arch == LLM_ARCH_PLAMO2) { // Only for small inputs
1136+ // Create debug directory if it doesn't exist
1137+ #ifdef _WIN32
1138+ _mkdir (" debug_tensors" );
1139+ #else
1140+ mkdir (" debug_tensors" , 0755 );
1141+ #endif
1142+ // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
1143+ ggml_cgraph* current_gf = res->get_graph ();
1144+ for (int i = 0 ; i < ggml_graph_n_nodes (current_gf); ++i) {
1145+ ggml_tensor* node = ggml_graph_node (current_gf, i);
1146+ printf (" Processing node: %s\n " , node->name ? node->name : " unknown" );
1147+ if (node && node->name ) {
1148+ bool should_dump = (strcmp (node->name , " embedding_output" ) == 0 ) ||
1149+ (strstr (node->name , " mamba_" ) == node->name ) ||
1150+ (strstr (node->name , " attn_norm" ) == node->name ) ||
1151+ (strstr (node->name , " norm" ) == node->name ) ||
1152+ (strcmp (node->name , " tokens" ) == 0 ) ||
1153+ (strstr (node->name , " attn_pre_norm" ) == node->name ) ||
1154+ (strcmp (node->name , " inp_embd" ) == 0 ) ||
1155+ (strcmp (node->name , " inp_tokens" ) == 0 );
1156+
1157+ if (strcmp (node->name , " tokens" ) == 0 ) {
1158+ llama_token* token_data = (llama_token*)node->data ;
1159+ printf (" Input Tokens: " );
1160+ for (int j = 0 ; j < node->ne [0 ]; ++j) {
1161+ printf (" %d " , token_data[j]);
1162+ }
1163+ printf (" \n " );
1164+ continue ; // Skip dumping tensor values for "tokens"
1165+ }
1166+
1167+ if (should_dump && node->data ) {
1168+ printf (" === Post-Compute Tensor Values ===\n " );
1169+ printf (" Tensor: %s\n " , node->name );
1170+ printf (" Shape: [%ld, %ld" , node->ne [0 ], node->ne [1 ]);
1171+ if (node->ne [2 ] > 1 ) printf (" , %ld" , node->ne [2 ]);
1172+ if (node->ne [3 ] > 1 ) printf (" , %ld" , node->ne [3 ]);
1173+ printf (" ]\n " );
1174+
1175+ int64_t total_elements = ggml_nelements (node);
1176+ float * data = new float [total_elements];
1177+ if (node->type == GGML_TYPE_F32) {
1178+ data = (float *)node->data ;
1179+ } else if (node->type == GGML_TYPE_BF16) {
1180+ ggml_bf16_t * bf16_data = (ggml_bf16_t *)node->data ;
1181+ for (int64_t j = 0 ; j < total_elements; j++) {
1182+ printf (" %.6f -> %.6f \n " , bf16_data[j], ggml_bf16_to_fp32 (bf16_data[j]));
1183+ }
1184+ ggml_bf16_to_fp32_row ((ggml_bf16_t *)node->data , data, total_elements);
1185+ }
1186+
1187+ if (total_elements > 0 ) {
1188+ // Calculate statistics
1189+ float sum = 0 .0f , sum_sq = 0 .0f , min_val = data[0 ], max_val = data[0 ];
1190+ for (int64_t j = 0 ; j < total_elements; j++) {
1191+ sum += data[j];
1192+ sum_sq += data[j] * data[j];
1193+ min_val = fminf (min_val, data[j]);
1194+ max_val = fmaxf (max_val, data[j]);
1195+ }
1196+
1197+ float mean = sum / total_elements;
1198+ float variance = (sum_sq / total_elements) - (mean * mean);
1199+ float std_dev = sqrtf (variance);
1200+
1201+ printf (" Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n " ,
1202+ mean, std_dev, min_val, max_val);
1203+
1204+ // Print first 8 values
1205+ printf (" First 8 values: " );
1206+ for (int j = 0 ; j < 8 && j < total_elements; j++) {
1207+ printf (" %.6f " , data[j]);
1208+ }
1209+ printf (" \n " );
1210+
1211+ // Save to file for detailed comparison
1212+ char filename[256 ];
1213+ snprintf (filename, sizeof (filename), " debug_tensors/%s.csv" , node->name );
1214+ FILE* f = fopen (filename, " w" );
1215+ if (f) {
1216+ for (int64_t j = 0 ; j < total_elements; ++j) {
1217+ fprintf (f, " %f" , data[j]);
1218+ if ((j + 1 ) % node->ne [0 ] == 0 ) {
1219+ fprintf (f, " \n " );
1220+ } else {
1221+ fprintf (f, " ," );
1222+ }
1223+ }
1224+ fclose (f);
1225+ printf (" Saved to: %s\n " , filename);
1226+ }
1227+ }
1228+ printf (" ==================================\n " );
1229+ }
1230+ }
1231+ }
1232+ }
12331233
12341234 n_outputs_prev += n_outputs;
12351235 } while (mctx->next ());
0 commit comments