@@ -1131,104 +1131,104 @@ int llama_context::decode(const llama_batch & batch_inp) {
11311131 }
11321132
11331133 // Debug: Dump tensor values after computation (for PLaMo-2 only)
1134- // if (model.arch == LLM_ARCH_PLAMO2) { // Only for small inputs
1135- // // Create debug directory if it doesn't exist
1136- // #ifdef _WIN32
1137- // _mkdir("debug_tensors");
1138- // #else
1139- // mkdir("debug_tensors", 0755);
1140- // #endif
1141- // // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
1142- // ggml_cgraph* current_gf = res->get_graph();
1143- // for (int i = 0; i < ggml_graph_n_nodes(current_gf); ++i) {
1144- // ggml_tensor* node = ggml_graph_node(current_gf, i);
1145- // printf("Processing node: %s\n", node->name ? node->name : "unknown");
1146- // if (node && node->name) {
1147- // bool should_dump = (strcmp(node->name, "embedding_output") == 0) ||
1148- // (strstr(node->name, "mamba_") == node->name) ||
1149- // (strstr(node->name, "attn_norm") == node->name) ||
1150- // (strstr(node->name, "norm") == node->name) ||
1151- // (strcmp(node->name, "tokens") == 0) ||
1152- // (strstr(node->name, "attn_pre_norm") == node->name) ||
1153- // (strcmp(node->name, "inp_embd") == 0) ||
1154- // (strcmp(node->name, "inp_tokens") == 0);
1155-
1156- // if (strcmp(node->name, "tokens") == 0) {
1157- // llama_token* token_data = (llama_token*)node->data;
1158- // printf("Input Tokens: ");
1159- // for (int j = 0; j < node->ne[0]; ++j) {
1160- // printf("%d ", token_data[j]);
1161- // }
1162- // printf("\n");
1163- // continue; // Skip dumping tensor values for "tokens"
1164- // }
1165-
1166- // if (should_dump && node->data) {
1167- // printf("=== Post-Compute Tensor Values ===\n");
1168- // printf("Tensor: %s\n", node->name);
1169- // printf("Shape: [%ld, %ld", node->ne[0], node->ne[1]);
1170- // if (node->ne[2] > 1) printf(", %ld", node->ne[2]);
1171- // if (node->ne[3] > 1) printf(", %ld", node->ne[3]);
1172- // printf("]\n");
1173-
1174- // int64_t total_elements = ggml_nelements(node);
1175- // float* data = new float[total_elements];
1176- // if (node->type == GGML_TYPE_F32) {
1177- // data = (float*)node->data;
1178- // } else if (node->type == GGML_TYPE_BF16) {
1179- // ggml_bf16_t * bf16_data = (ggml_bf16_t*)node->data;
1180- // for (int64_t j = 0; j < total_elements; j++) {
1181- // printf("%.6f -> %.6f \n", bf16_data[j], ggml_bf16_to_fp32(bf16_data[j]));
1182- // }
1183- // ggml_bf16_to_fp32_row((ggml_bf16_t*)node->data, data, total_elements);
1184- // }
1185-
1186- // if (total_elements > 0) {
1187- // // Calculate statistics
1188- // float sum = 0.0f, sum_sq = 0.0f, min_val = data[0], max_val = data[0];
1189- // for (int64_t j = 0; j < total_elements; j++) {
1190- // sum += data[j];
1191- // sum_sq += data[j] * data[j];
1192- // min_val = fminf(min_val, data[j]);
1193- // max_val = fmaxf(max_val, data[j]);
1194- // }
1195-
1196- // float mean = sum / total_elements;
1197- // float variance = (sum_sq / total_elements) - (mean * mean);
1198- // float std_dev = sqrtf(variance);
1199-
1200- // printf("Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n",
1201- // mean, std_dev, min_val, max_val);
1202-
1203- // // Print first 8 values
1204- // printf("First 8 values: ");
1205- // for (int j = 0; j < 8 && j < total_elements; j++) {
1206- // printf("%.6f ", data[j]);
1207- // }
1208- // printf("\n");
1209-
1210- // // Save to file for detailed comparison
1211- // char filename[256];
1212- // snprintf(filename, sizeof(filename), "debug_tensors/%s.csv", node->name);
1213- // FILE* f = fopen(filename, "w");
1214- // if (f) {
1215- // for (int64_t j = 0; j < total_elements; ++j) {
1216- // fprintf(f, "%f", data[j]);
1217- // if ((j + 1) % node->ne[0] == 0) {
1218- // fprintf(f, "\n");
1219- // } else {
1220- // fprintf(f, ",");
1221- // }
1222- // }
1223- // fclose(f);
1224- // printf("Saved to: %s\n", filename);
1225- // }
1226- // }
1227- // printf("==================================\n");
1228- // }
1229- // }
1230- // }
1231- // }
1134+ if (model.arch == LLM_ARCH_PLAMO2) { // Only for small inputs
1135+ // Create debug directory if it doesn't exist
1136+ #ifdef _WIN32
1137+ _mkdir (" debug_tensors" );
1138+ #else
1139+ mkdir (" debug_tensors" , 0755 );
1140+ #endif
1141+ // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
1142+ ggml_cgraph* current_gf = res->get_graph ();
1143+ for (int i = 0 ; i < ggml_graph_n_nodes (current_gf); ++i) {
1144+ ggml_tensor* node = ggml_graph_node (current_gf, i);
1145+ printf (" Processing node: %s\n " , node->name ? node->name : " unknown" );
1146+ if (node && node->name ) {
1147+ bool should_dump = (strcmp (node->name , " embedding_output" ) == 0 ) ||
1148+ (strstr (node->name , " mamba_" ) == node->name ) ||
1149+ (strstr (node->name , " attn_norm" ) == node->name ) ||
1150+ (strstr (node->name , " norm" ) == node->name ) ||
1151+ (strcmp (node->name , " tokens" ) == 0 ) ||
1152+ (strstr (node->name , " attn_pre_norm" ) == node->name ) ||
1153+ (strcmp (node->name , " inp_embd" ) == 0 ) ||
1154+ (strcmp (node->name , " inp_tokens" ) == 0 );
1155+
1156+ if (strcmp (node->name , " tokens" ) == 0 ) {
1157+ llama_token* token_data = (llama_token*)node->data ;
1158+ printf (" Input Tokens: " );
1159+ for (int j = 0 ; j < node->ne [0 ]; ++j) {
1160+ printf (" %d " , token_data[j]);
1161+ }
1162+ printf (" \n " );
1163+ continue ; // Skip dumping tensor values for "tokens"
1164+ }
1165+
1166+ if (should_dump && node->data ) {
1167+ printf (" === Post-Compute Tensor Values ===\n " );
1168+ printf (" Tensor: %s\n " , node->name );
1169+ printf (" Shape: [%ld, %ld" , node->ne [0 ], node->ne [1 ]);
1170+ if (node->ne [2 ] > 1 ) printf (" , %ld" , node->ne [2 ]);
1171+ if (node->ne [3 ] > 1 ) printf (" , %ld" , node->ne [3 ]);
1172+ printf (" ]\n " );
1173+
1174+ int64_t total_elements = ggml_nelements (node);
1175+ float * data = new float [total_elements];
1176+ if (node->type == GGML_TYPE_F32) {
1177+ data = (float *)node->data ;
1178+ } else if (node->type == GGML_TYPE_BF16) {
1179+ ggml_bf16_t * bf16_data = (ggml_bf16_t *)node->data ;
1180+ for (int64_t j = 0 ; j < total_elements; j++) {
1181+ printf (" %.6f -> %.6f \n " , bf16_data[j], ggml_bf16_to_fp32 (bf16_data[j]));
1182+ }
1183+ ggml_bf16_to_fp32_row ((ggml_bf16_t *)node->data , data, total_elements);
1184+ }
1185+
1186+ if (total_elements > 0 ) {
1187+ // Calculate statistics
1188+ float sum = 0 .0f , sum_sq = 0 .0f , min_val = data[0 ], max_val = data[0 ];
1189+ for (int64_t j = 0 ; j < total_elements; j++) {
1190+ sum += data[j];
1191+ sum_sq += data[j] * data[j];
1192+ min_val = fminf (min_val, data[j]);
1193+ max_val = fmaxf (max_val, data[j]);
1194+ }
1195+
1196+ float mean = sum / total_elements;
1197+ float variance = (sum_sq / total_elements) - (mean * mean);
1198+ float std_dev = sqrtf (variance);
1199+
1200+ printf (" Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n " ,
1201+ mean, std_dev, min_val, max_val);
1202+
1203+ // Print first 8 values
1204+ printf (" First 8 values: " );
1205+ for (int j = 0 ; j < 8 && j < total_elements; j++) {
1206+ printf (" %.6f " , data[j]);
1207+ }
1208+ printf (" \n " );
1209+
1210+ // Save to file for detailed comparison
1211+ char filename[256 ];
1212+ snprintf (filename, sizeof (filename), " debug_tensors/%s.csv" , node->name );
1213+ FILE* f = fopen (filename, " w" );
1214+ if (f) {
1215+ for (int64_t j = 0 ; j < total_elements; ++j) {
1216+ fprintf (f, " %f" , data[j]);
1217+ if ((j + 1 ) % node->ne [0 ] == 0 ) {
1218+ fprintf (f, " \n " );
1219+ } else {
1220+ fprintf (f, " ," );
1221+ }
1222+ }
1223+ fclose (f);
1224+ printf (" Saved to: %s\n " , filename);
1225+ }
1226+ }
1227+ printf (" ==================================\n " );
1228+ }
1229+ }
1230+ }
1231+ }
12321232
12331233 n_outputs_prev += n_outputs;
12341234 } while (mctx->next ());
0 commit comments