Add debug code

mitmul · mitmul · commit 8d9eab7dd098 · 2025-07-03T14:44:28.000+09:00
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1131,104 +1131,104 @@ int llama_context::decode(const llama_batch & batch_inp) {
         }
 
         // Debug: Dump tensor values after computation (for PLaMo-2 only)
-        // if (model.arch == LLM_ARCH_PLAMO2) {  // Only for small inputs
-        //     // Create debug directory if it doesn't exist
-        //     #ifdef _WIN32
-        //     _mkdir("debug_tensors");
-        //     #else
-        //     mkdir("debug_tensors", 0755);
-        //     #endif
-        //     // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
-        //     ggml_cgraph* current_gf = res->get_graph();
-        //     for (int i = 0; i < ggml_graph_n_nodes(current_gf); ++i) {
-        //         ggml_tensor* node = ggml_graph_node(current_gf, i);
-        //         printf("Processing node: %s\n", node->name ? node->name : "unknown");
-        //         if (node && node->name) {
-        //             bool should_dump = (strcmp(node->name, "embedding_output") == 0) ||
-        //                             (strstr(node->name, "mamba_") == node->name) ||
-        //                             (strstr(node->name, "attn_norm") == node->name) ||
-        //                             (strstr(node->name, "norm") == node->name) ||
-        //                             (strcmp(node->name, "tokens") == 0) ||
-        //                             (strstr(node->name, "attn_pre_norm") == node->name) ||
-        //                             (strcmp(node->name, "inp_embd") == 0) ||
-        //                             (strcmp(node->name, "inp_tokens") == 0);
-
-        //             if (strcmp(node->name, "tokens") == 0) {
-        //                 llama_token* token_data = (llama_token*)node->data;
-        //                 printf("Input Tokens: ");
-        //                 for (int j = 0; j < node->ne[0]; ++j) {
-        //                     printf("%d ", token_data[j]);
-        //                 }
-        //                 printf("\n");
-        //                 continue;  // Skip dumping tensor values for "tokens"
-        //             }
-
-        //             if (should_dump && node->data) {
-        //                 printf("=== Post-Compute Tensor Values ===\n");
-        //                 printf("Tensor: %s\n", node->name);
-        //                 printf("Shape: [%ld, %ld", node->ne[0], node->ne[1]);
-        //                 if (node->ne[2] > 1) printf(", %ld", node->ne[2]);
-        //                 if (node->ne[3] > 1) printf(", %ld", node->ne[3]);
-        //                 printf("]\n");
-
-        //                 int64_t total_elements = ggml_nelements(node);
-        //                 float* data = new float[total_elements];
-        //                 if (node->type == GGML_TYPE_F32) {
-        //                     data = (float*)node->data;
-        //                 } else if (node->type == GGML_TYPE_BF16) {
-        //                     ggml_bf16_t * bf16_data = (ggml_bf16_t*)node->data;
-        //                     for (int64_t j = 0; j < total_elements; j++) {
-        //                         printf("%.6f -> %.6f \n", bf16_data[j], ggml_bf16_to_fp32(bf16_data[j]));
-        //                     }
-        //                     ggml_bf16_to_fp32_row((ggml_bf16_t*)node->data, data, total_elements);
-        //                 }
-
-        //                 if (total_elements > 0) {
-        //                     // Calculate statistics
-        //                     float sum = 0.0f, sum_sq = 0.0f, min_val = data[0], max_val = data[0];
-        //                     for (int64_t j = 0; j < total_elements; j++) {
-        //                         sum += data[j];
-        //                         sum_sq += data[j] * data[j];
-        //                         min_val = fminf(min_val, data[j]);
-        //                         max_val = fmaxf(max_val, data[j]);
-        //                     }
-
-        //                     float mean = sum / total_elements;
-        //                     float variance = (sum_sq / total_elements) - (mean * mean);
-        //                     float std_dev = sqrtf(variance);
-
-        //                     printf("Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n",
-        //                         mean, std_dev, min_val, max_val);
-
-        //                     // Print first 8 values
-        //                     printf("First 8 values: ");
-        //                     for (int j = 0; j < 8 && j < total_elements; j++) {
-        //                         printf("%.6f ", data[j]);
-        //                     }
-        //                     printf("\n");
-
-        //                     // Save to file for detailed comparison
-        //                     char filename[256];
-        //                     snprintf(filename, sizeof(filename), "debug_tensors/%s.csv", node->name);
-        //                     FILE* f = fopen(filename, "w");
-        //                     if (f) {
-        //                         for (int64_t j = 0; j < total_elements; ++j) {
-        //                             fprintf(f, "%f", data[j]);
-        //                             if ((j + 1) % node->ne[0] == 0) {
-        //                                 fprintf(f, "\n");
-        //                             } else {
-        //                                 fprintf(f, ",");
-        //                             }
-        //                         }
-        //                         fclose(f);
-        //                         printf("Saved to: %s\n", filename);
-        //                     }
-        //                 }
-        //                 printf("==================================\n");
-        //             }
-        //         }
-        //     }
-        // }
+        if (model.arch == LLM_ARCH_PLAMO2) {  // Only for small inputs
+            // Create debug directory if it doesn't exist
+            #ifdef _WIN32
+            _mkdir("debug_tensors");
+            #else
+            mkdir("debug_tensors", 0755);
+            #endif
+            // Find debug tensors by searching through the graph (gf is now accessible via res->get_graph())
+            ggml_cgraph* current_gf = res->get_graph();
+            for (int i = 0; i < ggml_graph_n_nodes(current_gf); ++i) {
+                ggml_tensor* node = ggml_graph_node(current_gf, i);
+                printf("Processing node: %s\n", node->name ? node->name : "unknown");
+                if (node && node->name) {
+                    bool should_dump = (strcmp(node->name, "embedding_output") == 0) ||
+                                    (strstr(node->name, "mamba_") == node->name) ||
+                                    (strstr(node->name, "attn_norm") == node->name) ||
+                                    (strstr(node->name, "norm") == node->name) ||
+                                    (strcmp(node->name, "tokens") == 0) ||
+                                    (strstr(node->name, "attn_pre_norm") == node->name) ||
+                                    (strcmp(node->name, "inp_embd") == 0) ||
+                                    (strcmp(node->name, "inp_tokens") == 0);
+
+                    if (strcmp(node->name, "tokens") == 0) {
+                        llama_token* token_data = (llama_token*)node->data;
+                        printf("Input Tokens: ");
+                        for (int j = 0; j < node->ne[0]; ++j) {
+                            printf("%d ", token_data[j]);
+                        }
+                        printf("\n");
+                        continue;  // Skip dumping tensor values for "tokens"
+                    }
+
+                    if (should_dump && node->data) {
+                        printf("=== Post-Compute Tensor Values ===\n");
+                        printf("Tensor: %s\n", node->name);
+                        printf("Shape: [%ld, %ld", node->ne[0], node->ne[1]);
+                        if (node->ne[2] > 1) printf(", %ld", node->ne[2]);
+                        if (node->ne[3] > 1) printf(", %ld", node->ne[3]);
+                        printf("]\n");
+
+                        int64_t total_elements = ggml_nelements(node);
+                        float* data = new float[total_elements];
+                        if (node->type == GGML_TYPE_F32) {
+                            data = (float*)node->data;
+                        } else if (node->type == GGML_TYPE_BF16) {
+                            ggml_bf16_t * bf16_data = (ggml_bf16_t*)node->data;
+                            for (int64_t j = 0; j < total_elements; j++) {
+                                printf("%.6f -> %.6f \n", bf16_data[j], ggml_bf16_to_fp32(bf16_data[j]));
+                            }
+                            ggml_bf16_to_fp32_row((ggml_bf16_t*)node->data, data, total_elements);
+                        }
+
+                        if (total_elements > 0) {
+                            // Calculate statistics
+                            float sum = 0.0f, sum_sq = 0.0f, min_val = data[0], max_val = data[0];
+                            for (int64_t j = 0; j < total_elements; j++) {
+                                sum += data[j];
+                                sum_sq += data[j] * data[j];
+                                min_val = fminf(min_val, data[j]);
+                                max_val = fmaxf(max_val, data[j]);
+                            }
+
+                            float mean = sum / total_elements;
+                            float variance = (sum_sq / total_elements) - (mean * mean);
+                            float std_dev = sqrtf(variance);
+
+                            printf("Stats - Mean: %.6f, Std: %.6f, Min: %.6f, Max: %.6f\n",
+                                mean, std_dev, min_val, max_val);
+
+                            // Print first 8 values
+                            printf("First 8 values: ");
+                            for (int j = 0; j < 8 && j < total_elements; j++) {
+                                printf("%.6f ", data[j]);
+                            }
+                            printf("\n");
+
+                            // Save to file for detailed comparison
+                            char filename[256];
+                            snprintf(filename, sizeof(filename), "debug_tensors/%s.csv", node->name);
+                            FILE* f = fopen(filename, "w");
+                            if (f) {
+                                for (int64_t j = 0; j < total_elements; ++j) {
+                                    fprintf(f, "%f", data[j]);
+                                    if ((j + 1) % node->ne[0] == 0) {
+                                        fprintf(f, "\n");
+                                    } else {
+                                        fprintf(f, ",");
+                                    }
+                                }
+                                fclose(f);
+                                printf("Saved to: %s\n", filename);
+                            }
+                        }
+                        printf("==================================\n");
+                    }
+                }
+            }
+        }
 
         n_outputs_prev += n_outputs;
     } while (mctx->next());