diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000000000..f686aaf566df0 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,20 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/include/**", + "${workspaceFolder}/ggml/**" + ], + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++11", + "intelliSenseMode": "linux-gcc-x64", + "configurationProvider": "ms-vscode.cmake-tools", + "compileCommands": "${workspaceFolder}/build/compile_commands.json" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000000000..ba57704e5b629 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,73 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug llama-cli TinyLlama-1.1B-Chat-v1.0-GGUF", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/bin/llama-cli", + "args": [ + "-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", + "-p", "Hello, I am a language model" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build", + "miDebuggerPath": "/usr/bin/gdb" + }, + { + "name": "Debug llama-server TinyLlama-1.1B-Chat-v1.0-GGUF", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/bin/llama-server", + "args": [ + "-hf", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build", + "miDebuggerPath": "/usr/bin/gdb" + }, + { + "name": "Debug llama-cli --list-devices", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/bin/llama-cli", + "args": ["--list-devices"], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build", + "miDebuggerPath": "/usr/bin/gdb" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000000000..ff558300d2ebe --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,42 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "type": "shell", + "command": "cmake", + "args": [ + "--build", + "build", + "--config", + "Debug", + "-j8" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "problemMatcher": "$gcc", + "options": { + "cwd": "${workspaceFolder}" + }, + "dependsOn": [ + "cmake-configure" + ] + }, + { + "label": "cmake-configure", + "type": "shell", + "command": "cmake", + "args": [ + "-B", + "build", + "-DCMAKE_BUILD_TYPE=Debug" + ], + "problemMatcher": "$gcc", + "options": { + "cwd": "${workspaceFolder}" + } + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index cf45f23cf4475..3586be309e1d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,35 @@ +# GGML GRAPH LOGGING FORK OF LLAMA.CPP + +This fork includes a feature to log the computation graph of the model to a CSV file. + +## Graph Logging + +To enable graph logging, set the environment variable `GGML_LOG_GRAPH` to `1` or `true`: + +```bash +export GGML_LOG_GRAPH=1 +``` + +By default, the graph will be written to `ggml_graph.csv` in the current working directory. You can specify a different filename using the `GGML_LOG_GRAPH_FILENAME` environment variable: + +```bash +export GGML_LOG_GRAPH_FILENAME=/path/to/your/graph_log.csv +``` + +**Important:** When graph logging is enabled, the program will terminate immediately after writing the log file. + +### Output Format + +The output CSV file contains the following columns for each node (tensor) in the graph: + +- `node_id`: The memory address of the tensor, serving as a unique ID. +- `name`: The name assigned to the tensor (if any). +- `op`: The GGML operation that produces this tensor. +- `dim0`, `dim1`, `dim2`, `dim3`: The dimensions of the tensor. +- `bytes`: The size of the tensor data in bytes. +- `flags`: Tensor flags (e.g., `PARAM`, `INPUT`, `OUTPUT`, `LEAF`). +- `src0`...`srcN`: The `node_id` (memory address) of the source tensors for this node, up to `GGML_MAX_SRC`. + # llama.cpp ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) diff --git a/ggml/include/ggml-cpp.h b/ggml/include/ggml-cpp.h index a12342c25debe..7adf6706b6811 100644 --- a/ggml/include/ggml-cpp.h +++ b/ggml/include/ggml-cpp.h @@ -8,6 +8,7 @@ #include "ggml-alloc.h" #include "ggml-backend.h" #include "gguf.h" +#include "ggml-graph-logging.h" #include // Smart pointers for ggml types diff --git a/ggml/include/ggml-graph-logging.h b/ggml/include/ggml-graph-logging.h new file mode 100644 index 0000000000000..650e085bd24e8 --- /dev/null +++ b/ggml/include/ggml-graph-logging.h @@ -0,0 +1,19 @@ +// tensor-footprint-estimation.h +#pragma once + +#include +#include + +// Forward declaration for ggml_cgraph +struct ggml_cgraph; + +#ifdef __cplusplus +extern "C" { +#endif + +// Log the entire computation graph to CSV +void ggml_log_graph(struct ggml_cgraph* cgraph); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index f00700da71fcd..ccfb7837894ff 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -201,7 +201,8 @@ add_library(ggml-base ggml-threading.h ggml-quants.c ggml-quants.h - gguf.cpp) + gguf.cpp + ggml-graph-logging.c) target_include_directories(ggml-base PRIVATE .) if (GGML_BACKEND_DL) diff --git a/ggml/src/ggml-graph-logging.c b/ggml/src/ggml-graph-logging.c new file mode 100644 index 0000000000000..9dad46174f97f --- /dev/null +++ b/ggml/src/ggml-graph-logging.c @@ -0,0 +1,150 @@ +// ggml-graph-logging.c +#include "ggml-graph-logging.h" +#include // for getenv +#include // for fprintf, stderr +#include +#include +#include + +// Include the full definition of ggml structs +#include "ggml.h" +#include "ggml-impl.h" // This includes the full definition of ggml_cgraph + + +// +// Graph logging +// +// This is a simple logging system for the graph of a GGML model. +// +// The graph is logged to a CSV file. +// +// The CSV file contains the following columns: +// +// - node_id: The unique identifier for the node. +// - name: The name of the node. +// - op: The operation performed by the node. +// - dim0, dim1, dim2, dim3: The dimensions of the node. +// - bytes: The number of bytes in the node. +// - flags: The flags of the node. +// - src0..srcN: The source nodes of the node. +// +// The CSV file is written to the current working directory. +// The CSV file is overwritten if it already exists. +// The program will terminate after the graph is logged. +// +// The graph is logged when the environment variable GGML_LOG_GRAPH is set to 1. +// The filename for the log file can be set using the environment variable GGML_LOG_GRAPH_FILENAME. +// +// The graph is logged using the ggml_log_graph function. +// + + +static FILE* ggml_graph_log_init(const char* filename) { + FILE* file = fopen(filename, "w"); + if (file) { + fprintf(stderr, "%s: Graph logging enabled, will write to '%s'\n", __func__, filename); + + // Write CSV header - now with dynamic source columns + fprintf(file, "node_id,name,op,dim0,dim1,dim2,dim3,bytes,flags"); + + // Add source columns based on GGML_MAX_SRC + for (int i = 0; i < GGML_MAX_SRC; i++) { + fprintf(file, ",src%d", i); + } + fprintf(file, "\n"); + } else { + fprintf(stderr, "%s: Error: Failed to open graph file '%s' for writing.\n", __func__, filename); + } + return file; +} + +static void ggml_graph_log_free(FILE* file) { + if (file) { + fclose(file); + } +} + +static void write_tensor_to_csv(struct ggml_tensor* tensor, const char* custom_flags, FILE* file) { + if (!tensor || !file) return; + + // Get flags + const char* flags = custom_flags ? custom_flags : "-"; + if (!custom_flags) { + if (tensor->flags & GGML_TENSOR_FLAG_PARAM) { + flags = "PARAM"; + } else if (tensor->flags & GGML_TENSOR_FLAG_INPUT) { + flags = "INPUT"; + } else if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) { + flags = "OUTPUT"; + } + } + + // Calculate size in bytes + size_t total_size = ggml_nbytes(tensor); + + // Write base tensor info + fprintf(file, + "%p,%s,%s,%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%.2f,%s", + (void*)tensor, // node_id (pointer for uniqueness) + tensor->name[0] ? tensor->name : "unnamed", // name + ggml_op_name(tensor->op), // op + tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], // dimensions + (double)total_size, // bytes + flags); // flags + + // Write all source tensors dynamically + for (int i = 0; i < GGML_MAX_SRC; i++) { + fprintf(file, ",%p", (void*)tensor->src[i]); + } + + fprintf(file, "\n"); +} + +void ggml_log_graph(struct ggml_cgraph* cgraph) { + const char* log_graph_env = getenv("GGML_LOG_GRAPH"); + if (!log_graph_env || (strcmp(log_graph_env, "1") != 0 && strcmp(log_graph_env, "true") != 0)) { + return; + } + + // Get the filename from the environment variable, or use the default + const char* filename_env = getenv("GGML_LOG_GRAPH_FILENAME"); + const char* filename = filename_env ? filename_env : "ggml_graph.csv"; + + FILE* file = ggml_graph_log_init(filename); + if (!file || !cgraph) { + return; + } + + // Process all nodes in the graph + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor* node = cgraph->nodes[i]; + write_tensor_to_csv(node, NULL, file); + } + + // Process all leaf nodes as well + for (int i = 0; i < cgraph->n_leafs; i++) { + struct ggml_tensor* leaf = cgraph->leafs[i]; + if (!leaf) continue; + + // Skip if already included in nodes + bool already_processed = false; + for (int j = 0; j < cgraph->n_nodes; j++) { + if (cgraph->nodes[j] == leaf) { + already_processed = true; + break; + } + } + if (already_processed) continue; + + write_tensor_to_csv(leaf, "LEAF", file); + } + + // Flush the file to ensure all data is written + fflush(file); + ggml_graph_log_free(file); + + fprintf(stderr, "Graph logging complete: %d nodes and %d leafs written to CSV file. Terminating.\n", + cgraph->n_nodes, cgraph->n_leafs); + exit(0); +} + diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 6b7bfecf3a1cf..460558887af95 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -13064,6 +13064,9 @@ llm_graph_result_ptr llama_model::build_graph( // add on pooling layer llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b); + // Log the entire computation graph after it's built + ggml_log_graph(gf); + return std::move(llm->res); }