Updates!

RealTimeChris · RealTimeChris · commit 6771ccf99d80 · 2025-06-16T07:43:25.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,6 +5,16 @@ include(CheckIncludeFileCXX)
 #set(CMAKE_WARN_DEPRECATED YES)
 set(CMAKE_WARN_UNUSED_CLI YES)
 
+include(FetchContent)
+FetchContent_Declare(
+        Jsonifier
+        GIT_REPOSITORY https://github.com/realtimechris/jsonifier.git
+        GIT_TAG        dev
+)
+
+# Also provides "common"
+FetchContent_MakeAvailable(Jsonifier)
+
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -211,6 +221,8 @@ set(LLAMA_PUBLIC_HEADERS
     ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
     ${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
 
+target_link_libraries(llama PUBLIC Jsonifier::Jsonifier)
+
 set_target_properties(llama
     PROPERTIES
         PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
diff --git a/CMakePresets.json b/CMakePresets.json
@@ -8,7 +8,8 @@
         "binaryDir":   "${sourceDir}/build-${presetName}",
         "cacheVariables": {
             "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
-            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/..",
+                "LLAMA_CURL": "OFF"
         }
     },
     {
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1,7 +1,9 @@
 #include "llama-context.h"
-
+#include <atomic>
+#include <chrono>
 #include <cinttypes>
 #include <cstring>
+#include <mutex>
 #include <stdexcept>
 
 #include "../ggml/include/ggml-backend.h"
@@ -895,7 +897,7 @@ std::string format_dimensions(const int64_t * ne, int max_dims = GGML_MAX_DIMS)
 }
 
 // Main pretty print function
-void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
+void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os, bool input = false) {
     if (!tensor) {
         os << "NULL tensor\n";
         return;
@@ -908,19 +910,36 @@ void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
     } else {
         tensor_name = "<unnamed>";
     }
-
+    std::string tab{};
+    if (input) {
+        tab = "    ";
+    }
     // Format output with nice alignment
     const int label_width = 12;
-
-    os << "────────────────────────────────────────\n";
-    os << "" << std::left << std::setw(37) << ("Tensor: " + tensor_name) << " │\n";
-    os << "────────────────────────────────────────\n";
-    os << "" << std::left << std::setw(label_width) << "Type:" << std::left << std::setw(24)
+    if (!input) {
+        os << "────────────────────────────────────────\n";
+        os << tab << std::left << std::setw(37) << ("Tensor: " + tensor_name) << " │\n";
+        os << "────────────────────────────────────────\n";
+    } else {
+        os << tab << std::left << std::setw(37) << ("Tensor: " + tensor_name) << " │\n";
+    }
+    os << tab << std::left << std::setw(label_width) << "Type:" << std::left << std::setw(24)
        << ggml_type_name(tensor->type) << " \n";
-    os << "" << std::left << std::setw(label_width) << "Dimensions:" << std::left << std::setw(24)
+    os << tab << std::left << std::setw(label_width) << "Dimensions:" << std::left << std::setw(24)
        << format_dimensions(tensor->ne) << " \n";
-    os << "" << std::left << std::setw(label_width) << "Operation:" << std::left << std::setw(24)
-       << ggml_op_name(tensor->op) << " \n";
+    if (!input) {
+        os << "" << std::left << std::setw(label_width) << "Operation:" << std::left << std::setw(24)
+           << ggml_op_name(tensor->op) << " \n";
+        os << "" << std::left << std::setw(label_width) << "Inputs:";
+        size_t input{};
+        for (ggml_tensor * const * tensor_new = tensor->src; *tensor_new; ++tensor_new) {
+            ++input;
+        }
+        os << "" << std::left << std::setw(label_width) << std::to_string(input) << " \n";
+        for (ggml_tensor * const * tensor_new = tensor->src; *tensor_new; ++tensor_new) {
+            pretty_print_tensor(*tensor_new, os, true);
+        }
+    }
 
     // Calculate total elements
     int64_t total_elements = 1;
@@ -929,12 +948,16 @@ void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
             total_elements *= tensor->ne[i];
         }
     }
-    os << "" << std::left << std::setw(label_width) << "Elements:" << std::left << std::setw(24) << total_elements
-       << "\n";
+    if (!input) {
+        os << "" << std::left << std::setw(label_width) << "Elements:" << std::left << std::setw(24) << total_elements
+           << "\n";
 
-    os << "─────────────────────────────────────────\n";
+        os << "─────────────────────────────────────────\n";
+    }
 }
 
+test::stop_watch stop_watch_val{ 0 };
+
 bool save_string_to_file(const std::string & content, const std::string & filename) {
     std::ofstream file(filename);
     if (!file.is_open()) {
@@ -1074,17 +1097,21 @@ int llama_context::decode(llama_batch & inp_batch) {
         // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
 
         ggml_backend_sched_alloc_graph(sched.get(), gf);
-        std::stringstream stream{};
-        for (size_t x = 0; x < gf->n_leafs; ++x) {
-            pretty_print_tensor(gf->leafs[x], stream);
-        }
-        for (size_t x = 0; x < gf->n_nodes; ++x) {
-            pretty_print_tensor(gf->nodes[x], stream);
-        }
-        save_string_to_file(stream.str(), "../../../../../TensorData.txt");
+        //std::stringstream stream{};
+        //for (size_t x = 0; x < gf->n_leafs; ++x) {
+        //pretty_print_tensor(gf->leafs[x], stream);
+        //}
+        //for (size_t x = 0; x < gf->n_nodes; ++x) {
+        //pretty_print_tensor(gf->nodes[x], stream);
+        //}
+        //save_string_to_file(stream.str(), "../../../../../TensorData.txt");
         res->set_inputs(&ubatch);
-
+        stop_watch_val.reset();
         const auto compute_status = graph_compute(gf, ubatch.n_tokens > 1);
+        stop_watch_val.add_time();
+        std::cout << "LLAMA.CPP/GGML AVERAGE COMPUTE TIME, OVER: "
+                  << std::setw(50 - std::size("LLAMA.CPP/GGML AVERAGE COMPUTE TIME, OVER: "))
+                  << stop_watch_val.get_count() << " TOKENS: " << stop_watch_val.get_average() << std::endl;
         if (compute_status != GGML_STATUS_SUCCESS) {
             switch (compute_status) {
                 case GGML_STATUS_ABORTED:
diff --git a/src/llama-context.h b/src/llama-context.h

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,8 @@`
`8`	`8`	`"binaryDir": "${sourceDir}/build-${presetName}",`
`9`	`9`	`"cacheVariables": {`
`10`	`10`	`"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",`
`11`		`- "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."`
	`11`	`+ "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/..",`
	`12`	`+ "LLAMA_CURL": "OFF"`
`12`	`13`	`}`
`13`	`14`	`},`
`14`	`15`	`{`