Skip to content

Commit fc81418

Browse files
committed
Updates!
1 parent 5232d6e commit fc81418

File tree

4 files changed

+229
-121
lines changed

4 files changed

+229
-121
lines changed

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ include(CheckIncludeFileCXX)
55
#set(CMAKE_WARN_DEPRECATED YES)
66
set(CMAKE_WARN_UNUSED_CLI YES)
77

8+
include(FetchContent)
9+
FetchContent_Declare(
10+
Jsonifier
11+
GIT_REPOSITORY https://github.com/realtimechris/jsonifier.git
12+
GIT_TAG dev
13+
)
14+
15+
# Also provides "common"
16+
FetchContent_MakeAvailable(Jsonifier)
17+
818
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
919

1020
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -211,6 +221,8 @@ set(LLAMA_PUBLIC_HEADERS
211221
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
212222
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
213223

224+
target_link_libraries(llama PUBLIC Jsonifier::Jsonifier)
225+
214226
set_target_properties(llama
215227
PROPERTIES
216228
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")

CMakePresets.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
"binaryDir": "${sourceDir}/build-${presetName}",
99
"cacheVariables": {
1010
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
11-
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
11+
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/..",
12+
"LLAMA_CURL": "OFF"
1213
}
1314
},
1415
{

src/llama-context.cpp

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#include "llama-context.h"
2-
2+
#include <atomic>
3+
#include <chrono>
34
#include <cinttypes>
45
#include <cstring>
6+
#include <mutex>
57
#include <stdexcept>
68

79
#include "../ggml/include/ggml-backend.h"
@@ -895,7 +897,7 @@ std::string format_dimensions(const int64_t * ne, int max_dims = GGML_MAX_DIMS)
895897
}
896898

897899
// Main pretty print function
898-
void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
900+
void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os, bool input = false) {
899901
if (!tensor) {
900902
os << "NULL tensor\n";
901903
return;
@@ -908,19 +910,36 @@ void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
908910
} else {
909911
tensor_name = "<unnamed>";
910912
}
911-
913+
std::string tab{};
914+
if (input) {
915+
tab = " ";
916+
}
912917
// Format output with nice alignment
913918
const int label_width = 12;
914-
915-
os << "────────────────────────────────────────\n";
916-
os << "" << std::left << std::setw(37) << ("Tensor: " + tensor_name) << "\n";
917-
os << "────────────────────────────────────────\n";
918-
os << "" << std::left << std::setw(label_width) << "Type:" << std::left << std::setw(24)
919+
if (!input) {
920+
os << "────────────────────────────────────────\n";
921+
os << tab << std::left << std::setw(37) << ("Tensor: " + tensor_name) << "\n";
922+
os << "────────────────────────────────────────\n";
923+
} else {
924+
os << tab << std::left << std::setw(37) << ("Tensor: " + tensor_name) << "\n";
925+
}
926+
os << tab << std::left << std::setw(label_width) << "Type:" << std::left << std::setw(24)
919927
<< ggml_type_name(tensor->type) << " \n";
920-
os << "" << std::left << std::setw(label_width) << "Dimensions:" << std::left << std::setw(24)
928+
os << tab << std::left << std::setw(label_width) << "Dimensions:" << std::left << std::setw(24)
921929
<< format_dimensions(tensor->ne) << " \n";
922-
os << "" << std::left << std::setw(label_width) << "Operation:" << std::left << std::setw(24)
923-
<< ggml_op_name(tensor->op) << " \n";
930+
if (!input) {
931+
os << "" << std::left << std::setw(label_width) << "Operation:" << std::left << std::setw(24)
932+
<< ggml_op_name(tensor->op) << " \n";
933+
os << "" << std::left << std::setw(label_width) << "Inputs:";
934+
size_t input{};
935+
for (ggml_tensor * const * tensor_new = tensor->src; *tensor_new; ++tensor_new) {
936+
++input;
937+
}
938+
os << "" << std::left << std::setw(label_width) << std::to_string(input) << " \n";
939+
for (ggml_tensor * const * tensor_new = tensor->src; *tensor_new; ++tensor_new) {
940+
pretty_print_tensor(*tensor_new, os, true);
941+
}
942+
}
924943

925944
// Calculate total elements
926945
int64_t total_elements = 1;
@@ -929,10 +948,12 @@ void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
929948
total_elements *= tensor->ne[i];
930949
}
931950
}
932-
os << "" << std::left << std::setw(label_width) << "Elements:" << std::left << std::setw(24) << total_elements
933-
<< "\n";
951+
if (!input) {
952+
os << "" << std::left << std::setw(label_width) << "Elements:" << std::left << std::setw(24) << total_elements
953+
<< "\n";
934954

935-
os << "─────────────────────────────────────────\n";
955+
os << "─────────────────────────────────────────\n";
956+
}
936957
}
937958

938959
bool save_string_to_file(const std::string & content, const std::string & filename) {
@@ -1074,17 +1095,21 @@ int llama_context::decode(llama_batch & inp_batch) {
10741095
// LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
10751096

10761097
ggml_backend_sched_alloc_graph(sched.get(), gf);
1077-
std::stringstream stream{};
1078-
for (size_t x = 0; x < gf->n_leafs; ++x) {
1079-
pretty_print_tensor(gf->leafs[x], stream);
1080-
}
1081-
for (size_t x = 0; x < gf->n_nodes; ++x) {
1082-
pretty_print_tensor(gf->nodes[x], stream);
1083-
}
1084-
save_string_to_file(stream.str(), "../../../../../TensorData.txt");
1098+
//std::stringstream stream{};
1099+
//for (size_t x = 0; x < gf->n_leafs; ++x) {
1100+
//pretty_print_tensor(gf->leafs[x], stream);
1101+
//}
1102+
//for (size_t x = 0; x < gf->n_nodes; ++x) {
1103+
//pretty_print_tensor(gf->nodes[x], stream);
1104+
//}
1105+
//save_string_to_file(stream.str(), "../../../../../TensorData.txt");
10851106
res->set_inputs(&ubatch);
1086-
1107+
stop_watch_val.reset();
10871108
const auto compute_status = graph_compute(gf, ubatch.n_tokens > 1);
1109+
stop_watch_val.add_time();
1110+
std::cout << "LLAMA.CPP/GGML AVERAGE COMPUTE TIME, OVER: "
1111+
<< std::setw(50 - std::size("LLAMA.CPP/GGML AVERAGE COMPUTE TIME, OVER: "))
1112+
<< stop_watch_val.get_count() << " TOKENS: " << stop_watch_val.get_average() << std::endl;
10881113
if (compute_status != GGML_STATUS_SUCCESS) {
10891114
switch (compute_status) {
10901115
case GGML_STATUS_ABORTED:

0 commit comments

Comments
 (0)