Skip to content

Commit e7d1a4c

Browse files
committed
Updates!
1 parent 5232d6e commit e7d1a4c

File tree

3 files changed

+171
-24
lines changed

3 files changed

+171
-24
lines changed

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ include(CheckIncludeFileCXX)
55
#set(CMAKE_WARN_DEPRECATED YES)
66
set(CMAKE_WARN_UNUSED_CLI YES)
77

8+
include(FetchContent)
9+
FetchContent_Declare(
10+
Jsonifier
11+
GIT_REPOSITORY https://github.com/realtimechris/jsonifier.git
12+
GIT_TAG dev
13+
)
14+
15+
# Also provides "common"
16+
FetchContent_MakeAvailable(Jsonifier)
17+
818
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
919

1020
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -211,6 +221,8 @@ set(LLAMA_PUBLIC_HEADERS
211221
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
212222
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
213223

224+
target_link_libraries(llama PUBLIC Jsonifier::Jsonifier)
225+
214226
set_target_properties(llama
215227
PROPERTIES
216228
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")

CMakePresets.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
"binaryDir": "${sourceDir}/build-${presetName}",
99
"cacheVariables": {
1010
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
11-
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
11+
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/..",
12+
"LLAMA_CURL": "OFF"
1213
}
1314
},
1415
{

src/llama-context.cpp

Lines changed: 157 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#include "llama-context.h"
2-
2+
#include <atomic>
3+
#include <chrono>
34
#include <cinttypes>
45
#include <cstring>
6+
#include <mutex>
57
#include <stdexcept>
68

79
#include "../ggml/include/ggml-backend.h"
@@ -12,6 +14,113 @@
1214
#include "llama-mmap.h"
1315
#include "llama-model.h"
1416

17+
namespace test {
18+
19+
// from
20+
// https://stackoverflow.com/questions/16337610/how-to-know-if-a-type-is-a-specialization-of-stdvector
21+
template <typename, template <typename...> typename> constexpr bool is_specialization_v = false;
22+
23+
template <template <typename...> typename value_type, typename... arg_types>
24+
constexpr bool is_specialization_v<value_type<arg_types...>, value_type> = true;
25+
26+
template <typename value_type> concept time_type = is_specialization_v<value_type, std::chrono::duration>;
27+
28+
template <time_type value_type = std::chrono::nanoseconds> class stop_watch {
29+
public:
30+
using hr_clock = std::conditional_t<std::chrono::high_resolution_clock::is_steady,
31+
std::chrono::high_resolution_clock, std::chrono::steady_clock>;
32+
static constexpr bool lock_free{ std::atomic<value_type>::is_always_lock_free };
33+
using time_type = std::conditional_t<lock_free, value_type, uint64_t>;
34+
35+
stop_watch(uint64_t newTime) noexcept { total_time_units.store(time_type{ newTime }, std::memory_order_release); }
36+
37+
stop_watch & operator=(stop_watch && other) noexcept {
38+
if (this != &other) {
39+
total_time_units.store(other.total_time_units.load(std::memory_order_acquire), std::memory_order_release);
40+
start_time_units.store(other.start_time_units.load(std::memory_order_acquire), std::memory_order_release);
41+
}
42+
return *this;
43+
}
44+
45+
stop_watch(stop_watch && other) noexcept { *this = std::move(other); }
46+
47+
stop_watch & operator=(const stop_watch & other) noexcept {
48+
if (this != &other) {
49+
total_time_units.store(other.total_time_units.load(std::memory_order_acquire), std::memory_order_release);
50+
start_time_units.store(other.start_time_units.load(std::memory_order_acquire), std::memory_order_release);
51+
}
52+
return *this;
53+
}
54+
55+
stop_watch(const stop_watch & other) noexcept { *this = other; }
56+
57+
bool has_time_elapsed() noexcept {
58+
return ((get_current_time() - start_time_units.load(std::memory_order_acquire)) >=
59+
total_time_units.load(std::memory_order_acquire));
60+
}
61+
62+
void add_time() noexcept {
63+
//std::unique_lock lock{ mutex };
64+
values.emplace_back(total_time_elapsed());
65+
//lock.release();
66+
reset();
67+
}
68+
69+
uint64_t get_count() noexcept { return values.size(); }
70+
71+
uint64_t get_average(time_type newTimeValue = time_type{}) noexcept {
72+
std::unique_lock lock{ mutex };
73+
uint64_t total_time{};
74+
for (auto & value : values) {
75+
total_time += get_value_as_uint(value);
76+
}
77+
return total_time / ((values.size() > 0) ? values.size() : 1);
78+
}
79+
80+
void reset(time_type newTimeValue = time_type{}) noexcept {
81+
if (newTimeValue != time_type{}) {
82+
total_time_units.store(newTimeValue, std::memory_order_release);
83+
}
84+
start_time_units.store(get_current_time(), std::memory_order_release);
85+
}
86+
87+
uint64_t get_total_wait_time() const noexcept {
88+
return get_value_as_uint(total_time_units.load(std::memory_order_acquire));
89+
}
90+
91+
time_type total_time_elapsed() noexcept {
92+
return get_current_time() - start_time_units.load(std::memory_order_acquire);
93+
}
94+
95+
uint64_t total_time_elapsed_uint64() noexcept {
96+
return get_value_as_uint(get_current_time()) -
97+
get_value_as_uint(start_time_units.load(std::memory_order_acquire));
98+
}
99+
100+
protected:
101+
std::atomic<time_type> total_time_units{};
102+
std::atomic<time_type> start_time_units{};
103+
std::vector<time_type> values{};
104+
std::mutex mutex{};
105+
106+
time_type get_current_time() {
107+
if constexpr (lock_free) {
108+
return std::chrono::duration_cast<value_type>(hr_clock::now().time_since_epoch());
109+
} else {
110+
return std::chrono::duration_cast<value_type>(hr_clock::now().time_since_epoch()).count();
111+
}
112+
}
113+
114+
uint64_t get_value_as_uint(time_type time) {
115+
if constexpr (lock_free) {
116+
return time.count();
117+
} else {
118+
return time;
119+
}
120+
}
121+
};
122+
} // namespace test
123+
15124
//
16125
// llama_context
17126
//
@@ -895,7 +1004,7 @@ std::string format_dimensions(const int64_t * ne, int max_dims = GGML_MAX_DIMS)
8951004
}
8961005

8971006
// Main pretty print function
898-
void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
1007+
void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os, bool input = false) {
8991008
if (!tensor) {
9001009
os << "NULL tensor\n";
9011010
return;
@@ -908,19 +1017,36 @@ void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
9081017
} else {
9091018
tensor_name = "<unnamed>";
9101019
}
911-
1020+
std::string tab{};
1021+
if (input) {
1022+
tab = " ";
1023+
}
9121024
// Format output with nice alignment
9131025
const int label_width = 12;
914-
915-
os << "────────────────────────────────────────\n";
916-
os << "" << std::left << std::setw(37) << ("Tensor: " + tensor_name) << "\n";
917-
os << "────────────────────────────────────────\n";
918-
os << "" << std::left << std::setw(label_width) << "Type:" << std::left << std::setw(24)
1026+
if (!input) {
1027+
os << "────────────────────────────────────────\n";
1028+
os << tab << std::left << std::setw(37) << ("Tensor: " + tensor_name) << "\n";
1029+
os << "────────────────────────────────────────\n";
1030+
} else {
1031+
os << tab << std::left << std::setw(37) << ("Tensor: " + tensor_name) << "\n";
1032+
}
1033+
os << tab << std::left << std::setw(label_width) << "Type:" << std::left << std::setw(24)
9191034
<< ggml_type_name(tensor->type) << " \n";
920-
os << "" << std::left << std::setw(label_width) << "Dimensions:" << std::left << std::setw(24)
1035+
os << tab << std::left << std::setw(label_width) << "Dimensions:" << std::left << std::setw(24)
9211036
<< format_dimensions(tensor->ne) << " \n";
922-
os << "" << std::left << std::setw(label_width) << "Operation:" << std::left << std::setw(24)
923-
<< ggml_op_name(tensor->op) << " \n";
1037+
if (!input) {
1038+
os << "" << std::left << std::setw(label_width) << "Operation:" << std::left << std::setw(24)
1039+
<< ggml_op_name(tensor->op) << " \n";
1040+
os << "" << std::left << std::setw(label_width) << "Inputs:";
1041+
size_t input{};
1042+
for (ggml_tensor * const * tensor_new = tensor->src; *tensor_new; ++tensor_new) {
1043+
++input;
1044+
}
1045+
os << "" << std::left << std::setw(label_width) << std::to_string(input) << " \n";
1046+
for (ggml_tensor * const * tensor_new = tensor->src; *tensor_new; ++tensor_new) {
1047+
pretty_print_tensor(*tensor_new, os, true);
1048+
}
1049+
}
9241050

9251051
// Calculate total elements
9261052
int64_t total_elements = 1;
@@ -929,12 +1055,16 @@ void pretty_print_tensor(const struct ggml_tensor * tensor, std::ostream & os) {
9291055
total_elements *= tensor->ne[i];
9301056
}
9311057
}
932-
os << "" << std::left << std::setw(label_width) << "Elements:" << std::left << std::setw(24) << total_elements
933-
<< "\n";
1058+
if (!input) {
1059+
os << "" << std::left << std::setw(label_width) << "Elements:" << std::left << std::setw(24) << total_elements
1060+
<< "\n";
9341061

935-
os << "─────────────────────────────────────────\n";
1062+
os << "─────────────────────────────────────────\n";
1063+
}
9361064
}
9371065

1066+
test::stop_watch stop_watch_val{ 0 };
1067+
9381068
bool save_string_to_file(const std::string & content, const std::string & filename) {
9391069
std::ofstream file(filename);
9401070
if (!file.is_open()) {
@@ -1074,17 +1204,21 @@ int llama_context::decode(llama_batch & inp_batch) {
10741204
// LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
10751205

10761206
ggml_backend_sched_alloc_graph(sched.get(), gf);
1077-
std::stringstream stream{};
1078-
for (size_t x = 0; x < gf->n_leafs; ++x) {
1079-
pretty_print_tensor(gf->leafs[x], stream);
1080-
}
1081-
for (size_t x = 0; x < gf->n_nodes; ++x) {
1082-
pretty_print_tensor(gf->nodes[x], stream);
1083-
}
1084-
save_string_to_file(stream.str(), "../../../../../TensorData.txt");
1207+
//std::stringstream stream{};
1208+
//for (size_t x = 0; x < gf->n_leafs; ++x) {
1209+
//pretty_print_tensor(gf->leafs[x], stream);
1210+
//}
1211+
//for (size_t x = 0; x < gf->n_nodes; ++x) {
1212+
//pretty_print_tensor(gf->nodes[x], stream);
1213+
//}
1214+
//save_string_to_file(stream.str(), "../../../../../TensorData.txt");
10851215
res->set_inputs(&ubatch);
1086-
1216+
stop_watch_val.reset();
10871217
const auto compute_status = graph_compute(gf, ubatch.n_tokens > 1);
1218+
stop_watch_val.add_time();
1219+
std::cout << "LLAMA.CPP/GGML AVERAGE COMPUTE TIME, OVER: "
1220+
<< std::setw(50 - std::size("LLAMA.CPP/GGML AVERAGE COMPUTE TIME, OVER: "))
1221+
<< stop_watch_val.get_count() << " TOKENS: " << stop_watch_val.get_average() << std::endl;
10881222
if (compute_status != GGML_STATUS_SUCCESS) {
10891223
switch (compute_status) {
10901224
case GGML_STATUS_ABORTED:

0 commit comments

Comments
 (0)