Skip to content

Commit 4578c37

Browse files
profiler: make profiler optional with GGML_GRAPH_PROFILER
1 parent b7ae2d1 commit 4578c37

File tree

6 files changed

+83
-18
lines changed

6 files changed

+83
-18
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ option(GGML_CCACHE "ggml: use ccache if available" ON)
7575
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
7676
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
7777
option(GGML_GPROF "ggml: enable gprof" OFF)
78+
option(GGML_GRAPH_PROFILER "ggml: enable internal Graph and Op profiler" OFF)
7879

7980
# build
8081
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)

ggml/src/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
99
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
1010
endif()
1111

12+
if (GGML_GRAPH_PROFILER)
13+
add_compile_definitions(GGML_GRAPH_PROFILER)
14+
endif()
15+
1216
if (NOT MSVC)
1317
if (GGML_SANITIZE_THREAD)
1418
add_compile_options(-fsanitize=thread)

ggml/src/ggml-impl.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,6 @@ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct g
157157
GGML_ABORT("fatal error");
158158
}
159159

160-
// op profile data (per op / per thread)
161-
enum ggml_profile_event {
162-
GGML_PROF_OP_START,
163-
GGML_PROF_OP_SYNC,
164-
GGML_PROF_OP_END
165-
};
166-
167-
struct ggml_profile_data {
168-
uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec
169-
};
170-
171160
// computation graph
172161

173162
enum ggml_cgraph_eval_order {
@@ -176,6 +165,8 @@ enum ggml_cgraph_eval_order {
176165
GGML_CGRAPH_EVAL_ORDER_COUNT
177166
};
178167

168+
struct ggml_profile_data;
169+
179170
struct ggml_cgraph {
180171
int size;
181172
int n_nodes;
@@ -194,12 +185,6 @@ struct ggml_cgraph {
194185

195186
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
196187

197-
void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads);
198-
void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads);
199-
void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads);
200-
void ggml_profile_graph_free(struct ggml_cgraph *cg);
201-
void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith);
202-
203188
#ifdef __cplusplus
204189
}
205190
#endif

ggml/src/ggml-profile.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1-
#include "ggml-impl.h"
1+
#include "ggml-profile.h"
2+
23
#include <stdint.h>
34
#include <stdlib.h>
45

56
#include <chrono>
67

8+
#ifdef GGML_GRAPH_PROFILER
9+
710
extern "C" void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
811
{
912
if (!getenv("GGML_GRAPH_PROFILE")) { return; }
@@ -138,3 +141,5 @@ extern "C" void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_pr
138141
using clock = std::chrono::high_resolution_clock;
139142
cg->prof[node_n][ith].nsec[e] = std::chrono::nanoseconds(clock::now().time_since_epoch()).count();
140143
}
144+
145+
#endif // GGML_GRAPH_PROFILER

ggml/src/ggml-profile.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#pragma once
2+
3+
#include "ggml-impl.h"
4+
5+
// GGML internal header
6+
7+
#ifdef __cplusplus
8+
extern "C" {
9+
#endif
10+
11+
// op profile data (per op / per thread)
12+
enum ggml_profile_event {
13+
GGML_PROF_OP_START,
14+
GGML_PROF_OP_SYNC,
15+
GGML_PROF_OP_END
16+
};
17+
18+
struct ggml_profile_data {
19+
uint64_t nsec[GGML_PROF_OP_END + 1]; // event times in nsec
20+
};
21+
22+
#ifndef GGML_GRAPH_PROFILER
23+
24+
// Stub out all profiler functions
25+
26+
static inline void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads)
27+
{
28+
GGML_UNUSED(cg);
29+
GGML_UNUSED(n_threads);
30+
}
31+
32+
static inline void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads)
33+
{
34+
GGML_UNUSED(cg);
35+
GGML_UNUSED(n_threads);
36+
}
37+
38+
static inline void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads)
39+
{
40+
GGML_UNUSED(cg);
41+
GGML_UNUSED(n_threads);
42+
}
43+
44+
static inline void ggml_profile_graph_free(struct ggml_cgraph *cg)
45+
{
46+
GGML_UNUSED(cg);
47+
}
48+
49+
static inline void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith)
50+
{
51+
GGML_UNUSED(cg);
52+
GGML_UNUSED(e);
53+
GGML_UNUSED(node_n);
54+
GGML_UNUSED(ith);
55+
}
56+
57+
#else
58+
59+
void ggml_profile_graph_init(struct ggml_cgraph *cg, int n_threads);
60+
void ggml_profile_graph_start(struct ggml_cgraph *cg, int n_threads);
61+
void ggml_profile_graph_finish(struct ggml_cgraph *cg, int n_threads);
62+
void ggml_profile_graph_free(struct ggml_cgraph *cg);
63+
void ggml_profile_op_event(const struct ggml_cgraph *cg, enum ggml_profile_event e, int node_n, int ith);
64+
65+
#endif // GGML_GRAPH_PROFILER
66+
67+
#ifdef __cplusplus
68+
}
69+
#endif

ggml/src/ggml.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml-quants.h"
88
#include "ggml.h"
99
#include "ggml-aarch64.h"
10+
#include "ggml-profile.h"
1011

1112
#if defined(_MSC_VER) || defined(__MINGW32__)
1213
#include <malloc.h> // using malloc.h with MSC/MINGW

0 commit comments

Comments
 (0)