Skip to content

Commit cac4560

Browse files
committed
opencl: generate trace for profiling
1 parent d9c8bb9 commit cac4560

File tree

1 file changed

+32
-0
lines changed

1 file changed

+32
-0
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,12 @@ static int ggml_backend_opencl_n_devices = 0;
297297
struct ProfilingInfo {
298298
std::string op_name;
299299
std::string kernel_name;
300+
cl_ulong cmd_queued;
301+
cl_ulong cmd_submit;
302+
cl_ulong cmd_start;
303+
cl_ulong cmd_end;
304+
cl_ulong overhead_start;
305+
cl_ulong overhead_end;
300306
// For the times below, see spec for clGetEventProfilingInfo
301307
// The time kernel spent in cmd queue - SUBMIT - QUEUED
302308
cl_ulong cmd_queued_duration_ns;
@@ -930,6 +936,27 @@ static void ggml_cl2_free(void) {
930936
fclose(fperf);
931937

932938
GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time);
939+
940+
FILE* ftrace = fopen("cl_trace.json", "w");
941+
if (!ftrace) {
942+
GGML_LOG_ERROR("Failed to open cl_trace.json\n");
943+
return;
944+
}
945+
946+
fprintf(ftrace, "[\n");
947+
for (const ProfilingInfo & info : g_profiling_info) {
948+
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n",
949+
info.kernel_name.c_str(), info.cmd_queued/1000);
950+
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n",
951+
info.kernel_name.c_str(), info.cmd_submit/1000);
952+
953+
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n",
954+
info.kernel_name.c_str(), info.cmd_start/1000);
955+
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n",
956+
info.kernel_name.c_str(), info.cmd_end/1000);
957+
}
958+
fprintf(ftrace, "]\n");
959+
fclose(ftrace);
933960
#endif
934961
}
935962

@@ -2103,6 +2130,11 @@ static void populateProfilingInfo(
21032130
CL_CHECK(clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
21042131
sizeof(kernel_name), kernel_name, NULL));
21052132

2133+
info.cmd_queued = cmd_queued;
2134+
info.cmd_submit = cmd_submit;
2135+
info.cmd_start = cmd_start;
2136+
info.cmd_end = cmd_end;
2137+
21062138
info.cmd_queued_duration_ns = cmd_submit - cmd_queued;
21072139
info.cmd_submit_duration_ns = cmd_start - cmd_submit;
21082140
info.cmd_duration_ns = cmd_end - cmd_start;

0 commit comments

Comments
 (0)