@@ -297,6 +297,12 @@ static int ggml_backend_opencl_n_devices = 0;
297297struct ProfilingInfo {
298298 std::string op_name;
299299 std::string kernel_name;
300+ cl_ulong cmd_queued;
301+ cl_ulong cmd_submit;
302+ cl_ulong cmd_start;
303+ cl_ulong cmd_end;
304+ cl_ulong overhead_start;
305+ cl_ulong overhead_end;
300306 // For the times below, see spec for clGetEventProfilingInfo
301307 // The time kernel spent in cmd queue - SUBMIT - QUEUED
302308 cl_ulong cmd_queued_duration_ns;
@@ -930,6 +936,27 @@ static void ggml_cl2_free(void) {
930936 fclose (fperf);
931937
932938 GGML_LOG_INFO (" ggml_opencl: total kernel time: %f\n " , total_kernel_time);
939+
940+ FILE* ftrace = fopen (" cl_trace.json" , " w" );
941+ if (!ftrace) {
942+ GGML_LOG_ERROR (" Failed to open cl_trace.json\n " );
943+ return ;
944+ }
945+
946+ fprintf (ftrace, " [\n " );
947+ for (const ProfilingInfo & info : g_profiling_info) {
948+ fprintf (ftrace, " {\" name\" : \" %s\" , \" cat\" : \" OpenCL\" , \" ph\" : \" B\" , \" ts\" : %lu, \" pid\" : \"\" , \" tid\" : \" Host\" },\n " ,
949+ info.kernel_name .c_str (), info.cmd_queued /1000 );
950+ fprintf (ftrace, " {\" name\" : \" %s\" , \" cat\" : \" OpenCL\" , \" ph\" : \" E\" , \" ts\" : %lu, \" pid\" : \"\" , \" tid\" : \" Host\" },\n " ,
951+ info.kernel_name .c_str (), info.cmd_submit /1000 );
952+
953+ fprintf (ftrace, " {\" name\" : \" %s\" , \" cat\" : \" OpenCL\" , \" ph\" : \" B\" , \" ts\" : %lu, \" pid\" : \"\" , \" tid\" : \" Device\" },\n " ,
954+ info.kernel_name .c_str (), info.cmd_start /1000 );
955+ fprintf (ftrace, " {\" name\" : \" %s\" , \" cat\" : \" OpenCL\" , \" ph\" : \" E\" , \" ts\" : %lu, \" pid\" : \"\" , \" tid\" : \" Device\" },\n " ,
956+ info.kernel_name .c_str (), info.cmd_end /1000 );
957+ }
958+ fprintf (ftrace, " ]\n " );
959+ fclose (ftrace);
933960#endif
934961}
935962
@@ -2103,6 +2130,11 @@ static void populateProfilingInfo(
21032130 CL_CHECK (clGetKernelInfo (kernel, CL_KERNEL_FUNCTION_NAME,
21042131 sizeof (kernel_name), kernel_name, NULL ));
21052132
2133+ info.cmd_queued = cmd_queued;
2134+ info.cmd_submit = cmd_submit;
2135+ info.cmd_start = cmd_start;
2136+ info.cmd_end = cmd_end;
2137+
21062138 info.cmd_queued_duration_ns = cmd_submit - cmd_queued;
21072139 info.cmd_submit_duration_ns = cmd_start - cmd_submit;
21082140 info.cmd_duration_ns = cmd_end - cmd_start;
0 commit comments