Skip to content

Commit 7c78c40

Browse files
committed
feat: add OpenCL tracepoint for GPU execution tracing
Added tracepoints to CLScheduler.cpp Resolves: COMPMID-8339 Signed-off-by: Walid Ben Romdhane <[email protected]> Change-Id: Ice3b24aa4ef1feb7b75bcb7983984a70ff405396 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/775535 Comments-Addressed: bsgcomp <[email protected]> Tested-by: bsgcomp <[email protected]> Reviewed-by: Dongsung Kim <[email protected]> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/15159 Comments-Addressed: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Reviewed-by: Pablo Marquez Tello <[email protected]> Tested-by: Arm Jenkins <[email protected]>
1 parent 7445d3c commit 7c78c40

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

src/runtime/CL/CLScheduler.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2024 Arm Limited.
2+
* Copyright (c) 2016-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,7 @@
2626
#include "arm_compute/core/CL/CLKernelLibrary.h"
2727
#include "arm_compute/runtime/CL/CLTuner.h"
2828

29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/core/CL/ICLKernel.h"
3031

3132
namespace arm_compute
@@ -70,13 +71,18 @@ void CLScheduler::set_tuner(ICLTuner *tuner)
7071

7172
void CLScheduler::sync()
7273
{
74+
ARM_COMPUTE_TRACE_EVENT_BEGIN(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, "CLScheduler::sync");
7375
_queue.finish();
76+
ARM_COMPUTE_TRACE_OPENCL_SYNC();
7477
}
7578

7679
cl::Event CLScheduler::enqueue_sync_event()
7780
{
81+
ARM_COMPUTE_TRACE_EVENT_BEGIN(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU,
82+
"CLScheduler::enqueue_sync_event");
7883
cl::Event event;
7984
_queue.enqueueMarker(&event);
85+
ARM_COMPUTE_TRACE_EVENT_END(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
8086
return event;
8187
}
8288

@@ -179,10 +185,13 @@ void CLScheduler::init(cl::Context context,
179185
_cl_tuner = cl_tuner;
180186
_gemm_heuristics = gemm_h;
181187
_backend_type = cl_backend_type;
188+
ARM_COMPUTE_TRACE_OPENCL_BEGIN();
182189
}
183190

184191
void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush)
185192
{
193+
ARM_COMPUTE_TRACE_EVENT_BEGIN(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU,
194+
"CLScheduler::enqueue_common");
186195
ARM_COMPUTE_ERROR_ON_MSG(
187196
!_is_initialised, "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
188197
or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!");
@@ -199,10 +208,13 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool f
199208
inject_memory ? kernel.run_op(tensors, kernel.window(), _queue) : kernel.run(kernel.window(), _queue);
200209

201210
flush_queue(flush);
211+
ARM_COMPUTE_TRACE_EVENT_END(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
202212
}
203213

204214
void CLScheduler::flush_queue(bool flush)
205215
{
216+
ARM_COMPUTE_TRACE_EVENT_BEGIN(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, "CLScheduler::flush_queue");
217+
ARM_COMPUTE_TRACE_OPENCL_BEGIN();
206218
_enqueue_count++;
207219
_flush_count += flush;
208220
const float flush_ratio = _flush_count / (float)_enqueue_count;
@@ -232,17 +244,22 @@ void CLScheduler::flush_queue(bool flush)
232244
_job_chaining_count = 0;
233245
_queue.flush();
234246
}
247+
ARM_COMPUTE_TRACE_EVENT_END(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
235248
}
236249

237250
void CLScheduler::enqueue(ICLKernel &kernel, bool flush)
238251
{
252+
ARM_COMPUTE_TRACE_EVENT_BEGIN(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, "CLScheduler::enqueue");
239253
ITensorPack pack;
240254
enqueue_common(kernel, pack, flush);
255+
ARM_COMPUTE_TRACE_EVENT_END(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
241256
}
242257

243258
void CLScheduler::enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush)
244259
{
260+
ARM_COMPUTE_TRACE_EVENT_BEGIN(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, "CLScheduler::enqueue_op");
245261
enqueue_common(kernel, tensors, flush);
262+
ARM_COMPUTE_TRACE_EVENT_END(ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
246263
}
247264

248265
void CLScheduler::enable_job_chaining(int job_chaining_size)

0 commit comments

Comments
 (0)