11/*
2- * Copyright (c) 2016-2024 Arm Limited.
2+ * Copyright (c) 2016-2025 Arm Limited.
33 *
44 * SPDX-License-Identifier: MIT
55 *
2626#include " arm_compute/core/CL/CLKernelLibrary.h"
2727#include " arm_compute/runtime/CL/CLTuner.h"
2828
29+ #include " src/common/utils/profile/acl_profile.h"
2930#include " src/core/CL/ICLKernel.h"
3031
3132namespace arm_compute
@@ -70,13 +71,18 @@ void CLScheduler::set_tuner(ICLTuner *tuner)
7071
7172void CLScheduler::sync ()
7273{
74+ ARM_COMPUTE_TRACE_EVENT_BEGIN (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, " CLScheduler::sync" );
7375 _queue.finish ();
76+ ARM_COMPUTE_TRACE_OPENCL_SYNC ();
7477}
7578
7679cl::Event CLScheduler::enqueue_sync_event ()
7780{
81+ ARM_COMPUTE_TRACE_EVENT_BEGIN (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU,
82+ " CLScheduler::enqueue_sync_event" );
7883 cl::Event event;
7984 _queue.enqueueMarker (&event);
85+ ARM_COMPUTE_TRACE_EVENT_END (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
8086 return event;
8187}
8288
@@ -179,10 +185,13 @@ void CLScheduler::init(cl::Context context,
179185 _cl_tuner = cl_tuner;
180186 _gemm_heuristics = gemm_h;
181187 _backend_type = cl_backend_type;
188+ ARM_COMPUTE_TRACE_OPENCL_BEGIN ();
182189}
183190
184191void CLScheduler::enqueue_common (ICLKernel &kernel, ITensorPack &tensors, bool flush)
185192{
193+ ARM_COMPUTE_TRACE_EVENT_BEGIN (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU,
194+ " CLScheduler::enqueue_common" );
186195 ARM_COMPUTE_ERROR_ON_MSG (
187196 !_is_initialised, " The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
188197 or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!" );
@@ -199,10 +208,13 @@ void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool f
199208 inject_memory ? kernel.run_op (tensors, kernel.window (), _queue) : kernel.run (kernel.window (), _queue);
200209
201210 flush_queue (flush);
211+ ARM_COMPUTE_TRACE_EVENT_END (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
202212}
203213
204214void CLScheduler::flush_queue (bool flush)
205215{
216+ ARM_COMPUTE_TRACE_EVENT_BEGIN (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, " CLScheduler::flush_queue" );
217+ ARM_COMPUTE_TRACE_OPENCL_BEGIN ();
206218 _enqueue_count++;
207219 _flush_count += flush;
208220 const float flush_ratio = _flush_count / (float )_enqueue_count;
@@ -232,17 +244,22 @@ void CLScheduler::flush_queue(bool flush)
232244 _job_chaining_count = 0 ;
233245 _queue.flush ();
234246 }
247+ ARM_COMPUTE_TRACE_EVENT_END (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
235248}
236249
237250void CLScheduler::enqueue (ICLKernel &kernel, bool flush)
238251{
252+ ARM_COMPUTE_TRACE_EVENT_BEGIN (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, " CLScheduler::enqueue" );
239253 ITensorPack pack;
240254 enqueue_common (kernel, pack, flush);
255+ ARM_COMPUTE_TRACE_EVENT_END (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
241256}
242257
243258void CLScheduler::enqueue_op (ICLKernel &kernel, ITensorPack &tensors, bool flush)
244259{
260+ ARM_COMPUTE_TRACE_EVENT_BEGIN (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU, " CLScheduler::enqueue_op" );
245261 enqueue_common (kernel, tensors, flush);
262+ ARM_COMPUTE_TRACE_EVENT_END (ARM_COMPUTE_PROF_CAT_SCHEDULER, ARM_COMPUTE_PROF_LVL_GPU);
246263}
247264
248265void CLScheduler::enable_job_chaining (int job_chaining_size)
0 commit comments