Skip to content

Commit e058e2e

Browse files
committed
feat: add profiling tracepoints to CPU kernel implementations (Part 2)
Instrument key CPU kernel entry points in src/cpu/kernels/* with tracepoints to enable lightweight runtime profiling. These tracepoints leverage the ACL_PROFILE macros and form the basis for collecting execution timing and behavior metrics. This is the first step in integrating end-to-end profiling support. Partially Resolves: COMPMID-8330 Signed-off-by: Walid Ben Romdhane <[email protected]> Change-Id: I24b7f5cf8aac90b2011a146cf71f27b30a1f8632 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14775 Benchmark: Arm Jenkins <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]> Reviewed-by: Andreas Flöjt <[email protected]>
1 parent edd2381 commit e058e2e

38 files changed

+149
-34
lines changed

src/cpu/kernels/CpuPermuteKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2021, 2024 Arm Limited.
2+
* Copyright (c) 2018-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -31,6 +31,7 @@
3131
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
3232
#include "arm_compute/core/Validate.h"
3333

34+
#include "src/common/utils/profile/acl_profile.h"
3435
#include "src/core/helpers/AutoConfiguration.h"
3536
#include "src/core/helpers/WindowHelpers.h"
3637

@@ -200,6 +201,7 @@ void run_permute(const Window &window, const ITensor *src, const ITensor *dst, c
200201

201202
void CpuPermuteKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
202203
{
204+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPermuteKernel::configure");
203205
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
204206
const TensorShape dst_shape = misc::shape_calculator::compute_permutation_output_shape(*src, perm);
205207
// Destination auto inizialitation if not yet initialized
@@ -220,12 +222,14 @@ void CpuPermuteKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
220222

221223
Status CpuPermuteKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
222224
{
225+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPermuteKernel::validate");
223226
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, perm));
224227
return Status{};
225228
}
226229

227230
void CpuPermuteKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
228231
{
232+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPermuteKernel::run_op");
229233
ARM_COMPUTE_UNUSED(info);
230234
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
231235
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuPool2dKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2023 Arm Limited.
2+
* Copyright (c) 2017-2023, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -29,6 +29,7 @@
2929
#include "arm_compute/core/Validate.h"
3030
#include "arm_compute/core/Window.h"
3131

32+
#include "src/common/utils/profile/acl_profile.h"
3233
#include "src/core/common/Registrars.h"
3334
#include "src/core/CPP/Validate.h"
3435
#include "src/core/helpers/AutoConfiguration.h"
@@ -303,6 +304,7 @@ void CpuPool2dKernel::configure(ITensorInfo *src,
303304
const PoolingLayerInfo &pool_info,
304305
ITensorInfo *indices)
305306
{
307+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPool2dKernel::configure");
306308
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
307309
const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
308310
const bool is_global_pooling = pool_info.is_global_pooling;
@@ -353,6 +355,7 @@ Status CpuPool2dKernel::validate(const ITensorInfo *src,
353355
const PoolingLayerInfo &pool_info,
354356
const ITensorInfo *indices)
355357
{
358+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPool2dKernel::validate");
356359
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
357360

358361
unsigned int num_elems_processed_per_iteration = 0;
@@ -379,6 +382,7 @@ Status CpuPool2dKernel::validate(const ITensorInfo *src,
379382

380383
void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
381384
{
385+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPool2dKernel::run_op");
382386
ARM_COMPUTE_UNUSED(info);
383387
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
384388
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuPool3dKernel.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022 Arm Limited.
2+
* Copyright (c) 2022, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,7 @@
2626
#include "arm_compute/core/TensorInfo.h"
2727
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
2828

29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/core/common/Registrars.h"
3031
#include "src/core/CPP/Validate.h"
3132
#include "src/core/helpers/AutoConfiguration.h"
@@ -115,6 +116,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
115116

116117
void CpuPool3dKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const Pooling3dLayerInfo &pool_info)
117118
{
119+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPool3dKernel::configure");
118120
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
119121

120122
// Perform validation step
@@ -151,15 +153,16 @@ void CpuPool3dKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
151153

152154
Status CpuPool3dKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const Pooling3dLayerInfo &pool_info)
153155
{
156+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPool3dKernel::validate");
154157
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
155158

156159
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, pool_info));
157-
158160
return Status{};
159161
}
160162

161163
void CpuPool3dKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
162164
{
165+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuPool3dKernel::run_op");
163166
ARM_COMPUTE_UNUSED(info);
164167
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
165168
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuQuantizeKernel.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "arm_compute/core/Validate.h"
3030
#include "arm_compute/core/Window.h"
3131

32+
#include "src/common/utils/profile/acl_profile.h"
3233
#include "src/core/common/Registrars.h"
3334
#include "src/core/CPP/Validate.h"
3435
#include "src/core/helpers/AutoConfiguration.h"
@@ -82,6 +83,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
8283

8384
void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
8485
{
86+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuQuantizeKernel::configure");
8587
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
8688
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
8789

@@ -167,12 +169,14 @@ void CpuQuantizeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
167169

168170
Status CpuQuantizeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
169171
{
172+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuQuantizeKernel::validate");
170173
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
171174
return Status{};
172175
}
173176

174177
void CpuQuantizeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
175178
{
179+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuQuantizeKernel::run_op");
176180
ARM_COMPUTE_UNUSED(info);
177181
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
178182
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuReshapeKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2024 Arm Limited.
2+
* Copyright (c) 2017-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -30,6 +30,7 @@
3030
#include "arm_compute/core/Types.h"
3131
#include "arm_compute/core/Validate.h"
3232

33+
#include "src/common/utils/profile/acl_profile.h"
3334
#include "src/core/helpers/Utils.h"
3435
#include "src/core/helpers/WindowHelpers.h"
3536
#include "src/core/NEON/INEKernel.h"
@@ -172,6 +173,7 @@ void reshape_tensor_per_window(const Window &window, const ITensor *src, ITensor
172173

173174
void CpuReshapeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
174175
{
176+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuReshapeKernel::configure");
175177
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
176178
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
177179
ARM_COMPUTE_UNUSED(src);
@@ -185,12 +187,14 @@ void CpuReshapeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
185187

186188
Status CpuReshapeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
187189
{
190+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuReshapeKernel::validate");
188191
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
189192
return Status{};
190193
}
191194

192195
void CpuReshapeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
193196
{
197+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuReshapeKernel::run_op");
194198
ARM_COMPUTE_UNUSED(info);
195199
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
196200
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuScaleKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2023 Arm Limited.
2+
* Copyright (c) 2016-2023, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -27,6 +27,7 @@
2727
#include "arm_compute/core/utils/InterpolationPolicyUtils.h"
2828
#include "arm_compute/core/Window.h"
2929

30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/core/common/Registrars.h"
3132
#include "src/core/helpers/ScaleHelpers.h"
3233
#include "src/core/helpers/WindowHelpers.h"
@@ -401,6 +402,7 @@ void CpuScaleKernel::configure(const ITensorInfo *src,
401402
ITensorInfo *dst,
402403
const ScaleKernelInfo &info)
403404
{
405+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuScaleKernel::configure");
404406
ARM_COMPUTE_UNUSED(dx, dy, offsets);
405407
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
406408
// Perform validation step
@@ -493,12 +495,14 @@ Status CpuScaleKernel::validate(const ITensorInfo *input,
493495
ITensorInfo *output,
494496
const ScaleKernelInfo &info)
495497
{
498+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuScaleKernel::validate");
496499
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, info));
497500
return Status{};
498501
}
499502

500503
void CpuScaleKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
501504
{
505+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuScaleKernel::run_op");
502506
ARM_COMPUTE_UNUSED(info);
503507
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
504508
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuScatterKernel.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "arm_compute/core/TensorInfo.h"
2828

2929
#include "src/common/utils/Log.h"
30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/core/common/Registrars.h"
3132
#include "src/core/CPP/Validate.h"
3233
#include "src/core/helpers/WindowHelpers.h"
@@ -73,6 +74,7 @@ void CpuScatterKernel::configure(const ITensorInfo *updates,
7374
ITensorInfo *dst,
7475
const ScatterInfo &scatter_info)
7576
{
77+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuScatterKernel::configure");
7678
ARM_COMPUTE_ERROR_ON_NULLPTR(updates, dst, indices);
7779
ARM_COMPUTE_ERROR_THROW_ON(validate(updates, indices, dst, scatter_info));
7880
ARM_COMPUTE_LOG_PARAMS(updates, indices, dst, scatter_info);
@@ -117,6 +119,7 @@ Status CpuScatterKernel::validate(const ITensorInfo *updates,
117119
const ITensorInfo *dst,
118120
const ScatterInfo &scatter_info)
119121
{
122+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuScatterKernel::validate");
120123
ARM_COMPUTE_UNUSED(scatter_info);
121124

122125
const TensorShape &ind_shape = indices->tensor_shape();
@@ -172,6 +175,7 @@ Status CpuScatterKernel::validate(const ITensorInfo *updates,
172175

173176
void CpuScatterKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
174177
{
178+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuScatterKernel::run_op");
175179
ARM_COMPUTE_UNUSED(info);
176180
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
177181
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuSoftmaxKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2024 Arm Limited.
2+
* Copyright (c) 2017-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -31,6 +31,7 @@
3131
#include "arm_compute/core/Validate.h"
3232
#include "arm_compute/core/Window.h"
3333

34+
#include "src/common/utils/profile/acl_profile.h"
3435
#include "src/core/common/Registrars.h"
3536
#include "src/core/CPP/Validate.h"
3637
#include "src/core/helpers/AutoConfiguration.h"
@@ -165,6 +166,7 @@ const std::vector<typename CpuSoftmaxKernel::SoftmaxKernel> &CpuSoftmaxKernel::g
165166
void CpuSoftmaxKernel::configure(
166167
const ITensorInfo *src, ITensorInfo *dst, float beta, bool is_log, int axis, ITensorInfo *tmp)
167168
{
169+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuSoftmaxKernel::configure");
168170
_axis = axis;
169171

170172
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst, tmp);
@@ -249,6 +251,7 @@ void CpuSoftmaxKernel::configure(
249251
Status CpuSoftmaxKernel::validate(
250252
const ITensorInfo *src, const ITensorInfo *dst, float beta, int axis, bool is_log, const ITensorInfo *tmp)
251253
{
254+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuSoftmaxKernel::validate");
252255
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst, tmp);
253256
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_softmax(*src, *dst, beta, axis, *tmp, is_log));
254257

@@ -257,6 +260,7 @@ Status CpuSoftmaxKernel::validate(
257260

258261
void CpuSoftmaxKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
259262
{
263+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuSoftmaxKernel::run_op");
260264
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
261265
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel<CpuSoftmaxKernel>::window(), window);
262266
ARM_COMPUTE_ERROR_ON(_run_method == nullptr);

src/cpu/kernels/CpuSubKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024 Arm Limited.
2+
* Copyright (c) 2021-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,7 @@
2626
#include "arm_compute/core/TensorInfo.h"
2727
#include "arm_compute/core/Validate.h"
2828

29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/core/common/Registrars.h"
3031
#include "src/core/CPP/Validate.h"
3132
#include "src/core/helpers/AutoConfiguration.h"
@@ -120,6 +121,7 @@ validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITens
120121

121122
void CpuSubKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ConvertPolicy policy)
122123
{
124+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuSubKernel::configure");
123125
ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
124126
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
125127

@@ -192,6 +194,7 @@ size_t CpuSubKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
192194
Status
193195
CpuSubKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
194196
{
197+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuSubKernel::validate");
195198
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
196199
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst, policy));
197200

@@ -200,6 +203,7 @@ CpuSubKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const I
200203

201204
void CpuSubKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
202205
{
206+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuSubKernel::run_op");
203207
ARM_COMPUTE_UNUSED(info);
204208
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
205209
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

src/cpu/kernels/CpuTransposeKernel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021, 2023 Arm Limited.
2+
* Copyright (c) 2021, 2023, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -31,6 +31,7 @@
3131
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
3232
#include "arm_compute/core/Validate.h"
3333

34+
#include "src/common/utils/profile/acl_profile.h"
3435
#include "src/core/helpers/AutoConfiguration.h"
3536
#include "src/core/helpers/WindowHelpers.h"
3637

@@ -731,6 +732,7 @@ void transpose_32bit_elements(const ITensor *in, ITensor *out, const Window &win
731732

732733
void CpuTransposeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
733734
{
735+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTransposeKernel::configure");
734736
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
735737

736738
// Destination auto inizialitation if not yet initialized
@@ -763,6 +765,7 @@ void CpuTransposeKernel::configure(const ITensorInfo *src, ITensorInfo *dst)
763765

764766
Status CpuTransposeKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
765767
{
768+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTransposeKernel::validate");
766769
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
767770
//Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
768771
ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
@@ -786,6 +789,7 @@ Status CpuTransposeKernel::validate(const ITensorInfo *src, const ITensorInfo *d
786789

787790
void CpuTransposeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
788791
{
792+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTransposeKernel::run_op");
789793
ARM_COMPUTE_UNUSED(info);
790794
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
791795
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

0 commit comments

Comments
 (0)