Skip to content

Commit 9b424e5

Browse files
committed
feat: add profiling tracepoints to CPU operator implementations (Part 6)
Instrument key CPU operator entry points in src/cpu/operators/* with tracepoints to enable lightweight runtime profiling. These tracepoints leverage the ACL_PROFILE macros and form the basis for collecting execution timing and behavior metrics. This is the first step in integrating end-to-end profiling support. Partially Resolves: COMPMID-8330 Signed-off-by: Walid Ben Romdhane <[email protected]> Change-Id: I649a8c25b3851cb11321d0f45541779ed9f4e819 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14778 Comments-Addressed: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Reviewed-by: Andreas Flöjt <[email protected]> Reviewed-by: Dennis Wildmark <[email protected]>
1 parent c50c12b commit 9b424e5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+214
-35
lines changed

src/cpu/operators/CpuActivation.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2022 Arm Limited.
2+
* Copyright (c) 2021-2022, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -28,6 +28,7 @@
2828
#include "src/common/IOperator.h"
2929
#include "src/common/utils/LegacySupport.h"
3030
#include "src/common/utils/Log.h"
31+
#include "src/common/utils/profile/acl_profile.h"
3132
#include "src/cpu/CpuContext.h"
3233
#include "src/cpu/kernels/CpuActivationKernel.h"
3334

@@ -37,6 +38,7 @@ namespace cpu
3738
{
3839
void CpuActivation::configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info)
3940
{
41+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::configure");
4042
ARM_COMPUTE_LOG_PARAMS(input, output, activation_info);
4143
auto k = std::make_unique<kernels::CpuActivationKernel>();
4244
k->configure(input, output, activation_info);
@@ -46,11 +48,13 @@ void CpuActivation::configure(const ITensorInfo *input, ITensorInfo *output, con
4648
Status
4749
CpuActivation::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
4850
{
51+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::validate");
4952
return kernels::CpuActivationKernel::validate(input, output, activation_info);
5053
}
5154

5255
void CpuActivation::run(ITensorPack &tensors)
5356
{
57+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::run");
5458
ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
5559
auto split_dimension = static_cast<kernels::CpuActivationKernel *>(_kernel.get())->get_split_dimension_hint();
5660
NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);

src/cpu/operators/CpuAdd.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2022 Arm Limited.
2+
* Copyright (c) 2021-2022, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,8 +26,8 @@
2626
#include "arm_compute/runtime/NEON/NEScheduler.h"
2727

2828
#include "src/common/utils/Log.h"
29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/cpu/kernels/CpuAddKernel.h"
30-
3131
namespace arm_compute
3232
{
3333
namespace cpu
@@ -38,6 +38,7 @@ void CpuAdd::configure(const ITensorInfo *src0,
3838
ConvertPolicy policy,
3939
const ActivationLayerInfo &act_info)
4040
{
41+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuAdd::configure");
4142
ARM_COMPUTE_UNUSED(act_info);
4243
ARM_COMPUTE_LOG_PARAMS(src0, src1, dst, policy, act_info);
4344
auto k = std::make_unique<kernels::CpuAddKernel>();
@@ -51,12 +52,14 @@ Status CpuAdd::validate(const ITensorInfo *src0,
5152
ConvertPolicy policy,
5253
const ActivationLayerInfo &act_info)
5354
{
55+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuAdd::validate");
5456
ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
5557
return kernels::CpuAddKernel::validate(src0, src1, dst, policy);
5658
}
5759

5860
void CpuAdd::run(ITensorPack &tensors)
5961
{
62+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuAdd::run");
6063
const auto split_dimension = static_cast<kernels::CpuAddKernel *>(_kernel.get())->get_split_dimension();
6164

6265
NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);

src/cpu/operators/CpuAddMulAdd.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023 Arm Limited.
2+
* Copyright (c) 2023, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -27,6 +27,7 @@
2727
#include "arm_compute/runtime/NEON/NEScheduler.h"
2828

2929
#include "src/common/utils/Log.h"
30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/core/helpers/MemoryHelpers.h"
3132
#include "src/cpu/kernels/CpuAddMulAddKernel.h"
3233
#include "src/cpu/utils/CpuAuxTensorHandler.h"
@@ -44,6 +45,7 @@ void CpuAddMulAdd::configure(const ITensorInfo *input1,
4445
ConvertPolicy policy,
4546
const ActivationLayerInfo &act_info)
4647
{
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuAddMulAdd::configure");
4749
ARM_COMPUTE_LOG_PARAMS(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info);
4850

4951
auto k = std::make_unique<kernels::CpuAddMulAddKernel>();
@@ -82,6 +84,7 @@ Status CpuAddMulAdd::validate(const ITensorInfo *input1,
8284
ConvertPolicy policy,
8385
const ActivationLayerInfo &act_info)
8486
{
87+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuAddMulAdd::validate");
8588
const DataType data_type = input1->data_type();
8689
if (is_data_type_quantized(data_type))
8790
{
@@ -103,6 +106,7 @@ Status CpuAddMulAdd::validate(const ITensorInfo *input1,
103106

104107
void CpuAddMulAdd::run(ITensorPack &tensors)
105108
{
109+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuAddMulAdd::run");
106110
const DataType data_type = tensors.get_const_tensor(TensorType::ACL_SRC_0)->info()->data_type();
107111

108112
if (is_data_type_quantized(data_type))

src/cpu/operators/CpuCast.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021 Arm Limited.
2+
* Copyright (c) 2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#include "src/cpu/operators/CpuCast.h"
2525

2626
#include "src/common/utils/Log.h"
27+
#include "src/common/utils/profile/acl_profile.h"
2728
#include "src/cpu/kernels/CpuCastKernel.h"
2829

2930
namespace arm_compute
@@ -32,6 +33,7 @@ namespace cpu
3233
{
3334
void CpuCast::configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
3435
{
36+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuCast::configure");
3537
ARM_COMPUTE_LOG_PARAMS(src, dst, policy);
3638
auto k = std::make_unique<kernels::CpuCastKernel>();
3739
k->configure(src, dst, policy);
@@ -40,6 +42,7 @@ void CpuCast::configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy
4042

4143
Status CpuCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
4244
{
45+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuCast::validate");
4346
return kernels::CpuCastKernel::validate(src, dst, policy);
4447
}
4548
} // namespace cpu

src/cpu/operators/CpuConcatenate.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2021 Arm Limited.
2+
* Copyright (c) 2018-2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -32,6 +32,7 @@
3232
#include "arm_compute/runtime/NEON/NEScheduler.h"
3333

3434
#include "src/common/utils/Log.h"
35+
#include "src/common/utils/profile/acl_profile.h"
3536
#include "src/core/helpers/AutoConfiguration.h"
3637
#include "src/cpu/kernels/CpuConcatenateBatchKernel.h"
3738
#include "src/cpu/kernels/CpuConcatenateDepthKernel.h"
@@ -44,6 +45,7 @@ namespace cpu
4445
{
4546
void CpuConcatenate::configure(const std::vector<const ITensorInfo *> &srcs_vector, ITensorInfo *dst, size_t axis)
4647
{
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConcatenate::configure");
4749
ARM_COMPUTE_ERROR_ON(dst == nullptr);
4850
ARM_COMPUTE_LOG_PARAMS(srcs_vector, dst, axis);
4951

@@ -100,6 +102,7 @@ void CpuConcatenate::configure(const std::vector<const ITensorInfo *> &srcs_vect
100102
Status
101103
CpuConcatenate::validate(const std::vector<const ITensorInfo *> &srcs_vector, const ITensorInfo *dst, size_t axis)
102104
{
105+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConcatenate::validate");
103106
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
104107
ARM_COMPUTE_RETURN_ERROR_ON(srcs_vector.size() < 2);
105108

@@ -146,6 +149,7 @@ CpuConcatenate::validate(const std::vector<const ITensorInfo *> &srcs_vector, co
146149

147150
void CpuConcatenate::run(ITensorPack &tensors)
148151
{
152+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConcatenate::run");
149153
if (tensors.empty())
150154
{
151155
ARM_COMPUTE_ERROR("No inputs provided");

src/cpu/operators/CpuConv2d.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2023-2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -27,6 +27,7 @@
2727
#include "arm_compute/runtime/NEON/NEScheduler.h"
2828

2929
#include "src/common/utils/Log.h"
30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/cpu/operators/CpuDirectConv2d.h"
3132
#include "src/cpu/operators/CpuGemm.h"
3233
#include "src/cpu/operators/CpuGemmConv2d.h"
@@ -54,6 +55,7 @@ void CpuConv2d::configure(ITensorInfo *input,
5455
bool enable_fast_math,
5556
unsigned int num_groups)
5657
{
58+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConv2d::configure");
5759
// Perform validate step
5860
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
5961
ARM_COMPUTE_UNUSED(num_groups);
@@ -114,6 +116,7 @@ Status CpuConv2d::validate(const ITensorInfo *input,
114116
bool enable_fast_math,
115117
unsigned int num_groups)
116118
{
119+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConv2d::validate");
117120
ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on Neon");
118121

119122
const Conv2dInfo info(conv_info, dilation, act_info, enable_fast_math, num_groups);
@@ -291,6 +294,7 @@ ConvolutionMethod CpuConv2d::get_convolution_method(const ITensorInfo *i
291294

292295
void CpuConv2d::run(ITensorPack &tensors)
293296
{
297+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConv2d::run");
294298
prepare(tensors);
295299
_function->run(tensors);
296300
}

src/cpu/operators/CpuConvertFullyConnectedWeights.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2021 Arm Limited.
2+
* Copyright (c) 2018-2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,7 @@
2626
#include "arm_compute/runtime/NEON/NEScheduler.h"
2727

2828
#include "src/common/utils/Log.h"
29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h"
3031

3132
namespace arm_compute
@@ -37,6 +38,8 @@ void CpuConvertFullyConnectedWeights::configure(const ITensorInfo *src,
3738
const TensorShape &original_src_shape,
3839
DataLayout data_layout)
3940
{
41+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
42+
"CpuConvertFullyConnectedWeights::configure");
4043
ARM_COMPUTE_LOG_PARAMS(src, dst, original_src_shape, data_layout);
4144
auto k = std::make_unique<kernels::CpuConvertFullyConnectedWeightsKernel>();
4245
k->configure(src, dst, original_src_shape, data_layout);
@@ -48,11 +51,14 @@ Status CpuConvertFullyConnectedWeights::validate(const ITensorInfo *src,
4851
const TensorShape &original_src_shape,
4952
DataLayout data_layout)
5053
{
54+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
55+
"CpuConvertFullyConnectedWeights::validate");
5156
return kernels::CpuConvertFullyConnectedWeightsKernel::validate(src, dst, original_src_shape, data_layout);
5257
}
5358

5459
void CpuConvertFullyConnectedWeights::run(ITensorPack &tensors)
5560
{
61+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuConvertFullyConnectedWeights::run");
5662
NEScheduler::get().schedule_op(_kernel.get(), Window::DimZ, _kernel->window(), tensors);
5763
}
5864
} // namespace cpu

src/cpu/operators/CpuCopy.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021 Arm Limited.
2+
* Copyright (c) 2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#include "src/cpu/operators/CpuCopy.h"
2525

2626
#include "src/common/utils/Log.h"
27+
#include "src/common/utils/profile/acl_profile.h"
2728
#include "src/cpu/kernels/CpuCopyKernel.h"
2829

2930
namespace arm_compute
@@ -32,6 +33,7 @@ namespace cpu
3233
{
3334
void CpuCopy::configure(const ITensorInfo *src, ITensorInfo *dst)
3435
{
36+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuCopy::configure");
3537
ARM_COMPUTE_LOG_PARAMS(src, dst);
3638
auto k = std::make_unique<kernels::CpuCopyKernel>();
3739
k->configure(src, dst);
@@ -40,6 +42,7 @@ void CpuCopy::configure(const ITensorInfo *src, ITensorInfo *dst)
4042

4143
Status CpuCopy::validate(const ITensorInfo *src, const ITensorInfo *dst)
4244
{
45+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuCopy::validate");
4346
return kernels::CpuCopyKernel::validate(src, dst);
4447
}
4548
} // namespace cpu

src/cpu/operators/CpuDepthwiseConv2d.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024 Arm Limited.
2+
* Copyright (c) 2021-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -30,6 +30,7 @@
3030
#include "arm_compute/runtime/NEON/NEScheduler.h"
3131

3232
#include "src/common/utils/Log.h"
33+
#include "src/common/utils/profile/acl_profile.h"
3334
#include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
3435

3536
namespace arm_compute
@@ -145,6 +146,8 @@ Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const I
145146
const ITensorInfo *dst,
146147
const ConvolutionInfo &info)
147148
{
149+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
150+
"CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate");
148151
return validate_arguments_optimized(src, weights, biases, dst, info);
149152
}
150153

@@ -346,6 +349,8 @@ Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo
346349
const ITensorInfo *dst,
347350
const ConvolutionInfo &info)
348351
{
352+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
353+
"CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate");
349354
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
350355
if (src->data_layout() == DataLayout::NCHW)
351356
{
@@ -476,6 +481,7 @@ void CpuDepthwiseConv2d::configure(ITensorInfo *src,
476481
ITensorInfo *dst,
477482
const ConvolutionInfo &info)
478483
{
484+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuDepthwiseConv2d::configure");
479485
ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, info);
480486

481487
_depth_conv_func =
@@ -499,6 +505,7 @@ Status CpuDepthwiseConv2d::validate(const ITensorInfo *src,
499505
const ITensorInfo *dst,
500506
const ConvolutionInfo &info)
501507
{
508+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuDepthwiseConv2d::validate");
502509
DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
503510
switch (depth_conv_func)
504511
{
@@ -531,6 +538,7 @@ DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_functi
531538

532539
void CpuDepthwiseConv2d::run(ITensorPack &tensors)
533540
{
541+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuDepthwiseConv2d::run");
534542
switch (_depth_conv_func)
535543
{
536544
case DepthwiseConvolutionFunction::OPTIMIZED:

src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2024 Arm Limited.
2+
* Copyright (c) 2019-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -28,6 +28,7 @@
2828
#include "arm_compute/runtime/NEON/NEScheduler.h"
2929

3030
#include "src/common/utils/Log.h"
31+
#include "src/common/utils/profile/acl_profile.h"
3132
#include "src/core/CPP/Validate.h"
3233
#include "src/core/helpers/AutoConfiguration.h"
3334
#include "src/core/utils/AssemblyUtils.h"
@@ -59,6 +60,8 @@ void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src,
5960
ITensorInfo *dst,
6061
const ConvolutionInfo &info)
6162
{
63+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
64+
"CpuDepthwiseConv2dAssemblyDispatch::configure");
6265
ARM_COMPUTE_LOG_PARAMS(src, weights, bias, dst, info);
6366
const CPUInfo &ci = NEScheduler::get().cpu_info();
6467
const unsigned int num_threads = NEScheduler::get().num_threads();
@@ -88,6 +91,8 @@ Status CpuDepthwiseConv2dAssemblyDispatch::validate(const ITensorInfo *src,
8891
const ITensorInfo *dst,
8992
const ConvolutionInfo &info)
9093
{
94+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
95+
"CpuDepthwiseConv2dAssemblyDispatch::validate");
9196
return kernels::CpuDepthwiseConv2dAssemblyWrapperKernel::validate(src, weights, bias, dst, info);
9297
}
9398

@@ -104,6 +109,8 @@ bool CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(const Activatio
104109

105110
void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
106111
{
112+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
113+
"CpuDepthwiseConv2dAssemblyDispatch::run");
107114
ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
108115

109116
prepare(tensors);

0 commit comments

Comments
 (0)