Skip to content

Commit 38744bd

Browse files
committed
feat: add profiling tracepoints to CPU runtime function implementations (Part 7)
Instrument key CPU runtime functions entry points in src/runtime/NEON/functions/* with tracepoints to enable lightweight runtime profiling. These tracepoints leverage the ACL_PROFILE macros and form the basis for collecting execution timing and behavior metrics. This is the first step in integrating end-to-end profiling support. Partially Resolves: COMPMID-8330 Signed-off-by: Walid Ben Romdhane <[email protected]> Change-Id: I6c3c262f1f7c31bb4e55a2ab0659234d4d4b87de Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14779 Reviewed-by: Andreas Flöjt <[email protected]> Benchmark: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]>
1 parent d96c350 commit 38744bd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+259
-39
lines changed

src/runtime/NEON/functions/NEActivationLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -25,6 +25,7 @@
2525

2626
#include "arm_compute/core/Validate.h"
2727

28+
#include "src/common/utils/profile/acl_profile.h"
2829
#include "src/cpu/operators/CpuActivation.h"
2930

3031
namespace arm_compute
@@ -47,6 +48,7 @@ NEActivationLayer::~NEActivationLayer() = default;
4748

4849
void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
4950
{
51+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEActivationLayer::configure");
5052
_impl->src = input;
5153
_impl->dst = output == nullptr ? input : output;
5254

@@ -59,12 +61,14 @@ void NEActivationLayer::configure(ITensor *input, ITensor *output, ActivationLay
5961
Status
6062
NEActivationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
6163
{
64+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEActivationLayer::validate");
6265
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output);
6366
return cpu::CpuActivation::validate(input, output, act_info);
6467
}
6568

6669
void NEActivationLayer::run()
6770
{
71+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEActivationLayer::run");
6872
ITensorPack pack;
6973
pack.add_tensor(TensorType::ACL_SRC, _impl->src);
7074
pack.add_tensor(TensorType::ACL_DST, _impl->dst);

src/runtime/NEON/functions/NEAddMulAdd.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024 Arm Limited.
2+
* Copyright (c) 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -27,6 +27,7 @@
2727
#include "arm_compute/runtime/Tensor.h"
2828

2929
#include "src/common/utils/Log.h"
30+
#include "src/common/utils/profile/acl_profile.h"
3031
#include "src/core/helpers/MemoryHelpers.h"
3132
#include "src/cpu/operators/CpuAddMulAdd.h"
3233

@@ -56,6 +57,7 @@ void NEAddMulAdd::configure(ITensor *input1,
5657
const ConvertPolicy policy,
5758
const ActivationLayerInfo &act_info)
5859
{
60+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEAddMulAdd::configure");
5961
ARM_COMPUTE_LOG_PARAMS(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info);
6062

6163
_impl->op = std::make_unique<cpu::CpuAddMulAdd>();
@@ -79,12 +81,14 @@ Status NEAddMulAdd::validate(const ITensorInfo *input1,
7981
ConvertPolicy policy,
8082
const ActivationLayerInfo &act_info)
8183
{
84+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEAddMulAdd::validate");
8285
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input1, input2, bn_mul, bn_add, add_output, final_output);
8386
return cpu::CpuAddMulAdd::validate(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info);
8487
}
8588

8689
void NEAddMulAdd::run()
8790
{
91+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEAddMulAdd::run");
8892
_impl->op->run(_impl->run_pack);
8993
}
9094
} // namespace arm_compute

src/runtime/NEON/functions/NEArgMinMaxLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2021, 2023-2024 Arm Limited.
2+
* Copyright (c) 2018-2021, 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -34,6 +34,7 @@
3434
#include "arm_compute/runtime/Tensor.h"
3535

3636
#include "src/common/utils/Log.h"
37+
#include "src/common/utils/profile/acl_profile.h"
3738
#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
3839

3940
namespace arm_compute
@@ -56,6 +57,7 @@ NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manage
5657

5758
void NEArgMinMaxLayer::configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op)
5859
{
60+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArgMinMaxLayer::configure");
5961
ARM_COMPUTE_LOG_PARAMS(input, axis, output, op);
6062
_impl->reduction_function = std::make_unique<NEReductionOperation>();
6163
if (output->info() &&
@@ -78,6 +80,7 @@ void NEArgMinMaxLayer::configure(ITensor *input, int axis, ITensor *output, cons
7880
Status
7981
NEArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op)
8082
{
83+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArgMinMaxLayer::validate");
8184
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output);
8285
ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN,
8386
"Invalid operation");
@@ -86,6 +89,7 @@ NEArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo
8689

8790
void NEArgMinMaxLayer::run()
8891
{
92+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArgMinMaxLayer::run");
8993
MemoryGroupResourceScope scope_mg(_impl->memory_group);
9094
_impl->reduction_function->run();
9195
if (_impl->tmp_reduction_result != nullptr)

src/runtime/NEON/functions/NEArithmeticAddition.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -25,6 +25,7 @@
2525

2626
#include "arm_compute/core/Validate.h"
2727

28+
#include "src/common/utils/profile/acl_profile.h"
2829
#include "src/cpu/operators/CpuAdd.h"
2930

3031
#include <utility>
@@ -52,6 +53,7 @@ Status NEArithmeticAddition::validate(const ITensorInfo *input1,
5253
ConvertPolicy policy,
5354
const ActivationLayerInfo &act_info)
5455
{
56+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArithmeticAddition::validate");
5557
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input1, input2, output);
5658
return cpu::CpuAdd::validate(input1, input2, output, policy, act_info);
5759
}
@@ -62,6 +64,7 @@ void NEArithmeticAddition::configure(const ITensor *input1,
6264
ConvertPolicy policy,
6365
const ActivationLayerInfo &act_info)
6466
{
67+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArithmeticAddition::configure");
6568
_impl->src_0 = input1;
6669
_impl->src_1 = input2;
6770
_impl->dst = output;
@@ -71,6 +74,7 @@ void NEArithmeticAddition::configure(const ITensor *input1,
7174

7275
void NEArithmeticAddition::run()
7376
{
77+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArithmeticAddition::run");
7478
ITensorPack pack;
7579
pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
7680
pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);

src/runtime/NEON/functions/NEArithmeticSubtraction.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,7 @@
2626
#include "arm_compute/core/ITensor.h"
2727
#include "arm_compute/core/Validate.h"
2828

29+
#include "src/common/utils/profile/acl_profile.h"
2930
#include "src/cpu/operators/CpuSub.h"
3031

3132
#include <utility>
@@ -53,6 +54,7 @@ Status NEArithmeticSubtraction::validate(const ITensorInfo *input1,
5354
ConvertPolicy policy,
5455
const ActivationLayerInfo &act_info)
5556
{
57+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArithmeticSubtraction::validate");
5658
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input1, input2, output);
5759
return cpu::CpuSub::validate(input1, input2, output, policy, act_info);
5860
}
@@ -63,6 +65,7 @@ void NEArithmeticSubtraction::configure(const ITensor *input1,
6365
ConvertPolicy policy,
6466
const ActivationLayerInfo &act_info)
6567
{
68+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArithmeticSubtraction::configure");
6669
_impl->src_0 = input1;
6770
_impl->src_1 = input2;
6871
_impl->dst = output;
@@ -72,6 +75,7 @@ void NEArithmeticSubtraction::configure(const ITensor *input1,
7275

7376
void NEArithmeticSubtraction::run()
7477
{
78+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEArithmeticSubtraction::run");
7579
ITensorPack pack;
7680
pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
7781
pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);

src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -31,6 +31,7 @@
3131
#include "arm_compute/runtime/NEON/NEScheduler.h"
3232

3333
#include "src/common/utils/Log.h"
34+
#include "src/common/utils/profile/acl_profile.h"
3435
#include "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
3536

3637
namespace arm_compute
@@ -50,6 +51,7 @@ void NEBatchNormalizationLayer::configure(ITensor *input,
5051
float epsilon,
5152
ActivationLayerInfo act_info)
5253
{
54+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchNormalizationLayer::configure");
5355
ARM_COMPUTE_LOG_PARAMS(input, output, mean, var, beta, gamma, epsilon, act_info);
5456
// Configure kernel
5557
_norm_kernel = std::make_unique<NEBatchNormalizationLayerKernel>();
@@ -65,6 +67,7 @@ Status NEBatchNormalizationLayer::validate(const ITensorInfo *input,
6567
float epsilon,
6668
ActivationLayerInfo act_info)
6769
{
70+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchNormalizationLayer::validate");
6871
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, output, mean, var, beta, gamma);
6972
ARM_COMPUTE_RETURN_ON_ERROR(
7073
NEBatchNormalizationLayerKernel::validate(input, output, mean, var, beta, gamma, epsilon, act_info));
@@ -73,6 +76,7 @@ Status NEBatchNormalizationLayer::validate(const ITensorInfo *input,
7376

7477
void NEBatchNormalizationLayer::run()
7578
{
79+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchNormalizationLayer::run");
7680
NEScheduler::get().schedule(_norm_kernel.get(), Window::DimY);
7781
}
7882
} // namespace arm_compute

src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2021, 2023-2024 Arm Limited.
2+
* Copyright (c) 2019-2021, 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -30,12 +30,14 @@
3030
#include "arm_compute/core/Validate.h"
3131

3232
#include "src/common/utils/Log.h"
33+
#include "src/common/utils/profile/acl_profile.h"
3334
#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
3435

3536
namespace arm_compute
3637
{
3738
void NEBatchToSpaceLayer::configure(const ITensor *input, const ITensor *block_shape, ITensor *output)
3839
{
40+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchToSpaceLayer::configure");
3941
ARM_COMPUTE_LOG_PARAMS(input, block_shape, output);
4042
auto k = std::make_unique<NEBatchToSpaceLayerKernel>();
4143
k->configure(input, block_shape, output);
@@ -45,6 +47,7 @@ void NEBatchToSpaceLayer::configure(const ITensor *input, const ITensor *block_s
4547
void NEBatchToSpaceLayer::configure(
4648
const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output, const CropInfo &crop_info)
4749
{
50+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchToSpaceLayer::configure");
4851
auto k = std::make_unique<NEBatchToSpaceLayerKernel>();
4952
k->configure(input, block_shape_x, block_shape_y, output, crop_info);
5053
_kernel = std::move(k);
@@ -53,6 +56,7 @@ void NEBatchToSpaceLayer::configure(
5356
Status
5457
NEBatchToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
5558
{
59+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchToSpaceLayer::validate");
5660
ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(input, block_shape, output);
5761
return NEBatchToSpaceLayerKernel::validate(input, block_shape, output);
5862
}
@@ -63,6 +67,7 @@ Status NEBatchToSpaceLayer::validate(const ITensorInfo *input,
6367
const ITensorInfo *output,
6468
const CropInfo &crop_info)
6569
{
70+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBatchToSpaceLayer::validate");
6671
return NEBatchToSpaceLayerKernel::validate(input, block_shape_x, block_shape_y, output, crop_info);
6772
}
6873
} // namespace arm_compute

src/runtime/NEON/functions/NEBitwiseAnd.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021 Arm Limited.
2+
* Copyright (c) 2017-2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
2525

2626
#include "src/common/utils/Log.h"
27+
#include "src/common/utils/profile/acl_profile.h"
2728
#include "src/core/NEON/kernels/NEBitwiseAndKernel.h"
2829

2930
#include <utility>
@@ -32,6 +33,7 @@ using namespace arm_compute;
3233

3334
void NEBitwiseAnd::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
3435
{
36+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBitwiseAnd::configure");
3537
ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
3638
auto k = std::make_unique<NEBitwiseAndKernel>();
3739
k->configure(input1, input2, output);

src/runtime/NEON/functions/NEBitwiseNot.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021 Arm Limited.
2+
* Copyright (c) 2017-2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h"
2525

2626
#include "src/common/utils/Log.h"
27+
#include "src/common/utils/profile/acl_profile.h"
2728
#include "src/core/NEON/kernels/NEBitwiseNotKernel.h"
2829

2930
#include <utility>
@@ -32,6 +33,7 @@ using namespace arm_compute;
3233

3334
void NEBitwiseNot::configure(const ITensor *input, ITensor *output)
3435
{
36+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBitwiseNot::configure");
3537
ARM_COMPUTE_LOG_PARAMS(input, output);
3638
auto k = std::make_unique<NEBitwiseNotKernel>();
3739
k->configure(input, output);

src/runtime/NEON/functions/NEBitwiseOr.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021 Arm Limited.
2+
* Copyright (c) 2017-2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
2525

2626
#include "src/common/utils/Log.h"
27+
#include "src/common/utils/profile/acl_profile.h"
2728
#include "src/core/NEON/kernels/NEBitwiseOrKernel.h"
2829

2930
#include <utility>
@@ -32,6 +33,7 @@ using namespace arm_compute;
3233

3334
void NEBitwiseOr::configure(const ITensor *input1, const ITensor *input2, ITensor *output)
3435
{
36+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NEBitwiseOr::configure");
3537
ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
3638
auto k = std::make_unique<NEBitwiseOrKernel>();
3739
k->configure(input1, input2, output);

0 commit comments

Comments
 (0)