Skip to content

Commit edd2381

Browse files
committed
feat: add profiling tracepoints to CPU kernel implementations (Part 5)
Instrument key CPU kernel entry points in src/cpu/kernels/* with tracepoints to enable lightweight runtime profiling. These tracepoints leverage the ACL_PROFILE macros and form the basis for collecting execution timing and behavior metrics. This is the first step in integrating end-to-end profiling support. Partially Resolves: COMPMID-8330 Signed-off-by: Walid Ben Romdhane <[email protected]> Change-Id: I3e4d2877bf74cd31ce48aae078967432737eca73 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14777 Tested-by: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Reviewed-by: Andreas Flöjt <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]> Reviewed-by: Dennis Wildmark <[email protected]>
1 parent 9b424e5 commit edd2381

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+251
-94
lines changed

src/cpu/kernels/quantize/generic/neon/fp16.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -22,6 +22,7 @@
2222
* SOFTWARE.
2323
*/
2424
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
25+
#include "src/common/utils/profile/acl_profile.h"
2526
#include "src/cpu/kernels/quantize/generic/neon/impl.h"
2627

2728
namespace arm_compute
@@ -30,14 +31,17 @@ namespace cpu
3031
{
3132
void fp16_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3233
{
34+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_u8_run_quantize_qasymm8");
3335
run_quantize_qasymm8<float16_t, uint8_t>(src, dst, window);
3436
}
3537
void fp16_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3638
{
39+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_i8_run_quantize_qasymm8");
3740
run_quantize_qasymm8<float16_t, int8_t>(src, dst, window);
3841
}
3942
void fp16_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
4043
{
44+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_run_quantize_qasymm16");
4145
run_quantize_qasymm16<float16_t>(src, dst, window);
4246
}
4347
} // namespace cpu

src/cpu/kernels/quantize/generic/neon/fp32.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24+
#include "src/common/utils/profile/acl_profile.h"
2425
#include "src/cpu/kernels/quantize/generic/neon/impl.h"
2526

2627
namespace arm_compute
@@ -29,19 +30,23 @@ namespace cpu
2930
{
3031
void fp32_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3132
{
33+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_u8_run_quantize_qasymm8");
3234
run_quantize_qasymm8<float, uint8_t>(src, dst, window);
3335
}
3436
void fp32_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3537
{
38+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_i8_run_quantize_qasymm8");
3639
run_quantize_qasymm8<float, int8_t>(src, dst, window);
3740
}
3841
void fp32_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
3942
{
43+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_run_quantize_qasymm16");
4044
run_quantize_qasymm16<float>(src, dst, window);
4145
}
4246

4347
void fp32_i8_run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window &window)
4448
{
49+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_i8_run_quantize_qsymm8");
4550
run_quantize_qsymm8<float, int8_t>(src, dst, window);
4651
}
4752

src/cpu/kernels/quantize/generic/neon/integer.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,6 +21,7 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24+
#include "src/common/utils/profile/acl_profile.h"
2425
#include "src/cpu/kernels/quantize/generic/neon/impl.h"
2526

2627
namespace arm_compute
@@ -29,53 +30,67 @@ namespace cpu
2930
{
3031
void u8_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3132
{
33+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_u8_run_quantize_qasymm8");
3234
run_quantize_qasymm8<uint8_t, uint8_t>(src, dst, window);
3335
}
3436
void u8_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3537
{
38+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_i8_run_quantize_qasymm8");
3639
run_quantize_qasymm8<uint8_t, int8_t>(src, dst, window);
3740
}
3841
void i8_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
3942
{
43+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_u8_run_quantize_qasymm8");
4044
run_quantize_qasymm8<int8_t, uint8_t>(src, dst, window);
4145
}
4246
void i8_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
4347
{
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_i8_run_quantize_qasymm8");
4449
run_quantize_qasymm8<int8_t, int8_t>(src, dst, window);
4550
}
4651

4752
void u8_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
4853
{
54+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_run_quantize_qasymm16");
4955
run_quantize_qasymm16<uint8_t>(src, dst, window);
5056
}
5157
void i8_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
5258
{
59+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_run_quantize_qasymm16");
5360
run_quantize_qasymm16<int8_t>(src, dst, window);
5461
}
5562

5663
void u8_u8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
5764
{
65+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_u8_run_requantize_offset_only");
5866
run_requantize_offset_only<uint8_t, uint8_t>(src, dst, window);
5967
}
6068
void u8_i8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
6169
{
70+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_i8_run_requantize_offset_only");
6271
run_requantize_offset_only<uint8_t, int8_t>(src, dst, window);
6372
}
6473
void i8_u8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
6574
{
75+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_u8_run_requantize_offset_only");
6676
run_requantize_offset_only<int8_t, uint8_t>(src, dst, window);
6777
}
6878
void i8_i8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
6979
{
80+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_i8_run_requantize_offset_only");
7081
run_requantize_offset_only<int8_t, int8_t>(src, dst, window);
7182
}
7283

7384
void i8_u8_run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window)
7485
{
86+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
87+
"i8_u8_run_requantize_offset_only_convert");
7588
run_requantize_offset_only_convert<int8_t, uint8_t>(src, dst, window);
7689
}
7790
void u8_i8_run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window)
7891
{
92+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
93+
"u8_i8_run_requantize_offset_only_convert");
7994
run_requantize_offset_only_convert<uint8_t, int8_t>(src, dst, window);
8095
}
8196
} // namespace cpu

src/cpu/kernels/range/generic/neon/fp16.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021 Arm Limited.
2+
* Copyright (c) 2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -25,6 +25,7 @@
2525

2626
#include "arm_compute/core/Helpers.h"
2727

28+
#include "src/common/utils/profile/acl_profile.h"
2829
#include "src/core/NEON/wrapper/wrapper.h"
2930
#include "src/cpu/kernels/range/generic/neon/impl.h"
3031

@@ -34,7 +35,8 @@ namespace cpu
3435
{
3536
void fp16_neon_range_function(ITensor *output, float start, float step, const Window &window)
3637
{
37-
return neon_range_function<float16_t>(output, start, step, window);
38+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_neon_range_function");
39+
neon_range_function<float16_t>(output, start, step, window);
3840
}
3941
} // namespace cpu
4042
} // namespace arm_compute

src/cpu/kernels/range/generic/neon/fp32.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021 Arm Limited.
2+
* Copyright (c) 2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424

2525
#include "arm_compute/core/Helpers.h"
2626

27+
#include "src/common/utils/profile/acl_profile.h"
2728
#include "src/core/NEON/wrapper/wrapper.h"
2829
#include "src/cpu/kernels/range/generic/neon/impl.h"
2930

@@ -33,7 +34,8 @@ namespace cpu
3334
{
3435
void fp32_neon_range_function(ITensor *output, float start, float step, const Window &window)
3536
{
36-
return neon_range_function<float32_t>(output, start, step, window);
37+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_neon_range_function");
38+
neon_range_function<float32_t>(output, start, step, window);
3739
}
3840
} // namespace cpu
3941
} // namespace arm_compute

src/cpu/kernels/range/generic/neon/integer.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021 Arm Limited.
2+
* Copyright (c) 2021, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -22,6 +22,7 @@
2222
* SOFTWARE.
2323
*/
2424

25+
#include "src/common/utils/profile/acl_profile.h"
2526
#include "src/cpu/kernels/range/generic/neon/impl.h"
2627

2728
#include <cstdint>
@@ -32,32 +33,38 @@ namespace cpu
3233
{
3334
void u8_neon_range_function(ITensor *output, float start, float step, const Window &window)
3435
{
35-
return neon_range_function<uint8_t>(output, start, step, window);
36+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_neon_range_function");
37+
neon_range_function<uint8_t>(output, start, step, window);
3638
}
3739

3840
void u16_neon_range_function(ITensor *output, float start, float step, const Window &window)
3941
{
40-
return neon_range_function<uint16_t>(output, start, step, window);
42+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u16_neon_range_function");
43+
neon_range_function<uint16_t>(output, start, step, window);
4144
}
4245

4346
void u32_neon_range_function(ITensor *output, float start, float step, const Window &window)
4447
{
45-
return neon_range_function<uint32_t>(output, start, step, window);
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u32_neon_range_function");
49+
neon_range_function<uint32_t>(output, start, step, window);
4650
}
4751

4852
void s8_neon_range_function(ITensor *output, float start, float step, const Window &window)
4953
{
50-
return neon_range_function<int8_t>(output, start, step, window);
54+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "s8_neon_range_function");
55+
neon_range_function<int8_t>(output, start, step, window);
5156
}
5257

5358
void s16_neon_range_function(ITensor *output, float start, float step, const Window &window)
5459
{
55-
return neon_range_function<int16_t>(output, start, step, window);
60+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "s16_neon_range_function");
61+
neon_range_function<int16_t>(output, start, step, window);
5662
}
5763

5864
void s32_neon_range_function(ITensor *output, float start, float step, const Window &window)
5965
{
60-
return neon_range_function<int32_t>(output, start, step, window);
66+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "s32_neon_range_function");
67+
neon_range_function<int32_t>(output, start, step, window);
6168
}
6269

6370
} // namespace cpu

src/cpu/kernels/reduction_layer/generic/neon/fp16.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -23,6 +23,7 @@
2323
*/
2424
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
2525

26+
#include "src/common/utils/profile/acl_profile.h"
2627
#include "src/cpu/kernels/reduction_layer/generic/neon/impl.h"
2728

2829
namespace arm_compute
@@ -34,31 +35,35 @@ void reduce_RedOpX_reduceX_float16_8(const Window &window,
3435
ITensor *output,
3536
const ReductionOperation op)
3637
{
37-
return Reducer<RedOpX<float16_t, 8>>::reduceX(window, input, output, RedOpX<float16_t, 8>(), op);
38+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpX_reduceX_float16_8");
39+
Reducer<RedOpX<float16_t, 8>>::reduceX(window, input, output, RedOpX<float16_t, 8>(), op);
3840
}
3941

4042
void reduce_RedOpYZW_reduceY_float16_8(const Window &window,
4143
const ITensor *input,
4244
ITensor *output,
4345
const ReductionOperation op)
4446
{
45-
return Reducer<RedOpYZW<float16_t, 8>>::reduceY(window, input, output, RedOpYZW<float16_t, 8>(), op);
47+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceY_float16_8");
48+
Reducer<RedOpYZW<float16_t, 8>>::reduceY(window, input, output, RedOpYZW<float16_t, 8>(), op);
4649
}
4750

4851
void reduce_RedOpYZW_reduceZ_float16_8(const Window &window,
4952
const ITensor *input,
5053
ITensor *output,
5154
const ReductionOperation op)
5255
{
53-
return Reducer<RedOpYZW<float16_t, 8>>::reduceZ(window, input, output, RedOpYZW<float16_t, 8>(), op);
56+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceZ_float16_8");
57+
Reducer<RedOpYZW<float16_t, 8>>::reduceZ(window, input, output, RedOpYZW<float16_t, 8>(), op);
5458
}
5559

5660
void reduce_RedOpYZW_reduceW_float16_8(const Window &window,
5761
const ITensor *input,
5862
ITensor *output,
5963
const ReductionOperation op)
6064
{
61-
return Reducer<RedOpYZW<float16_t, 8>>::reduceW(window, input, output, RedOpYZW<float16_t, 8>(), op);
65+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceW_float16_8");
66+
Reducer<RedOpYZW<float16_t, 8>>::reduceW(window, input, output, RedOpYZW<float16_t, 8>(), op);
6267
}
6368
} // namespace cpu
6469
} // namespace arm_compute

src/cpu/kernels/reduction_layer/generic/neon/fp32.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Arm Limited.
2+
* Copyright (c) 2024-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -22,6 +22,7 @@
2222
* SOFTWARE.
2323
*/
2424

25+
#include "src/common/utils/profile/acl_profile.h"
2526
#include "src/cpu/kernels/reduction_layer/generic/neon/impl.h"
2627

2728
namespace arm_compute
@@ -33,6 +34,8 @@ void reduce_RedOpYZW_complex_reduceZ_float32_4_2_SUM(const Window &wi
3334
ITensor *output,
3435
const ReductionOperation op)
3536
{
37+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
38+
"reduce_RedOpYZW_complex_reduceZ_float32_4_2_SUM");
3639
Reducer<RedOpYZW_complex<float, 4, 2, ReductionOperation::SUM>>::reduceZ(
3740
window, input, output, RedOpYZW_complex<float, 4, 2, ReductionOperation::SUM>(), op);
3841
}
@@ -42,31 +45,35 @@ void reduce_RedOpX_reduceX_float32_4(const Window &window,
4245
ITensor *output,
4346
const ReductionOperation op)
4447
{
45-
return Reducer<RedOpX<float, 4>>::reduceX(window, input, output, RedOpX<float, 4>(), op);
48+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpX_reduceX_float32_4");
49+
Reducer<RedOpX<float, 4>>::reduceX(window, input, output, RedOpX<float, 4>(), op);
4650
}
4751

4852
void reduce_RedOpYZW_reduceY_float32_4(const Window &window,
4953
const ITensor *input,
5054
ITensor *output,
5155
const ReductionOperation op)
5256
{
53-
return Reducer<RedOpYZW<float, 4>>::reduceY(window, input, output, RedOpYZW<float, 4>(), op);
57+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceY_float32_4");
58+
Reducer<RedOpYZW<float, 4>>::reduceY(window, input, output, RedOpYZW<float, 4>(), op);
5459
}
5560

5661
void reduce_RedOpYZW_reduceZ_float32_4(const Window &window,
5762
const ITensor *input,
5863
ITensor *output,
5964
const ReductionOperation op)
6065
{
61-
return Reducer<RedOpYZW<float, 4>>::reduceZ(window, input, output, RedOpYZW<float, 4>(), op);
66+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceZ_float32_4");
67+
Reducer<RedOpYZW<float, 4>>::reduceZ(window, input, output, RedOpYZW<float, 4>(), op);
6268
}
6369

6470
void reduce_RedOpYZW_reduceW_float32_4(const Window &window,
6571
const ITensor *input,
6672
ITensor *output,
6773
const ReductionOperation op)
6874
{
69-
return Reducer<RedOpYZW<float, 4>>::reduceW(window, input, output, RedOpYZW<float, 4>(), op);
75+
ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceW_float32_4");
76+
Reducer<RedOpYZW<float, 4>>::reduceW(window, input, output, RedOpYZW<float, 4>(), op);
7077
}
7178

7279
} // namespace cpu

0 commit comments

Comments
 (0)