ARM-software
diff --git a/‎src/cpu/kernels/quantize/generic/neon/fp16.cpp‎
Lines changed: 5 additions & 1 deletion b/‎src/cpu/kernels/quantize/generic/neon/fp16.cpp‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/cpu/kernels/quantize/generic/neon/fp32.cpp‎
Lines changed: 5 additions & 0 deletions b/‎src/cpu/kernels/quantize/generic/neon/fp32.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/cpu/kernels/quantize/generic/neon/integer.cpp‎
Lines changed: 16 additions & 1 deletion b/‎src/cpu/kernels/quantize/generic/neon/integer.cpp‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎src/cpu/kernels/range/generic/neon/fp16.cpp‎
Lines changed: 4 additions & 2 deletions b/‎src/cpu/kernels/range/generic/neon/fp16.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/cpu/kernels/range/generic/neon/fp32.cpp‎
Lines changed: 4 additions & 2 deletions b/‎src/cpu/kernels/range/generic/neon/fp32.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/cpu/kernels/range/generic/neon/integer.cpp‎
Lines changed: 14 additions & 7 deletions b/‎src/cpu/kernels/range/generic/neon/integer.cpp‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎src/cpu/kernels/reduction_layer/generic/neon/fp16.cpp‎
Lines changed: 10 additions & 5 deletions b/‎src/cpu/kernels/reduction_layer/generic/neon/fp16.cpp‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎src/cpu/kernels/reduction_layer/generic/neon/fp32.cpp‎
Lines changed: 12 additions & 5 deletions b/‎src/cpu/kernels/reduction_layer/generic/neon/fp32.cpp‎
Lines changed: 12 additions & 5 deletions
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/cpu/kernels/quantize/generic/neon/impl.h"
 
 namespace arm_compute
@@ -30,14 +31,17 @@ namespace cpu
 {
 void fp16_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_u8_run_quantize_qasymm8");
     run_quantize_qasymm8<float16_t, uint8_t>(src, dst, window);
 }
 void fp16_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_i8_run_quantize_qasymm8");
     run_quantize_qasymm8<float16_t, int8_t>(src, dst, window);
 }
 void fp16_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_run_quantize_qasymm16");
     run_quantize_qasymm16<float16_t>(src, dst, window);
 }
 } // namespace cpu
 
@@ -21,6 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/cpu/kernels/quantize/generic/neon/impl.h"
 
 namespace arm_compute
@@ -29,19 +30,23 @@ namespace cpu
 {
 void fp32_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_u8_run_quantize_qasymm8");
     run_quantize_qasymm8<float, uint8_t>(src, dst, window);
 }
 void fp32_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_i8_run_quantize_qasymm8");
     run_quantize_qasymm8<float, int8_t>(src, dst, window);
 }
 void fp32_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_run_quantize_qasymm16");
     run_quantize_qasymm16<float>(src, dst, window);
 }
 
 void fp32_i8_run_quantize_qsymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_i8_run_quantize_qsymm8");
     run_quantize_qsymm8<float, int8_t>(src, dst, window);
 }
 
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,6 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/cpu/kernels/quantize/generic/neon/impl.h"
 
 namespace arm_compute
@@ -29,53 +30,67 @@ namespace cpu
 {
 void u8_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_u8_run_quantize_qasymm8");
     run_quantize_qasymm8<uint8_t, uint8_t>(src, dst, window);
 }
 void u8_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_i8_run_quantize_qasymm8");
     run_quantize_qasymm8<uint8_t, int8_t>(src, dst, window);
 }
 void i8_u8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_u8_run_quantize_qasymm8");
     run_quantize_qasymm8<int8_t, uint8_t>(src, dst, window);
 }
 void i8_i8_run_quantize_qasymm8(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_i8_run_quantize_qasymm8");
     run_quantize_qasymm8<int8_t, int8_t>(src, dst, window);
 }
 
 void u8_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_run_quantize_qasymm16");
     run_quantize_qasymm16<uint8_t>(src, dst, window);
 }
 void i8_run_quantize_qasymm16(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_run_quantize_qasymm16");
     run_quantize_qasymm16<int8_t>(src, dst, window);
 }
 
 void u8_u8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_u8_run_requantize_offset_only");
     run_requantize_offset_only<uint8_t, uint8_t>(src, dst, window);
 }
 void u8_i8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_i8_run_requantize_offset_only");
     run_requantize_offset_only<uint8_t, int8_t>(src, dst, window);
 }
 void i8_u8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_u8_run_requantize_offset_only");
     run_requantize_offset_only<int8_t, uint8_t>(src, dst, window);
 }
 void i8_i8_run_requantize_offset_only(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "i8_i8_run_requantize_offset_only");
     run_requantize_offset_only<int8_t, int8_t>(src, dst, window);
 }
 
 void i8_u8_run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
+                            "i8_u8_run_requantize_offset_only_convert");
     run_requantize_offset_only_convert<int8_t, uint8_t>(src, dst, window);
 }
 void u8_i8_run_requantize_offset_only_convert(const ITensor *src, ITensor *dst, const Window &window)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
+                            "u8_i8_run_requantize_offset_only_convert");
     run_requantize_offset_only_convert<uint8_t, int8_t>(src, dst, window);
 }
 } // namespace cpu
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/Helpers.h"
 
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/core/NEON/wrapper/wrapper.h"
 #include "src/cpu/kernels/range/generic/neon/impl.h"
 
@@ -34,7 +35,8 @@ namespace cpu
 {
 void fp16_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<float16_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_neon_range_function");
+    neon_range_function<float16_t>(output, start, step, window);
 }
 } // namespace cpu
 } // namespace arm_compute
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 
 #include "arm_compute/core/Helpers.h"
 
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/core/NEON/wrapper/wrapper.h"
 #include "src/cpu/kernels/range/generic/neon/impl.h"
 
@@ -33,7 +34,8 @@ namespace cpu
 {
 void fp32_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<float32_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_neon_range_function");
+    neon_range_function<float32_t>(output, start, step, window);
 }
 } // namespace cpu
 } // namespace arm_compute
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/cpu/kernels/range/generic/neon/impl.h"
 
 #include <cstdint>
@@ -32,32 +33,38 @@ namespace cpu
 {
 void u8_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<uint8_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u8_neon_range_function");
+    neon_range_function<uint8_t>(output, start, step, window);
 }
 
 void u16_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<uint16_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u16_neon_range_function");
+    neon_range_function<uint16_t>(output, start, step, window);
 }
 
 void u32_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<uint32_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "u32_neon_range_function");
+    neon_range_function<uint32_t>(output, start, step, window);
 }
 
 void s8_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<int8_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "s8_neon_range_function");
+    neon_range_function<int8_t>(output, start, step, window);
 }
 
 void s16_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<int16_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "s16_neon_range_function");
+    neon_range_function<int16_t>(output, start, step, window);
 }
 
 void s32_neon_range_function(ITensor *output, float start, float step, const Window &window)
 {
-    return neon_range_function<int32_t>(output, start, step, window);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "s32_neon_range_function");
+    neon_range_function<int32_t>(output, start, step, window);
 }
 
 } // namespace cpu
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)
 
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/cpu/kernels/reduction_layer/generic/neon/impl.h"
 
 namespace arm_compute
@@ -34,31 +35,35 @@ void reduce_RedOpX_reduceX_float16_8(const Window            &window,
                                      ITensor                 *output,
                                      const ReductionOperation op)
 {
-    return Reducer<RedOpX<float16_t, 8>>::reduceX(window, input, output, RedOpX<float16_t, 8>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpX_reduceX_float16_8");
+    Reducer<RedOpX<float16_t, 8>>::reduceX(window, input, output, RedOpX<float16_t, 8>(), op);
 }
 
 void reduce_RedOpYZW_reduceY_float16_8(const Window            &window,
                                        const ITensor           *input,
                                        ITensor                 *output,
                                        const ReductionOperation op)
 {
-    return Reducer<RedOpYZW<float16_t, 8>>::reduceY(window, input, output, RedOpYZW<float16_t, 8>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceY_float16_8");
+    Reducer<RedOpYZW<float16_t, 8>>::reduceY(window, input, output, RedOpYZW<float16_t, 8>(), op);
 }
 
 void reduce_RedOpYZW_reduceZ_float16_8(const Window            &window,
                                        const ITensor           *input,
                                        ITensor                 *output,
                                        const ReductionOperation op)
 {
-    return Reducer<RedOpYZW<float16_t, 8>>::reduceZ(window, input, output, RedOpYZW<float16_t, 8>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceZ_float16_8");
+    Reducer<RedOpYZW<float16_t, 8>>::reduceZ(window, input, output, RedOpYZW<float16_t, 8>(), op);
 }
 
 void reduce_RedOpYZW_reduceW_float16_8(const Window            &window,
                                        const ITensor           *input,
                                        ITensor                 *output,
                                        const ReductionOperation op)
 {
-    return Reducer<RedOpYZW<float16_t, 8>>::reduceW(window, input, output, RedOpYZW<float16_t, 8>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceW_float16_8");
+    Reducer<RedOpYZW<float16_t, 8>>::reduceW(window, input, output, RedOpYZW<float16_t, 8>(), op);
 }
 } // namespace cpu
 } // namespace arm_compute
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Arm Limited.
+ * Copyright (c) 2024-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 
+#include "src/common/utils/profile/acl_profile.h"
 #include "src/cpu/kernels/reduction_layer/generic/neon/impl.h"
 
 namespace arm_compute
@@ -33,6 +34,8 @@ void reduce_RedOpYZW_complex_reduceZ_float32_4_2_SUM(const Window            &wi
                                                      ITensor                 *output,
                                                      const ReductionOperation op)
 {
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU,
+                            "reduce_RedOpYZW_complex_reduceZ_float32_4_2_SUM");
     Reducer<RedOpYZW_complex<float, 4, 2, ReductionOperation::SUM>>::reduceZ(
         window, input, output, RedOpYZW_complex<float, 4, 2, ReductionOperation::SUM>(), op);
 }
@@ -42,31 +45,35 @@ void reduce_RedOpX_reduceX_float32_4(const Window            &window,
                                      ITensor                 *output,
                                      const ReductionOperation op)
 {
-    return Reducer<RedOpX<float, 4>>::reduceX(window, input, output, RedOpX<float, 4>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpX_reduceX_float32_4");
+    Reducer<RedOpX<float, 4>>::reduceX(window, input, output, RedOpX<float, 4>(), op);
 }
 
 void reduce_RedOpYZW_reduceY_float32_4(const Window            &window,
                                        const ITensor           *input,
                                        ITensor                 *output,
                                        const ReductionOperation op)
 {
-    return Reducer<RedOpYZW<float, 4>>::reduceY(window, input, output, RedOpYZW<float, 4>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceY_float32_4");
+    Reducer<RedOpYZW<float, 4>>::reduceY(window, input, output, RedOpYZW<float, 4>(), op);
 }
 
 void reduce_RedOpYZW_reduceZ_float32_4(const Window            &window,
                                        const ITensor           *input,
                                        ITensor                 *output,
                                        const ReductionOperation op)
 {
-    return Reducer<RedOpYZW<float, 4>>::reduceZ(window, input, output, RedOpYZW<float, 4>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceZ_float32_4");
+    Reducer<RedOpYZW<float, 4>>::reduceZ(window, input, output, RedOpYZW<float, 4>(), op);
 }
 
 void reduce_RedOpYZW_reduceW_float32_4(const Window            &window,
                                        const ITensor           *input,
                                        ITensor                 *output,
                                        const ReductionOperation op)
 {
-    return Reducer<RedOpYZW<float, 4>>::reduceW(window, input, output, RedOpYZW<float, 4>(), op);
+    ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "reduce_RedOpYZW_reduceW_float32_4");
+    Reducer<RedOpYZW<float, 4>>::reduceW(window, input, output, RedOpYZW<float, 4>(), op);
 }
 
 } // namespace cpu
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2024 Arm Limited.`
	`2`	`+ * Copyright (c) 2024-2025 Arm Limited.`
`3`	`3`	`*`
`4`	`4`	`* SPDX-License-Identifier: MIT`
`5`	`5`	`*`
`@@ -22,6 +22,7 @@`
`22`	`22`	`* SOFTWARE.`
`23`	`23`	`*/`
`24`	`24`	`#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS)`
	`25`	`+#include "src/common/utils/profile/acl_profile.h"`
`25`	`26`	`#include "src/cpu/kernels/quantize/generic/neon/impl.h"`
`26`	`27`
`27`	`28`	`namespace arm_compute`
`@@ -30,14 +31,17 @@ namespace cpu`
`30`	`31`	`{`
`31`	`32`	`void fp16_u8_run_quantize_qasymm8(const ITensor src, ITensor dst, const Window &window)`
`32`	`33`	`{`
	`34`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_u8_run_quantize_qasymm8");`
`33`	`35`	`run_quantize_qasymm8<float16_t, uint8_t>(src, dst, window);`
`34`	`36`	`}`
`35`	`37`	`void fp16_i8_run_quantize_qasymm8(const ITensor src, ITensor dst, const Window &window)`
`36`	`38`	`{`
	`39`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_i8_run_quantize_qasymm8");`
`37`	`40`	`run_quantize_qasymm8<float16_t, int8_t>(src, dst, window);`
`38`	`41`	`}`
`39`	`42`	`void fp16_run_quantize_qasymm16(const ITensor src, ITensor dst, const Window &window)`
`40`	`43`	`{`
	`44`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_run_quantize_qasymm16");`
`41`	`45`	`run_quantize_qasymm16<float16_t>(src, dst, window);`
`42`	`46`	`}`
`43`	`47`	`} // namespace cpu`
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE`
`22`	`22`	`* SOFTWARE.`
`23`	`23`	`*/`
	`24`	`+#include "src/common/utils/profile/acl_profile.h"`
`24`	`25`	`#include "src/cpu/kernels/quantize/generic/neon/impl.h"`
`25`	`26`
`26`	`27`	`namespace arm_compute`
`@@ -29,19 +30,23 @@ namespace cpu`
`29`	`30`	`{`
`30`	`31`	`void fp32_u8_run_quantize_qasymm8(const ITensor src, ITensor dst, const Window &window)`
`31`	`32`	`{`
	`33`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_u8_run_quantize_qasymm8");`
`32`	`34`	`run_quantize_qasymm8<float, uint8_t>(src, dst, window);`
`33`	`35`	`}`
`34`	`36`	`void fp32_i8_run_quantize_qasymm8(const ITensor src, ITensor dst, const Window &window)`
`35`	`37`	`{`
	`38`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_i8_run_quantize_qasymm8");`
`36`	`39`	`run_quantize_qasymm8<float, int8_t>(src, dst, window);`
`37`	`40`	`}`
`38`	`41`	`void fp32_run_quantize_qasymm16(const ITensor src, ITensor dst, const Window &window)`
`39`	`42`	`{`
	`43`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_run_quantize_qasymm16");`
`40`	`44`	`run_quantize_qasymm16<float>(src, dst, window);`
`41`	`45`	`}`
`42`	`46`
`43`	`47`	`void fp32_i8_run_quantize_qsymm8(const ITensor src, ITensor dst, const Window &window)`
`44`	`48`	`{`
	`49`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp32_i8_run_quantize_qsymm8");`
`45`	`50`	`run_quantize_qsymm8<float, int8_t>(src, dst, window);`
`46`	`51`	`}`
`47`	`52`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`/*`
`2`		`- * Copyright (c) 2021 Arm Limited.`
	`2`	`+ * Copyright (c) 2021, 2025 Arm Limited.`
`3`	`3`	`*`
`4`	`4`	`* SPDX-License-Identifier: MIT`
`5`	`5`	`*`
`@@ -25,6 +25,7 @@`
`25`	`25`
`26`	`26`	`#include "arm_compute/core/Helpers.h"`
`27`	`27`
	`28`	`+#include "src/common/utils/profile/acl_profile.h"`
`28`	`29`	`#include "src/core/NEON/wrapper/wrapper.h"`
`29`	`30`	`#include "src/cpu/kernels/range/generic/neon/impl.h"`
`30`	`31`
`@@ -34,7 +35,8 @@ namespace cpu`
`34`	`35`	`{`
`35`	`36`	`void fp16_neon_range_function(ITensor *output, float start, float step, const Window &window)`
`36`	`37`	`{`
`37`		`- return neon_range_function<float16_t>(output, start, step, window);`
	`38`	`+ ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "fp16_neon_range_function");`
	`39`	`+ neon_range_function<float16_t>(output, start, step, window);`
`38`	`40`	`}`
`39`	`41`	`} // namespace cpu`
`40`	`42`	`} // namespace arm_compute`