Revert "fix: Handle padding updates after configure() in CpuActivation"

dwildmark · developer-compute · commit 1bb8474746a3 · 2025-10-02T10:23:09.000+02:00
This reverts commit 4eec351.
diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp
@@ -201,19 +201,17 @@ void init_lut(ActivationLayerInfo::ActivationFunction act_func,
 void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info)
 {
     ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivationKernel::configure");
+    ARM_COMPUTE_UNUSED(dst);
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_ERROR_THROW_ON(CpuActivationKernel::validate(src, dst, activation_info));
 
     heuristics::CpuActivationKernelHeuristics heuristics(src, dst, activation_info);
     _heuristics = std::move(heuristics);
 
-    _src_padding = src->padding();
-    _inplace     = (dst == nullptr);
-    if (!_inplace)
+    if (dst != nullptr)
     {
         // dst auto inizialitation if not yet initialized
         auto_init_if_empty(*dst, *src->clone());
-        _dst_padding = dst->padding();
     }
 
     const auto *uk = _heuristics.kernel();
@@ -266,35 +264,6 @@ size_t CpuActivationKernel::get_mws(const CPUInfo &platform, size_t thread_count
     return _heuristics.mws();
 }
 
-void CpuActivationKernel::prepare(ITensorPack &tensors)
-{
-    const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC);
-    ITensor       *dst = tensors.get_tensor(TensorType::ACL_DST);
-
-    const ITensorInfo *src_info = src->info();
-    const ITensorInfo *dst_info = dst->info();
-
-    const bool src_padding_changed = (src_info->padding() != _src_padding);
-    const bool dst_padding_changed = (!_inplace && dst_info->padding() != _dst_padding);
-
-    if (src_padding_changed || dst_padding_changed)
-    {
-        // If padding has changed after configuration, recalculate the heuristics
-        const auto                                kernel_before_padding_change = _heuristics.kernel();
-        heuristics::CpuActivationKernelHeuristics heuristics(src_info, dst_info, _act_info);
-        _heuristics                            = std::move(heuristics);
-        const auto kernel_after_padding_change = _heuristics.kernel();
-
-        // We expect only the window and split dimension to change if padding changes.
-        // This guard is here to prevent future incompliant changes to the heuristic class.
-        // Thus, the kernel name specific logic in configuration doesn't need to be replicated here.
-        ARM_COMPUTE_ERROR_ON(kernel_before_padding_change != kernel_after_padding_change);
-        ARM_COMPUTE_UNUSED(kernel_before_padding_change, kernel_after_padding_change);
-
-        ICPPKernel::configure(_heuristics.window());
-    }
-}
-
 void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivationKernel::run_op");
diff --git a/src/cpu/kernels/CpuActivationKernel.h b/src/cpu/kernels/CpuActivationKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2025 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ACL_SRC_CPU_KERNELS_CPUACTIVATIONKERNEL_H
 #define ACL_SRC_CPU_KERNELS_CPUACTIVATIONKERNEL_H
 
-#include "arm_compute/core/Types.h"
 #include "arm_compute/function_info/ActivationLayerInfo.h"
 
 #include "src/core/common/Macros.h"
@@ -87,20 +86,10 @@ class CpuActivationKernel : public ICPPKernel
         return _heuristics.scheduler_hint().split_dimension();
     }
 
-    /** Prepare the activation kernel for execution (Only executed once)
-     *
-     * @param[in] tensors Pack of input and output tensors
-     *
-     */
-    void prepare(ITensorPack &tensors);
-
 private:
     ActivationLayerInfo                       _act_info{};
     std::string                               _name{};
     heuristics::CpuActivationKernelHeuristics _heuristics{};
-    PaddingSize                               _src_padding{};
-    PaddingSize                               _dst_padding{};
-    bool                                      _inplace{};
 };
 } // namespace kernels
 } // namespace cpu
diff --git a/src/cpu/kernels/activation/heuristics/CpuActivationKernelHeuristics.cpp b/src/cpu/kernels/activation/heuristics/CpuActivationKernelHeuristics.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2025 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -318,6 +318,8 @@ CpuActivationKernelHeuristics::CpuActivationKernelHeuristics(const ITensorInfo
                                                              const ITensorInfo         *dst,
                                                              const ActivationLayerInfo &activation_info)
 {
+    ARM_COMPUTE_UNUSED(dst);
+
     // Set kernel
     const DataType                    dtype = src->data_type();
     ActivationDataTypeISASelectorData selector{dtype, CPUInfo::get().get_cpu_model(), CPUInfo::get().get_isa(),
@@ -327,8 +329,7 @@ CpuActivationKernelHeuristics::CpuActivationKernelHeuristics(const ITensorInfo
 
     // Set window and scheduling hint
     int split_dim;
-    std::tie(_window, split_dim) =
-        dst == nullptr ? calculate_squashed_or_max_window(*src) : calculate_squashed_or_max_window(*src, *dst);
+    std::tie(_window, split_dim) = calculate_squashed_or_max_window(*src);
 
     // Collapse window with SME kernels in Y-Dim
     if (std::string(_kernel->name) == "sme2_fp32_logistic")
diff --git a/src/cpu/operators/CpuActivation.cpp b/src/cpu/operators/CpuActivation.cpp
@@ -40,9 +40,7 @@ void CpuActivation::configure(const ITensorInfo *input, ITensorInfo *output, con
 {
     ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::configure");
     ARM_COMPUTE_LOG_PARAMS(input, output, activation_info);
-
-    _is_prepared = false;
-    auto k       = std::make_unique<kernels::CpuActivationKernel>();
+    auto k = std::make_unique<kernels::CpuActivationKernel>();
     k->configure(input, output, activation_info);
     _kernel = std::move(k);
 }
@@ -58,15 +56,7 @@ void CpuActivation::run(ITensorPack &tensors)
 {
     ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuActivation::run");
     ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
-
-    auto kernel_casted = static_cast<kernels::CpuActivationKernel *>(_kernel.get());
-    if (!_is_prepared)
-    {
-        kernel_casted->prepare(tensors);
-        _is_prepared = true;
-    }
-
-    const size_t split_dimension = kernel_casted->get_split_dimension_hint();
+    auto split_dimension = static_cast<kernels::CpuActivationKernel *>(_kernel.get())->get_split_dimension_hint();
     NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);
 }
 
diff --git a/src/cpu/operators/CpuActivation.h b/src/cpu/operators/CpuActivation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, 2025 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ACL_SRC_CPU_OPERATORS_CPUACTIVATION_H
-#define ACL_SRC_CPU_OPERATORS_CPUACTIVATION_H
+#ifndef ARM_COMPUTE_CPU_ACTIVATION_H
+#define ARM_COMPUTE_CPU_ACTIVATION_H
 
 #include "arm_compute/function_info/ActivationLayerInfo.h"
 
@@ -53,10 +53,7 @@ class CpuActivation : public ICpuOperator
 
     // Inherited methods overridden:
     void run(ITensorPack &tensors) override;
-
-private:
-    bool _is_prepared{};
 };
 } // namespace cpu
 } // namespace arm_compute
-#endif // ACL_SRC_CPU_OPERATORS_CPUACTIVATION_H
+#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
@@ -50,7 +50,6 @@ namespace test
 {
 namespace validation
 {
-using framework::dataset::make;
 namespace
 {
 
@@ -63,11 +62,6 @@ const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions
 
 /** Input data sets. */
 const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
-const auto ActivationDatasetForPaddingAfterConfigure = combine(
-    make("InPlace", { false, true }),
-    NeonActivationFunctionsDataset,
-    make("AlphaBeta", { 0.5f })
-);
 
 template <typename T, ARM_COMPUTE_REQUIRES_TA(arm_compute::utils::traits::is_floating_point<T>::value)>
 void test_float_sqrt_boundary_value()
@@ -187,8 +181,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 
 template <typename T>
 using NEActivationLayerFixture = ActivationValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
-template <typename T>
-using NEActivationLayerWithPaddingFixture = ActivationWithPaddingValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
 
 TEST_SUITE(Float)
 #ifdef ARM_COMPUTE_ENABLE_FP16
@@ -212,25 +204,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<half>, framework::Data
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
-
-FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingFixture<half>, framework::DatasetMode::ALL,
-    combine(
-        make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
-        ActivationDatasetForPaddingAfterConfigure,
-        make("DataType", DataType::F16))
-    )
-{
-    if(CPUInfo::get().has_fp16())
-    {
-        // Validate output
-        validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
-    }
-    else
-    {
-        ARM_COMPUTE_TEST_INFO("Device does not support fp16 vector operations. Test SKIPPED.");
-        framework::ARM_COMPUTE_PRINT_INFO();
-    }
-}
 TEST_SUITE_END() // FP16
 #endif           /* ARM_COMPUTE_ENABLE_FP16 */
 
@@ -239,45 +212,28 @@ TEST_CASE(SqrtBoundaryValue, framework::DatasetMode::ALL)
 {
     test_float_sqrt_boundary_value<float>();
 }
-FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ActivationDataset), make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
                                                                                                        DataType::F32)))
 
 {
     // Validate output
     validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
 }
-
-FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingFixture<float>, framework::DatasetMode::ALL,
-    combine(
-        make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
-        ActivationDatasetForPaddingAfterConfigure,
-        make("DataType", DataType::F32))
-    )
-{
-    validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
-}
 // Run only on SME Devices to stress Logistic SME kernel
 #ifdef ARM_COMPUTE_ENABLE_SME2
 TEST_SUITE(SME)
-const auto LogisticDataset =  combine(make("InPlace", { false }), make("Function", ActivationLayerInfo::ActivationFunction::LOGISTIC), make("AlphaBeta", { 1.f }));
-FIXTURE_DATA_TEST_CASE(RunLogistic5D, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::Tiny5dShapes(), LogisticDataset, make("DataType",
+const auto LogsisticDataset =  combine(framework::dataset::make("InPlace", { false }), framework::dataset::make("Function", ActivationLayerInfo::ActivationFunction::LOGISTIC), framework::dataset::make("AlphaBeta", { 1.f }));
+FIXTURE_DATA_TEST_CASE(RunLogistic5D, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::Tiny5dShapes(), LogsisticDataset, framework::dataset::make("DataType",
                                                                                                        DataType::F32)))
 
 {
     // Validate output
     validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
 }
 
-FIXTURE_DATA_TEST_CASE(RunLogisticSME, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::LogisticSMEStressShapesFp32(), LogisticDataset, make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLogisticSME, NEActivationLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::LogisticSMEStressShapesFp32(), LogsisticDataset, framework::dataset::make("DataType",
                                                                                                        DataType::F32)))
 
-{
-    // Validate output
-    validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
-}
-FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingFixture<float>, framework::DatasetMode::ALL,
-    combine(datasets::LogisticSMEStressShapesFp32(), LogisticDataset, make("DataType", DataType::F32)))
-
 {
     // Validate output
     validate(Accessor(_target), _reference, helper::relative_tolerance(_data_type, _function), 0.f, helper::absolute_tolerance(_data_type, _function));
@@ -289,8 +245,6 @@ TEST_SUITE_END() // Float
 
 template <typename T>
 using NEActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<Tensor, Accessor, NEActivationLayer, T>;
-template <typename T>
-using NEActivationLayerWithPaddingQuantizedFixture = ActivationWithPaddingValidationQuantizedFixture<Tensor, Accessor, NEActivationLayer, T>;
 
 /** Input data sets. */
 const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction",
@@ -309,13 +263,6 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
 const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }),
                                                         concat(QuantizedActivationFunctionsDataset, framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH))),
                                                 framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
-const auto QuantizedActivationDatasetForPaddingAfterConfigure = combine(
-    make("InPlace", { false }),
-    concat(QuantizedActivationFunctionsDataset,
-        make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH)
-    ),
-    make("AlphaBeta", { 0.5f})
-);
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
@@ -327,17 +274,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<uint8_t>, fra
     // Validate output
     validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
 }
-FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
-    combine(
-        make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
-        QuantizedActivationDatasetForPaddingAfterConfigure,
-        make("DataType", DataType::QASYMM8),
-        make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) }
-    )))
-{
-    // Validate output
-    validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
-}
 TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
@@ -349,17 +285,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int8_t>, fram
     // Validate output
     validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
 }
-FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
-    combine(
-        make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
-        QuantizedActivationDatasetForPaddingAfterConfigure,
-        make("DataType", DataType::QASYMM8_SIGNED),
-        make("QuantizationInfo", { QuantizationInfo(0.5f, 10.0f) }
-    )))
-{
-    // Validate output
-    validate(Accessor(_target), _reference, helper::tolerance_qasymm8(_function));
-}
 TEST_SUITE_END() // QASYMM8_SIGNED
 
 /** Input data sets. */
@@ -372,12 +297,6 @@ const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("
 const auto Int16QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), Int16QuantizedActivationFunctionsDataset),
                                                      framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
 
-const auto Int16QuantizedActivationDatasetForPaddingAfterConfigure = combine(
-    make("InPlace", { false }),
-    Int16QuantizedActivationFunctionsDataset,
-    make("AlphaBeta", { 0.5f })
-);
-
 TEST_SUITE(QSYMM16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), Int16QuantizedActivationDataset),
                                                                                                                   framework::dataset::make("DataType",
@@ -387,17 +306,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int16_t>, fra
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qsymm16);
 }
-FIXTURE_DATA_TEST_CASE(PaddingAfterConfigure, NEActivationLayerWithPaddingQuantizedFixture<int16_t>, framework::DatasetMode::ALL,
-    combine(
-        make("Shape", TensorShape{ 7U, 7U, 17U, 2U }),
-        Int16QuantizedActivationDatasetForPaddingAfterConfigure,
-        make("DataType", DataType::QSYMM16),
-        make("QuantizationInfo", { QuantizationInfo(1.f / 32768.f, 0.f) }))
-    )
-{
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_qsymm16);
-}
 TEST_SUITE_END() // QSYMM16
 TEST_SUITE_END() // Quantized
 
diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h