test: Add thread safety tests for CpuSoftmax

wajahat-abbas · wajahat-abbas · commit 34a2ef26ca1c · 2025-07-23T07:05:27.000Z
Resolves: COMPMID-8329 Change-Id: I3f74bc4c000c71a17fcca94c4552b8d97a61191a Signed-off-by: Syed Wajahat Abbas Naqvi <syedwajahatabbas.naqvi@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14698 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Dongsung Kim <dongsung.kim@arm.com>
diff --git a/tests/validation/fixtures/CpuSoftmaxFixture.h b/tests/validation/fixtures/CpuSoftmaxFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, 2023-2024 Arm Limited.
+ * Copyright (c) 2017-2021, 2023-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,31 +33,42 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/reference/SoftmaxLayer.h"
-
 #include <random>
 
+#if !defined(BARE_METAL)
+#include <thread>
+#include <vector>
+#endif // !defined(BARE_METAL)
+
 namespace arm_compute
 {
 namespace test
 {
 namespace validation
 {
+namespace
+{
+constexpr int NUM_THREADS =  3;
+}// namespace
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool IS_LOG = false>
-class CpuSoftmaxValidationFixture : public framework::Fixture
+class CpuSoftmaxValidationGenericFixture : public framework::Fixture
 {
 public:
-    void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis, QuantizationInfo qinfo,
+               TestType test_type = TestType::ConfigureOnceRunOnce)
     {
         if(std::is_same<TensorType, Tensor>::value &&  // Cpu
             data_type == DataType::F16 && !CPUInfo::get().has_fp16())
         {
             return;
         }
 
-        quantization_info_ = QuantizationInfo();
+        quantization_info_  = qinfo;
+        test_type_          = test_type;
+        num_parallel_runs_  = (test_type_ == TestType::ConfigureOnceRunMultiThreaded ? NUM_THREADS : 1);
 
-        reference_ = compute_reference(shape, data_type, quantization_info_, beta, axis);
-        target_    = compute_target(shape, data_type, quantization_info_, beta, axis);
+        compute_reference(shape, data_type, quantization_info_, beta, axis);
+        compute_target(shape, data_type, quantization_info_, beta, axis);
     }
 
 protected:
@@ -85,56 +96,140 @@ class CpuSoftmaxValidationFixture : public framework::Fixture
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type,
-                              QuantizationInfo quantization_info, float beta, int32_t axis)
-    {
-        // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type, 1, quantization_info);
-        TensorType dst = create_tensor<TensorType>(shape, data_type, 1, get_softmax_output_quantization_info(data_type, IS_LOG));
-
-        // Create and configure function
-        FunctionType softmax;
-        softmax.configure(src.info(), dst.info(), beta, axis);
+    void allocate_and_fill_tensors(TensorType *src, TensorType *dst){
+        for(int i = 0; i < num_parallel_runs_; ++i){
 
-        ARM_COMPUTE_ASSERT(src.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+            ARM_COMPUTE_ASSERT(src[i].info()->is_resizable());
+            ARM_COMPUTE_ASSERT(dst[i].info()->is_resizable());
 
-        // Allocate tensors
-        src.allocator()->allocate();
-        dst.allocator()->allocate();
+            // Allocate tensors
+            src[i].allocator()->allocate();
+            dst[i].allocator()->allocate();
 
-        ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+            ARM_COMPUTE_ASSERT(!src[i].info()->is_resizable());
+            ARM_COMPUTE_ASSERT(!dst[i].info()->is_resizable());
 
-        // Fill tensors
-        fill(AccessorType(src));
+            // Fill tensors
+            fill(AccessorType(src[i]));
+        }
+    }
 
-        ITensorPack run_pack{ { arm_compute::TensorType::ACL_SRC_0, &src }};
-        run_pack.add_tensor(arm_compute::TensorType::ACL_DST, &dst);
+    void compute_target(const TensorShape &shape, DataType data_type,
+                              QuantizationInfo quantization_info, float beta, int32_t axis)
+    {
+        TensorType src[NUM_THREADS];
+        TensorType dst[NUM_THREADS];
+        ITensorPack run_pack[NUM_THREADS];
+        TensorType *dst_ptrs[NUM_THREADS];
         auto mg = MemoryGroup{};
-        auto ws = manage_workspace<Tensor>(softmax.workspace(), mg, run_pack);
 
-        // Compute function
-        softmax.run(run_pack);
+        // Create tensors
+        for(int i = 0; i < num_parallel_runs_; ++i){
+            src[i]  = create_tensor<TensorType>(shape, data_type, 1, quantization_info);
+            dst[i]  = create_tensor<TensorType>(shape, data_type, 1, get_softmax_output_quantization_info(data_type, IS_LOG));
+            dst_ptrs[i] = &dst[i];
+        }
+
+        // Create and configure function
+        FunctionType softmax;
+        softmax.configure(src[0].info(), dst[0].info(), beta, axis);
+
+        allocate_and_fill_tensors(src, dst);
 
-        return dst;
+        if(test_type_ == TestType::ConfigureOnceRunMultiThreaded)
+        {
+#ifndef BARE_METAL
+            std::vector<std::thread> threads;
+
+            threads.reserve(num_parallel_runs_);
+            for(int i = 0; i < num_parallel_runs_; ++i)
+            {
+                // Compute function
+                run_pack[i] = {{arm_compute::TensorType::ACL_SRC_0, &src[i]},
+                               {arm_compute::TensorType::ACL_DST, dst_ptrs[i]}};
+
+                threads.emplace_back([&,i]
+                {
+                    auto ws = manage_workspace<Tensor>(softmax.workspace(), mg, run_pack[i]);
+                    softmax.run(run_pack[i]);
+                    target_[i] = std::move(*(dst_ptrs[i]));
+                });
+            }
+            for(int i = 0; i < num_parallel_runs_; ++i)
+            {
+                threads[i].join();
+            }
+#endif // ifndef BARE_METAL
+        }
+        else
+        {
+            // Compute function
+            ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src[0]},
+                                 {arm_compute::TensorType::ACL_DST, dst_ptrs[0]}};
+            auto ws = manage_workspace<Tensor>(softmax.workspace(), mg, run_pack);
+
+            // Compute function
+            softmax.run(run_pack);
+            target_[0] = std::move(*(dst_ptrs[0]));
+        }
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type,
+    void compute_reference(const TensorShape &shape, DataType data_type,
                                       QuantizationInfo quantization_info, float beta, int32_t axis)
     {
         // Create reference
         SimpleTensor<T> src{ shape, data_type, 1, quantization_info };
 
         // Fill reference
-        fill(src);
-
-        return reference::softmax_layer<T>(src, beta, axis, IS_LOG);
+        for(int i = 0; i < num_parallel_runs_; ++i)
+        {
+            // Fill reference
+            fill(src);
+            reference_[i] = reference::softmax_layer<T>(src, beta, axis, IS_LOG);
+        }
     }
 
-    TensorType       target_{};
-    SimpleTensor<T>  reference_{};
+    TensorType       target_[NUM_THREADS];
+    SimpleTensor<T>  reference_[NUM_THREADS];
     QuantizationInfo quantization_info_{};
+    TestType         test_type_{};
+    int              num_parallel_runs_{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuSoftmaxValidationFixture
+    : public CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
+    {
+        CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape, data_type, beta, axis, QuantizationInfo(), TestType::ConfigureOnceRunOnce);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuSoftmaxThreadSafeValidationFixture
+    : public CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
+    {
+        CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape, data_type, beta, axis, QuantizationInfo(), TestType::ConfigureOnceRunMultiThreaded);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CpuSoftmaxQuantizedThreadSafeValidationFixture
+    : public CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis, QuantizationInfo qinfo)
+    {
+        CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape, data_type, beta, axis, qinfo, TestType::ConfigureOnceRunMultiThreaded);
+    }
 };
 
 } // namespace validation
diff --git a/tests/validation/runtime/experimental/operators/CpuSoftmax.cpp b/tests/validation/runtime/experimental/operators/CpuSoftmax.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, 2022-2024 Arm Limited.
+ * Copyright (c) 2017-2020, 2022-2025 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,11 +48,14 @@ namespace
 using framework::dataset::make;
 
 /** Tolerance for float operations */
-constexpr AbsoluteTolerance<float> tolerance_f32(0.000001f);
+RelativeTolerance<half>              tolerance_f16(half(0.2));
+constexpr AbsoluteTolerance<float>   tolerance_f32(0.000001f);
+constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(1);
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
 } // namespace
+
 TEST_SUITE(NEON)
 TEST_SUITE(OPERATORS)
-
 TEST_SUITE(CpuSoftmax)
 
 // clang-format off
@@ -150,6 +153,12 @@ TEST_CASE(OpCpuSoftmaxMemoryInjection, framework::DatasetMode::ALL)
 template <typename T>
 using CpuOpSoftmaxFixture = CpuSoftmaxValidationFixture<Tensor, Accessor, arm_compute::experimental::op::CpuSoftmax, T>;
 
+template <typename T>
+using CpuSoftmaxThreadSafeFixture = CpuSoftmaxThreadSafeValidationFixture<Tensor, Accessor, arm_compute::experimental::op::CpuSoftmax, T>;
+
+template <typename T>
+using CpuSoftmaxQuantizedThreadSafeFixture = CpuSoftmaxQuantizedThreadSafeValidationFixture<Tensor, Accessor, arm_compute::experimental::op::CpuSoftmax, T>;
+
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(SmokeTest, CpuOpSoftmaxFixture<float>, framework::DatasetMode::PRECOMMIT,
     combine(
@@ -159,10 +168,122 @@ FIXTURE_DATA_TEST_CASE(SmokeTest, CpuOpSoftmaxFixture<float>, framework::Dataset
         make("Axis", { 0, -1 })))
 {
     // Validate output
-    validate(Accessor(target_), reference_, tolerance_f32);
+    for(int i = 0; i < num_parallel_runs_; ++i)
+    {
+        validate(Accessor(target_[i]), reference_[i], tolerance_f32);
+    }
 }
+TEST_SUITE_END() //FP32
+#ifdef ARM_COMPUTE_ENABLE_FP16
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(SmokeTest, CpuOpSoftmaxFixture<half>, framework::DatasetMode::PRECOMMIT,
+    combine(
+        datasets::SoftmaxLayerSmallShapes(),
+        make("DataType", DataType::F16),
+        make("Beta", { 1.0f, 2.0f }),
+        make("Axis", { 0, -1 })))
+{
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        for(int i = 0; i < num_parallel_runs_; ++i)
+        {
+            validate(Accessor(target_[i]), reference_[i], tolerance_f16);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() //FP16
+#endif           // ARM_COMPUTE_ENABLE_FP16
 
+#ifndef BARE_METAL
+TEST_SUITE(ThreadSafety)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxThreadSafeFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+    combine(
+        datasets::SoftmaxLayerSmallShapes(),
+        make("DataType", DataType::F32),
+        make("Beta", { 1.0f, 2.0f }),
+        make("Axis", { 0, -1 })))
+{
+    // Validate output
+    for(int i = 0; i < num_parallel_runs_; ++i)
+    {
+        validate(Accessor(target_[i]), reference_[i], tolerance_f32);
+    }
+}
 TEST_SUITE_END() //FP32
+
+#ifdef ARM_COMPUTE_ENABLE_FP16
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxThreadSafeFixture<half>,
+                       framework::DatasetMode::ALL,
+    combine(
+        datasets::SoftmaxLayerSmallShapes(),
+        make("DataType", DataType::F16),
+        make("Beta", { 1.0f, 2.0f }),
+        make("Axis", { 0, -1 })))
+{
+    if(CPUInfo::get().has_fp16())
+    {
+        // Validate output
+        for(int i = 0; i < num_parallel_runs_; ++i)
+        {
+            validate(Accessor(target_[i]), reference_[i], tolerance_f16);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("Device does not support fp16. Test SKIPPED.");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() //F16
+#endif           // ARM_COMPUTE_ENABLE_FP16
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxQuantizedThreadSafeFixture<int8_t>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SoftmaxLayerSmallShapes(),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("Beta", { 1.0f, 2.0f }),
+        make("Axis", { 0, -1 }),
+        make("QuantizationInfo", {QuantizationInfo(0.5f, 10), QuantizationInfo(0.25f, 0)})
+    ))
+{
+    // Validate output
+    for(int i = 0; i < num_parallel_runs_; ++i)
+    {
+        validate(Accessor(target_[i]), reference_[i], tolerance_qasymm8_signed);
+    }
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxQuantizedThreadSafeFixture<uint8_t>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SoftmaxLayerSmallShapes(),
+        make("DataType", DataType::QASYMM8),
+        make("Beta", { 1.0f, 2.0f }),
+        make("Axis", { 0, -1 }),
+        make("QuantizationInfo", {QuantizationInfo(0.5f, 10), QuantizationInfo(0.25f, 0)})
+    ))
+{
+    // Validate output
+    for(int i = 0; i < num_parallel_runs_; ++i)
+    {
+        validate(Accessor(target_[i]), reference_[i], tolerance_qasymm8);
+    }
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // ThreadSafety
+#endif // #ifndef BARE_METAL
 TEST_SUITE_END() //CpuSoftmax
 TEST_SUITE_END() //OPERATORS
 TEST_SUITE_END() //NEON