Skip to content

Commit 34a2ef2

Browse files
committed
test: Add thread safety tests for CpuSoftmax
Resolves: COMPMID-8329 Change-Id: I3f74bc4c000c71a17fcca94c4552b8d97a61191a Signed-off-by: Syed Wajahat Abbas Naqvi <[email protected]> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14698 Tested-by: Arm Jenkins <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Reviewed-by: Dongsung Kim <[email protected]>
1 parent 9f7a1fb commit 34a2ef2

File tree

2 files changed

+258
-42
lines changed

2 files changed

+258
-42
lines changed

tests/validation/fixtures/CpuSoftmaxFixture.h

Lines changed: 133 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2023-2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -33,31 +33,42 @@
3333
#include "tests/framework/Asserts.h"
3434
#include "tests/framework/Fixture.h"
3535
#include "tests/validation/reference/SoftmaxLayer.h"
36-
3736
#include <random>
3837

38+
#if !defined(BARE_METAL)
39+
#include <thread>
40+
#include <vector>
41+
#endif // !defined(BARE_METAL)
42+
3943
namespace arm_compute
4044
{
4145
namespace test
4246
{
4347
namespace validation
4448
{
49+
namespace
50+
{
51+
constexpr int NUM_THREADS = 3;
52+
}// namespace
4553
template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool IS_LOG = false>
46-
class CpuSoftmaxValidationFixture : public framework::Fixture
54+
class CpuSoftmaxValidationGenericFixture : public framework::Fixture
4755
{
4856
public:
49-
void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
57+
void setup(TensorShape shape, DataType data_type, float beta, size_t axis, QuantizationInfo qinfo,
58+
TestType test_type = TestType::ConfigureOnceRunOnce)
5059
{
5160
if(std::is_same<TensorType, Tensor>::value && // Cpu
5261
data_type == DataType::F16 && !CPUInfo::get().has_fp16())
5362
{
5463
return;
5564
}
5665

57-
quantization_info_ = QuantizationInfo();
66+
quantization_info_ = qinfo;
67+
test_type_ = test_type;
68+
num_parallel_runs_ = (test_type_ == TestType::ConfigureOnceRunMultiThreaded ? NUM_THREADS : 1);
5869

59-
reference_ = compute_reference(shape, data_type, quantization_info_, beta, axis);
60-
target_ = compute_target(shape, data_type, quantization_info_, beta, axis);
70+
compute_reference(shape, data_type, quantization_info_, beta, axis);
71+
compute_target(shape, data_type, quantization_info_, beta, axis);
6172
}
6273

6374
protected:
@@ -85,56 +96,140 @@ class CpuSoftmaxValidationFixture : public framework::Fixture
8596
}
8697
}
8798

88-
TensorType compute_target(const TensorShape &shape, DataType data_type,
89-
QuantizationInfo quantization_info, float beta, int32_t axis)
90-
{
91-
// Create tensors
92-
TensorType src = create_tensor<TensorType>(shape, data_type, 1, quantization_info);
93-
TensorType dst = create_tensor<TensorType>(shape, data_type, 1, get_softmax_output_quantization_info(data_type, IS_LOG));
94-
95-
// Create and configure function
96-
FunctionType softmax;
97-
softmax.configure(src.info(), dst.info(), beta, axis);
99+
void allocate_and_fill_tensors(TensorType *src, TensorType *dst){
100+
for(int i = 0; i < num_parallel_runs_; ++i){
98101

99-
ARM_COMPUTE_ASSERT(src.info()->is_resizable());
100-
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
102+
ARM_COMPUTE_ASSERT(src[i].info()->is_resizable());
103+
ARM_COMPUTE_ASSERT(dst[i].info()->is_resizable());
101104

102-
// Allocate tensors
103-
src.allocator()->allocate();
104-
dst.allocator()->allocate();
105+
// Allocate tensors
106+
src[i].allocator()->allocate();
107+
dst[i].allocator()->allocate();
105108

106-
ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
107-
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
109+
ARM_COMPUTE_ASSERT(!src[i].info()->is_resizable());
110+
ARM_COMPUTE_ASSERT(!dst[i].info()->is_resizable());
108111

109-
// Fill tensors
110-
fill(AccessorType(src));
112+
// Fill tensors
113+
fill(AccessorType(src[i]));
114+
}
115+
}
111116

112-
ITensorPack run_pack{ { arm_compute::TensorType::ACL_SRC_0, &src }};
113-
run_pack.add_tensor(arm_compute::TensorType::ACL_DST, &dst);
117+
void compute_target(const TensorShape &shape, DataType data_type,
118+
QuantizationInfo quantization_info, float beta, int32_t axis)
119+
{
120+
TensorType src[NUM_THREADS];
121+
TensorType dst[NUM_THREADS];
122+
ITensorPack run_pack[NUM_THREADS];
123+
TensorType *dst_ptrs[NUM_THREADS];
114124
auto mg = MemoryGroup{};
115-
auto ws = manage_workspace<Tensor>(softmax.workspace(), mg, run_pack);
116125

117-
// Compute function
118-
softmax.run(run_pack);
126+
// Create tensors
127+
for(int i = 0; i < num_parallel_runs_; ++i){
128+
src[i] = create_tensor<TensorType>(shape, data_type, 1, quantization_info);
129+
dst[i] = create_tensor<TensorType>(shape, data_type, 1, get_softmax_output_quantization_info(data_type, IS_LOG));
130+
dst_ptrs[i] = &dst[i];
131+
}
132+
133+
// Create and configure function
134+
FunctionType softmax;
135+
softmax.configure(src[0].info(), dst[0].info(), beta, axis);
136+
137+
allocate_and_fill_tensors(src, dst);
119138

120-
return dst;
139+
if(test_type_ == TestType::ConfigureOnceRunMultiThreaded)
140+
{
141+
#ifndef BARE_METAL
142+
std::vector<std::thread> threads;
143+
144+
threads.reserve(num_parallel_runs_);
145+
for(int i = 0; i < num_parallel_runs_; ++i)
146+
{
147+
// Compute function
148+
run_pack[i] = {{arm_compute::TensorType::ACL_SRC_0, &src[i]},
149+
{arm_compute::TensorType::ACL_DST, dst_ptrs[i]}};
150+
151+
threads.emplace_back([&,i]
152+
{
153+
auto ws = manage_workspace<Tensor>(softmax.workspace(), mg, run_pack[i]);
154+
softmax.run(run_pack[i]);
155+
target_[i] = std::move(*(dst_ptrs[i]));
156+
});
157+
}
158+
for(int i = 0; i < num_parallel_runs_; ++i)
159+
{
160+
threads[i].join();
161+
}
162+
#endif // ifndef BARE_METAL
163+
}
164+
else
165+
{
166+
// Compute function
167+
ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src[0]},
168+
{arm_compute::TensorType::ACL_DST, dst_ptrs[0]}};
169+
auto ws = manage_workspace<Tensor>(softmax.workspace(), mg, run_pack);
170+
171+
// Compute function
172+
softmax.run(run_pack);
173+
target_[0] = std::move(*(dst_ptrs[0]));
174+
}
121175
}
122176

123-
SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type,
177+
void compute_reference(const TensorShape &shape, DataType data_type,
124178
QuantizationInfo quantization_info, float beta, int32_t axis)
125179
{
126180
// Create reference
127181
SimpleTensor<T> src{ shape, data_type, 1, quantization_info };
128182

129183
// Fill reference
130-
fill(src);
131-
132-
return reference::softmax_layer<T>(src, beta, axis, IS_LOG);
184+
for(int i = 0; i < num_parallel_runs_; ++i)
185+
{
186+
// Fill reference
187+
fill(src);
188+
reference_[i] = reference::softmax_layer<T>(src, beta, axis, IS_LOG);
189+
}
133190
}
134191

135-
TensorType target_{};
136-
SimpleTensor<T> reference_{};
192+
TensorType target_[NUM_THREADS];
193+
SimpleTensor<T> reference_[NUM_THREADS];
137194
QuantizationInfo quantization_info_{};
195+
TestType test_type_{};
196+
int num_parallel_runs_{};
197+
};
198+
199+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
200+
class CpuSoftmaxValidationFixture
201+
: public CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
202+
{
203+
public:
204+
void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
205+
{
206+
CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
207+
shape, data_type, beta, axis, QuantizationInfo(), TestType::ConfigureOnceRunOnce);
208+
}
209+
};
210+
211+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
212+
class CpuSoftmaxThreadSafeValidationFixture
213+
: public CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
214+
{
215+
public:
216+
void setup(TensorShape shape, DataType data_type, float beta, size_t axis)
217+
{
218+
CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
219+
shape, data_type, beta, axis, QuantizationInfo(), TestType::ConfigureOnceRunMultiThreaded);
220+
}
221+
};
222+
223+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
224+
class CpuSoftmaxQuantizedThreadSafeValidationFixture
225+
: public CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
226+
{
227+
public:
228+
void setup(TensorShape shape, DataType data_type, float beta, size_t axis, QuantizationInfo qinfo)
229+
{
230+
CpuSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
231+
shape, data_type, beta, axis, qinfo, TestType::ConfigureOnceRunMultiThreaded);
232+
}
138233
};
139234

140235
} // namespace validation

tests/validation/runtime/experimental/operators/CpuSoftmax.cpp

Lines changed: 125 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2020, 2022-2024 Arm Limited.
2+
* Copyright (c) 2017-2020, 2022-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -48,11 +48,14 @@ namespace
4848
using framework::dataset::make;
4949

5050
/** Tolerance for float operations */
51-
constexpr AbsoluteTolerance<float> tolerance_f32(0.000001f);
51+
RelativeTolerance<half> tolerance_f16(half(0.2));
52+
constexpr AbsoluteTolerance<float> tolerance_f32(0.000001f);
53+
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
54+
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
5255
} // namespace
56+
5357
TEST_SUITE(NEON)
5458
TEST_SUITE(OPERATORS)
55-
5659
TEST_SUITE(CpuSoftmax)
5760

5861
// clang-format off
@@ -150,6 +153,12 @@ TEST_CASE(OpCpuSoftmaxMemoryInjection, framework::DatasetMode::ALL)
150153
template <typename T>
151154
using CpuOpSoftmaxFixture = CpuSoftmaxValidationFixture<Tensor, Accessor, arm_compute::experimental::op::CpuSoftmax, T>;
152155

156+
template <typename T>
157+
using CpuSoftmaxThreadSafeFixture = CpuSoftmaxThreadSafeValidationFixture<Tensor, Accessor, arm_compute::experimental::op::CpuSoftmax, T>;
158+
159+
template <typename T>
160+
using CpuSoftmaxQuantizedThreadSafeFixture = CpuSoftmaxQuantizedThreadSafeValidationFixture<Tensor, Accessor, arm_compute::experimental::op::CpuSoftmax, T>;
161+
153162
TEST_SUITE(FP32)
154163
FIXTURE_DATA_TEST_CASE(SmokeTest, CpuOpSoftmaxFixture<float>, framework::DatasetMode::PRECOMMIT,
155164
combine(
@@ -159,10 +168,122 @@ FIXTURE_DATA_TEST_CASE(SmokeTest, CpuOpSoftmaxFixture<float>, framework::Dataset
159168
make("Axis", { 0, -1 })))
160169
{
161170
// Validate output
162-
validate(Accessor(target_), reference_, tolerance_f32);
171+
for(int i = 0; i < num_parallel_runs_; ++i)
172+
{
173+
validate(Accessor(target_[i]), reference_[i], tolerance_f32);
174+
}
163175
}
176+
TEST_SUITE_END() //FP32
177+
#ifdef ARM_COMPUTE_ENABLE_FP16
178+
TEST_SUITE(FP16)
179+
FIXTURE_DATA_TEST_CASE(SmokeTest, CpuOpSoftmaxFixture<half>, framework::DatasetMode::PRECOMMIT,
180+
combine(
181+
datasets::SoftmaxLayerSmallShapes(),
182+
make("DataType", DataType::F16),
183+
make("Beta", { 1.0f, 2.0f }),
184+
make("Axis", { 0, -1 })))
185+
{
186+
if(CPUInfo::get().has_fp16())
187+
{
188+
// Validate output
189+
for(int i = 0; i < num_parallel_runs_; ++i)
190+
{
191+
validate(Accessor(target_[i]), reference_[i], tolerance_f16);
192+
}
193+
}
194+
else
195+
{
196+
ARM_COMPUTE_TEST_INFO("Device does not support fp16. Test SKIPPED.");
197+
framework::ARM_COMPUTE_PRINT_INFO();
198+
}
199+
}
200+
TEST_SUITE_END() //FP16
201+
#endif // ARM_COMPUTE_ENABLE_FP16
164202

203+
#ifndef BARE_METAL
204+
TEST_SUITE(ThreadSafety)
205+
TEST_SUITE(FP32)
206+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxThreadSafeFixture<float>,
207+
framework::DatasetMode::PRECOMMIT,
208+
combine(
209+
datasets::SoftmaxLayerSmallShapes(),
210+
make("DataType", DataType::F32),
211+
make("Beta", { 1.0f, 2.0f }),
212+
make("Axis", { 0, -1 })))
213+
{
214+
// Validate output
215+
for(int i = 0; i < num_parallel_runs_; ++i)
216+
{
217+
validate(Accessor(target_[i]), reference_[i], tolerance_f32);
218+
}
219+
}
165220
TEST_SUITE_END() //FP32
221+
222+
#ifdef ARM_COMPUTE_ENABLE_FP16
223+
TEST_SUITE(FP16)
224+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxThreadSafeFixture<half>,
225+
framework::DatasetMode::ALL,
226+
combine(
227+
datasets::SoftmaxLayerSmallShapes(),
228+
make("DataType", DataType::F16),
229+
make("Beta", { 1.0f, 2.0f }),
230+
make("Axis", { 0, -1 })))
231+
{
232+
if(CPUInfo::get().has_fp16())
233+
{
234+
// Validate output
235+
for(int i = 0; i < num_parallel_runs_; ++i)
236+
{
237+
validate(Accessor(target_[i]), reference_[i], tolerance_f16);
238+
}
239+
}
240+
else
241+
{
242+
ARM_COMPUTE_TEST_INFO("Device does not support fp16. Test SKIPPED.");
243+
framework::ARM_COMPUTE_PRINT_INFO();
244+
}
245+
}
246+
TEST_SUITE_END() //F16
247+
#endif // ARM_COMPUTE_ENABLE_FP16
248+
249+
TEST_SUITE(Quantized)
250+
TEST_SUITE(QASYMM8_SIGNED)
251+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxQuantizedThreadSafeFixture<int8_t>, framework::DatasetMode::ALL,
252+
combine(
253+
datasets::SoftmaxLayerSmallShapes(),
254+
make("DataType", DataType::QASYMM8_SIGNED),
255+
make("Beta", { 1.0f, 2.0f }),
256+
make("Axis", { 0, -1 }),
257+
make("QuantizationInfo", {QuantizationInfo(0.5f, 10), QuantizationInfo(0.25f, 0)})
258+
))
259+
{
260+
// Validate output
261+
for(int i = 0; i < num_parallel_runs_; ++i)
262+
{
263+
validate(Accessor(target_[i]), reference_[i], tolerance_qasymm8_signed);
264+
}
265+
}
266+
TEST_SUITE_END() // QASYMM8_SIGNED
267+
TEST_SUITE(QASYMM8)
268+
FIXTURE_DATA_TEST_CASE(ConfigureOnceUseFromDifferentThreads, CpuSoftmaxQuantizedThreadSafeFixture<uint8_t>, framework::DatasetMode::ALL,
269+
combine(
270+
datasets::SoftmaxLayerSmallShapes(),
271+
make("DataType", DataType::QASYMM8),
272+
make("Beta", { 1.0f, 2.0f }),
273+
make("Axis", { 0, -1 }),
274+
make("QuantizationInfo", {QuantizationInfo(0.5f, 10), QuantizationInfo(0.25f, 0)})
275+
))
276+
{
277+
// Validate output
278+
for(int i = 0; i < num_parallel_runs_; ++i)
279+
{
280+
validate(Accessor(target_[i]), reference_[i], tolerance_qasymm8);
281+
}
282+
}
283+
TEST_SUITE_END() // QASYMM8
284+
TEST_SUITE_END() // Quantized
285+
TEST_SUITE_END() // ThreadSafety
286+
#endif // #ifndef BARE_METAL
166287
TEST_SUITE_END() //CpuSoftmax
167288
TEST_SUITE_END() //OPERATORS
168289
TEST_SUITE_END() //NEON

0 commit comments

Comments
 (0)