Skip to content

Commit 3e4b990

Browse files
wajahat-abbasgunes-arm
authored andcommitted
test: Add thread safety tests for CpuElementwise
Resolves: COMPMID-8328 Change-Id: Iad97fa614507035cde6e450ddb0ae6d5034715a8 Signed-off-by: Syed Wajahat Abbas Naqvi <[email protected]> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14697 Reviewed-by: Gunes Bayir <[email protected]> Benchmark: Arm Jenkins <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]>
1 parent bc0a47b commit 3e4b990

File tree

2 files changed

+558
-81
lines changed

2 files changed

+558
-81
lines changed

tests/validation/fixtures/CpuElementwiseFixture.h

Lines changed: 201 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,21 @@
3838
#include "tests/validation/reference/ActivationLayer.h"
3939
#include "tests/validation/reference/ElementwiseOperations.h"
4040

41+
#if !defined(BARE_METAL)
42+
#include <thread>
43+
#include <vector>
44+
#endif // !defined(BARE_METAL)
45+
4146
namespace arm_compute
4247
{
4348
namespace test
4449
{
4550
namespace validation
4651
{
52+
namespace
53+
{
54+
constexpr int NUM_THREADS = 3;
55+
}// namespace
4756
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
4857
class CpuElementwiseOperationsGenericFixture : public framework::Fixture
4958
{
@@ -54,7 +63,11 @@ class CpuElementwiseOperationsGenericFixture : public framework::Fixture
5463
DataType data_type0,
5564
DataType data_type1,
5665
DataType output_data_type,
57-
bool is_inplace = false)
66+
QuantizationInfo qinfo0,
67+
QuantizationInfo qinfo1,
68+
QuantizationInfo qinfo_out,
69+
bool is_inplace = false,
70+
TestType test_type = TestType::ConfigureOnceRunOnce)
5871
{
5972
if (std::is_same<TensorType, Tensor>::value && // Cpu
6073
(data_type0 == DataType::F16 || data_type1 == DataType::F16 || output_data_type == DataType::F16) &&
@@ -65,9 +78,11 @@ class CpuElementwiseOperationsGenericFixture : public framework::Fixture
6578

6679
_op = op;
6780
_is_inplace = is_inplace;
81+
_test_type = test_type;
82+
_num_parallel_runs = (_test_type == TestType::ConfigureOnceRunMultiThreaded ? NUM_THREADS : 1);
6883

69-
_target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type);
70-
_reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type);
84+
compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
85+
compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
7186
}
7287

7388
protected:
@@ -94,99 +109,142 @@ class CpuElementwiseOperationsGenericFixture : public framework::Fixture
94109
}
95110
}
96111

97-
TensorType compute_target(const TensorShape &shape0,
98-
const TensorShape &shape1,
99-
DataType data_type0,
100-
DataType data_type1,
101-
DataType output_data_type)
112+
void allocate_and_fill_tensors(TensorType *src1, TensorType *src2, TensorType *dst){
113+
for(int i = 0; i < _num_parallel_runs; ++i){
114+
ARM_COMPUTE_ASSERT(src1[i].info()->is_resizable());
115+
ARM_COMPUTE_ASSERT(src2[i].info()->is_resizable());
116+
117+
// Allocate tensors
118+
src1[i].allocator()->allocate();
119+
src2[i].allocator()->allocate();
120+
121+
ARM_COMPUTE_ASSERT(!src1[i].info()->is_resizable());
122+
ARM_COMPUTE_ASSERT(!src2[i].info()->is_resizable());
123+
124+
// If don't do in-place computation, still need to allocate original dst
125+
if (!_is_inplace)
126+
{
127+
ARM_COMPUTE_ASSERT(dst[i].info()->is_resizable());
128+
dst[i].allocator()->allocate();
129+
ARM_COMPUTE_ASSERT(!dst[i].info()->is_resizable());
130+
}
131+
132+
// Fill tensors
133+
fill(AccessorType(src1[i]), (2*i + 0));
134+
fill(AccessorType(src2[i]), (2*i + 1));
135+
}
136+
}
137+
138+
void compute_target(const TensorShape &shape0,
139+
const TensorShape &shape1,
140+
DataType data_type0,
141+
DataType data_type1,
142+
DataType output_data_type,
143+
QuantizationInfo qinfo0,
144+
QuantizationInfo qinfo1,
145+
QuantizationInfo qinfo_out)
102146
{
103147
// Create tensors
148+
TensorType src1[NUM_THREADS];
149+
TensorType src2[NUM_THREADS];
150+
TensorType dst[NUM_THREADS];
151+
ITensorPack run_pack[NUM_THREADS];
152+
TensorType *dst_ptrs[NUM_THREADS];
104153
const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
105-
TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, QuantizationInfo());
106-
TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, QuantizationInfo());
107-
TensorType dst = create_tensor<TensorType>(out_shape, output_data_type, 1, QuantizationInfo());
154+
155+
for(int i = 0; i < _num_parallel_runs; ++i){
156+
src1[i] = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
157+
src2[i] = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
158+
dst[i] = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out);
159+
dst_ptrs[i] = &dst[i];
160+
}
108161

109162
// Check whether do in-place computation and whether inputs are broadcast compatible
110-
TensorType *actual_dst = &dst;
111163
if (_is_inplace)
112164
{
113165
bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) &&
114-
(data_type0 == output_data_type);
166+
(data_type0 == output_data_type) && (qinfo0 == qinfo_out) ;
115167
bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) &&
116-
(data_type1 == output_data_type);
168+
(data_type1 == output_data_type) && (qinfo1 == qinfo_out);
117169
bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
118170
ARM_COMPUTE_ASSERT(do_in_place);
119171

120-
if (src1_is_inplace)
121-
{
122-
actual_dst = &ref_src1;
123-
}
124-
else
125-
{
126-
actual_dst = &ref_src2;
172+
for(int i = 0; i < _num_parallel_runs; ++i){
173+
dst_ptrs[i] = src1_is_inplace ? &(src1[i]) : &(src2[i]);
127174
}
128175
}
129176

130177
// Create and configure function
131178
FunctionType elem_op;
132-
elem_op.configure(ref_src1.info(), ref_src2.info(), actual_dst->info());
179+
elem_op.configure(src1[0].info(), src2[0].info(), dst_ptrs[0]->info());
133180

134-
ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
135-
ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
181+
allocate_and_fill_tensors(src1, src2, dst);
136182

137-
// Allocate tensors
138-
ref_src1.allocator()->allocate();
139-
ref_src2.allocator()->allocate();
140-
141-
// If don't do in-place computation, still need to allocate original dst
142-
if (!_is_inplace)
183+
if(_test_type == TestType::ConfigureOnceRunMultiThreaded)
143184
{
144-
ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
145-
dst.allocator()->allocate();
146-
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
147-
}
148-
149-
ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
150-
ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
151-
152-
// Fill tensors
153-
fill(AccessorType(ref_src1), 0);
154-
fill(AccessorType(ref_src2), 1);
155-
156-
// Compute function
157-
ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &ref_src1},
158-
{arm_compute::TensorType::ACL_SRC_1, &ref_src2},
159-
{arm_compute::TensorType::ACL_DST, actual_dst}
185+
#ifndef BARE_METAL
186+
std::vector<std::thread> threads;
160187

161-
};
162-
163-
elem_op.run(run_pack);
188+
threads.reserve(_num_parallel_runs);
189+
for(int i = 0; i < _num_parallel_runs; ++i)
190+
{
191+
// Compute function
192+
run_pack[i] = { { arm_compute::TensorType::ACL_SRC_0, &src1[i] },
193+
{arm_compute::TensorType::ACL_SRC_1, &src2[i]},
194+
{arm_compute::TensorType::ACL_DST, dst_ptrs[i]}};
164195

165-
return std::move(*actual_dst);
196+
threads.emplace_back([&,i]
197+
{
198+
elem_op.run(run_pack[i]);
199+
_target[i] = std::move(*(dst_ptrs[i]));
200+
});
201+
}
202+
for(int i = 0; i < _num_parallel_runs; ++i)
203+
{
204+
threads[i].join();
205+
}
206+
#endif // ifndef BARE_METAL
207+
}
208+
else
209+
{
210+
// Compute function
211+
ITensorPack run_pack{{arm_compute::TensorType::ACL_SRC_0, &src1[0]},
212+
{arm_compute::TensorType::ACL_SRC_1, &src2[0]},
213+
{arm_compute::TensorType::ACL_DST, dst_ptrs[0]}};
214+
elem_op.run(run_pack);
215+
_target[0] = std::move(*(dst_ptrs[0]));
216+
}
166217
}
167218

168-
SimpleTensor<T> compute_reference(const TensorShape &shape0,
169-
const TensorShape &shape1,
170-
DataType data_type0,
171-
DataType data_type1,
172-
DataType output_data_type)
219+
void compute_reference(const TensorShape &shape0,
220+
const TensorShape &shape1,
221+
DataType data_type0,
222+
DataType data_type1,
223+
DataType output_data_type,
224+
QuantizationInfo qinfo0,
225+
QuantizationInfo qinfo1,
226+
QuantizationInfo qinfo_out)
173227
{
174228
// Create reference
175-
SimpleTensor<T> ref_src1{shape0, data_type0, 1, QuantizationInfo()};
176-
SimpleTensor<T> ref_src2{shape1, data_type1, 1, QuantizationInfo()};
177-
SimpleTensor<T> ref_dst{TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, QuantizationInfo()};
229+
SimpleTensor<T> ref_src1{shape0, data_type0, 1, qinfo0};
230+
SimpleTensor<T> ref_src2{shape1, data_type1, 1, qinfo1};
231+
SimpleTensor<T> ref_dst{TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out};
178232

179233
// Fill reference
180-
fill(ref_src1, 0);
181-
fill(ref_src2, 1);
182-
183-
return reference::arithmetic_operation<T>(_op, ref_src1, ref_src2, ref_dst);
234+
for(int i = 0; i < _num_parallel_runs; ++i)
235+
{
236+
fill(ref_src1, 2*i + 0);
237+
fill(ref_src2, 2*i + 1);
238+
_reference[i] = reference::arithmetic_operation<T>(_op, ref_src1, ref_src2, ref_dst);
239+
}
184240
}
185241

186-
TensorType _target{};
187-
SimpleTensor<T> _reference{};
242+
TensorType _target[NUM_THREADS];
243+
SimpleTensor<T> _reference[NUM_THREADS];
188244
ArithmeticOperation _op{ArithmeticOperation::ADD};
189245
bool _is_inplace{false};
246+
TestType _test_type{};
247+
int _num_parallel_runs{};
190248
};
191249

192250
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -198,7 +256,21 @@ class CpuElementwiseDivisionValidationFixture
198256
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
199257
{
200258
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
201-
ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
259+
ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
260+
QuantizationInfo(), QuantizationInfo(), is_inplace);
261+
}
262+
};
263+
264+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
265+
class CpuElementwiseDivisionThreadSafeValidationFixture
266+
: public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
267+
{
268+
public:
269+
void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
270+
{
271+
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
272+
ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
273+
QuantizationInfo(), QuantizationInfo(), is_inplace, TestType::ConfigureOnceRunMultiThreaded);
202274
}
203275
};
204276

@@ -211,7 +283,37 @@ class CpuElementwiseMaxValidationFixture
211283
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
212284
{
213285
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
214-
ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
286+
ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
287+
QuantizationInfo(), QuantizationInfo(), is_inplace);
288+
}
289+
};
290+
291+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
292+
class CpuElementwiseMaxThreadSafeValidationFixture
293+
: public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
294+
{
295+
public:
296+
void setup(
297+
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
298+
{
299+
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
300+
ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
301+
QuantizationInfo(), QuantizationInfo(), is_inplace, TestType::ConfigureOnceRunMultiThreaded);
302+
}
303+
};
304+
305+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
306+
class CpuElementwiseMaxQuantizedThreadSafeValidationFixture
307+
: public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
308+
{
309+
public:
310+
void setup(
311+
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
312+
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
313+
{
314+
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
315+
ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, qinfo0,
316+
qinfo1, qinfo_out, is_inplace, TestType::ConfigureOnceRunMultiThreaded);
215317
}
216318
};
217319

@@ -224,7 +326,37 @@ class CpuElementwiseMinValidationFixture
224326
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
225327
{
226328
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
227-
ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
329+
ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
330+
QuantizationInfo(), QuantizationInfo(), is_inplace);
331+
}
332+
};
333+
334+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
335+
class CpuElementwiseMinThreadSafeValidationFixture
336+
: public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
337+
{
338+
public:
339+
void setup(
340+
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
341+
{
342+
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
343+
ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
344+
QuantizationInfo(), QuantizationInfo(), is_inplace, TestType::ConfigureOnceRunMultiThreaded);
345+
}
346+
};
347+
348+
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
349+
class CpuElementwiseMinQuantizedThreadSafeValidationFixture
350+
: public CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>
351+
{
352+
public:
353+
void setup(
354+
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
355+
QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
356+
{
357+
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
358+
ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, qinfo0,
359+
qinfo1, qinfo_out, is_inplace, TestType::ConfigureOnceRunMultiThreaded);
228360
}
229361
};
230362

@@ -237,7 +369,8 @@ class CpuPReluValidationFixture
237369
const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
238370
{
239371
CpuElementwiseOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
240-
ArithmeticOperation::PRELU, shape, shape, data_type0, data_type1, output_data_type, is_inplace);
372+
ArithmeticOperation::PRELU, shape, shape, data_type0, data_type1, output_data_type, QuantizationInfo(),
373+
QuantizationInfo(), QuantizationInfo(), is_inplace);
241374
}
242375
};
243376
} // namespace validation

0 commit comments

Comments
 (0)