2929
3030#include < cstdint>
3131
32- #ifndef BARE_METAL
33- #include < thread>
34- #endif // ifndef BARE_METAL
35-
3632namespace arm_compute
3733{
3834namespace test
@@ -41,169 +37,114 @@ namespace validation
4137{
4238
4339namespace {
44- constexpr int NUM_THREADS = 3 ;
4540
4641template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false , bool run_twice = false >
47- void compute_cpugemmlowp_target (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
42+ TensorType compute_cpugemmlowp_target (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
4843 const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
4944 GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(),
50- bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN, int num_parallel_runs = 1, TensorType targets[NUM_THREADS] = {} )
45+ bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN)
5146{
5247 ARM_COMPUTE_ASSERT (is_data_type_quantized_asymmetric (data_type_a));
53- ARM_COMPUTE_ASSERT (num_parallel_runs > 1 ? run_twice == false : true );
5448
5549 // If unknown, set to sensible defaults
5650 if (data_type_output == DataType::UNKNOWN) {
5751 data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
5852 }
5953
6054 // Create tensors
61- TensorType a[NUM_THREADS];
62- TensorType b[NUM_THREADS];
63- TensorType output[NUM_THREADS];
64- TensorType *out_ptrs[NUM_THREADS];
65- TensorType bias[NUM_THREADS];
66-
67- for (int i = 0 ; i < num_parallel_runs; ++i){
68- a[i] = create_tensor<TensorType>(shape_a, data_type_a, 1 , dynamic_qinfo ? QuantizationInfo (1.0 ,0 ,true ) : a_qinfo);
69- b[i] = create_tensor<TensorType>(shape_b, data_type_b, 1 , dynamic_qinfo ? QuantizationInfo (1.0 ,0 ,true ) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
70- output[i] = create_tensor<TensorType>(shape_output, data_type_output, 1 , output_qinfo /* output_qinfo will be ignored when output stage type is None */ );
71- out_ptrs[i] = &output[i];
55+ TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1 , dynamic_qinfo ? QuantizationInfo (1.0 ,0 ,true ) : a_qinfo);
56+ TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1 , dynamic_qinfo ? QuantizationInfo (1.0 ,0 ,true ) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
57+ TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1 , output_qinfo /* output_qinfo will be ignored when output stage type is None */ );
58+ TensorType bias;
7259
73- if (is_fused)
74- {
75- TensorShape bias_shape (shape_b[0 ]);
76- bias[i] = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1 );
77- }
60+ if (is_fused)
61+ {
62+ TensorShape bias_shape (shape_b[0 ]);
63+ bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1 );
7864 }
7965
8066 // Create and configure function
8167 // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
8268 FunctionType gemmlowp;
83- gemmlowp.configure (a[ 0 ] .info (), b[ 0 ] .info (), is_fused ? bias[ 0 ] .info () : nullptr , out_ptrs[ 0 ]-> info (), GEMMInfo (false , false , reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2 ] : 0 ), reinterpret_input_as_3d, false ,
69+ gemmlowp.configure (a.info (), b.info (), is_fused ? bias.info () : nullptr , output. info (), GEMMInfo (false , false , reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2 ] : 0 ), reinterpret_input_as_3d, false ,
8470 output_stage, false /* fp_mixed_precision*/ , false /* fast_math*/ , false /* broadcast_bias*/ ,
8571 arm_compute::ActivationLayerInfo (), false /* fixed_format */ , arm_compute::WeightFormat::UNSPECIFIED,
8672 false /* pretranspose_B */ , accumulate));
8773
88- for (int i = 0 ; i < num_parallel_runs; ++i)
74+ // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic)
75+ if (dynamic_qinfo)
8976 {
90- // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic)
91- if (dynamic_qinfo)
92- {
93- a[i].info ()->set_quantization_info (QuantizationInfo (a_qinfo.scale (), a_qinfo.offset (), true ));
94- b[i].info ()->set_quantization_info (QuantizationInfo (b_qinfo.scale (), b_qinfo.offset (), true ));
95- output[i].info ()->set_quantization_info (QuantizationInfo (output_qinfo.scale (), output_qinfo.offset (), true ));
96- gemmlowp.update_quantization_parameters (a[i].info ()->quantization_info (),
97- b[i].info ()->quantization_info (),
98- output[i].info ()->quantization_info (),
99- data_type_output,
100- true , true );
101- }
77+ a.info ()->set_quantization_info (QuantizationInfo (a_qinfo.scale (), a_qinfo.offset (), true ));
78+ b.info ()->set_quantization_info (QuantizationInfo (b_qinfo.scale (), b_qinfo.offset (), true ));
79+ output.info ()->set_quantization_info (QuantizationInfo (output_qinfo.scale (), output_qinfo.offset (), true ));
80+ gemmlowp.update_quantization_parameters (a.info ()->quantization_info (),
81+ b.info ()->quantization_info (),
82+ output.info ()->quantization_info (),
83+ data_type_output,
84+ true , true );
85+ }
10286
103- ARM_COMPUTE_ASSERT (a[i] .info ()->is_resizable ());
104- ARM_COMPUTE_ASSERT (b[i] .info ()->is_resizable ());
105- ARM_COMPUTE_ASSERT (output[i] .info ()->is_resizable ());
87+ ARM_COMPUTE_ASSERT (a.info ()->is_resizable ());
88+ ARM_COMPUTE_ASSERT (b.info ()->is_resizable ());
89+ ARM_COMPUTE_ASSERT (output.info ()->is_resizable ());
10690
107- add_padding_x ({ &a[i] , &b[i] , &output[i] });
91+ add_padding_x ({ &a, &b, &output });
10892
109- // Allocate tensors
110- a[i] .allocator ()->allocate ();
111- b[i] .allocator ()->allocate ();
112- output[i] .allocator ()->allocate ();
93+ // Allocate tensors
94+ a .allocator ()->allocate ();
95+ b .allocator ()->allocate ();
96+ output.allocator ()->allocate ();
11397
114- ARM_COMPUTE_ASSERT (!a[i].info ()->is_resizable ());
115- ARM_COMPUTE_ASSERT (!b[i].info ()->is_resizable ());
116- ARM_COMPUTE_ASSERT (!output[i].info ()->is_resizable ());
117- }
98+ ARM_COMPUTE_ASSERT (!a.info ()->is_resizable ());
99+ ARM_COMPUTE_ASSERT (!b.info ()->is_resizable ());
100+ ARM_COMPUTE_ASSERT (!output.info ()->is_resizable ());
118101
119- ITensorPack pack [NUM_THREADS];
102+ ITensorPack pack =
103+ {
104+ { arm_compute::TensorType::ACL_SRC_0, &a },
105+ { arm_compute::TensorType::ACL_SRC_1, &b },
106+ { arm_compute::TensorType::ACL_DST, &output }
107+ };
120108
121- #ifndef BARE_METAL
122- std::vector<std::thread> threads;
109+ // Fill tensors
110+ fill_quantized (AccessorType (a), 0 + finfo.hash );
111+ fill_quantized (AccessorType (b), 1 + finfo.hash );
123112
124- if (num_parallel_runs > 1 )
113+ if (accumulate )
125114 {
126- threads.reserve (num_parallel_runs);
115+ ARM_COMPUTE_ASSERT (accumulate != run_twice);
116+ fill (AccessorType (output), 6 + finfo.hash , finfo.min_output , finfo.max_output );
127117 }
128- #endif // ifndef BARE_METAL
129118
130- for ( int i = 0 ; i < num_parallel_runs; ++i )
119+ if (is_fused )
131120 {
132- // these are newly created every call of this lambda function
133- pack[i] =
134- {
135- { arm_compute::TensorType::ACL_SRC_0, &a[i] },
136- { arm_compute::TensorType::ACL_SRC_1, &b[i] },
137- { arm_compute::TensorType::ACL_DST, out_ptrs[i] }
138- };
121+ ARM_COMPUTE_ASSERT (bias.info ()->is_resizable ());
122+ bias.allocator ()->allocate ();
123+ ARM_COMPUTE_ASSERT (!bias.info ()->is_resizable ());
124+ fill (AccessorType (bias), 2 + finfo.hash , finfo.min_bias , finfo.max_bias );
125+ pack.add_tensor (arm_compute::TensorType::ACL_SRC_2, &bias);
126+ }
139127
140- // Fill tensors
141- fill_quantized (AccessorType (a[i]), 0 + finfo.hash );
142- fill_quantized (AccessorType (b[i]), 1 + finfo.hash );
128+ auto mg = MemoryGroup{};
129+ auto ws = manage_workspace<Tensor>(gemmlowp.workspace (), mg, pack, pack);
143130
144- if (accumulate)
145- {
146- ARM_COMPUTE_ASSERT (accumulate != run_twice);
147- fill (AccessorType (output[i]), 6 + finfo.hash , finfo.min_output , finfo.max_output );
148- }
131+ // Run with variable inputs.
132+ if (run_twice)
133+ {
134+ gemmlowp.run (pack);
135+ fill_quantized (AccessorType (a), 3 + finfo.hash ); // Fill tensors with new seed after run
136+ fill_quantized (AccessorType (b), 4 + finfo.hash );
149137
150138 if (is_fused)
151139 {
152- ARM_COMPUTE_ASSERT (bias[i].info ()->is_resizable ());
153- bias[i].allocator ()->allocate ();
154- ARM_COMPUTE_ASSERT (!bias[i].info ()->is_resizable ());
155- fill (AccessorType (bias[i]), 2 + finfo.hash , finfo.min_bias , finfo.max_bias );
156- pack[i].add_tensor (arm_compute::TensorType::ACL_SRC_2, &bias[i]);
157- }
158-
159- // Run with variable inputs.
160- if (run_twice)
161- {
162- auto mg = MemoryGroup{};
163- auto ws = manage_workspace<Tensor>(gemmlowp.workspace (), mg, pack[i], pack[i]);
164-
165- gemmlowp.run (pack[i]);
166- fill_quantized (AccessorType (a[i]), 3 + finfo.hash ); // Fill tensors with new seed after run
167- fill_quantized (AccessorType (b[i]), 4 + finfo.hash );
168- if (is_fused)
169- {
170- fill (AccessorType (bias[i]), 5 + finfo.hash , finfo.min_bias , finfo.max_bias );
171- }
172- }
173-
174- // Compute GEMM function
175- #ifndef BARE_METAL
176- if (num_parallel_runs > 1 )
177- {
178- threads.emplace_back ([&,i]
179- {
180- auto mg = MemoryGroup{};
181- auto ws = manage_workspace<Tensor>(gemmlowp.workspace (), mg, pack[i], pack[i]);
182-
183- gemmlowp.run (pack[i]);
184- targets[i] =std::move (*(out_ptrs[i]));
185- });
186- }
187- else
188- #endif // ifndef BARE_METAL
189- {
190- auto mg = MemoryGroup{};
191- auto ws = manage_workspace<Tensor>(gemmlowp.workspace (), mg, pack[i], pack[i]);
192-
193- gemmlowp.run (pack[i]);
194- targets[i] = std::move (*(out_ptrs[i]));
140+ fill (AccessorType (bias), 5 + finfo.hash , finfo.min_bias , finfo.max_bias );
195141 }
196142 }
197143
198- #ifndef BARE_METAL
199- if (num_parallel_runs > 1 )
200- {
201- for (int i = 0 ; i < num_parallel_runs; ++i)
202- {
203- threads[i].join ();
204- }
205- }
206- #endif // ifndef BARE_METAL
144+ // Compute GEMM function
145+ gemmlowp.run (pack);
146+
147+ return output;
207148}
208149} // namespace
209150
@@ -219,28 +160,23 @@ class CpuGEMMLowpMatrixMultiplyCoreValidationFixture : protected GEMMLowpGeneric
219160
220161 bool accumulate = false ;
221162 bool dynamic_qinfo = false ;
222- this ->_num_parallel_runs = 1 ;
223- compute_target (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
224- this ->_references [0 ] = this ->compute_reference (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
163+ this ->_target = compute_target (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo);
164+ this ->_reference = this ->compute_reference (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate);
225165 }
226166
227167protected:
228- void compute_target (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
168+ TensorType compute_target (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo)
229169 {
230170 const auto output_qinfo = QuantizationInfo (); // No output stage
231- compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo (), false , finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN, this -> _num_parallel_runs , this -> _targets );
171+ return compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo (), false , finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN);
232172 }
233-
234- int _num_parallel_runs{};
235- TensorType _targets[NUM_THREADS];
236- SimpleTensor<int32_t > _references[NUM_THREADS];
237173};
238174
239175template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false , bool reinterpret_output_as_3d = false , bool run_twice = false >
240176class CpuGEMMLowpStaticQuantMatrixMultiplyCoreValidationFixture : protected CpuGEMMLowpMatrixMultiplyCoreValidationFixture <TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>
241177{
242178public:
243- void setup (TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, DataType data_type, bool is_multithreaded )
179+ void setup (TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, DataType data_type)
244180 {
245181 ARM_COMPUTE_ASSERT (data_type == DataType::QASYMM8_SIGNED || data_type == DataType::QASYMM8);
246182 const auto a_qinfo = QuantizationInfo (1 .0f / 255 , a_offset);
@@ -249,30 +185,26 @@ class CpuGEMMLowpStaticQuantMatrixMultiplyCoreValidationFixture : protected CpuG
249185
250186 bool accumulate = false ;
251187 bool dynamic_qinfo = true ;
252- this ->_num_parallel_runs = is_multithreaded ? NUM_THREADS : 1 ;
253- compute_target (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo, data_type);
254- compute_reference (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, data_type);
188+ this ->_target = compute_target (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo, data_type);
189+ this ->_reference = compute_reference (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, data_type);
255190 }
256191
257192protected:
258- void compute_target (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo, const DataType data_type)
193+ TensorType compute_target (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo, const DataType data_type)
259194 {
260195 const auto output_qinfo = QuantizationInfo (a_qinfo.scale (), a_qinfo.offset ()); // No output stage
261- compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, GEMMLowpOutputStageInfo (), false , finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN, this -> _num_parallel_runs , this -> _targets );
196+ return compute_cpugemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, GEMMLowpOutputStageInfo (), false , finfo, accumulate, dynamic_qinfo, DataType::UNKNOWN);
262197 }
263198
264- void compute_reference (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const DataType data_type)
199+ SimpleTensor< int32_t > compute_reference (const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const DataType data_type)
265200 {
266- for (int i = 0 ; i < this ->_num_parallel_runs ; ++i)
201+ if (data_type == DataType::QASYMM8)
202+ {
203+ return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t , uint8_t , false , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
204+ }
205+ else
267206 {
268- if (data_type == DataType::QASYMM8)
269- {
270- this ->_references [i] = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t , uint8_t , false , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
271- }
272- else
273- {
274- this ->_references [i] = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t , int8_t , false , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
275- }
207+ return compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t , int8_t , false , false , run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, finfo);
276208 }
277209 }
278210};
0 commit comments