@@ -9,21 +9,13 @@ namespace {
99void MatMul (const std::vector<int > &in_vec, int rc_size, std::vector<int > &out_vec) {
1010 for (int i = 0 ; i < rc_size; ++i) {
1111 for (int j = 0 ; j < rc_size; ++j) {
12+ out_vec[(i * rc_size) + j] = 0 ;
1213 for (int k = 0 ; k < rc_size; ++k) {
1314 out_vec[(i * rc_size) + j] += in_vec[(i * rc_size) + k] * in_vec[(k * rc_size) + j];
1415 }
1516 }
1617 }
1718}
18- void MatMulElse (const std::vector<int > &in_vec, int rc_size, std::vector<int > &out_vec) {
19- for (int k = 0 ; k < rc_size; ++k) {
20- for (int j = 0 ; j < rc_size; ++j) {
21- for (int i = 0 ; i < rc_size; ++i) {
22- out_vec[(i * rc_size) + j] += in_vec[(i * rc_size) + k] * in_vec[(k * rc_size) + j];
23- }
24- }
25- }
26- }
2719} // namespace
2820
2921bool nesterov_a_test_task_all::TestTaskALL::PreProcessingImpl () {
@@ -46,8 +38,11 @@ bool nesterov_a_test_task_all::TestTaskALL::ValidationImpl() {
4638
4739bool nesterov_a_test_task_all::TestTaskALL::RunImpl () {
4840 if (world_.rank () == 0 ) {
49- #pragma omp parallel
50- { MatMul (input_, rc_size_, output_); }
41+ #pragma omp parallel default(none)
42+ {
43+ #pragma omp critical
44+ { MatMul (input_, rc_size_, output_); }
45+ }
5146 } else if (world_.rank () == 1 ) {
5247 const int num_threads = ppc::util::GetPPCNumThreads ();
5348 std::vector<std::thread> threads (num_threads);
@@ -56,10 +51,16 @@ bool nesterov_a_test_task_all::TestTaskALL::RunImpl() {
5651 threads[i].join ();
5752 }
5853 } else if (world_.rank () == 2 ) {
59- oneapi::tbb::task_arena arena;
60- arena.execute ([&] { MatMul (input_, rc_size_, output_); });
54+ oneapi::tbb::task_arena arena (1 );
55+ arena.execute ([&] {
56+ tbb::task_group tg;
57+ for (int i = 0 ; i < ppc::util::GetPPCNumThreads (); ++i) {
58+ tg.run ([&] { MatMul (input_, rc_size_, output_); });
59+ }
60+ tg.wait ();
61+ });
6162 } else {
62- MatMulElse (input_, rc_size_, output_);
63+ MatMul (input_, rc_size_, output_);
6364 }
6465 world_.barrier ();
6566 return true ;
0 commit comments