Skip to content

Commit bcdf266

Browse files
Insert kmpc_parallel_51 call loop distribution
Pragma target teams distribute parallel for calls kmpc_parallel_51 function in different way in comparison to pragma target parallel. Pragma target parallel calls kmpc_parallel_51 function just after initialization procedure. Pragma target teams distribute parallel for calls kmpc_parallel_51 function in the body of the outer loop. Signed-off-by: Dominik Adamski <[email protected]>
1 parent 4e98c1c commit bcdf266

File tree

5 files changed

+66
-6
lines changed

5 files changed

+66
-6
lines changed

tools/flang2/flang2exe/expand.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,9 @@ expand(void)
238238
auto it = process_expanded_map.find(gbl.currsub);
239239
int process_expanded = 0;
240240

241+
//we are at the beginning of pragma expansion
242+
//make sure that mploop_counter equals to zero
243+
reset_mploop_counter();
241244
// we reset flag because we do not know if we generate initialization
242245
// function for SPMD kernel (the function with kmpc_parallel_51 call)
243246
// or the proper kernel code (the function which is passed as an argument

tools/flang2/flang2exe/kmpcutil.cpp

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,7 +1778,11 @@ bool check_if_skip_symbol(SPTR sym)
17781778
}
17791779

17801780
int
1781-
ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector<int> &symbols, SPTR helper_func)
1781+
ll_make_kmpc_parallel_51(int global_tid_sptr,
1782+
std::vector<int> &symbols,
1783+
SPTR helper_func,
1784+
SPTR lower,
1785+
SPTR upper)
17821786
{
17831787
static int id;
17841788
int n_symbols = get_n_symbols(ompaccel_tinfo_get(gbl.currsub));
@@ -1787,6 +1791,10 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector<int> &symbols, SPTR he
17871791
DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t);
17881792
DTYPE arr_dtype;
17891793
int args[9];
1794+
1795+
if (lower && upper)
1796+
n_symbols += 2;
1797+
17901798
SPTR captured_vars = make_array_sptr(const_cast<char*>("captured_vars_addrs"),
17911799
void_ptr_t,
17921800
n_symbols);
@@ -1798,7 +1806,25 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector<int> &symbols, SPTR he
17981806
ad_icon(0),
17991807
FALSE);
18001808
int j = 0;
1801-
for (int i = 0; i < n_symbols; ++i) {
1809+
int i = 0;
1810+
/* Store lower and upper bounds for loop distribution */
1811+
if (lower && upper) {
1812+
ilix = mk_ompaccel_ldsptr(lower);
1813+
ilix = mk_ompaccel_store(ilix,
1814+
DT_INT8,
1815+
nme_args,
1816+
ad_acon(captured_vars, i * TARGET_PTRSIZE));
1817+
chk_block(ilix);
1818+
i++;
1819+
ilix = mk_ompaccel_ldsptr(upper);
1820+
ilix = mk_ompaccel_store(ilix,
1821+
DT_INT8,
1822+
nme_args,
1823+
ad_acon(captured_vars, i * TARGET_PTRSIZE));
1824+
chk_block(ilix);
1825+
i++;
1826+
}
1827+
for (; i < n_symbols; ++i) {
18021828
if (check_if_skip_symbol(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym))
18031829
continue;
18041830
else if (PASSBYVALG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) &&
@@ -1825,7 +1851,6 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector<int> &symbols, SPTR he
18251851
}
18261852
chk_block(ilix);
18271853
}
1828-
18291854

18301855
arg_types[0] = DT_CPTR; /* ident */
18311856
arg_types[1] = DT_INT; /* global_tid */
@@ -1842,7 +1867,10 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector<int> &symbols, SPTR he
18421867
args[6] = ad_icon(1); /* if_expr */
18431868
args[5] = ad_icon(-1); /* num_threads */
18441869
args[4] = ad_icon(-1); /* proc_bind */
1845-
args[3] = ad_acon(helper_func, 0);
1870+
if (helper_func)
1871+
args[3] = ad_acon(helper_func, 0);
1872+
else
1873+
args[3] = gen_null_arg();
18461874
args[2] = gen_null_arg(); /* wrapper_fn */
18471875
args[1] = ad_acon(captured_vars, 0); /* args */
18481876
args[0] = ad_icon(n_symbols); /* n_args */

tools/flang2/flang2exe/kmpcutil.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,11 @@ int ll_make_kmpc_target_init(OMP_TARGET_MODE);
509509
/**
510510
\brief Generate kmpc_parallel_51 function call
511511
*/
512-
int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector<int> &, SPTR);
512+
int ll_make_kmpc_parallel_51(int global_tid_sptr,
513+
std::vector<int> &,
514+
SPTR,
515+
SPTR lower = (SPTR)0,
516+
SPTR upper = (SPTR)0);
513517

514518
#ifdef OMP_OFFLOAD_AMD
515519
/**

tools/flang2/flang2exe/ompaccel.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@
6666
// Should be in sync with clang::GPU::AMDGPUGpuGridValues in clang
6767
int warp_size_log2;
6868
int warp_size_log2_mask;
69+
// count if we expand the second MPLOOP instruction
70+
// inside single OpenMP pragma
71+
int mploop_counter;
6972
// AOCC End
7073
#include "../../flang1/flang1exe/global.h"
7174

@@ -2758,7 +2761,20 @@ exp_ompaccel_mploop(ILM *ilmp, int curilm)
27582761
ili = ll_make_kmpc_for_static_init(&loop_args);
27592762
// AOCC end
27602763
} else {
2761-
ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched);
2764+
mploop_counter++;
2765+
if (mploop_counter != 2)
2766+
ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched);
2767+
else {
2768+
std::vector<int> allocated_symbols;
2769+
int ilix = ll_make_kmpc_global_thread_num();
2770+
ilix = ll_make_kmpc_parallel_51(ilix,
2771+
allocated_symbols,
2772+
(SPTR)0, /*TODO: replace with wrapper fn ptr */
2773+
loop_args.lower,
2774+
loop_args.upper);
2775+
iltb.callfg = 1;
2776+
chk_block(ilix);
2777+
}
27622778
}
27632779
break;
27642780
default:
@@ -3732,6 +3748,11 @@ bool is_SPMD_mode(OMP_TARGET_MODE mode) {
37323748
return false;
37333749
}
37343750

3751+
void reset_mploop_counter()
3752+
{
3753+
mploop_counter = 0;
3754+
}
3755+
37353756
// AOCC End
37363757
#endif
37373758
/* Expander - OpenMP Accelerator Model */

tools/flang2/flang2exe/ompaccel.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,4 +590,8 @@ void ompaccel_set_target_declare();
590590
*/
591591
bool is_SPMD_mode(OMP_TARGET_MODE mode);
592592
// AOCC End
593+
/**
594+
\brief Reset counts of MPLOOP instruction
595+
*/
596+
void reset_mploop_counter();
593597
#endif

0 commit comments

Comments
 (0)