From b228a58b9e5a8d44b9267734664314b25f35808b Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 28 Jan 2022 13:36:44 +0100 Subject: [PATCH 01/18] Start new control flow for SPMD --- tools/flang2/flang2exe/kmpcutil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 72374f6c4d..2f7c374eaf 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1734,6 +1734,7 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) args[2] = ad_icon(2); /* SPMD Mode */ args[1] = ad_icon(0); /* UseGenericStateMachine */ args[0] = ad_icon(0); /* RequiresFullRuntime */ +// args[0] = ad_icon(1); /* RequiresFullRuntime */ } else { args[2] = ad_icon(1); /* Generic mode */ args[1] = ad_icon(1); /* UseGenericStateMachine */ @@ -1744,7 +1745,6 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) // AOCC Begin #ifdef OMP_OFFLOAD_AMD - int ll_make_kmpc_target_deinit(OMP_TARGET_MODE mode) { From 91c3925eed717f76e281c1dc90a046731db69859 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 28 Jan 2022 15:19:53 +0100 Subject: [PATCH 02/18] Modify deinit calls --- tools/flang2/flang2exe/exp_rte.cpp | 10 ++++++---- tools/flang2/flang2exe/expand.cpp | 9 ++++++++- tools/flang2/flang2exe/kmpcutil.cpp | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index 011e8047c4..c8d711abde 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -2158,10 +2158,12 @@ exp_end(ILM *ilmp, int curilm, bool is_func) int ilix; if (flg.omptarget && !is_func) { if (XBIT(232, 0x40) && gbl.ompaccel_intarget && !OMPACCFUNCDEVG(gbl.currsub) /*is_gpu_output_file() */ ) { - ilix = ll_make_kmpc_target_deinit( - ompaccel_tinfo_get(gbl.currsub)->mode); - iltb.callfg = 1; - chk_block(ilix); + OMP_TARGET_MODE mode = ompaccel_tinfo_get(gbl.currsub)->mode; + if (!is_SPMD_mode(mode)) { + ilix = ll_make_kmpc_target_deinit(mode); + iltb.callfg = 1; + chk_block(ilix); + } } } #endif diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 12dd61e1ac..fd1df689a0 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -451,6 +451,10 @@ eval_ilm_argument1(int opr, ILM *ilmpx, int ilmx) } } /* eval_ilm_argument1 */ +static void add_instruction(int ilix) +{ +} + void eval_ilm(int ilmx) { @@ -714,8 +718,11 @@ eval_ilm(int ilmx) exp_label(target_code_lab); if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { - iltb.callfg = 1; ilix = ll_make_kmpc_global_thread_num(); + iltb.callfg = 1; + chk_block(ilix); + ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); + iltb.callfg = 1; chk_block(ilix); } diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 2f7c374eaf..a68f44da6d 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1755,6 +1755,7 @@ ll_make_kmpc_target_deinit(OMP_TARGET_MODE mode) if (is_SPMD_mode(mode)) { args[1] = ad_icon(2); /* SPMD Mode */ args[0] = ad_icon(0); /* RequiresFullRuntime */ +// args[0] = ad_icon(1); /* RequiresFullRuntime */ } else { args[1] = ad_icon(1); /* Generic mode */ args[0] = ad_icon(1); /* RequiresFullRuntime */ From b97ae37e7f57c906cfe0aba98ece72bfed5bb08f Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 1 Feb 2022 13:04:35 +0100 Subject: [PATCH 03/18] Added first version of calling kmpc_parallel_51 Done: Added declaration of kmpc_parallel_51 Set constant args Added logic for setting up size of array which will contain target symbols Calling kmpc_parallel_51 Moved deinit function just after kmpc_parallel_51 Not done: Passing symbols to target array Creating separate function which will reflect kernel code Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 1 + tools/flang2/flang2exe/kmpcutil.cpp | 44 +++++++++++++++++++++++++++-- tools/flang2/flang2exe/kmpcutil.h | 7 +++++ tools/flang2/flang2exe/tgtutil.h | 5 ++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index fd1df689a0..29fc63625c 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -719,6 +719,7 @@ eval_ilm(int ilmx) if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { ilix = ll_make_kmpc_global_thread_num(); + ilix = ll_make_kmpc_parallel_51(ilix, ompaccel_tinfo_get(gbl.currsub)); iltb.callfg = 1; chk_block(ilix); ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index a68f44da6d..34ab733565 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -188,10 +188,13 @@ static class ClassKmpcApiCalls break; case KMPC_API_SPMD_KERNEL_INIT: return {"__kmpc_spmd_kernel_init", IL_NONE, DT_VOID_NONE, 0}; + // AOCC Begin case KMPC_API_TARGET_INIT: return {"__kmpc_target_init_v1", IL_NONE, DT_INT, 0}; break; - // AOCC Begin + case KMPC_API_PARALLEL_51: + return {"__kmpc_parallel_51", IL_NONE, DT_INT, 0}; + break; #ifdef OMP_OFFLOAD_AMD case KMPC_API_TARGET_DEINIT: return {"__kmpc_target_deinit_v1", IL_NONE, DT_VOID_NONE, 0}; @@ -313,9 +316,11 @@ static const struct kmpc_api_entry_t kmpc_api_calls[] = { KMPC_FLAG_STR_FMT}, [KMPC_API_SPMD_KERNEL_INIT] = {"__kmpc_spmd_kernel_init", 0, DT_VOID_NONE, 0}, + // AOCC Begin [KMPC_API_TARGET_INIT] = {"__kmpc_target_init_v1", 0, DT_INT, 0}, - // AOCC Begin + [KMPC_API_PARALLEL_51] = {"__kmpc_parallel_51", 0, DT_INT, + 0}, #ifdef OMP_OFFLOAD_AMD [KMPC_API_TARGET_DEINIT] = {"__kmpc_target_deinit_v1", 0, DT_VOID_NONE, 0}, @@ -1743,6 +1748,41 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) return mk_kmpc_api_call(KMPC_API_TARGET_INIT, 4, arg_types, args); } +int +ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * symbols) +{ + static int id; + int n_symbols = symbols->n_symbols; + DTYPE arg_types[9]; + DTYPE void_ptr_t = create_dtype_funcprototype(); + DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); + DTYPE arr_dtype; + int args[9]; + SPTR array = make_array_sptr("captured_vars_addrs", void_ptr_t, n_symbols); + + arg_types[0] = DT_CPTR; /* ident */ + arg_types[1] = DT_INT; /* global_tid */ + arg_types[2] = DT_INT; /* if_expr */ + arg_types[3] = DT_INT; /* num_threads */ + arg_types[4] = DT_INT; /* proc_bind */ + arg_types[5] = void_ptr_t; /* fn */ + arg_types[6] = void_ptr_t; /* wrapper_fn */ + arg_types[7] = void_ptr_ptr_t; /* args */ + arg_types[8] = DT_INT; /* n_args */ + + args[8] = gen_null_arg(); /* ident */ + args[7] = global_tid_sptr; /* global_tid */ + args[6] = ad_icon(1); /* if_expr */ + args[5] = ad_icon(-1); /* num_threads */ + args[4] = ad_icon(-1); /* proc_bind */ + args[3] = gen_null_arg(); /* fn */ + args[2] = gen_null_arg(); /* wrapper_fn */ + args[1] = ad_acon(array, 0); /* args */ + args[0] = ad_icon(n_symbols); /* n_args */ + + return mk_kmpc_api_call(KMPC_API_PARALLEL_51, 9, arg_types, args); +} + // AOCC Begin #ifdef OMP_OFFLOAD_AMD int diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index 696aaf026b..89610c0fc2 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -28,6 +28,7 @@ #include "symtab.h" #include "ili.h" //AOCC Begin +typedef struct _OMPACCEL_TARGET OMPACCEL_TINFO; #include "llmputil.h" //AOCC End /** \file @@ -175,6 +176,7 @@ enum { KMPC_API_TARGET_INIT, KMPC_API_SPMD_KERNEL_INIT, // AOCC Begin + KMPC_API_PARALLEL_51, #ifdef OMP_OFFLOAD_AMD KMPC_API_TARGET_DEINIT, KMPC_API_SPMD_KERNEL_DEINIT_V2, @@ -503,6 +505,11 @@ int ll_make_kmpc_for_static_init_simple_spmd(const loop_args_t *, int); int ll_make_kmpc_target_init(OMP_TARGET_MODE); // AOCC Begin +/** + \brief Generate kmpc_parallel_51 function call +*/ +int ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * args); + #ifdef OMP_OFFLOAD_AMD /** \brief kernel deinit diff --git a/tools/flang2/flang2exe/tgtutil.h b/tools/flang2/flang2exe/tgtutil.h index cb034ed79f..d8d36ec76f 100644 --- a/tools/flang2/flang2exe/tgtutil.h +++ b/tools/flang2/flang2exe/tgtutil.h @@ -145,4 +145,9 @@ DTYPE ll_make_tgt_offload_entry(char *); void init_tgtutil(); +/** + \brief Create array sptr + */ +SPTR +make_array_sptr(char *name, DTYPE atype, int arraysize); #endif /* __TGT_RUNTIME_H__ */ From 2dbd1f50dc7ba6548cdcd6c0298369e6a8ba0db2 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 4 Feb 2022 23:28:36 +0100 Subject: [PATCH 04/18] Add function which initializes mapped symbols. kmpc_parallel_51 requires that offloaded symbols are passed as addresses inside pointer array Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 39 +++++++++++++++++++++++-- tools/flang2/flang2exe/kmpcutil.cpp | 44 ++++++++++++++++++++--------- tools/flang2/flang2exe/kmpcutil.h | 3 +- 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 29fc63625c..9e48c12bfc 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -54,6 +54,7 @@ #ifdef OMP_OFFLOAD_LLVM #include "tgtutil.h" #include "kmpcutil.h" +#include #endif extern int in_extract_inline; /* Bottom-up auto-inlining */ @@ -451,8 +452,36 @@ eval_ilm_argument1(int opr, ILM *ilmpx, int ilmx) } } /* eval_ilm_argument1 */ -static void add_instruction(int ilix) +static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) { + int num_of_symbols = orig_symbols->n_symbols; + char allocated_symbol_name[128]; + SPTR allocated_symbol; + std::vector init_symbols(orig_symbols->n_symbols); + int store_instr; + int load_instr; + for (unsigned i = 0; i < num_of_symbols; ++i) { + snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), + ".allocated_symbol_%d", i); + allocated_symbol = getsymbol(allocated_symbol_name); + STYPEP(allocated_symbol, ST_VAR); + DTYPEP(allocated_symbol, + get_type(2,TY_PTR,DTYPEG(orig_symbols->symbols[i].device_sym))); + SCP(allocated_symbol, SC_AUTO); + store_instr = ad4ili(IL_ST, + ad_acon(orig_symbols->symbols[i].device_sym,0), + ad_acon(allocated_symbol,0), + addnme(NT_VAR, allocated_symbol, 0,0), + MSZ_I8); + chk_block(store_instr); + load_instr = mk_ompaccel_ldsptr(allocated_symbol); + chk_block(load_instr); + + init_symbols[i] = load_instr; + + } + return init_symbols; + } void @@ -692,6 +721,10 @@ eval_ilm(int ilmx) * sharing model. It does extra work and allocates device on-chip memory. * */ if (XBIT(232, 0x40) && gbl.ompaccel_intarget) { + std::vector allocated_symbols; + if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { + allocated_symbols = get_allocated_symbols(ompaccel_tinfo_get(gbl.currsub)); + } ilix = ll_make_kmpc_target_init(ompaccel_tinfo_get(gbl.currsub)->mode); /* Generate new control flow for generic kernel */ @@ -719,7 +752,9 @@ eval_ilm(int ilmx) if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { ilix = ll_make_kmpc_global_thread_num(); - ilix = ll_make_kmpc_parallel_51(ilix, ompaccel_tinfo_get(gbl.currsub)); + iltb.callfg = 1; + chk_block(ilix); + ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols); iltb.callfg = 1; chk_block(ilix); ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 34ab733565..188a4ceac1 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -27,6 +27,7 @@ #define _GNU_SOURCE // for vasprintf() #include +#include #undef _GNU_SOURCE #include "kmpcutil.h" #include "error.h" @@ -1749,17 +1750,34 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) } int -ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * symbols) +ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) { static int id; - int n_symbols = symbols->n_symbols; + int n_symbols = symbols.size(); DTYPE arg_types[9]; - DTYPE void_ptr_t = create_dtype_funcprototype(); + DTYPE void_ptr_t = DT_ADDR;//create_dtype_funcprototype(); DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); DTYPE arr_dtype; int args[9]; - SPTR array = make_array_sptr("captured_vars_addrs", void_ptr_t, n_symbols); + SPTR captured_vars = make_array_sptr(const_cast("captured_vars_addrs"), + void_ptr_t, + n_symbols); + int ilix; + int nme_args = add_arrnme(NT_ARR, + captured_vars, + addnme(NT_VAR, captured_vars, 0, 0), + 0, + ad_icon(0), + FALSE); + for (unsigned i = 0; i < symbols.size(); ++i) { + ilix = mk_ompaccel_store(symbols[i], + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + chk_block(ilix); + } +// chk_block(ilix); arg_types[0] = DT_CPTR; /* ident */ arg_types[1] = DT_INT; /* global_tid */ arg_types[2] = DT_INT; /* if_expr */ @@ -1770,15 +1788,15 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * symbols) arg_types[7] = void_ptr_ptr_t; /* args */ arg_types[8] = DT_INT; /* n_args */ - args[8] = gen_null_arg(); /* ident */ - args[7] = global_tid_sptr; /* global_tid */ - args[6] = ad_icon(1); /* if_expr */ - args[5] = ad_icon(-1); /* num_threads */ - args[4] = ad_icon(-1); /* proc_bind */ - args[3] = gen_null_arg(); /* fn */ - args[2] = gen_null_arg(); /* wrapper_fn */ - args[1] = ad_acon(array, 0); /* args */ - args[0] = ad_icon(n_symbols); /* n_args */ + args[8] = gen_null_arg(); /* ident */ + args[7] = global_tid_sptr; /* global_tid */ + args[6] = ad_icon(1); /* if_expr */ + args[5] = ad_icon(-1); /* num_threads */ + args[4] = ad_icon(-1); /* proc_bind */ + args[3] = gen_null_arg(); /* fn */ + args[2] = gen_null_arg(); /* wrapper_fn */ + args[1] = ad_acon(captured_vars, 0); /* args */ + args[0] = ad_icon(n_symbols); /* n_args */ return mk_kmpc_api_call(KMPC_API_PARALLEL_51, 9, arg_types, args); } diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index 89610c0fc2..9c6dd610c3 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -30,6 +30,7 @@ //AOCC Begin typedef struct _OMPACCEL_TARGET OMPACCEL_TINFO; #include "llmputil.h" +#include //AOCC End /** \file * \brief Various definitions for the kmpc runtime @@ -508,7 +509,7 @@ int ll_make_kmpc_target_init(OMP_TARGET_MODE); /** \brief Generate kmpc_parallel_51 function call */ -int ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * args); +int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &); #ifdef OMP_OFFLOAD_AMD /** From 61816d866aa0b4e3e03be8c5a12a1b508615ced5 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 11 Mar 2022 10:23:14 +0100 Subject: [PATCH 05/18] Add support for SPMD kernels with new OpenMP API. Flang generates new SPMD kernels which use kmpc_parallel_51 function. Signed-off-by: Dominik Adamski --- tools/flang1/flang1exe/symtab.c | 3 +- tools/flang2/flang2exe/cgmain.cpp | 3 +- tools/flang2/flang2exe/exp_ftn.cpp | 6 +- tools/flang2/flang2exe/exp_ftn.h | 2 +- tools/flang2/flang2exe/exp_rte.cpp | 4 +- tools/flang2/flang2exe/exp_rte.h | 2 +- tools/flang2/flang2exe/expand.cpp | 97 ++++++++++++++++++++----- tools/flang2/flang2exe/expsmp.cpp | 6 +- tools/flang2/flang2exe/expsmp.h | 2 + tools/flang2/flang2exe/kmpcutil.cpp | 30 +++++--- tools/flang2/flang2exe/kmpcutil.h | 2 +- tools/flang2/flang2exe/llassem.cpp | 3 +- tools/flang2/flang2exe/ompaccel_x86.cpp | 2 +- tools/flang2/flang2exe/outliner.cpp | 70 +++++++++++++++++- tools/flang2/flang2exe/outliner.h | 5 +- 15 files changed, 189 insertions(+), 48 deletions(-) diff --git a/tools/flang1/flang1exe/symtab.c b/tools/flang1/flang1exe/symtab.c index ad7cb62500..a423aef3a4 100644 --- a/tools/flang1/flang1exe/symtab.c +++ b/tools/flang1/flang1exe/symtab.c @@ -479,7 +479,8 @@ get_ieee_arith_intrin(char *nm) int getsymbol(const char *name) { - return getsym(name, strlen(name)); + int sym = getsym(name, strlen(name)); + return sym; } /** \brief Enter symbol with indicated name into symbol table, initialize diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 797024be65..1b33c6ee1c 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -4301,7 +4301,7 @@ make_stmt(STMT_Type stmt_type, int ilix, bool deletable, SPTR next_bih_label, int alignment; INSTR_LIST *Curr_Instr; - DBGTRACEIN2(" type: %s ilix: %d", stmt_names[stmt_type], ilix) + DBGTRACEIN2(" type: %s ilix: %d", stmt_names[stmt_type], ilix); curr_stmt_type = stmt_type; if (last_stmt_is_branch && stmt_type != STMT_LABEL) { @@ -14063,7 +14063,6 @@ process_formal_arguments(LL_ABI_Info *abi) /* Other by-value kinds. */ break; } - /* This op represents the real LLVM argument, not the local variable. */ arg_op = make_operand(); arg_op->ot_type = OT_VAR; diff --git a/tools/flang2/flang2exe/exp_ftn.cpp b/tools/flang2/flang2exe/exp_ftn.cpp index a2ad8d618b..9de771c26e 100644 --- a/tools/flang2/flang2exe/exp_ftn.cpp +++ b/tools/flang2/flang2exe/exp_ftn.cpp @@ -4147,7 +4147,7 @@ exp_bran(ILM_OP opc, ILM *ilmp, int curilm) /***************************************************************/ void -exp_misc(ILM_OP opc, ILM *ilmp, int curilm) +exp_misc(ILM_OP opc, ILM *ilmp, int curilm, bool process_expanded) { int tmp; int ilix, listilix; @@ -4289,11 +4289,11 @@ exp_misc(ILM_OP opc, ILM *ilmp, int curilm) break; case IM_ENDF: - exp_end(ilmp, curilm, true); + exp_end(ilmp, curilm, true, process_expanded); break; case IM_END: - exp_end(ilmp, curilm, false); + exp_end(ilmp, curilm, false, process_expanded); break; case IM_BYVAL: diff --git a/tools/flang2/flang2exe/exp_ftn.h b/tools/flang2/flang2exe/exp_ftn.h index 9857b6a9cb..4832002ca6 100644 --- a/tools/flang2/flang2exe/exp_ftn.h +++ b/tools/flang2/flang2exe/exp_ftn.h @@ -53,7 +53,7 @@ void exp_bran(ILM_OP opc, ILM *ilmp, int curilm); /** \brief ... */ -void exp_misc(ILM_OP opc, ILM *ilmp, int curilm); +void exp_misc(ILM_OP opc, ILM *ilmp, int curilm, bool process_expanded = false); /** \brief ... diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index c8d711abde..744a8c28b0 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -2139,7 +2139,7 @@ exp_alloca(ILM *ilmp) static void gen_funcret(finfo_t *); void -exp_end(ILM *ilmp, int curilm, bool is_func) +exp_end(ILM *ilmp, int curilm, bool is_func, bool process_expanded) { int tmp; int op1; @@ -2159,7 +2159,7 @@ exp_end(ILM *ilmp, int curilm, bool is_func) if (flg.omptarget && !is_func) { if (XBIT(232, 0x40) && gbl.ompaccel_intarget && !OMPACCFUNCDEVG(gbl.currsub) /*is_gpu_output_file() */ ) { OMP_TARGET_MODE mode = ompaccel_tinfo_get(gbl.currsub)->mode; - if (!is_SPMD_mode(mode)) { + if (!is_SPMD_mode(mode) && !process_expanded) { ilix = ll_make_kmpc_target_deinit(mode); iltb.callfg = 1; chk_block(ilix); diff --git a/tools/flang2/flang2exe/exp_rte.h b/tools/flang2/flang2exe/exp_rte.h index b8a5c9ce16..7d8394c9da 100644 --- a/tools/flang2/flang2exe/exp_rte.h +++ b/tools/flang2/flang2exe/exp_rte.h @@ -100,7 +100,7 @@ void exp_cgoto(ILM *ilmp, int curilm); /** \brief ... */ -void exp_end(ILM *ilmp, int curilm, bool is_func); +void exp_end(ILM *ilmp, int curilm, bool is_func, bool process_expanded = false); /** \brief ... diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 9e48c12bfc..63d5193f90 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -55,6 +55,7 @@ #include "tgtutil.h" #include "kmpcutil.h" #include +#include #endif extern int in_extract_inline; /* Bottom-up auto-inlining */ @@ -63,6 +64,8 @@ static int create_ref(SPTR sym, int *pnmex, int basenm, int baseilix, int *pclen, int *pmxlen, int *prestype); static int jsr2qjsr(int); +SPTR +eval_ilm_check_if_skip(int ilmx, int *skip_expand = nullptr, int *process_expanded = nullptr); #define DO_PFO ((XBIT(148, 0x1000) && !XBIT(148, 0x4000)) || XBIT(148, 1)) /***************************************************************/ @@ -216,7 +219,6 @@ parse_im_file(const ILM *ilmp, int *lineno_out, int *findex_out, int *ftag_out) } /***************************************************************/ - /** \brief Expand ILMs to ILIs */ int expand(void) @@ -230,12 +232,30 @@ expand(void) int last_ftag = 0; int nextftag = 0, nextfindex = 0; int last_cpp_branch = 0; - + static int skip_expand; + static int skip_expand_sptr; + static std::map process_expanded_map = std::map(); + auto it = process_expanded_map.find(gbl.currsub); + int process_expanded = 0; + if (it != process_expanded_map.end()) + { + process_expanded = it->second; + } + else + { + process_expanded = 0; + } /* * NOTE, for an ILM: ilmx is needed to access the ILM_AUX area, ilmp is * needed to access the ILM area */ exp_init(); + + //set current target info if given target region was already processed + if(ompaccel_tinfo_get(gbl.currsub)) + { + ompaccel_tinfo_current_set(ompaccel_tinfo_get(gbl.currsub)); + } /* During expand, we want to generate unique proc ili each time a * proc ILM is processed. The assumption is that the scheduler will * cse a proc ili if it appears multiple times in a block. E.g., @@ -300,7 +320,13 @@ expand(void) ilmp = (ILM *)(ilmb.ilm_base + ilmx); opc = ILM_OPC(ilmp); - + /* Do not expand map statements for helper function for kmpc_parallel_51 */ + if ((opc == IM_MP_MAP || opc == IM_MP_EMAP) && process_expanded) + continue; + if (process_expanded) + { + gbl.ompoutlinedfunc = gbl.currsub; + } if (opc == IM_BR) { last_cpp_branch = ILM_OPND(ilmp, 1); } else if (opc == IM_LABEL) { @@ -319,8 +345,17 @@ expand(void) * variable operands */ if (IM_TRM(opc)) { int cur_label = BIH_LABEL(expb.curbih); - eval_ilm(ilmx); - } + if (!skip_expand){ + SPTR sptr1 = eval_ilm_check_if_skip(ilmx, &skip_expand, &process_expanded); + if (skip_expand) { + skip_expand_sptr = sptr1; + process_expanded_map[skip_expand_sptr] = 1; + ll_write_ilm_header((int)sptr1, ilmx); + restartRewritingILM(ilmx); + } + } else { + ll_rewrite_ilms(-1, ilmx, len); + }} else if (flg.smp && len) { ll_rewrite_ilms(-1, ilmx, len); } @@ -367,7 +402,6 @@ expand(void) new_callee_scope = 0; } while (opc != IM_END && opc != IM_ENDF); - if (DBGBIT(10, 2) && (bihb.stg_avail != 1)) { int bih; for (bih = 1; bih != 0; bih = BIH_NEXT(bih)) { @@ -424,6 +458,13 @@ expand(void) } else { fihb.nextfindex = fihb.currfindex = 1; } + if (skip_expand && !process_expanded) + { + process_expanded = 1; + unsetRewritingILM(); + } + skip_expand = 0; + return expb.nilms; } @@ -457,10 +498,12 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int num_of_symbols = orig_symbols->n_symbols; char allocated_symbol_name[128]; SPTR allocated_symbol; - std::vector init_symbols(orig_symbols->n_symbols); + std::vector init_symbols{}; int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { + if (DTYPEG(orig_symbols->symbols[i].device_sym) != DT_INT8) + continue; snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), ".allocated_symbol_%d", i); allocated_symbol = getsymbol(allocated_symbol_name); @@ -477,17 +520,22 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) load_instr = mk_ompaccel_ldsptr(allocated_symbol); chk_block(load_instr); - init_symbols[i] = load_instr; + init_symbols.push_back(load_instr); } return init_symbols; } +void eval_ilm(int ilmx) +{ + eval_ilm_check_if_skip(ilmx, nullptr, nullptr); +} -void -eval_ilm(int ilmx) +SPTR +eval_ilm_check_if_skip(int ilmx, int *skip_expand, int *process_expanded) { + SPTR sptr1 = SPTR_NULL; ILM *ilmpx; int noprs, /* number of operands in the ILM */ ilix, /* ili index */ @@ -511,7 +559,7 @@ eval_ilm(int ilmx) /* Set line no for EPARx */ gbl.lineno = ILM_OPND(ilmpx, 1); } - return; + return sptr1; } } @@ -543,12 +591,12 @@ eval_ilm(int ilmx) } } else if (opcx == IM_MP_EREDUCTION) { ompaccel_notify_reduction(false); - return; + return sptr1; } } if (ompaccel_is_reduction_region()) - return; + return sptr1; } #endif /*- @@ -647,7 +695,7 @@ eval_ilm(int ilmx) if (IM_I8(opcx)) ILM_RESTYPE(ilmx) = ILM_ISI8; - return; + return sptr1; } switch (IM_TYPE(opcx)) { /* special-cased ILM */ @@ -678,7 +726,10 @@ eval_ilm(int ilmx) break; case IMTY_MISC: /* miscellaneous */ - exp_misc(opcx, ilmpx, ilmx); + if (process_expanded && *process_expanded) + exp_misc(opcx, ilmpx, ilmx, true); + else + exp_misc(opcx, ilmpx, ilmx); break; case IMTY_FSTR: /* fortran string */ @@ -720,10 +771,11 @@ eval_ilm(int ilmx) /* We do not initialize spmd kernel library since we do not use spmd data * sharing model. It does extra work and allocates device on-chip memory. * */ - if (XBIT(232, 0x40) && gbl.ompaccel_intarget) { + if (XBIT(232, 0x40) && gbl.ompaccel_intarget && !*process_expanded) { + //TODO move initialization to separate function std::vector allocated_symbols; if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { - allocated_symbols = get_allocated_symbols(ompaccel_tinfo_get(gbl.currsub)); + allocated_symbols = get_allocated_symbols(ompaccel_tinfo_get(gbl.currsub)); } ilix = ll_make_kmpc_target_init(ompaccel_tinfo_get(gbl.currsub)->mode); @@ -754,12 +806,20 @@ eval_ilm(int ilmx) ilix = ll_make_kmpc_global_thread_num(); iltb.callfg = 1; chk_block(ilix); - ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols); + sptr1 = ll_make_helper_function_for_kmpc_parallel_51((SPTR)0, ompaccel_tinfo_get(gbl.currsub)); + ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols, sptr1); iltb.callfg = 1; chk_block(ilix); ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); iltb.callfg = 1; chk_block(ilix); + expb.curilt = addilt(expb.curilt, ad1ili(IL_EXIT, gbl.currsub)); + BIH_XT(expb.curbih) = 1; + BIH_LAST(expb.curbih) = 1; + wr_block(); + if (skip_expand && process_expanded && (*process_expanded == 0)){ + *skip_expand = 1; + } } iltb.callfg = 1; @@ -770,6 +830,7 @@ eval_ilm(int ilmx) #endif if (IM_I8(opcx)) ILM_RESTYPE(ilmx) = ILM_ISI8; + return sptr1; } /***************************************************************/ diff --git a/tools/flang2/flang2exe/expsmp.cpp b/tools/flang2/flang2exe/expsmp.cpp index 8cc3d1b50f..4f340845ac 100644 --- a/tools/flang2/flang2exe/expsmp.cpp +++ b/tools/flang2/flang2exe/expsmp.cpp @@ -69,8 +69,6 @@ std::list targetVector; int HasRequiresUnifiedSharedMemory = false; // AOCC End -static int incrOutlinedCnt(void); -static int decrOutlinedCnt(void); static int getOutlinedTemp(char *, int); static int isUnnamedCs(int); static int addMpUnp(void); @@ -3270,7 +3268,7 @@ no_pad_func(char *fname) NOPADP(sptr, 1); } -static int +int decrOutlinedCnt(void) { outlinedCnt--; @@ -3281,7 +3279,7 @@ decrOutlinedCnt(void) return outlinedCnt; } -static int +int incrOutlinedCnt(void) { parCnt++; diff --git a/tools/flang2/flang2exe/expsmp.h b/tools/flang2/flang2exe/expsmp.h index 3a17885011..923f737af2 100644 --- a/tools/flang2/flang2exe/expsmp.h +++ b/tools/flang2/flang2exe/expsmp.h @@ -114,4 +114,6 @@ void section_create_endblock(SPTR endLabel); /// \brief ... LLTask* llGetTask(int scope); +int incrOutlinedCnt(void); +int decrOutlinedCnt(void); #endif // EXPSMP_H_ diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 188a4ceac1..184850e1cd 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1750,10 +1750,10 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) } int -ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) +ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR helper_func) { static int id; - int n_symbols = symbols.size(); + int n_symbols = ompaccel_tinfo_get(gbl.currsub)->n_symbols;//2;//symbols.size(); DTYPE arg_types[9]; DTYPE void_ptr_t = DT_ADDR;//create_dtype_funcprototype(); DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); @@ -1769,15 +1769,25 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) 0, ad_icon(0), FALSE); - for (unsigned i = 0; i < symbols.size(); ++i) { - ilix = mk_ompaccel_store(symbols[i], - DT_INT8, - nme_args, - ad_acon(captured_vars, i * TARGET_PTRSIZE)); + int j = 0; + for (unsigned i = 0; i < n_symbols; ++i) { + if (DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) == DT_INT8) { + ilix = mk_ompaccel_store(symbols[j++], + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + } + else { + ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + } chk_block(ilix); } + -// chk_block(ilix); arg_types[0] = DT_CPTR; /* ident */ arg_types[1] = DT_INT; /* global_tid */ arg_types[2] = DT_INT; /* if_expr */ @@ -1786,14 +1796,14 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) arg_types[5] = void_ptr_t; /* fn */ arg_types[6] = void_ptr_t; /* wrapper_fn */ arg_types[7] = void_ptr_ptr_t; /* args */ - arg_types[8] = DT_INT; /* n_args */ + arg_types[8] = DT_INT8; /* n_args */ args[8] = gen_null_arg(); /* ident */ args[7] = global_tid_sptr; /* global_tid */ args[6] = ad_icon(1); /* if_expr */ args[5] = ad_icon(-1); /* num_threads */ args[4] = ad_icon(-1); /* proc_bind */ - args[3] = gen_null_arg(); /* fn */ + args[3] = ad_acon(helper_func, 0); args[2] = gen_null_arg(); /* wrapper_fn */ args[1] = ad_acon(captured_vars, 0); /* args */ args[0] = ad_icon(n_symbols); /* n_args */ diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index 9c6dd610c3..e872e6e010 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -509,7 +509,7 @@ int ll_make_kmpc_target_init(OMP_TARGET_MODE); /** \brief Generate kmpc_parallel_51 function call */ -int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &); +int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &, SPTR); #ifdef OMP_OFFLOAD_AMD /** diff --git a/tools/flang2/flang2exe/llassem.cpp b/tools/flang2/flang2exe/llassem.cpp index fc1b204f71..02b1e2cab9 100644 --- a/tools/flang2/flang2exe/llassem.cpp +++ b/tools/flang2/flang2exe/llassem.cpp @@ -406,8 +406,9 @@ find_ag(const char *ag_name) int hashval = name_to_hash(ag_name, strlen(ag_name)); for (gblsym = agb.hashtb[hashval]; gblsym; gblsym = AG_HASHLK(gblsym)) - if (!strcmp(ag_name, AG_NAME(gblsym))) + if (!strcmp(ag_name, AG_NAME(gblsym))){ return gblsym; + } return SPTR_NULL; } diff --git a/tools/flang2/flang2exe/ompaccel_x86.cpp b/tools/flang2/flang2exe/ompaccel_x86.cpp index 8bafa7e6b3..d0359fac04 100644 --- a/tools/flang2/flang2exe/ompaccel_x86.cpp +++ b/tools/flang2/flang2exe/ompaccel_x86.cpp @@ -170,7 +170,7 @@ void ompaccel_x86_fix_arg_types(SPTR func_sptr) { } OMPACCEL_TINFO *tinfo = ompaccel_tinfo_get(func_sptr); - + if (!tinfo) return; // Remember all the reduction symbols of func_sptr so that we can blacklist // them during the type update. std::set reduc_syms; diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index be22268474..713bd402df 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -477,12 +477,13 @@ ll_get_shared_arg(SPTR func_sptr) } void -ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype) +ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo) { int count = 0; int sym, dtype; char name[MXIDLEN + 2]; int dpdscp = aux.dpdsc_avl; + int cnt = 0; PARAMCTP(func_sptr, paramct); DPDSCP(func_sptr, dpdscp); @@ -500,9 +501,23 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype) DTYPEP(sym, *argtype); PASSBYVALP(sym, 1); } + argtype++; STYPEP(sym, ST_VAR); aux.dpdsc_base[dpdscp++] = sym; + if (current_tinfo) + { + NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, + current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); + if (cnt >= 2) + current_tinfo->symbols[current_tinfo->n_symbols].host_sym = + ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; + current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); + current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; + current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; // AOCC + current_tinfo->n_symbols++; + cnt++; + } } } @@ -1155,6 +1170,7 @@ ll_rewrite_ilms(int lineno, int ilmx, int len) /* replace host sptr with device sptrs, PLD keeps sptr in 2nd index */ op1Pld = ILM_OPND(ilmpx, 1); + //replace host sym to device sym ILM_OPND(ilmpx, 2) = ompaccel_tinfo_current_get_devsptr(ILM_SymOPND(ilmpx, 2)); // AOCC begin @@ -2416,7 +2432,6 @@ llMakeFtnOutlinedSignatureTarget(SPTR func_sptr, OMPACCEL_TINFO *current_tinfo, for (i = 0; i < current_tinfo->n_symbols; ++i) { SPTR sptr = current_tinfo->symbols[i].host_sym; - // AOCC begin if (XBIT(232, 0x1)) { if (orig_sptr_map.find(sptr) != orig_sptr_map.end()) { @@ -2428,6 +2443,13 @@ llMakeFtnOutlinedSignatureTarget(SPTR func_sptr, OMPACCEL_TINFO *current_tinfo, sym = ompaccel_create_device_symbol(sptr, count); count++; current_tinfo->symbols[i].device_sym = sym; + if (is_SPMD_mode(current_tinfo->mode) && DTYPEG(sym) != DT_INT8) + { + PASSBYVALP(sym, 1); + DTYPEP(sym, get_type(2, TY_PTR, DTYPEG(sym))); + } else { + PASSBYVALP(sym, 0); + } OMPACCDEVSYMP(sym, TRUE); aux.dpdsc_base[dpdscp++] = sym; } @@ -2647,6 +2669,50 @@ ompaccel_copy_arraydescriptors(SPTR arg_sptr) return device_symbol; } +SPTR +ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo) +{ + OMPACCEL_TINFO *current_tinfo; + SPTR func_sptr; + + int max_nargs = orig_tinfo->n_symbols + + orig_tinfo->n_quiet_symbols + + orig_tinfo->n_reduction_symbols; + int func_args_cnt = orig_tinfo->n_symbols + 2; // global_tid, bound_tid + target_info args + std::vector func_args(func_args_cnt); + auto *symbols = orig_tinfo->symbols; + func_args[0] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // global_tid + func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid + + for (int k = 2; k < func_args_cnt; k++) { + if (DTYPEG(symbols->device_sym) == DT_INT8 ) { + func_args[k] = get_type(2, TY_PTR, DT_INT8); + } + else { + func_args[k] = DTYPEG(symbols->device_sym);} + symbols++; + } + + func_sptr = create_target_outlined_func_sptr(scope_sptr, false); + CCSYMP(func_sptr, + 1); /* currently we make all CCSYM func varargs in Fortran. */ + CFUNCP(func_sptr, 1); + TASKFNP(func_sptr, FALSE); + ISTASKDUPP(func_sptr, FALSE); + OUTLINEDP(func_sptr, gbl.currsub); + FUNCLINEP(func_sptr, gbl.lineno); + STYPEP(func_sptr, ST_ENTRY); + DTYPEP(func_sptr, DT_VOID_NONE); + DEFDP(func_sptr, 1); + SCP(func_sptr, SC_STATIC); + ADDRTKNP(func_sptr, 1); + OMPACCFUNCDEVP(func_sptr, 1); + current_tinfo = ompaccel_tinfo_create(func_sptr, max_nargs); + ll_make_ftn_outlined_params(func_sptr, func_args_cnt, func_args.data(), current_tinfo); + ll_process_routine_parameters(func_sptr); + return func_sptr; +} + SPTR ll_make_outlined_ompaccel_func(SPTR stblk_sptr, SPTR scope_sptr, bool iskernel) { diff --git a/tools/flang2/flang2exe/outliner.h b/tools/flang2/flang2exe/outliner.h index 642e739e26..b93a25b65d 100644 --- a/tools/flang2/flang2exe/outliner.h +++ b/tools/flang2/flang2exe/outliner.h @@ -26,6 +26,7 @@ #include "symtab.h" #include "ili.h" #include +#include "kmpcutil.h" extern FILE *par_file1; extern FILE *par_file2; @@ -246,7 +247,7 @@ void ilm_outlined_pad_ilm(int curilm); /** \brief ... */ -void ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype); +void ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo = nullptr); /** \brief ... @@ -398,4 +399,6 @@ bool outlined_is_eliminated(ILM_OP opc); bool outlined_need_recompile(); void ll_set_ompaccel_currfunc(bool isILMrecompile); +SPTR +ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo); #endif /* OUTLINER_H_ */ From 8f95900d8a13581f9b25f94e46989efe539b2e23 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 15 Mar 2022 15:32:52 +0100 Subject: [PATCH 06/18] Fixed handling of last argument. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/kmpcutil.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 184850e1cd..92318d2f8c 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1783,6 +1783,16 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he DT_INT8, nme_args, ad_acon(captured_vars, i * TARGET_PTRSIZE)); + // Hack for correct generation of *.ll code ( perform load operation instead of raw bitcast) + if (i == n_symbols - 1) { + chk_block(ilix); + ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + + } } chk_block(ilix); } From 1af255b2d1227263073bb5377883724ff4c84c60 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 23 Mar 2022 12:54:46 +0100 Subject: [PATCH 07/18] Fixed passing arrays for spmd kernels. Do not assign new dtype value for device symbols via get_type function. Use macros PASSBYVALP PASSBYREFP instead. Remove hack for load/store of the last symbol. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/kmpcutil.cpp | 10 --------- tools/flang2/flang2exe/outliner.cpp | 33 ++++++++++++----------------- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 92318d2f8c..184850e1cd 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1783,16 +1783,6 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he DT_INT8, nme_args, ad_acon(captured_vars, i * TARGET_PTRSIZE)); - // Hack for correct generation of *.ll code ( perform load operation instead of raw bitcast) - if (i == n_symbols - 1) { - chk_block(ilix); - ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); - ilix = mk_ompaccel_store(ilix, - DT_INT8, - nme_args, - ad_acon(captured_vars, i * TARGET_PTRSIZE)); - - } } chk_block(ilix); } diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index 713bd402df..ca3d1d7346 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -505,19 +505,24 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL argtype++; STYPEP(sym, ST_VAR); aux.dpdsc_base[dpdscp++] = sym; + //AOC begin if (current_tinfo) { NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); - if (cnt >= 2) + current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); + if (cnt >= 2) { + PASSBYVALP(sym, false); + PASSBYREFP(sym, true); current_tinfo->symbols[current_tinfo->n_symbols].host_sym = ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; - current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); - current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; - current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; // AOCC - current_tinfo->n_symbols++; - cnt++; + } + current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; + current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; + current_tinfo->n_symbols++; + cnt++; } + //AOCC end } } @@ -2443,13 +2448,6 @@ llMakeFtnOutlinedSignatureTarget(SPTR func_sptr, OMPACCEL_TINFO *current_tinfo, sym = ompaccel_create_device_symbol(sptr, count); count++; current_tinfo->symbols[i].device_sym = sym; - if (is_SPMD_mode(current_tinfo->mode) && DTYPEG(sym) != DT_INT8) - { - PASSBYVALP(sym, 1); - DTYPEP(sym, get_type(2, TY_PTR, DTYPEG(sym))); - } else { - PASSBYVALP(sym, 0); - } OMPACCDEVSYMP(sym, TRUE); aux.dpdsc_base[dpdscp++] = sym; } @@ -2685,11 +2683,8 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid for (int k = 2; k < func_args_cnt; k++) { - if (DTYPEG(symbols->device_sym) == DT_INT8 ) { - func_args[k] = get_type(2, TY_PTR, DT_INT8); - } - else { - func_args[k] = DTYPEG(symbols->device_sym);} + func_args[k] = DTYPEG(symbols->device_sym); + PASSBYVALP(symbols->device_sym, false); symbols++; } @@ -2709,7 +2704,7 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or OMPACCFUNCDEVP(func_sptr, 1); current_tinfo = ompaccel_tinfo_create(func_sptr, max_nargs); ll_make_ftn_outlined_params(func_sptr, func_args_cnt, func_args.data(), current_tinfo); - ll_process_routine_parameters(func_sptr); + ll_process_routine_parameters(func_sptr); return func_sptr; } From 54651369fc09b204451f2a282068e1c28046aab9 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 24 Mar 2022 14:46:22 +0100 Subject: [PATCH 08/18] Fix passing args to helper function Fixed passing scalars which type is different than int64. Fixed passing allocatable arrays. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 2 +- tools/flang2/flang2exe/kmpcutil.cpp | 2 +- tools/flang2/flang2exe/outliner.cpp | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 63d5193f90..d63b31bd6c 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -502,7 +502,7 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { - if (DTYPEG(orig_symbols->symbols[i].device_sym) != DT_INT8) + if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym))) continue; snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), ".allocated_symbol_%d", i); diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 184850e1cd..cbee3c2701 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1771,7 +1771,7 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he FALSE); int j = 0; for (unsigned i = 0; i < n_symbols; ++i) { - if (DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) == DT_INT8) { + if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym))) { ilix = mk_ompaccel_store(symbols[j++], DT_INT8, nme_args, diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index ca3d1d7346..c24116b5bf 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -492,7 +492,10 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL aux.dpdsc_size + paramct + 100); while (paramct--) { - sprintf(name, "%sArg%d", SYMNAME(func_sptr), count++); + if (current_tinfo && cnt >= 2) + sprintf(name, "%s", SYMNAME(ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym)); + else + sprintf(name, "%sArg%d", SYMNAME(func_sptr), count++); sym = getsymbol(name); SCP(sym, SC_DUMMY); if (*argtype == DT_CPTR) { /* either i8* or actual type( pass by value). */ @@ -510,12 +513,13 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL { NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); - current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); if (cnt >= 2) { PASSBYVALP(sym, false); PASSBYREFP(sym, true); current_tinfo->symbols[current_tinfo->n_symbols].host_sym = ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; + current_tinfo->symbols[current_tinfo->n_symbols].device_sym = + ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; } current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; @@ -2683,7 +2687,10 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid for (int k = 2; k < func_args_cnt; k++) { - func_args[k] = DTYPEG(symbols->device_sym); + if(DT_ISSCALAR( DTYPEG(symbols->device_sym))) + func_args[k] = DT_CPTR; + else + func_args[k] = DTYPEG(symbols->device_sym); PASSBYVALP(symbols->device_sym, false); symbols++; } From 9ca72c052be57ffb6aeefff109e16f9b561741a7 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 30 Mar 2022 08:26:24 +0200 Subject: [PATCH 09/18] Pass complex arg as pointer to pair of floats --- tools/flang2/flang2exe/outliner.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index c24116b5bf..e3f73f7e9f 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2687,7 +2687,8 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid for (int k = 2; k < func_args_cnt; k++) { - if(DT_ISSCALAR( DTYPEG(symbols->device_sym))) + if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) + && DTYPEG(symbols->device_sym) != DT_CMPLX) func_args[k] = DT_CPTR; else func_args[k] = DTYPEG(symbols->device_sym); From abce3c852b8d4a905951d7ce1f3520fa49d983c3 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 31 Mar 2022 11:56:05 +0200 Subject: [PATCH 10/18] Do not modify the LLType of the argument If we generate initialization function for SPMD kernels we need to store addresses of the arguments before we call kmpc_parallel_51 function. We use ptrtoint instruction for scalar variables. Before this patch the LLVM IR code was generated wrongly for complex variables: void kernel_func( *Arg_c) //some code ptrtoint i64* %Arg_c //error Arg_c was declared as pair of floats //some code call kmpc_parallel_51() //some code This patch causes that LLVM IR contains correct ptrtoint instruction: void kernel_func( *Arg_c) //some code ptrtoint * %Arg_c //ok, Arg_c was declared as pair of floats //some code call kmpc_parallel_51() //some code Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/cgmain.cpp | 2 +- tools/flang2/flang2exe/expand.cpp | 8 +++++++- tools/shared/utils/global.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 1b33c6ee1c..5c0edd5ee4 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -12267,7 +12267,7 @@ process_sptr_offset(SPTR sptr, ISZ_T off) } if ((flg.smp || (XBIT(34, 0x200) || gbl.usekmpc)) && (gbl.outlined || ISTASKDUPG(GBL_CURRFUNC))) { - if (sptr == ll_get_shared_arg(gbl.currsub)) { + if (sptr == ll_get_shared_arg(gbl.currsub) && !gbl.is_init_spmd_kernel) { LLTYPE(sptr) = make_ptr_lltype(make_lltype_from_dtype(DT_INT8)); } } diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index d63b31bd6c..6c95bde29f 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -237,6 +237,11 @@ expand(void) static std::map process_expanded_map = std::map(); auto it = process_expanded_map.find(gbl.currsub); int process_expanded = 0; + // we reset flag because we do not know if we generate initialization + // function for SPMD kernel (the function with kmpc_parallel_51 call) + // or the proper kernel code (the function which is passed as an argument + // to kmpc_parallel_51 call or generic kernel + gbl.is_init_spmd_kernel = false; if (it != process_expanded_map.end()) { process_expanded = it->second; @@ -806,7 +811,8 @@ eval_ilm_check_if_skip(int ilmx, int *skip_expand, int *process_expanded) ilix = ll_make_kmpc_global_thread_num(); iltb.callfg = 1; chk_block(ilix); - sptr1 = ll_make_helper_function_for_kmpc_parallel_51((SPTR)0, ompaccel_tinfo_get(gbl.currsub)); + gbl.is_init_spmd_kernel = true; + sptr1 = ll_make_helper_function_for_kmpc_parallel_51((SPTR)0, ompaccel_tinfo_get(gbl.currsub)); ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols, sptr1); iltb.callfg = 1; chk_block(ilix); diff --git a/tools/shared/utils/global.h b/tools/shared/utils/global.h index 14ef411d85..02988d5e0f 100644 --- a/tools/shared/utils/global.h +++ b/tools/shared/utils/global.h @@ -162,6 +162,7 @@ typedef struct { bool ompaccel_intarget; /* set when expander is in the openmp target construct */ bool ompaccel_isdevice; /* set when generating code for openmp target device */ SPTR teamPrivateArgs; /* keeps sptr that holds team private array */ + bool is_init_spmd_kernel; /* if TRUE, we generate initialization proceudre of SPMD kernel */ #endif } GBL; From fb7f61ff6ca28112b008121cf66caa64b5b0bda4 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 4 Apr 2022 11:45:38 +0200 Subject: [PATCH 11/18] Fix passing Fortran objects Fortran objects should be passed as i64* Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 11 ++++++++--- tools/flang2/flang2exe/kmpcutil.cpp | 4 +++- tools/flang2/flang2exe/outliner.cpp | 3 +++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 6c95bde29f..34274c51fa 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -507,13 +507,18 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { - if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym))) - continue; + if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym)) + && STYPEG(orig_symbols->symbols[i].host_sym) != ST_STRUCT) { + continue; + } snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), ".allocated_symbol_%d", i); allocated_symbol = getsymbol(allocated_symbol_name); STYPEP(allocated_symbol, ST_VAR); - DTYPEP(allocated_symbol, + if (STYPEG(orig_symbols->symbols[i].host_sym) == ST_STRUCT) + DTYPEP(allocated_symbol,DT_CPTR); + else + DTYPEP(allocated_symbol, get_type(2,TY_PTR,DTYPEG(orig_symbols->symbols[i].device_sym))); SCP(allocated_symbol, SC_AUTO); store_instr = ad4ili(IL_ST, diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index cbee3c2701..57109ac0e4 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1770,8 +1770,10 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he ad_icon(0), FALSE); int j = 0; + for (unsigned i = 0; i < n_symbols; ++i) { - if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym))) { + if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || + STYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].host_sym) == ST_STRUCT) { ilix = mk_ompaccel_store(symbols[j++], DT_INT8, nme_args, diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index e3f73f7e9f..e36daae6a4 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2690,6 +2690,9 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) && DTYPEG(symbols->device_sym) != DT_CMPLX) func_args[k] = DT_CPTR; + else if (STYPEG(symbols->host_sym) == ST_STRUCT) { + func_args[k] = DT_CPTR; + } else func_args[k] = DTYPEG(symbols->device_sym); PASSBYVALP(symbols->device_sym, false); From 06f8be273cffc53745ff6f656aeb2aa3ba92fd82 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 4 Apr 2022 23:00:14 +0200 Subject: [PATCH 12/18] Fixed passing complex numbers. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/outliner.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index e36daae6a4..c7ad31bc59 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2671,6 +2671,17 @@ ompaccel_copy_arraydescriptors(SPTR arg_sptr) return device_symbol; } +static bool is_complex_type(DTYPE dt) +{ + if (dt == DT_DCMPLX){ + return true; + } + else if (dt == DT_CMPLX){ + return true; + } + return false; +} + SPTR ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo) { @@ -2688,13 +2699,15 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or for (int k = 2; k < func_args_cnt; k++) { if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) - && DTYPEG(symbols->device_sym) != DT_CMPLX) + && !is_complex_type(DTYPEG(symbols->device_sym))) { func_args[k] = DT_CPTR; + } else if (STYPEG(symbols->host_sym) == ST_STRUCT) { func_args[k] = DT_CPTR; } - else + else { func_args[k] = DTYPEG(symbols->device_sym); + } PASSBYVALP(symbols->device_sym, false); symbols++; } From fd7e9ac6add7ac744c589c4ba2f18dba4af494b6 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 6 Apr 2022 09:44:16 +0200 Subject: [PATCH 13/18] Skip symbols which are not initialized Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/kmpcutil.cpp | 27 ++++++++++++++++++++++++--- tools/flang2/flang2exe/kmpcutil.h | 12 ++++++++++++ tools/flang2/flang2exe/outliner.cpp | 3 ++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 57109ac0e4..3f024f8f89 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1749,11 +1749,31 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) return mk_kmpc_api_call(KMPC_API_TARGET_INIT, 4, arg_types, args); } +int get_n_symbols(OMPACCEL_TINFO *tinfo) +{ + int orig_n_symbols = tinfo->n_symbols; + int n_symbols = orig_n_symbols; + for (int i = 0; i < orig_n_symbols; ++i) { + //skip uninitialized symbols + if (DTYPEG(tinfo->symbols[i].device_sym) == 0) { + n_symbols--; + } + } + return n_symbols; +} + +bool check_if_skip_symbol(SPTR sym) +{ + if (DTYPEG(sym) == 0) + return true; + return false; +} + int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR helper_func) { static int id; - int n_symbols = ompaccel_tinfo_get(gbl.currsub)->n_symbols;//2;//symbols.size(); + int n_symbols = get_n_symbols(ompaccel_tinfo_get(gbl.currsub)); DTYPE arg_types[9]; DTYPE void_ptr_t = DT_ADDR;//create_dtype_funcprototype(); DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); @@ -1770,8 +1790,9 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he ad_icon(0), FALSE); int j = 0; - - for (unsigned i = 0; i < n_symbols; ++i) { + for (int i = 0; i < n_symbols; ++i) { + if (check_if_skip_symbol(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) + continue; if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || STYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].host_sym) == ST_STRUCT) { ilix = mk_ompaccel_store(symbols[j++], diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index e872e6e010..f159c205cb 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -534,6 +534,18 @@ int ll_make_kmpc_nvptx_parallel_reduce_nowait_simple_spmd(int, int, int, SPTR, S */ int ll_make_kmpc_nvptx_end_reduce_nowait(); +/** + \brief Get number of correctly initialized number of symbols. +*/ +int get_n_symbols(OMPACCEL_TINFO *tinfo); + +/** + \brief Check if given symbol should be skipped + If DTYPE of symbol is 0 then the symbol should not be passed + as an argument to kmpc_parallel_51 function +*/ +bool check_if_skip_symbol(SPTR sym); + /* End OpenMP Accelerator RT - non standard */ #endif #endif /* KMPC_RUNTIME_H_ */ diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index c7ad31bc59..195024a11c 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2691,7 +2691,8 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or int max_nargs = orig_tinfo->n_symbols + orig_tinfo->n_quiet_symbols + orig_tinfo->n_reduction_symbols; - int func_args_cnt = orig_tinfo->n_symbols + 2; // global_tid, bound_tid + target_info args + int func_args_cnt = get_n_symbols(orig_tinfo); + func_args_cnt += 2; // global_tid, bound_tid + target_info args std::vector func_args(func_args_cnt); auto *symbols = orig_tinfo->symbols; func_args[0] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // global_tid From b55c51af984ed6a4ecf1ecc65890ca2fef4e59e3 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 7 Apr 2022 16:27:26 +0200 Subject: [PATCH 14/18] Add support for passing integers by value Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 5 ++++- tools/flang2/flang2exe/kmpcutil.cpp | 10 +++++++++- tools/flang2/flang2exe/outliner.cpp | 9 +++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 34274c51fa..7262f8653e 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -237,6 +237,7 @@ expand(void) static std::map process_expanded_map = std::map(); auto it = process_expanded_map.find(gbl.currsub); int process_expanded = 0; + // we reset flag because we do not know if we generate initialization // function for SPMD kernel (the function with kmpc_parallel_51 call) // or the proper kernel code (the function which is passed as an argument @@ -507,6 +508,9 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { + if (PASSBYVALG(orig_symbols->symbols[i].device_sym) && + !PASSBYREFG(orig_symbols->symbols[i].device_sym)) + continue; if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym)) && STYPEG(orig_symbols->symbols[i].host_sym) != ST_STRUCT) { continue; @@ -531,7 +535,6 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) chk_block(load_instr); init_symbols.push_back(load_instr); - } return init_symbols; diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 3f024f8f89..41fc8304a4 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1793,7 +1793,15 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he for (int i = 0; i < n_symbols; ++i) { if (check_if_skip_symbol(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) continue; - if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || + else if (PASSBYVALG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) && + !PASSBYREFG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) { + ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + } + else if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || STYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].host_sym) == ST_STRUCT) { ilix = mk_ompaccel_store(symbols[j++], DT_INT8, diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index 195024a11c..6786f21e51 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -514,12 +514,14 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); if (cnt >= 2) { - PASSBYVALP(sym, false); - PASSBYREFP(sym, true); + if (!(PASSBYVALG(sym) && !PASSBYREFG(sym) && DTYPEG(sym) == DT_INT8)) { + PASSBYVALP(sym, false); + PASSBYREFP(sym, true); + } current_tinfo->symbols[current_tinfo->n_symbols].host_sym = ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; current_tinfo->symbols[current_tinfo->n_symbols].device_sym = - ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; + ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; } current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; @@ -2709,7 +2711,6 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or else { func_args[k] = DTYPEG(symbols->device_sym); } - PASSBYVALP(symbols->device_sym, false); symbols++; } From 76d82f0c5e0617f89335c9306520a0ac93eb0ae9 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 21 Apr 2022 10:33:46 +0200 Subject: [PATCH 15/18] Use the same OpenMP API as Clang for target parallel for pragma Clang uses kmpc_parallel_51 function for handlig target parallel for pragma. Flang should use the same functions as Clang for pragma target parallel Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/cgmain.cpp | 3 ++- tools/flang2/flang2exe/kmpcutil.cpp | 28 ++++++++++++++++++++++------ tools/flang2/flang2exe/ompaccel.cpp | 3 +-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 5c0edd5ee4..f89e9d2059 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -14406,7 +14406,8 @@ INLINE void static add_property_struct(char *func_name, print_token("@"); print_token(func_name); - if (is_SPMD_mode(mode)) { + if (mode >= mode_target_teams_distribute_parallel_for + && mode <= mode_target_parallel_for_simd) { print_token("__exec_mode = weak constant i8 2\n"); } else { diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 41fc8304a4..789276cfae 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1736,11 +1736,19 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) int args[4]; args[3] = gen_null_arg(); /* ident */ - if (is_SPMD_mode(mode)) { + if (mode >= mode_target_teams_distribute_parallel_for && + mode <= mode_target_parallel_for_simd) { args[2] = ad_icon(2); /* SPMD Mode */ args[1] = ad_icon(0); /* UseGenericStateMachine */ - args[0] = ad_icon(0); /* RequiresFullRuntime */ -// args[0] = ad_icon(1); /* RequiresFullRuntime */ + if (mode == mode_target_parallel) { + /* RequiresFullRuntime - kmpc_parallel_51 requires full runtime */ + args[0] = ad_icon(1); + } + else { + /* RequiresFullRuntime - Old Fortran OpenMP API does not require + * full runtime */ + args[0] = ad_icon(0); + } } else { args[2] = ad_icon(1); /* Generic mode */ args[1] = ad_icon(1); /* UseGenericStateMachine */ @@ -1851,10 +1859,18 @@ ll_make_kmpc_target_deinit(OMP_TARGET_MODE mode) int args[3]; args[2] = gen_null_arg(); /* ident */ - if (is_SPMD_mode(mode)) { + if (mode >= mode_target_teams_distribute_parallel_for && + mode <= mode_target_parallel_for_simd) { args[1] = ad_icon(2); /* SPMD Mode */ - args[0] = ad_icon(0); /* RequiresFullRuntime */ -// args[0] = ad_icon(1); /* RequiresFullRuntime */ + if (mode == mode_target_parallel) { + /* RequiresFullRuntime - kmpc_parallel_51 requires full runtime */ + args[0] = ad_icon(1); + } + else { + /* RequiresFullRuntime - Old Fortran OpenMP API does not require + * full runtime */ + args[0] = ad_icon(0); + } } else { args[1] = ad_icon(1); /* Generic mode */ args[0] = ad_icon(1); /* RequiresFullRuntime */ diff --git a/tools/flang2/flang2exe/ompaccel.cpp b/tools/flang2/flang2exe/ompaccel.cpp index 9d266499c3..b5e78dccee 100644 --- a/tools/flang2/flang2exe/ompaccel.cpp +++ b/tools/flang2/flang2exe/ompaccel.cpp @@ -3726,8 +3726,7 @@ ompaccel_set_target_declare() { } bool is_SPMD_mode(OMP_TARGET_MODE mode) { - if (mode >= mode_target_teams_distribute_parallel_for - && mode <= mode_target_parallel_for_simd) { + if (mode == mode_target_parallel) { return true; } return false; From f11437ce761f3c3a6646ac1ef5ca39208cd905cd Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 26 Apr 2022 10:52:52 +0200 Subject: [PATCH 16/18] Insert kmpc_parallel_51 call loop distribution Pragma target teams distribute parallel for calls kmpc_parallel_51 function in different way in comparison to pragma target parallel. Pragma target parallel calls kmpc_parallel_51 function just after initialization procedure. Pragma target teams distribute parallel for calls kmpc_parallel_51 function in the body of the outer loop. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 3 +++ tools/flang2/flang2exe/kmpcutil.cpp | 36 +++++++++++++++++++++++++---- tools/flang2/flang2exe/kmpcutil.h | 6 ++++- tools/flang2/flang2exe/ompaccel.cpp | 23 +++++++++++++++++- tools/flang2/flang2exe/ompaccel.h | 4 ++++ 5 files changed, 66 insertions(+), 6 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 7262f8653e..f234fff78d 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -238,6 +238,9 @@ expand(void) auto it = process_expanded_map.find(gbl.currsub); int process_expanded = 0; + //we are at the beginning of pragma expansion + //make sure that mploop_counter equals to zero + reset_mploop_counter(); // we reset flag because we do not know if we generate initialization // function for SPMD kernel (the function with kmpc_parallel_51 call) // or the proper kernel code (the function which is passed as an argument diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 789276cfae..0e3af9cf75 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1778,7 +1778,11 @@ bool check_if_skip_symbol(SPTR sym) } int -ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR helper_func) +ll_make_kmpc_parallel_51(int global_tid_sptr, + std::vector &symbols, + SPTR helper_func, + SPTR lower, + SPTR upper) { static int id; int n_symbols = get_n_symbols(ompaccel_tinfo_get(gbl.currsub)); @@ -1787,6 +1791,10 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); DTYPE arr_dtype; int args[9]; + + if (lower && upper) + n_symbols += 2; + SPTR captured_vars = make_array_sptr(const_cast("captured_vars_addrs"), void_ptr_t, n_symbols); @@ -1798,7 +1806,25 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he ad_icon(0), FALSE); int j = 0; - for (int i = 0; i < n_symbols; ++i) { + int i = 0; + /* Store lower and upper bounds for loop distribution */ + if (lower && upper) { + ilix = mk_ompaccel_ldsptr(lower); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + chk_block(ilix); + i++; + ilix = mk_ompaccel_ldsptr(upper); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + chk_block(ilix); + i++; + } + for (; i < n_symbols; ++i) { if (check_if_skip_symbol(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) continue; else if (PASSBYVALG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) && @@ -1825,7 +1851,6 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he } chk_block(ilix); } - arg_types[0] = DT_CPTR; /* ident */ arg_types[1] = DT_INT; /* global_tid */ @@ -1842,7 +1867,10 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he args[6] = ad_icon(1); /* if_expr */ args[5] = ad_icon(-1); /* num_threads */ args[4] = ad_icon(-1); /* proc_bind */ - args[3] = ad_acon(helper_func, 0); + if (helper_func) + args[3] = ad_acon(helper_func, 0); + else + args[3] = gen_null_arg(); args[2] = gen_null_arg(); /* wrapper_fn */ args[1] = ad_acon(captured_vars, 0); /* args */ args[0] = ad_icon(n_symbols); /* n_args */ diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index f159c205cb..6619b1d3ec 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -509,7 +509,11 @@ int ll_make_kmpc_target_init(OMP_TARGET_MODE); /** \brief Generate kmpc_parallel_51 function call */ -int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &, SPTR); +int ll_make_kmpc_parallel_51(int global_tid_sptr, + std::vector &, + SPTR, + SPTR lower = (SPTR)0, + SPTR upper = (SPTR)0); #ifdef OMP_OFFLOAD_AMD /** diff --git a/tools/flang2/flang2exe/ompaccel.cpp b/tools/flang2/flang2exe/ompaccel.cpp index b5e78dccee..3893e8a6b1 100644 --- a/tools/flang2/flang2exe/ompaccel.cpp +++ b/tools/flang2/flang2exe/ompaccel.cpp @@ -66,6 +66,9 @@ // Should be in sync with clang::GPU::AMDGPUGpuGridValues in clang int warp_size_log2; int warp_size_log2_mask; +// count if we expand the second MPLOOP instruction +// inside single OpenMP pragma +int mploop_counter; // AOCC End #include "../../flang1/flang1exe/global.h" @@ -2758,7 +2761,20 @@ exp_ompaccel_mploop(ILM *ilmp, int curilm) ili = ll_make_kmpc_for_static_init(&loop_args); // AOCC end } else { - ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched); + mploop_counter++; + if (mploop_counter != 2) + ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched); + else { + std::vector allocated_symbols; + int ilix = ll_make_kmpc_global_thread_num(); + ilix = ll_make_kmpc_parallel_51(ilix, + allocated_symbols, + (SPTR)0, /*TODO: replace with wrapper fn ptr */ + loop_args.lower, + loop_args.upper); + iltb.callfg = 1; + chk_block(ilix); + } } break; default: @@ -3732,6 +3748,11 @@ bool is_SPMD_mode(OMP_TARGET_MODE mode) { return false; } +void reset_mploop_counter() +{ + mploop_counter = 0; +} + // AOCC End #endif /* Expander - OpenMP Accelerator Model */ diff --git a/tools/flang2/flang2exe/ompaccel.h b/tools/flang2/flang2exe/ompaccel.h index f9173b7487..b270afe6b1 100644 --- a/tools/flang2/flang2exe/ompaccel.h +++ b/tools/flang2/flang2exe/ompaccel.h @@ -590,4 +590,8 @@ void ompaccel_set_target_declare(); */ bool is_SPMD_mode(OMP_TARGET_MODE mode); // AOCC End +/** + \brief Reset counts of MPLOOP instruction + */ +void reset_mploop_counter(); #endif From 852a46c3f440e39515ef5f74de2c5c43ac59c23e Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 6 May 2022 07:43:16 -0500 Subject: [PATCH 17/18] Do not expand ILM instructions for second nest level Pragma target teams distribute parallel do consists of two levels of parallelism (teams and threads). Threads from one team should be launched in parallel via __kmpc_parallel_51 function. The code which is executed by threads should be outlined to separate function. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index f234fff78d..c395a8d026 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -558,6 +558,9 @@ eval_ilm_check_if_skip(int ilmx, int *skip_expand, int *process_expanded) tmp, /* temporary */ op1; /* operand 1 */ ILM_OP opcx; /**< ILM opcode of the ILM */ + static int mp_loop_nest_level; + const int mp_loop_second_nest_level = 2; + static bool omit_loop_nesting; int first_op = 0; @@ -582,6 +585,24 @@ eval_ilm_check_if_skip(int ilmx, int *skip_expand, int *process_expanded) if (EXPDBG(8, 2)) fprintf(gbl.dbgfil, "---------- eval ilm %d\n", ilmx); + if (flg.omptarget && gbl.ompaccel_intarget && !ll_ilm_is_rewriting()) { + if (opcx == IM_MPLOOP) { + if (++mp_loop_nest_level == mp_loop_second_nest_level) { + omit_loop_nesting = true; + } + } + else if ((opcx == IM_MPLOOPFINI) && + (mp_loop_nest_level == mp_loop_second_nest_level)) { + if (omit_loop_nesting) { + omit_loop_nesting = false; + } + } + else if (omit_loop_nesting) + { + //Do not expand ilm instructions for 2nd level of parallelism + return sptr1; + } + } if (!ll_ilm_is_rewriting()) { #ifdef OMP_OFFLOAD_LLVM From 9cd8030a8460f23f9d516cc4178db86800786c90 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 9 May 2022 07:32:01 -0500 Subject: [PATCH 18/18] Add lower and upper bounds args to kmpc_paralle_51 call Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/ompaccel.cpp | 9 +++++-- tools/flang2/flang2exe/outliner.cpp | 39 ++++++++++++++++++++--------- tools/flang2/flang2exe/outliner.h | 11 ++++++-- 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/tools/flang2/flang2exe/ompaccel.cpp b/tools/flang2/flang2exe/ompaccel.cpp index 3893e8a6b1..e669e72542 100644 --- a/tools/flang2/flang2exe/ompaccel.cpp +++ b/tools/flang2/flang2exe/ompaccel.cpp @@ -2765,11 +2765,16 @@ exp_ompaccel_mploop(ILM *ilmp, int curilm) if (mploop_counter != 2) ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched); else { - std::vector allocated_symbols; + std::vector allocated_symbols; + SPTR func_ptr = ll_make_helper_function_for_kmpc_parallel_51 + ((SPTR)0, + ompaccel_tinfo_get(gbl.currsub), + loop_args.lower, + loop_args.upper); int ilix = ll_make_kmpc_global_thread_num(); ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols, - (SPTR)0, /*TODO: replace with wrapper fn ptr */ + func_ptr, loop_args.lower, loop_args.upper); iltb.callfg = 1; diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index 6786f21e51..e3862b2517 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -477,13 +477,16 @@ ll_get_shared_arg(SPTR func_sptr) } void -ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo) +ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo, bool has_bounds_args) { int count = 0; int sym, dtype; char name[MXIDLEN + 2]; int dpdscp = aux.dpdsc_avl; int cnt = 0; + int number_of_prologue_args = 2; + if (has_bounds_args) + number_of_prologue_args += 2; //lower and upper bounds PARAMCTP(func_sptr, paramct); DPDSCP(func_sptr, dpdscp); @@ -492,8 +495,9 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL aux.dpdsc_size + paramct + 100); while (paramct--) { - if (current_tinfo && cnt >= 2) - sprintf(name, "%s", SYMNAME(ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym)); + if (current_tinfo && cnt >= number_of_prologue_args) + sprintf(name, "%s", + SYMNAME(ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-number_of_prologue_args].device_sym)); else sprintf(name, "%sArg%d", SYMNAME(func_sptr), count++); sym = getsymbol(name); @@ -2685,7 +2689,10 @@ static bool is_complex_type(DTYPE dt) } SPTR -ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo) +ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, + OMPACCEL_TINFO *orig_tinfo, + SPTR lower_bound, + SPTR upper_bound) { OMPACCEL_TINFO *current_tinfo; SPTR func_sptr; @@ -2695,21 +2702,29 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or orig_tinfo->n_reduction_symbols; int func_args_cnt = get_n_symbols(orig_tinfo); func_args_cnt += 2; // global_tid, bound_tid + target_info args + if (lower_bound && upper_bound) + func_args_cnt += 2; // + lower_bound + upper_bound std::vector func_args(func_args_cnt); auto *symbols = orig_tinfo->symbols; - func_args[0] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // global_tid - func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid - - for (int k = 2; k < func_args_cnt; k++) { + bool has_bounds_args = lower_bound && upper_bound; + int i = 2; + func_args[0] = get_type(2, TY_PTR, DT_INT8);// global_tid + func_args[1] = get_type(2, TY_PTR, DT_INT8);// bound_tid + if (has_bounds_args) { + func_args[2] = DT_INT8; //lower_bound + func_args[3] = DT_INT8; //upper_bound + i += 2; + } + for (; i < func_args_cnt; i++) { if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) && !is_complex_type(DTYPEG(symbols->device_sym))) { - func_args[k] = DT_CPTR; + func_args[i] = DT_CPTR; } else if (STYPEG(symbols->host_sym) == ST_STRUCT) { - func_args[k] = DT_CPTR; + func_args[i] = DT_CPTR; } else { - func_args[k] = DTYPEG(symbols->device_sym); + func_args[i] = DTYPEG(symbols->device_sym); } symbols++; } @@ -2729,7 +2744,7 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or ADDRTKNP(func_sptr, 1); OMPACCFUNCDEVP(func_sptr, 1); current_tinfo = ompaccel_tinfo_create(func_sptr, max_nargs); - ll_make_ftn_outlined_params(func_sptr, func_args_cnt, func_args.data(), current_tinfo); + ll_make_ftn_outlined_params(func_sptr, func_args_cnt, func_args.data(), current_tinfo, has_bounds_args); ll_process_routine_parameters(func_sptr); return func_sptr; } diff --git a/tools/flang2/flang2exe/outliner.h b/tools/flang2/flang2exe/outliner.h index b93a25b65d..99129d86bc 100644 --- a/tools/flang2/flang2exe/outliner.h +++ b/tools/flang2/flang2exe/outliner.h @@ -247,7 +247,11 @@ void ilm_outlined_pad_ilm(int curilm); /** \brief ... */ -void ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo = nullptr); +void ll_make_ftn_outlined_params(int func_sptr, + int paramct, + DTYPE *argtype, + OMPACCEL_TINFO *current_tinfo = nullptr, + bool has_bound_args = false); /** \brief ... @@ -400,5 +404,8 @@ bool outlined_need_recompile(); void ll_set_ompaccel_currfunc(bool isILMrecompile); SPTR -ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo); +ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, + OMPACCEL_TINFO *orig_tinfo, + SPTR lower_bound = SPTR(0), + SPTR upper_bound = SPTR(0)); #endif /* OUTLINER_H_ */