@@ -914,34 +914,21 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
914914 AllocaInst *TIDAddr = Builder.CreateAlloca (Int32, nullptr , " tid.addr" );
915915 AllocaInst *ZeroAddr = Builder.CreateAlloca (Int32, nullptr , " zero.addr" );
916916
917- // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
918- // program, otherwise we only need them for modeling purposes to get the
919- // associated arguments in the outlined function. In the former case,
920- // initialize the allocas properly, in the latter case, delete them later.
921- if (IfCondition) {
922- Builder.CreateStore (Constant::getNullValue (Int32), TIDAddr);
923- Builder.CreateStore (Constant::getNullValue (Int32), ZeroAddr);
924- } else {
925- ToBeDeleted.push_back (TIDAddr);
926- ToBeDeleted.push_back (ZeroAddr);
927- }
917+ // We only need TIDAddr and ZeroAddr for modeling purposes to get the
918+ // associated arguments in the outlined function, so we delete them later.
919+ ToBeDeleted.push_back (TIDAddr);
920+ ToBeDeleted.push_back (ZeroAddr);
928921
929922 // Create an artificial insertion point that will also ensure the blocks we
930923 // are about to split are not degenerated.
931924 auto *UI = new UnreachableInst (Builder.getContext (), InsertBB);
932925
933- Instruction *ThenTI = UI, *ElseTI = nullptr ;
934- if (IfCondition)
935- SplitBlockAndInsertIfThenElse (IfCondition, UI, &ThenTI, &ElseTI);
936-
937- BasicBlock *ThenBB = ThenTI->getParent ();
938- BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock (ThenTI, " omp.par.entry" );
939- BasicBlock *PRegBodyBB =
940- PRegEntryBB->splitBasicBlock (ThenTI, " omp.par.region" );
926+ BasicBlock *EntryBB = UI->getParent ();
927+ BasicBlock *PRegEntryBB = EntryBB->splitBasicBlock (UI, " omp.par.entry" );
928+ BasicBlock *PRegBodyBB = PRegEntryBB->splitBasicBlock (UI, " omp.par.region" );
941929 BasicBlock *PRegPreFiniBB =
942- PRegBodyBB->splitBasicBlock (ThenTI, " omp.par.pre_finalize" );
943- BasicBlock *PRegExitBB =
944- PRegPreFiniBB->splitBasicBlock (ThenTI, " omp.par.exit" );
930+ PRegBodyBB->splitBasicBlock (UI, " omp.par.pre_finalize" );
931+ BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock (UI, " omp.par.exit" );
945932
946933 auto FiniCBWrapper = [&](InsertPointTy IP) {
947934 // Hide "open-ended" blocks from the given FiniCB by setting the right jump
@@ -975,7 +962,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
975962 Builder.CreateLoad (Int32, ZeroAddr, " zero.addr.use" );
976963 ToBeDeleted.push_back (ZeroAddrUse);
977964
978- // ThenBB
965+ // EntryBB
979966 // |
980967 // V
981968 // PRegionEntryBB <- Privatization allocas are placed here.
@@ -998,8 +985,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
998985 BodyGenCB (InnerAllocaIP, CodeGenIP);
999986
1000987 LLVM_DEBUG (dbgs () << " After body codegen: " << *OuterFn << " \n " );
988+ FunctionCallee RTLFn;
989+ if (IfCondition)
990+ RTLFn = getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_fork_call_if);
991+ else
992+ RTLFn = getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_fork_call);
1001993
1002- FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_fork_call);
1003994 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee ())) {
1004995 if (!F->hasMetadata (llvm::LLVMContext::MD_callback)) {
1005996 llvm::LLVMContext &Ctx = F->getContext ();
@@ -1034,15 +1025,30 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
10341025 CI->getParent ()->setName (" omp_parallel" );
10351026 Builder.SetInsertPoint (CI);
10361027
1037- // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
1028+ // Build call __kmpc_fork_call[_if] (Ident, n, microtask, var1, .., varn);
10381029 Value *ForkCallArgs[] = {
10391030 Ident, Builder.getInt32 (NumCapturedVars),
10401031 Builder.CreateBitCast (&OutlinedFn, ParallelTaskPtr)};
10411032
10421033 SmallVector<Value *, 16 > RealArgs;
10431034 RealArgs.append (std::begin (ForkCallArgs), std::end (ForkCallArgs));
1035+ if (IfCondition) {
1036+ Value *Cond = Builder.CreateSExtOrTrunc (IfCondition,
1037+ Type::getInt32Ty (M.getContext ()));
1038+ RealArgs.push_back (Cond);
1039+ }
10441040 RealArgs.append (CI->arg_begin () + /* tid & bound tid */ 2 , CI->arg_end ());
10451041
1042+ // __kmpc_fork_call_if always expects a void ptr as the last argument
1043+ // If there are no arguments, pass a null pointer.
1044+ auto PtrTy = Type::getInt8PtrTy (M.getContext ());
1045+ if (IfCondition && NumCapturedVars == 0 ) {
1046+ llvm::Value *Void = ConstantPointerNull::get (PtrTy);
1047+ RealArgs.push_back (Void);
1048+ }
1049+ if (IfCondition && RealArgs.back ()->getType () != PtrTy)
1050+ RealArgs.back () = Builder.CreateBitCast (RealArgs.back (), PtrTy);
1051+
10461052 Builder.CreateCall (RTLFn, RealArgs);
10471053
10481054 LLVM_DEBUG (dbgs () << " With fork_call placed: "
@@ -1055,35 +1061,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
10551061 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin ();
10561062 Builder.CreateStore (Builder.CreateLoad (Int32, OutlinedAI), PrivTIDAddr);
10571063
1058- // If no "if" clause was present we do not need the call created during
1059- // outlining, otherwise we reuse it in the serialized parallel region.
1060- if (!ElseTI) {
1061- CI->eraseFromParent ();
1062- } else {
1063-
1064- // If an "if" clause was present we are now generating the serialized
1065- // version into the "else" branch.
1066- Builder.SetInsertPoint (ElseTI);
1067-
1068- // Build calls __kmpc_serialized_parallel(&Ident, GTid);
1069- Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
1070- Builder.CreateCall (
1071- getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_serialized_parallel),
1072- SerializedParallelCallArgs);
1073-
1074- // OutlinedFn(>id, &zero, CapturedStruct);
1075- CI->removeFromParent ();
1076- Builder.Insert (CI);
1077-
1078- // __kmpc_end_serialized_parallel(&Ident, GTid);
1079- Value *EndArgs[] = {Ident, ThreadID};
1080- Builder.CreateCall (
1081- getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_end_serialized_parallel),
1082- EndArgs);
1083-
1084- LLVM_DEBUG (dbgs () << " With serialized parallel region: "
1085- << *Builder.GetInsertBlock ()->getParent () << " \n " );
1086- }
1064+ CI->eraseFromParent ();
10871065
10881066 for (Instruction *I : ToBeDeleted)
10891067 I->eraseFromParent ();
0 commit comments