@@ -592,7 +592,20 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
592592 return bodyGenStatus;
593593}
594594
595+ namespace {
596+ // / Contains the arguments for an LLVM store operation
597+ struct DeferredStore {
598+ DeferredStore (llvm::Value *value, llvm::Value *address)
599+ : value(value), address(address) {}
600+
601+ llvm::Value *value;
602+ llvm::Value *address;
603+ };
604+ } // namespace
605+
595606// / Allocate space for privatized reduction variables.
607+ // / `deferredStores` contains information to create store operations which needs
608+ // / to be inserted after all allocas
596609template <typename T>
597610static LogicalResult
598611allocReductionVars (T loop, ArrayRef<BlockArgument> reductionArgs,
@@ -602,13 +615,13 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
602615 SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
603616 SmallVectorImpl<llvm::Value *> &privateReductionVariables,
604617 DenseMap<Value, llvm::Value *> &reductionVariableMap,
618+ SmallVectorImpl<DeferredStore> &deferredStores,
605619 llvm::ArrayRef<bool > isByRefs) {
606620 llvm::IRBuilderBase::InsertPointGuard guard (builder);
607621 builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
608622
609623 // delay creating stores until after all allocas
610- SmallVector<std::pair<llvm::Value *, llvm::Value *>> storesToCreate;
611- storesToCreate.reserve (loop.getNumReductionVars ());
624+ deferredStores.reserve (loop.getNumReductionVars ());
612625
613626 for (std::size_t i = 0 ; i < loop.getNumReductionVars (); ++i) {
614627 Region &allocRegion = reductionDecls[i].getAllocRegion ();
@@ -628,7 +641,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
628641 // variable allocated in the inlined region)
629642 llvm::Value *var = builder.CreateAlloca (
630643 moduleTranslation.convertType (reductionDecls[i].getType ()));
631- storesToCreate .emplace_back (phis[0 ], var);
644+ deferredStores .emplace_back (phis[0 ], var);
632645
633646 privateReductionVariables[i] = var;
634647 moduleTranslation.mapValue (reductionArgs[i], phis[0 ]);
@@ -644,10 +657,6 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
644657 }
645658 }
646659
647- // TODO: further delay this so it doesn't come in the entry block at all
648- for (auto [data, addr] : storesToCreate)
649- builder.CreateStore (data, addr);
650-
651660 return success ();
652661}
653662
@@ -819,12 +828,19 @@ static LogicalResult allocAndInitializeReductionVars(
819828 if (op.getNumReductionVars () == 0 )
820829 return success ();
821830
831+ SmallVector<DeferredStore> deferredStores;
832+
822833 if (failed (allocReductionVars (op, reductionArgs, builder, moduleTranslation,
823834 allocaIP, reductionDecls,
824835 privateReductionVariables, reductionVariableMap,
825- isByRef)))
836+ deferredStores, isByRef)))
826837 return failure ();
827838
839+ // store result of the alloc region to the allocated pointer to the real
840+ // reduction variable
841+ for (auto [data, addr] : deferredStores)
842+ builder.CreateStore (data, addr);
843+
828844 // Before the loop, store the initial values of reductions into reduction
829845 // variables. Although this could be done after allocas, we don't want to mess
830846 // up with the alloca insertion point.
@@ -1359,6 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
13591375 collectReductionDecls (opInst, reductionDecls);
13601376 SmallVector<llvm::Value *> privateReductionVariables (
13611377 opInst.getNumReductionVars ());
1378+ SmallVector<DeferredStore> deferredStores;
13621379
13631380 auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
13641381 // Allocate reduction vars
@@ -1373,10 +1390,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
13731390 InsertPointTy (allocaIP.getBlock (),
13741391 allocaIP.getBlock ()->getTerminator ()->getIterator ());
13751392
1376- if (failed (allocReductionVars (opInst, reductionArgs, builder,
1377- moduleTranslation, allocaIP, reductionDecls ,
1378- privateReductionVariables,
1379- reductionVariableMap , isByRef)))
1393+ if (failed (allocReductionVars (
1394+ opInst, reductionArgs, builder, moduleTranslation, allocaIP,
1395+ reductionDecls, privateReductionVariables, reductionVariableMap ,
1396+ deferredStores , isByRef)))
13801397 bodyGenStatus = failure ();
13811398
13821399 // Initialize reduction vars
@@ -1401,6 +1418,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
14011418
14021419 builder.SetInsertPoint (initBlock->getFirstNonPHIOrDbgOrAlloca ());
14031420
1421+ // insert stores deferred until after all allocas
1422+ // these store the results of the alloc region into the allocation for the
1423+ // pointer to the reduction variable
1424+ for (auto [data, addr] : deferredStores)
1425+ builder.CreateStore (data, addr);
1426+
14041427 for (unsigned i = 0 ; i < opInst.getNumReductionVars (); ++i) {
14051428 SmallVector<llvm::Value *> phis;
14061429
0 commit comments