@@ -259,7 +259,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
259259 checkInReduction (op, result);
260260 checkMergeable (op, result);
261261 checkPriority (op, result);
262- checkPrivate (op, result);
263262 checkUntied (op, result);
264263 })
265264 .Case ([&](omp::TaskgroupOp op) {
@@ -701,9 +700,9 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
701700
702701// / Populates `privatizations` with privatization declarations used for the
703702// / given op.
704- // / TODO: generalise beyond ParallelOp
703+ template < class OP >
705704static void collectPrivatizationDecls (
706- omp::ParallelOp op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
705+ OP op, SmallVectorImpl<omp::PrivateClauseOp> &privatizations) {
707706 std::optional<ArrayAttr> attr = op.getPrivateSyms ();
708707 if (!attr)
709708 return ;
@@ -1252,6 +1251,79 @@ static LogicalResult allocAndInitializeReductionVars(
12521251 return success ();
12531252}
12541253
1254+ // / Allocate delayed private variables. Returns the basic block which comes
1255+ // / after all of these allocations. llvm::Value * for each of these private
1256+ // / variables are populated in llvmPrivateVars.
1257+ template <class OP >
1258+ static llvm::Expected<llvm::BasicBlock *>
1259+ allocatePrivateVars (OP opInst, llvm::IRBuilderBase &builder,
1260+ LLVM::ModuleTranslation &moduleTranslation,
1261+ MutableArrayRef<BlockArgument> privateBlockArgs,
1262+ MutableArrayRef<omp::PrivateClauseOp> privateDecls,
1263+ llvm::SmallVector<llvm::Value *> &llvmPrivateVars,
1264+ const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
1265+ // Allocate private vars
1266+ llvm::BranchInst *allocaTerminator =
1267+ llvm::cast<llvm::BranchInst>(allocaIP.getBlock ()->getTerminator ());
1268+ builder.SetInsertPoint (allocaTerminator);
1269+ assert (allocaTerminator->getNumSuccessors () == 1 &&
1270+ " This is an unconditional branch created by OpenMPIRBuilder" );
1271+ llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor (0 );
1272+
1273+ // FIXME: Some of the allocation regions do more than just allocating.
1274+ // They read from their block argument (amongst other non-alloca things).
1275+ // When OpenMPIRBuilder outlines the parallel region into a different
1276+ // function it places the loads for live in-values (such as these block
1277+ // arguments) at the end of the entry block (because the entry block is
1278+ // assumed to contain only allocas). Therefore, if we put these complicated
1279+ // alloc blocks in the entry block, these will not dominate the availability
1280+ // of the live-in values they are using. Fix this by adding a latealloc
1281+ // block after the entry block to put these in (this also helps to avoid
1282+ // mixing non-alloca code with allocas).
1283+ // Alloc regions which do not use the block argument can still be placed in
1284+ // the entry block (therefore keeping the allocas together).
1285+ llvm::BasicBlock *privAllocBlock = nullptr ;
1286+ if (!privateBlockArgs.empty ())
1287+ privAllocBlock = splitBB (builder, true , " omp.private.latealloc" );
1288+ for (unsigned i = 0 ; i < privateBlockArgs.size (); ++i) {
1289+ Region &allocRegion = privateDecls[i].getAllocRegion ();
1290+
1291+ // map allocation region block argument
1292+ llvm::Value *nonPrivateVar =
1293+ moduleTranslation.lookupValue (opInst.getPrivateVars ()[i]);
1294+ assert (nonPrivateVar);
1295+ moduleTranslation.mapValue (privateDecls[i].getAllocMoldArg (),
1296+ nonPrivateVar);
1297+
1298+ // in-place convert the private allocation region
1299+ SmallVector<llvm::Value *, 1 > phis;
1300+ if (privateDecls[i].getAllocMoldArg ().getUses ().empty ()) {
1301+ // TODO this should use
1302+ // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1303+ // the code for fetching the thread id. Not doing this for now to avoid
1304+ // test churn.
1305+ builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
1306+ } else {
1307+ builder.SetInsertPoint (privAllocBlock->getTerminator ());
1308+ }
1309+ if (failed (inlineConvertOmpRegions (allocRegion, " omp.private.alloc" ,
1310+ builder, moduleTranslation, &phis)))
1311+ return llvm::createStringError (
1312+ " failed to inline `alloc` region of `omp.private`" );
1313+
1314+ assert (phis.size () == 1 && " expected one allocation to be yielded" );
1315+
1316+ moduleTranslation.mapValue (privateBlockArgs[i], phis[0 ]);
1317+ llvmPrivateVars.push_back (phis[0 ]);
1318+
1319+ // clear alloc region block argument mapping in case it needs to be
1320+ // re-created with a different source for another use of the same
1321+ // reduction decl
1322+ moduleTranslation.forgetMapping (allocRegion);
1323+ }
1324+ return afterAllocas;
1325+ }
1326+
12551327static LogicalResult
12561328convertOmpSections (Operation &opInst, llvm::IRBuilderBase &builder,
12571329 LLVM::ModuleTranslation &moduleTranslation) {
@@ -1486,16 +1558,98 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
14861558 if (failed (checkImplementationStatus (*taskOp)))
14871559 return failure ();
14881560
1489- auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
1561+ // Collect delayed privatisation declarations
1562+ MutableArrayRef<BlockArgument> privateBlockArgs =
1563+ cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs ();
1564+ SmallVector<llvm::Value *> llvmPrivateVars;
1565+ SmallVector<omp::PrivateClauseOp> privateDecls;
1566+ llvmPrivateVars.reserve (privateBlockArgs.size ());
1567+ privateDecls.reserve (privateBlockArgs.size ());
1568+ collectPrivatizationDecls (taskOp, privateDecls);
1569+
1570+ auto bodyCB = [&](InsertPointTy allocaIP,
1571+ InsertPointTy codegenIP) -> llvm::Error {
14901572 // Save the alloca insertion point on ModuleTranslation stack for use in
14911573 // nested regions.
14921574 LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame (
14931575 moduleTranslation, allocaIP);
14941576
1577+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars (
1578+ taskOp, builder, moduleTranslation, privateBlockArgs, privateDecls,
1579+ llvmPrivateVars, allocaIP);
1580+ if (handleError (afterAllocas, *taskOp).failed ())
1581+ return llvm::make_error<PreviouslyReportedError>();
1582+
1583+ // Apply copy region for firstprivate
1584+ bool needsFirstPrivate =
1585+ llvm::any_of (privateDecls, [](omp::PrivateClauseOp &privOp) {
1586+ return privOp.getDataSharingType () ==
1587+ omp::DataSharingClauseType::FirstPrivate;
1588+ });
1589+ if (needsFirstPrivate) {
1590+ // Find the end of the allocation blocks
1591+ assert (afterAllocas.get ()->getSinglePredecessor ());
1592+ builder.SetInsertPoint (
1593+ afterAllocas.get ()->getSinglePredecessor ()->getTerminator ());
1594+ llvm::BasicBlock *copyBlock =
1595+ splitBB (builder, /* CreateBranch=*/ true , " omp.private.copy" );
1596+ builder.SetInsertPoint (copyBlock->getFirstNonPHIOrDbgOrAlloca ());
1597+ }
1598+ for (unsigned i = 0 ; i < privateBlockArgs.size (); ++i) {
1599+ if (privateDecls[i].getDataSharingType () !=
1600+ omp::DataSharingClauseType::FirstPrivate)
1601+ continue ;
1602+
1603+ // copyRegion implements `lhs = rhs`
1604+ Region ©Region = privateDecls[i].getCopyRegion ();
1605+
1606+ // map copyRegion rhs arg
1607+ llvm::Value *nonPrivateVar =
1608+ moduleTranslation.lookupValue (taskOp.getPrivateVars ()[i]);
1609+ assert (nonPrivateVar);
1610+ moduleTranslation.mapValue (privateDecls[i].getCopyMoldArg (),
1611+ nonPrivateVar);
1612+
1613+ // map copyRegion lhs arg
1614+ moduleTranslation.mapValue (privateDecls[i].getCopyPrivateArg (),
1615+ llvmPrivateVars[i]);
1616+
1617+ // in-place convert copy region
1618+ builder.SetInsertPoint (builder.GetInsertBlock ()->getTerminator ());
1619+ if (failed (inlineConvertOmpRegions (copyRegion, " omp.private.copy" ,
1620+ builder, moduleTranslation)))
1621+ return llvm::createStringError (
1622+ " failed to inline `copy` region of an `omp.private` op in taskOp" );
1623+
1624+ // ignore unused value yielded from copy region
1625+
1626+ // clear copy region block argument mapping in case it needs to be
1627+ // re-created with different source for reuse of the same reduction decl
1628+ moduleTranslation.forgetMapping (copyRegion);
1629+ }
1630+
1631+ // translate the body of the task:
14951632 builder.restoreIP (codegenIP);
1496- return convertOmpOpRegions (taskOp.getRegion (), " omp.task.region" , builder,
1497- moduleTranslation)
1498- .takeError ();
1633+ auto continuationBlockOrError = convertOmpOpRegions (
1634+ taskOp.getRegion (), " omp.task.region" , builder, moduleTranslation);
1635+ if (failed (handleError (continuationBlockOrError, *taskOp)))
1636+ return llvm::make_error<PreviouslyReportedError>();
1637+
1638+ // private variable deallocation
1639+ SmallVector<Region *> privateCleanupRegions;
1640+ llvm::transform (privateDecls, std::back_inserter (privateCleanupRegions),
1641+ [](omp::PrivateClauseOp privatizer) {
1642+ return &privatizer.getDeallocRegion ();
1643+ });
1644+
1645+ builder.SetInsertPoint (continuationBlockOrError.get ()->getTerminator ());
1646+ if (failed (inlineOmpRegionCleanup (
1647+ privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
1648+ " omp.private.dealloc" , /* shouldLoadCleanupRegionArg=*/ false )))
1649+ return llvm::createStringError (" failed to inline `dealloc` region of an "
1650+ " `omp.private` op in an omp.task" );
1651+
1652+ return llvm::Error::success ();
14991653 };
15001654
15011655 SmallVector<llvm::OpenMPIRBuilder::DependData> dds;
@@ -1740,65 +1894,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
17401894
17411895 auto bodyGenCB = [&](InsertPointTy allocaIP,
17421896 InsertPointTy codeGenIP) -> llvm::Error {
1743- // Allocate private vars
1744- llvm::BranchInst *allocaTerminator =
1745- llvm::cast<llvm::BranchInst>(allocaIP.getBlock ()->getTerminator ());
1746- builder.SetInsertPoint (allocaTerminator);
1747- assert (allocaTerminator->getNumSuccessors () == 1 &&
1748- " This is an unconditional branch created by OpenMPIRBuilder" );
1749- llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor (0 );
1750-
1751- // FIXME: Some of the allocation regions do more than just allocating.
1752- // They read from their block argument (amongst other non-alloca things).
1753- // When OpenMPIRBuilder outlines the parallel region into a different
1754- // function it places the loads for live in-values (such as these block
1755- // arguments) at the end of the entry block (because the entry block is
1756- // assumed to contain only allocas). Therefore, if we put these complicated
1757- // alloc blocks in the entry block, these will not dominate the availability
1758- // of the live-in values they are using. Fix this by adding a latealloc
1759- // block after the entry block to put these in (this also helps to avoid
1760- // mixing non-alloca code with allocas).
1761- // Alloc regions which do not use the block argument can still be placed in
1762- // the entry block (therefore keeping the allocas together).
1763- llvm::BasicBlock *privAllocBlock = nullptr ;
1764- if (!privateBlockArgs.empty ())
1765- privAllocBlock = splitBB (builder, true , " omp.private.latealloc" );
1766- for (unsigned i = 0 ; i < privateBlockArgs.size (); ++i) {
1767- Region &allocRegion = privateDecls[i].getAllocRegion ();
1768-
1769- // map allocation region block argument
1770- llvm::Value *nonPrivateVar =
1771- moduleTranslation.lookupValue (opInst.getPrivateVars ()[i]);
1772- assert (nonPrivateVar);
1773- moduleTranslation.mapValue (privateDecls[i].getAllocMoldArg (),
1774- nonPrivateVar);
1775-
1776- // in-place convert the private allocation region
1777- SmallVector<llvm::Value *, 1 > phis;
1778- if (privateDecls[i].getAllocMoldArg ().getUses ().empty ()) {
1779- // TODO this should use
1780- // allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
1781- // the code for fetching the thread id. Not doing this for now to avoid
1782- // test churn.
1783- builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
1784- } else {
1785- builder.SetInsertPoint (privAllocBlock->getTerminator ());
1786- }
1787- if (failed (inlineConvertOmpRegions (allocRegion, " omp.private.alloc" ,
1788- builder, moduleTranslation, &phis)))
1789- return llvm::createStringError (
1790- " failed to inline `alloc` region of `omp.private`" );
1791-
1792- assert (phis.size () == 1 && " expected one allocation to be yielded" );
1793-
1794- moduleTranslation.mapValue (privateBlockArgs[i], phis[0 ]);
1795- llvmPrivateVars.push_back (phis[0 ]);
1796-
1797- // clear alloc region block argument mapping in case it needs to be
1798- // re-created with a different source for another use of the same
1799- // reduction decl
1800- moduleTranslation.forgetMapping (allocRegion);
1801- }
1897+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars (
1898+ opInst, builder, moduleTranslation, privateBlockArgs, privateDecls,
1899+ llvmPrivateVars, allocaIP);
1900+ if (handleError (afterAllocas, *opInst).failed ())
1901+ return llvm::make_error<PreviouslyReportedError>();
18021902
18031903 // Allocate reduction vars
18041904 DenseMap<Value, llvm::Value *> reductionVariableMap;
@@ -1824,9 +1924,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
18241924 });
18251925 if (needsFirstprivate) {
18261926 // Find the end of the allocation blocks
1827- assert (afterAllocas->getSinglePredecessor ());
1927+ assert (afterAllocas. get () ->getSinglePredecessor ());
18281928 builder.SetInsertPoint (
1829- afterAllocas->getSinglePredecessor ()->getTerminator ());
1929+ afterAllocas. get () ->getSinglePredecessor ()->getTerminator ());
18301930 llvm::BasicBlock *copyBlock =
18311931 splitBB (builder, /* CreateBranch=*/ true , " omp.private.copy" );
18321932 builder.SetInsertPoint (copyBlock->getFirstNonPHIOrDbgOrAlloca ());
0 commit comments