Skip to content

Commit f3d74ae

Browse files
committed
[flang][OpenMP] Fix crash when a sliced array is specified in forall construct within workshare construct
This is fix for two problems: 1. Thread-local variables sometimes are required to be paralellized. Added a special case to handle this in LowerWorkshare.cpp:isSafeToParallelize. 2. Race condition caused by a `nowait` added to the omp.workshare if it is the last operation in a block. This allowed multiple threads to execute the omp.workshare region concurrently. Since _FortranAPushValue modifies a shared stack, this concurrent access causes a crash. Disable the addition of `nowait` and rely on the implicit barrier at the the of the omp.workshare region. Fixes #143330
1 parent 254b33f commit f3d74ae

File tree

1 file changed

+47
-2
lines changed

1 file changed

+47
-2
lines changed

flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,48 @@ static bool mustParallelizeOp(Operation *op) {
136136
}
137137

138138
static bool isSafeToParallelize(Operation *op) {
139-
return isa<hlfir::DeclareOp>(op) || isa<fir::DeclareOp>(op) ||
140-
isMemoryEffectFree(op);
139+
if (isa<hlfir::DeclareOp>(op) || isa<fir::DeclareOp>(op) ||
140+
isMemoryEffectFree(op))
141+
return true;
142+
143+
// Special case: Thread-local variables allocated in the OpenMP parallel
144+
// region (using fir.alloca) are private to each thread and thus safe (and
145+
// sometimes required) to parallelize. If the compiler wraps such load/stores
146+
// in an omp.workshare block, only one thread updates its local copy, while
147+
// all other threads read uninitialized data (see issue #143330).
148+
if (fir::StoreOp store = dyn_cast<fir::StoreOp>(op)) {
149+
Value mem = store.getMemref();
150+
while (fir::DeclareOp declare = mem.getDefiningOp<fir::DeclareOp>())
151+
mem = declare.getMemref();
152+
while (hlfir::DeclareOp declare = mem.getDefiningOp<hlfir::DeclareOp>())
153+
mem = declare.getMemref();
154+
155+
if (fir::AllocaOp alloca = mem.getDefiningOp<fir::AllocaOp>()) {
156+
if (mlir::omp::ParallelOp parallelOp =
157+
alloca->getParentOfType<mlir::omp::ParallelOp>()) {
158+
if (op->getParentOfType<mlir::omp::ParallelOp>() == parallelOp)
159+
return true;
160+
}
161+
}
162+
}
163+
164+
if (fir::LoadOp load = dyn_cast<fir::LoadOp>(op)) {
165+
Value mem = load.getMemref();
166+
while (fir::DeclareOp declare = mem.getDefiningOp<fir::DeclareOp>())
167+
mem = declare.getMemref();
168+
while (hlfir::DeclareOp declare = mem.getDefiningOp<hlfir::DeclareOp>())
169+
mem = declare.getMemref();
170+
171+
if (fir::AllocaOp alloca = mem.getDefiningOp<fir::AllocaOp>()) {
172+
if (mlir::omp::ParallelOp parallelOp =
173+
alloca->getParentOfType<mlir::omp::ParallelOp>()) {
174+
if (op->getParentOfType<mlir::omp::ParallelOp>() == parallelOp)
175+
return true;
176+
}
177+
}
178+
}
179+
180+
return false;
141181
}
142182

143183
/// Simple shallow copies suffice for our purposes in this pass, so we implement
@@ -335,6 +375,11 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
335375

336376
for (auto [i, opOrSingle] : llvm::enumerate(regions)) {
337377
bool isLast = i + 1 == regions.size();
378+
// Make sure shared runtime calls are synchronized: disable `nowait`
379+
// insertion, and rely on the implicit barrier at the end of the
380+
// omp.workshare block.
381+
if (isa<fir::DoLoopOp>(block.getParentOp()))
382+
isLast = false;
338383
if (std::holds_alternative<SingleRegion>(opOrSingle)) {
339384
OpBuilder singleBuilder(sourceRegion.getContext());
340385
Block *singleBlock = new Block();

0 commit comments

Comments
 (0)