Skip to content

Commit 5ce4d4c

Browse files
ftynsefabianmcg
andauthored
[mlir] fix memory effects in GPU barrier elimination (#117432)
Existing implementation may trigger infinite cycles when collecting effects above or below the current block after wrapping around a loop-like construct. Limit this case to only looking at the immediate block (loop body). This is correct because wrap around is intended to consider effects of different iterations of the same loop and shouldn't be existing the loop block. Reported-by: Fabian Mora <[email protected]> Co-authored-by: Fabian Mora <[email protected]>
1 parent 590f451 commit 5ce4d4c

File tree

2 files changed

+68
-28
lines changed

2 files changed

+68
-28
lines changed

mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,29 @@ collectEffects(Operation *op,
132132
return false;
133133
}
134134

135+
/// Get all effects before the given operation caused by other operations in the
136+
/// same block. That is, this will not consider operations beyond the block.
137+
static bool
138+
getEffectsBeforeInBlock(Operation *op,
139+
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
140+
bool stopAtBarrier) {
141+
if (op == &op->getBlock()->front())
142+
return true;
143+
144+
for (Operation *it = op->getPrevNode(); it != nullptr;
145+
it = it->getPrevNode()) {
146+
if (isa<BarrierOp>(it)) {
147+
if (stopAtBarrier)
148+
return true;
149+
continue;
150+
}
151+
152+
if (!collectEffects(it, effects))
153+
return false;
154+
}
155+
return true;
156+
}
157+
135158
/// Collects memory effects from operations that may be executed before `op` in
136159
/// a trivial structured control flow, e.g., without branches. Stops at the
137160
/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
@@ -153,19 +176,7 @@ getEffectsBefore(Operation *op,
153176
}
154177

155178
// Collect all effects before the op.
156-
if (op != &op->getBlock()->front()) {
157-
for (Operation *it = op->getPrevNode(); it != nullptr;
158-
it = it->getPrevNode()) {
159-
if (isa<BarrierOp>(it)) {
160-
if (stopAtBarrier)
161-
return true;
162-
else
163-
continue;
164-
}
165-
if (!collectEffects(it, effects))
166-
return false;
167-
}
168-
}
179+
getEffectsBeforeInBlock(op, effects, stopAtBarrier);
169180

170181
// Stop if reached the parallel region boundary.
171182
if (isParallelRegionBoundary(op->getParentOp()))
@@ -191,8 +202,8 @@ getEffectsBefore(Operation *op,
191202
// appropriately.
192203
if (isSequentialLoopLike(op->getParentOp())) {
193204
// Assuming loop terminators have no side effects.
194-
return getEffectsBefore(op->getBlock()->getTerminator(), effects,
195-
/*stopAtBarrier=*/true);
205+
return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects,
206+
/*stopAtBarrier=*/true);
196207
}
197208

198209
// If the parent operation is not guaranteed to execute its (single-block)
@@ -212,6 +223,28 @@ getEffectsBefore(Operation *op,
212223
return !conservative;
213224
}
214225

226+
/// Get all effects after the given operation caused by other operations in the
227+
/// same block. That is, this will not consider operations beyond the block.
228+
static bool
229+
getEffectsAfterInBlock(Operation *op,
230+
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
231+
bool stopAtBarrier) {
232+
if (op == &op->getBlock()->back())
233+
return true;
234+
235+
for (Operation *it = op->getNextNode(); it != nullptr;
236+
it = it->getNextNode()) {
237+
if (isa<BarrierOp>(it)) {
238+
if (stopAtBarrier)
239+
return true;
240+
continue;
241+
}
242+
if (!collectEffects(it, effects))
243+
return false;
244+
}
245+
return true;
246+
}
247+
215248
/// Collects memory effects from operations that may be executed after `op` in
216249
/// a trivial structured control flow, e.g., without branches. Stops at the
217250
/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
@@ -233,17 +266,7 @@ getEffectsAfter(Operation *op,
233266
}
234267

235268
// Collect all effects after the op.
236-
if (op != &op->getBlock()->back())
237-
for (Operation *it = op->getNextNode(); it != nullptr;
238-
it = it->getNextNode()) {
239-
if (isa<BarrierOp>(it)) {
240-
if (stopAtBarrier)
241-
return true;
242-
continue;
243-
}
244-
if (!collectEffects(it, effects))
245-
return false;
246-
}
269+
getEffectsAfterInBlock(op, effects, stopAtBarrier);
247270

248271
// Stop if reached the parallel region boundary.
249272
if (isParallelRegionBoundary(op->getParentOp()))
@@ -272,8 +295,8 @@ getEffectsAfter(Operation *op,
272295
return true;
273296

274297
bool exact = collectEffects(&op->getBlock()->front(), effects);
275-
return getEffectsAfter(&op->getBlock()->front(), effects,
276-
/*stopAtBarrier=*/true) &&
298+
return getEffectsAfterInBlock(&op->getBlock()->front(), effects,
299+
/*stopAtBarrier=*/true) &&
277300
exact;
278301
}
279302

mlir/test/Dialect/GPU/barrier-elimination.mlir

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,20 @@ attributes {__parallel_region_boundary_for_test} {
182182
%4 = memref.load %C[] : memref<f32>
183183
return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32
184184
}
185+
186+
// CHECK-LABEL: @nested_loop_barrier_only
187+
func.func @nested_loop_barrier_only() attributes {__parallel_region_boundary_for_test} {
188+
%c0 = arith.constant 0 : index
189+
%c42 = arith.constant 42 : index
190+
%c1 = arith.constant 1 : index
191+
// Note: the barrier can be removed and as consequence the loops get folded
192+
// by the greedy rewriter.
193+
// CHECK-NOT: scf.for
194+
// CHECK-NOT: gpu.barrier
195+
scf.for %j = %c0 to %c42 step %c1 {
196+
scf.for %i = %c0 to %c42 step %c1 {
197+
gpu.barrier
198+
}
199+
}
200+
return
201+
}

0 commit comments

Comments
 (0)