Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 51 additions & 28 deletions mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,29 @@ collectEffects(Operation *op,
return false;
}

/// Get all effects before the given operation caused by other operations in the
/// same block. That is, this will not consider operations beyond the block.
static bool
getEffectsBeforeInBlock(Operation *op,
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
bool stopAtBarrier) {
if (op == &op->getBlock()->front())
return true;

for (Operation *it = op->getPrevNode(); it != nullptr;
it = it->getPrevNode()) {
if (isa<BarrierOp>(it)) {
if (stopAtBarrier)
return true;
continue;
}

if (!collectEffects(it, effects))
return false;
}
return true;
}

/// Collects memory effects from operations that may be executed before `op` in
/// a trivial structured control flow, e.g., without branches. Stops at the
/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
Expand All @@ -153,19 +176,7 @@ getEffectsBefore(Operation *op,
}

// Collect all effects before the op.
if (op != &op->getBlock()->front()) {
for (Operation *it = op->getPrevNode(); it != nullptr;
it = it->getPrevNode()) {
if (isa<BarrierOp>(it)) {
if (stopAtBarrier)
return true;
else
continue;
}
if (!collectEffects(it, effects))
return false;
}
}
getEffectsBeforeInBlock(op, effects, stopAtBarrier);

// Stop if reached the parallel region boundary.
if (isParallelRegionBoundary(op->getParentOp()))
Expand All @@ -191,8 +202,8 @@ getEffectsBefore(Operation *op,
// appropriately.
if (isSequentialLoopLike(op->getParentOp())) {
// Assuming loop terminators have no side effects.
return getEffectsBefore(op->getBlock()->getTerminator(), effects,
/*stopAtBarrier=*/true);
return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects,
/*stopAtBarrier=*/true);
}

// If the parent operation is not guaranteed to execute its (single-block)
Expand All @@ -212,6 +223,28 @@ getEffectsBefore(Operation *op,
return !conservative;
}

/// Get all effects after the given operation caused by other operations in the
/// same block. That is, this will not consider operations beyond the block.
static bool
getEffectsAfterInBlock(Operation *op,
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
bool stopAtBarrier) {
if (op == &op->getBlock()->back())
return true;

for (Operation *it = op->getNextNode(); it != nullptr;
it = it->getNextNode()) {
if (isa<BarrierOp>(it)) {
if (stopAtBarrier)
return true;
continue;
}
if (!collectEffects(it, effects))
return false;
}
return true;
}

/// Collects memory effects from operations that may be executed after `op` in
/// a trivial structured control flow, e.g., without branches. Stops at the
/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
Expand All @@ -233,17 +266,7 @@ getEffectsAfter(Operation *op,
}

// Collect all effects after the op.
if (op != &op->getBlock()->back())
for (Operation *it = op->getNextNode(); it != nullptr;
it = it->getNextNode()) {
if (isa<BarrierOp>(it)) {
if (stopAtBarrier)
return true;
continue;
}
if (!collectEffects(it, effects))
return false;
}
getEffectsAfterInBlock(op, effects, stopAtBarrier);

// Stop if reached the parallel region boundary.
if (isParallelRegionBoundary(op->getParentOp()))
Expand Down Expand Up @@ -272,8 +295,8 @@ getEffectsAfter(Operation *op,
return true;

bool exact = collectEffects(&op->getBlock()->front(), effects);
return getEffectsAfter(&op->getBlock()->front(), effects,
/*stopAtBarrier=*/true) &&
return getEffectsAfterInBlock(&op->getBlock()->front(), effects,
/*stopAtBarrier=*/true) &&
exact;
}

Expand Down
17 changes: 17 additions & 0 deletions mlir/test/Dialect/GPU/barrier-elimination.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,20 @@ attributes {__parallel_region_boundary_for_test} {
%4 = memref.load %C[] : memref<f32>
return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32
}

// CHECK-LABEL: @nested_loop_barrier_only
func.func @nested_loop_barrier_only() attributes {__parallel_region_boundary_for_test} {
%c0 = arith.constant 0 : index
%c42 = arith.constant 42 : index
%c1 = arith.constant 1 : index
// Note: the barrier can be removed and as consequence the loops get folded
// by the greedy rewriter.
// CHECK-NOT: scf.for
// CHECK-NOT: gpu.barrier
scf.for %j = %c0 to %c42 step %c1 {
scf.for %i = %c0 to %c42 step %c1 {
gpu.barrier
}
}
return
}
Loading