Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,24 @@ static LogicalResult processParallelLoop(
1, 2,
rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) +
rewriter.getAffineSymbolExpr(1));
// Map through cloningMap first so we use values valid at the launch
// scope, then ensure they are launch-independent (or cloned constants).
Value mappedStep = cloningMap.lookupOrDefault(step);
Value mappedLowerBound = cloningMap.lookupOrDefault(lowerBound);

mappedStep = ensureLaunchIndependent(mappedStep);
mappedLowerBound = ensureLaunchIndependent(mappedLowerBound);

// If either cannot be made available above the launch, fail gracefully.
if (!mappedStep || !mappedLowerBound) {
return rewriter.notifyMatchFailure(
parallelOp, "lower bound / step must be constant or defined above "
"the gpu.launch");
}

newIndex = AffineApplyOp::create(
rewriter, loc, annotation.getMap().compose(lowerAndStep),
ValueRange{operand, ensureLaunchIndependent(step),
ensureLaunchIndependent(lowerBound)});
ValueRange{operand, mappedStep, mappedLowerBound});
// If there was also a bound, insert that, too.
// TODO: Check that we do not assign bounds twice.
if (annotation.getBound()) {
Expand Down
48 changes: 48 additions & 0 deletions mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -673,3 +673,51 @@ func.func @nested_parallel_with_side_effect() {

// CHECK: gpu.launch
// CHECK-NOT: scf.parallel

// -----

func.func @scf2gpu_index_creation_2d() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index

// Single 2-D scf.parallel mapped to block_x and thread_x.
// Use both IVs so the conversion must compute indices.
scf.parallel (%bx, %tx) = (%c0, %c0) to (%c32, %c32) step (%c1, %c1) {
%u = arith.addi %bx, %c0 : index
%v = arith.addi %tx, %c0 : index
} {
mapping = [
#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
]
}
return
}

// CHECK-LABEL: func.func @scf2gpu_index_creation_2d
// CHECK: gpu.launch
// CHECK: affine.apply

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// CHECK-LABEL: func.func @scf2gpu_index_creation_2d -> // CHECK-LABEL: func @scf2gpu_index_creation_2d

and You should capture the SSA value and then match it.rather than a simple matching operation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// -----

func.func @scf2gpu_index_creation_1d() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c64 = arith.constant 64 : index

scf.parallel (%t) = (%c0) to (%c64) step (%c1) {
%w = arith.addi %t, %c0 : index
} {
mapping = [
#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
]
}
return
}

// CHECK-LABEL: func.func @scf2gpu_index_creation_1d
// CHECK: gpu.launch
// CHECK: affine.apply

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The end of the file should contain only a single blank line, which will not be displayed on GitHub.


Loading