Skip to content

Commit 918244c

Browse files
authored
Properly set the upper bound of gpu.lane_id when rewriteForallToLanes. (iree-org#20513)
If subgroup size is available, properly sets the upper bound of gpu.lane_id when it's created. Fixes: iree-org#20385 Signed-off-by: Lin, Peiyong <[email protected]>
1 parent a4bb8e0 commit 918244c

File tree

3 files changed

+11
-3
lines changed

3 files changed

+11
-3
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func.func @distribute_lane_forall(%out : memref<?xi32>)
6060
}
6161

6262
// CHECK-LABEL: func @distribute_lane_forall
63-
// CHECK: %[[LANEID:.+]] = gpu.lane_id
63+
// CHECK: %[[LANEID:.+]] = gpu.lane_id upper_bound 32
6464
// CHECK: memref.store {{.*}}[%[[LANEID]]]
6565

6666
// -----

compiler/src/iree/compiler/Codegen/Dialect/GPU/TransformExtensions/test/distribute_lane_forall.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ module attributes { transform.with_named_sequence } {
2828
// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0 * 16)>
2929

3030
// CHECK-LABEL: func @distribute_lane_forall
31-
// CHECK: %[[LANE_ID:.+]] = gpu.lane_id
31+
// CHECK: %[[LANE_ID:.+]] = gpu.lane_id upper_bound 64
3232
// CHECK-NOT: scf.forall
3333
// CHECK: affine.delinearize_index %[[LANE_ID]] into (4, 16) : index, index
3434
// CHECK: linalg.copy

compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1469,7 +1469,15 @@ static void rewriteForallToLanes(RewriterBase &rewriter, scf::ForallOp forallOp,
14691469
Location loc = forallOp->getLoc();
14701470
assert(isLaneMappableForall(forallOp) && "mapping non-lane forall op");
14711471

1472-
Value laneId = rewriter.create<gpu::LaneIdOp>(loc, /*upperBound=*/nullptr);
1472+
auto upperBounds = forallOp.getLoopUpperBounds();
1473+
std::optional<IntegerAttr> upperBound;
1474+
if (upperBounds && upperBounds->size() > 0) {
1475+
if (auto upperBoundAttr = (*upperBounds)[0].dyn_cast<Attribute>()) {
1476+
upperBound = dyn_cast<IntegerAttr>(upperBoundAttr);
1477+
}
1478+
}
1479+
Value laneId = rewriter.create<gpu::LaneIdOp>(
1480+
loc, upperBound ? rewriter.getIndexAttr(upperBound->getInt()) : nullptr);
14731481
rewriter.eraseOp(forallOp.getTerminator());
14741482
rewriter.setInsertionPoint(forallOp);
14751483
rewriter.inlineBlockBefore(forallOp.getBody(), forallOp, {laneId});

0 commit comments

Comments
 (0)