Skip to content

Commit 9617ce4

Browse files
authored
[vector][distribution] Bug fix in moveRegionToNewWarpOpAndAppendReturns (#153656)
1 parent 1371684 commit 9617ce4

File tree

2 files changed

+39
-14
lines changed

2 files changed

+39
-14
lines changed

mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "mlir/Dialect/Affine/IR/AffineOps.h"
1515
#include "mlir/Dialect/Arith/IR/Arith.h"
1616
#include "mlir/IR/Value.h"
17+
#include "llvm/ADT/DenseMap.h"
1718

1819
#include <numeric>
1920

@@ -57,26 +58,29 @@ WarpDistributionPattern::moveRegionToNewWarpOpAndAppendReturns(
5758
warpOp.getResultTypes().end());
5859
auto yield = cast<gpu::YieldOp>(
5960
warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
60-
llvm::SmallSetVector<Value, 32> yieldValues(yield.getOperands().begin(),
61-
yield.getOperands().end());
61+
SmallVector<Value> yieldValues(yield.getOperands().begin(),
62+
yield.getOperands().end());
63+
llvm::SmallDenseMap<Value, unsigned> indexLookup;
64+
// Record the value -> first index mapping for faster lookup.
65+
for (auto [i, v] : llvm::enumerate(yieldValues)) {
66+
if (!indexLookup.count(v))
67+
indexLookup[v] = i;
68+
}
69+
6270
for (auto [value, type] : llvm::zip_equal(newYieldedValues, newReturnTypes)) {
63-
if (yieldValues.insert(value)) {
71+
// If the value already exists in the yield, don't create a new output.
72+
if (indexLookup.count(value)) {
73+
indices.push_back(indexLookup[value]);
74+
} else {
75+
// If the value is new, add it to the yield and to the types.
76+
yieldValues.push_back(value);
6477
types.push_back(type);
6578
indices.push_back(yieldValues.size() - 1);
66-
} else {
67-
// If the value already exit the region don't create a new output.
68-
for (auto [idx, yieldOperand] :
69-
llvm::enumerate(yieldValues.getArrayRef())) {
70-
if (yieldOperand == value) {
71-
indices.push_back(idx);
72-
break;
73-
}
74-
}
7579
}
7680
}
77-
yieldValues.insert_range(newYieldedValues);
81+
7882
WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
79-
rewriter, warpOp, yieldValues.getArrayRef(), types);
83+
rewriter, warpOp, yieldValues, types);
8084
rewriter.replaceOp(warpOp,
8185
newWarpOp.getResults().take_front(warpOp.getNumResults()));
8286
return newWarpOp;

mlir/test/Dialect/Vector/vector-warp-distribute.mlir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,3 +1803,24 @@ func.func @warp_propagate_nd_write(%laneid: index, %dest: memref<4x1024xf32>) {
18031803
// CHECK-DIST-AND-PROP: %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (4, 8) : index, index
18041804
// CHECK-DIST-AND-PROP: %[[INNER_ID:.+]] = affine.apply #map()[%[[IDS]]#1]
18051805
// CHECK-DIST-AND-PROP: vector.transfer_write %[[W]], %{{.*}}[%[[IDS]]#0, %[[INNER_ID]]] {{.*}} : vector<1x128xf32>
1806+
1807+
// -----
1808+
func.func @warp_propagate_duplicated_operands_in_yield(%laneid: index) {
1809+
%r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1xf32>, vector<1xf32>, vector<1xf32>) {
1810+
%0 = "some_def"() : () -> (vector<32xf32>)
1811+
%1 = "some_other_def"() : () -> (vector<32xf32>)
1812+
%2 = math.exp %1 : vector<32xf32>
1813+
gpu.yield %2, %0, %0 : vector<32xf32>, vector<32xf32>, vector<32xf32>
1814+
}
1815+
"some_use"(%r#0) : (vector<1xf32>) -> ()
1816+
return
1817+
}
1818+
1819+
// CHECK-PROP-LABEL : func.func @warp_propagate_duplicated_operands_in_yield(
1820+
// CHECK-PROP : %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32] -> (vector<1xf32>) {
1821+
// CHECK-PROP : %{{.*}} = "some_def"() : () -> vector<32xf32>
1822+
// CHECK-PROP : %[[T3:.*]] = "some_other_def"() : () -> vector<32xf32>
1823+
// CHECK-PROP : gpu.yield %[[T3]] : vector<32xf32>
1824+
// CHECK-PROP : }
1825+
// CHECK-PROP : %[T1:.*] = math.exp %[[W]] : vector<1xf32>
1826+
// CHECK-PROP : "some_use"(%[[T1]]) : (vector<1xf32>) -> ()

0 commit comments

Comments
 (0)