Skip to content

Commit 3a7aa13

Browse files
committed
Fix comesBefore check
1 parent 077f424 commit 3a7aa13

File tree

3 files changed

+115
-20
lines changed

3 files changed

+115
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "AMDGPUTargetTransformInfo.h"
1919
#include "GCNSubtarget.h"
2020
#include "llvm/ADT/FloatingPointMode.h"
21+
#include "llvm/IR/Dominators.h"
2122
#include "llvm/IR/IntrinsicsAMDGPU.h"
2223
#include "llvm/Transforms/InstCombine/InstCombiner.h"
2324
#include <optional>
@@ -498,8 +499,9 @@ Instruction *GCNTTIImpl::hoistReadLaneThroughOperand(InstCombiner &IC,
498499
Value *LaneID = nullptr;
499500
if (IsReadLane) {
500501
LaneID = II.getOperand(1);
501-
if (!isa<Constant>(LaneID) && !(isa<Instruction>(LaneID) &&
502-
cast<Instruction>(LaneID)->comesBefore(Op)))
502+
// Check LaneID is available at Op, otherwise we can't move the readlane
503+
// higher.
504+
if (!IC.getDominatorTree().dominates(LaneID, Op))
503505
return nullptr;
504506
}
505507

@@ -508,8 +510,13 @@ Instruction *GCNTTIImpl::hoistReadLaneThroughOperand(InstCombiner &IC,
508510
if (IsReadLane)
509511
Ops.push_back(LaneID);
510512

511-
Instruction *NewII =
512-
IC.Builder.CreateIntrinsic(II.getType(), II.getIntrinsicID(), Ops);
513+
// Make sure convergence tokens are preserved.
514+
// TODO: CreateIntrinsic should allow directly copying bundles
515+
SmallVector<OperandBundleDef, 2> OpBundles;
516+
II.getOperandBundlesAsDefs(OpBundles);
517+
518+
CallInst *NewII =
519+
IC.Builder.CreateCall(II.getCalledFunction(), Ops, OpBundles);
513520

514521
Instruction &NewOp = *Op->clone();
515522
NewOp.setOperand(OpIdx, NewII);

llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.readfirstlane.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,33 @@ bb:
459459
%rfl = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %val)
460460
ret i32 %rfl
461461
}
462+
463+
; test that convergence tokens are preserved
464+
465+
define i32 @hoist_preserves_convergence_token(i1 %cond, i32 %arg) convergent {
466+
; CHECK-LABEL: define i32 @hoist_preserves_convergence_token(
467+
; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
468+
; CHECK-NEXT: [[BB:.*]]:
469+
; CHECK-NEXT: [[ENTRY:%.*]] = call token @llvm.experimental.convergence.entry()
470+
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
471+
; CHECK: [[THEN]]:
472+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG]]) [ "convergencectrl"(token [[ENTRY]]) ]
473+
; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0]], 16777215
474+
; CHECK-NEXT: br label %[[END]]
475+
; CHECK: [[END]]:
476+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL]], %[[THEN]] ], [ [[ARG]], %[[BB]] ]
477+
; CHECK-NEXT: ret i32 [[RES]]
478+
;
479+
bb:
480+
%entry = call token @llvm.experimental.convergence.entry()
481+
br i1 %cond, label %then, label %end
482+
483+
then:
484+
%val = add i32 %arg, 16777215
485+
%rfl = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %val) [ "convergencectrl"(token %entry)]
486+
br label %end
487+
488+
end:
489+
%res = phi i32 [%rfl, %then], [%arg, %bb]
490+
ret i32 %res
491+
}

llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.readlane.ll

Lines changed: 74 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define float @hoist_fneg_f32(float %arg, i32 %lane) {
1010
; CHECK-LABEL: define float @hoist_fneg_f32(
1111
; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[BB:.*:]]
13-
; CHECK-NEXT: [[VAL:%.*]] = fneg float [[ARG]]
14-
; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
13+
; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
14+
; CHECK-NEXT: [[RFL:%.*]] = fneg float [[TMP0]]
1515
; CHECK-NEXT: ret float [[RFL]]
1616
;
1717
bb:
@@ -24,8 +24,8 @@ define double @hoist_fneg_f64(double %arg, i32 %lane) {
2424
; CHECK-LABEL: define double @hoist_fneg_f64(
2525
; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
2626
; CHECK-NEXT: [[BB:.*:]]
27-
; CHECK-NEXT: [[VAL:%.*]] = fneg double [[ARG]]
28-
; CHECK-NEXT: [[RFL:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL]], i32 [[LANE]])
27+
; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
28+
; CHECK-NEXT: [[RFL:%.*]] = fneg double [[TMP0]]
2929
; CHECK-NEXT: ret double [[RFL]]
3030
;
3131
bb:
@@ -40,8 +40,8 @@ define i32 @hoist_add_i32(i32 %arg, i32 %lane) {
4040
; CHECK-LABEL: define i32 @hoist_add_i32(
4141
; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
4242
; CHECK-NEXT: [[BB:.*:]]
43-
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[ARG]], 16777215
44-
; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[VAL]], i32 [[LANE]])
43+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
44+
; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0]], 16777215
4545
; CHECK-NEXT: ret i32 [[RFL]]
4646
;
4747
bb:
@@ -54,8 +54,8 @@ define float @hoist_fadd_f32(float %arg, i32 %lane) {
5454
; CHECK-LABEL: define float @hoist_fadd_f32(
5555
; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
5656
; CHECK-NEXT: [[BB:.*:]]
57-
; CHECK-NEXT: [[VAL:%.*]] = fadd float [[ARG]], 1.280000e+02
58-
; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
57+
; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
58+
; CHECK-NEXT: [[RFL:%.*]] = fadd float [[TMP0]], 1.280000e+02
5959
; CHECK-NEXT: ret float [[RFL]]
6060
;
6161
bb:
@@ -70,8 +70,8 @@ define i64 @hoist_and_i64(i64 %arg, i32 %lane) {
7070
; CHECK-LABEL: define i64 @hoist_and_i64(
7171
; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
7272
; CHECK-NEXT: [[BB:.*:]]
73-
; CHECK-NEXT: [[VAL:%.*]] = and i64 [[ARG]], 16777215
74-
; CHECK-NEXT: [[RFL:%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[VAL]], i32 [[LANE]])
73+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
74+
; CHECK-NEXT: [[RFL:%.*]] = and i64 [[TMP0]], 16777215
7575
; CHECK-NEXT: ret i64 [[RFL]]
7676
;
7777
bb:
@@ -84,8 +84,8 @@ define double @hoist_fadd_f64(double %arg, i32 %lane) {
8484
; CHECK-LABEL: define double @hoist_fadd_f64(
8585
; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
8686
; CHECK-NEXT: [[BB:.*:]]
87-
; CHECK-NEXT: [[VAL:%.*]] = fadd double [[ARG]], 1.280000e+02
88-
; CHECK-NEXT: [[RFL:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL]], i32 [[LANE]])
87+
; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
88+
; CHECK-NEXT: [[RFL:%.*]] = fadd double [[TMP0]], 1.280000e+02
8989
; CHECK-NEXT: ret double [[RFL]]
9090
;
9191
bb:
@@ -100,8 +100,8 @@ define i32 @hoist_sub_i32_lhs(i32 %arg, i32 %lane) {
100100
; CHECK-LABEL: define i32 @hoist_sub_i32_lhs(
101101
; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
102102
; CHECK-NEXT: [[BB:.*:]]
103-
; CHECK-NEXT: [[VAL:%.*]] = sub i32 16777215, [[ARG]]
104-
; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[VAL]], i32 [[LANE]])
103+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
104+
; CHECK-NEXT: [[RFL:%.*]] = sub i32 16777215, [[TMP0]]
105105
; CHECK-NEXT: ret i32 [[RFL]]
106106
;
107107
bb:
@@ -114,8 +114,8 @@ define float @hoist_fsub_f32_lhs(float %arg, i32 %lane) {
114114
; CHECK-LABEL: define float @hoist_fsub_f32_lhs(
115115
; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
116116
; CHECK-NEXT: [[BB:.*:]]
117-
; CHECK-NEXT: [[VAL:%.*]] = fsub float 1.280000e+02, [[ARG]]
118-
; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
117+
; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
118+
; CHECK-NEXT: [[RFL:%.*]] = fsub float 1.280000e+02, [[TMP0]]
119119
; CHECK-NEXT: ret float [[RFL]]
120120
;
121121
bb:
@@ -141,3 +141,61 @@ bb:
141141
%rfl = call float @llvm.amdgcn.readlane.f32(float %val, i32 %lane)
142142
ret float %rfl
143143
}
144+
145+
define i32 @readlane_lane_op_in_other_block(i1 %cond, i32 %arg, i32 %base) {
146+
; CHECK-LABEL: define i32 @readlane_lane_op_in_other_block(
147+
; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] {
148+
; CHECK-NEXT: [[BB:.*]]:
149+
; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2
150+
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
151+
; CHECK: [[THEN]]:
152+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
153+
; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0]], 16777215
154+
; CHECK-NEXT: br label %[[END]]
155+
; CHECK: [[END]]:
156+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL]], %[[THEN]] ], [ [[LANE]], %[[BB]] ]
157+
; CHECK-NEXT: ret i32 [[RES]]
158+
;
159+
bb:
160+
%lane = add i32 %base, 2
161+
br i1 %cond, label %then, label %end
162+
163+
then:
164+
%val = add i32 %arg, 16777215
165+
%rfl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane)
166+
br label %end
167+
168+
end:
169+
%res = phi i32 [%rfl, %then], [%lane, %bb]
170+
ret i32 %res
171+
}
172+
173+
; test that convergence tokens are preserved
174+
175+
define i32 @hoist_preserves_convergence_token(i1 %cond, i32 %arg, i32 %lane) convergent {
176+
; CHECK-LABEL: define i32 @hoist_preserves_convergence_token(
177+
; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR1:[0-9]+]] {
178+
; CHECK-NEXT: [[BB:.*]]:
179+
; CHECK-NEXT: [[ENTRY:%.*]] = call token @llvm.experimental.convergence.entry()
180+
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
181+
; CHECK: [[THEN]]:
182+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) [ "convergencectrl"(token [[ENTRY]]) ]
183+
; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0]], 16777215
184+
; CHECK-NEXT: br label %[[END]]
185+
; CHECK: [[END]]:
186+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL]], %[[THEN]] ], [ [[ARG]], %[[BB]] ]
187+
; CHECK-NEXT: ret i32 [[RES]]
188+
;
189+
bb:
190+
%entry = call token @llvm.experimental.convergence.entry()
191+
br i1 %cond, label %then, label %end
192+
193+
then:
194+
%val = add i32 %arg, 16777215
195+
%rfl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane) [ "convergencectrl"(token %entry)]
196+
br label %end
197+
198+
end:
199+
%res = phi i32 [%rfl, %then], [%arg, %bb]
200+
ret i32 %res
201+
}

0 commit comments

Comments
 (0)