@@ -10,8 +10,8 @@ define float @hoist_fneg_f32(float %arg, i32 %lane) {
1010; CHECK-LABEL: define float @hoist_fneg_f32(
1111; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0:[0-9]+]] {
1212; CHECK-NEXT: [[BB:.*:]]
13- ; CHECK-NEXT: [[TMP0 :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
14- ; CHECK-NEXT: [[RFL:%.*]] = fneg float [[TMP0 ]]
13+ ; CHECK-NEXT: [[RL :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
14+ ; CHECK-NEXT: [[RFL:%.*]] = fneg float [[RL ]]
1515; CHECK-NEXT: ret float [[RFL]]
1616;
1717bb:
@@ -24,8 +24,8 @@ define double @hoist_fneg_f64(double %arg, i32 %lane) {
2424; CHECK-LABEL: define double @hoist_fneg_f64(
2525; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
2626; CHECK-NEXT: [[BB:.*:]]
27- ; CHECK-NEXT: [[TMP0 :%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
28- ; CHECK-NEXT: [[RFL:%.*]] = fneg double [[TMP0 ]]
27+ ; CHECK-NEXT: [[RL :%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
28+ ; CHECK-NEXT: [[RFL:%.*]] = fneg double [[RL ]]
2929; CHECK-NEXT: ret double [[RFL]]
3030;
3131bb:
@@ -40,8 +40,8 @@ define i32 @hoist_trunc(i64 %arg, i32 %lane) {
4040; CHECK-LABEL: define i32 @hoist_trunc(
4141; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
4242; CHECK-NEXT: [[BB:.*:]]
43- ; CHECK-NEXT: [[RFL :%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
44- ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[RFL ]] to i32
43+ ; CHECK-NEXT: [[RL :%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
44+ ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[RL ]] to i32
4545; CHECK-NEXT: ret i32 [[TMP0]]
4646;
4747bb:
@@ -54,8 +54,8 @@ define i64 @hoist_zext(i32 %arg, i32 %lane) {
5454; CHECK-LABEL: define i64 @hoist_zext(
5555; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
5656; CHECK-NEXT: [[BB:.*:]]
57- ; CHECK-NEXT: [[RFL :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
58- ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[RFL ]] to i64
57+ ; CHECK-NEXT: [[RL :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
58+ ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[RL ]] to i64
5959; CHECK-NEXT: ret i64 [[TMP0]]
6060;
6161bb:
@@ -70,8 +70,8 @@ define i32 @hoist_add_i32(i32 %arg, i32 %lane) {
7070; CHECK-LABEL: define i32 @hoist_add_i32(
7171; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
7272; CHECK-NEXT: [[BB:.*:]]
73- ; CHECK-NEXT: [[TMP0 :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
74- ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0 ]], 16777215
73+ ; CHECK-NEXT: [[RL :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
74+ ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[RL ]], 16777215
7575; CHECK-NEXT: ret i32 [[RFL]]
7676;
7777bb:
@@ -84,8 +84,8 @@ define float @hoist_fadd_f32(float %arg, i32 %lane) {
8484; CHECK-LABEL: define float @hoist_fadd_f32(
8585; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
8686; CHECK-NEXT: [[BB:.*:]]
87- ; CHECK-NEXT: [[TMP0 :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
88- ; CHECK-NEXT: [[RFL:%.*]] = fadd float [[TMP0 ]], 1.280000e+02
87+ ; CHECK-NEXT: [[RL :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
88+ ; CHECK-NEXT: [[RFL:%.*]] = fadd float [[RL ]], 1.280000e+02
8989; CHECK-NEXT: ret float [[RFL]]
9090;
9191bb:
@@ -100,8 +100,8 @@ define i64 @hoist_and_i64(i64 %arg, i32 %lane) {
100100; CHECK-LABEL: define i64 @hoist_and_i64(
101101; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
102102; CHECK-NEXT: [[BB:.*:]]
103- ; CHECK-NEXT: [[TMP0 :%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
104- ; CHECK-NEXT: [[RFL:%.*]] = and i64 [[TMP0 ]], 16777215
103+ ; CHECK-NEXT: [[RL :%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
104+ ; CHECK-NEXT: [[RFL:%.*]] = and i64 [[RL ]], 16777215
105105; CHECK-NEXT: ret i64 [[RFL]]
106106;
107107bb:
@@ -114,8 +114,8 @@ define double @hoist_fadd_f64(double %arg, i32 %lane) {
114114; CHECK-LABEL: define double @hoist_fadd_f64(
115115; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
116116; CHECK-NEXT: [[BB:.*:]]
117- ; CHECK-NEXT: [[TMP0 :%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
118- ; CHECK-NEXT: [[RFL:%.*]] = fadd double [[TMP0 ]], 1.280000e+02
117+ ; CHECK-NEXT: [[RL :%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
118+ ; CHECK-NEXT: [[RFL:%.*]] = fadd double [[RL ]], 1.280000e+02
119119; CHECK-NEXT: ret double [[RFL]]
120120;
121121bb:
@@ -130,8 +130,8 @@ define i32 @hoist_sub_i32_lhs(i32 %arg, i32 %lane) {
130130; CHECK-LABEL: define i32 @hoist_sub_i32_lhs(
131131; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
132132; CHECK-NEXT: [[BB:.*:]]
133- ; CHECK-NEXT: [[TMP0 :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
134- ; CHECK-NEXT: [[RFL:%.*]] = sub i32 16777215, [[TMP0 ]]
133+ ; CHECK-NEXT: [[RL :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
134+ ; CHECK-NEXT: [[RFL:%.*]] = sub i32 16777215, [[RL ]]
135135; CHECK-NEXT: ret i32 [[RFL]]
136136;
137137bb:
@@ -144,8 +144,8 @@ define float @hoist_fsub_f32_lhs(float %arg, i32 %lane) {
144144; CHECK-LABEL: define float @hoist_fsub_f32_lhs(
145145; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
146146; CHECK-NEXT: [[BB:.*:]]
147- ; CHECK-NEXT: [[TMP0 :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
148- ; CHECK-NEXT: [[RFL:%.*]] = fsub float 1.280000e+02, [[TMP0 ]]
147+ ; CHECK-NEXT: [[RL :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
148+ ; CHECK-NEXT: [[RFL:%.*]] = fsub float 1.280000e+02, [[RL ]]
149149; CHECK-NEXT: ret float [[RFL]]
150150;
151151bb:
@@ -154,36 +154,18 @@ bb:
154154 ret float %rl
155155}
156156
157- ; Check cases where we can't move the readlane higher
158-
159- define float @cannot_move_readlane (float %arg , i32 %base ) {
160- ; CHECK-LABEL: define float @cannot_move_readlane(
161- ; CHECK-SAME: float [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] {
162- ; CHECK-NEXT: [[BB:.*:]]
163- ; CHECK-NEXT: [[VAL:%.*]] = fsub float 1.280000e+02, [[ARG]]
164- ; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2
165- ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
166- ; CHECK-NEXT: ret float [[RFL]]
167- ;
168- bb:
169- %val = fsub float 128 .0 , %arg
170- %lane = add i32 %base , 2
171- %rl = call float @llvm.amdgcn.readlane.f32 (float %val , i32 %lane )
172- ret float %rl
173- }
174-
175157define i32 @readlane_lane_op_in_other_block (i1 %cond , i32 %arg , i32 %base ) {
176158; CHECK-LABEL: define i32 @readlane_lane_op_in_other_block(
177159; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] {
178160; CHECK-NEXT: [[BB:.*]]:
179161; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2
180162; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
181163; CHECK: [[THEN]]:
182- ; CHECK-NEXT: [[TMP0 :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
183- ; CHECK-NEXT: [[RFL :%.*]] = add i32 [[TMP0 ]], 16777215
164+ ; CHECK-NEXT: [[RL :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
165+ ; CHECK-NEXT: [[TMP0 :%.*]] = add i32 [[RL ]], 16777215
184166; CHECK-NEXT: br label %[[END]]
185167; CHECK: [[END]]:
186- ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL ]], %[[THEN]] ], [ [[LANE]], %[[BB]] ]
168+ ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0 ]], %[[THEN]] ], [ [[LANE]], %[[BB]] ]
187169; CHECK-NEXT: ret i32 [[RES]]
188170;
189171bb:
@@ -200,6 +182,25 @@ end:
200182 ret i32 %res
201183}
202184
185+ ; Check cases where we can't move the readlane higher
186+
187+ define float @cannot_move_readlane (float %arg , i32 %base ) {
188+ ; CHECK-LABEL: define float @cannot_move_readlane(
189+ ; CHECK-SAME: float [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] {
190+ ; CHECK-NEXT: [[BB:.*:]]
191+ ; CHECK-NEXT: [[VAL:%.*]] = fsub float 1.280000e+02, [[ARG]]
192+ ; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2
193+ ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
194+ ; CHECK-NEXT: ret float [[RFL]]
195+ ;
196+ bb:
197+ %val = fsub float 128 .0 , %arg
198+ %lane = add i32 %base , 2
199+ %rl = call float @llvm.amdgcn.readlane.f32 (float %val , i32 %lane )
200+ ret float %rl
201+ }
202+
203+
203204; test that convergence tokens are preserved
204205
205206define i32 @hoist_preserves_convergence_token (i1 %cond , i32 %arg , i32 %lane ) convergent {
@@ -209,11 +210,11 @@ define i32 @hoist_preserves_convergence_token(i1 %cond, i32 %arg, i32 %lane) con
209210; CHECK-NEXT: [[ENTRY:%.*]] = call token @llvm.experimental.convergence.entry()
210211; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
211212; CHECK: [[THEN]]:
212- ; CHECK-NEXT: [[TMP0 :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) [ "convergencectrl"(token [[ENTRY]]) ]
213- ; CHECK-NEXT: [[RFL :%.*]] = add i32 [[TMP0 ]], 16777215
213+ ; CHECK-NEXT: [[RL :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) [ "convergencectrl"(token [[ENTRY]]) ]
214+ ; CHECK-NEXT: [[TMP0 :%.*]] = add i32 [[RL ]], 16777215
214215; CHECK-NEXT: br label %[[END]]
215216; CHECK: [[END]]:
216- ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL ]], %[[THEN]] ], [ [[ARG]], %[[BB]] ]
217+ ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0 ]], %[[THEN]] ], [ [[ARG]], %[[BB]] ]
217218; CHECK-NEXT: ret i32 [[RES]]
218219;
219220bb:
0 commit comments