@@ -10,8 +10,8 @@ define float @hoist_fneg_f32(float %arg, i32 %lane) {
1010; CHECK-LABEL: define float @hoist_fneg_f32(
1111; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0:[0-9]+]] {
1212; CHECK-NEXT: [[BB:.*:]]
13- ; CHECK-NEXT: [[VAL :%.*]] = fneg float [[ARG]]
14- ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
13+ ; CHECK-NEXT: [[TMP0 :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
14+ ; CHECK-NEXT: [[RFL:%.*]] = fneg float [[TMP0]]
1515; CHECK-NEXT: ret float [[RFL]]
1616;
1717bb:
@@ -24,8 +24,8 @@ define double @hoist_fneg_f64(double %arg, i32 %lane) {
2424; CHECK-LABEL: define double @hoist_fneg_f64(
2525; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
2626; CHECK-NEXT: [[BB:.*:]]
27- ; CHECK-NEXT: [[VAL :%.*]] = fneg double [[ARG]]
28- ; CHECK-NEXT: [[RFL:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL]], i32 [[LANE]])
27+ ; CHECK-NEXT: [[TMP0 :%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
28+ ; CHECK-NEXT: [[RFL:%.*]] = fneg double [[TMP0]]
2929; CHECK-NEXT: ret double [[RFL]]
3030;
3131bb:
@@ -40,8 +40,8 @@ define i32 @hoist_add_i32(i32 %arg, i32 %lane) {
4040; CHECK-LABEL: define i32 @hoist_add_i32(
4141; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
4242; CHECK-NEXT: [[BB:.*:]]
43- ; CHECK-NEXT: [[VAL :%.*]] = add i32 [[ARG]], 16777215
44- ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[VAL ]], i32 [[LANE]])
43+ ; CHECK-NEXT: [[TMP0 :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
44+ ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0 ]], 16777215
4545; CHECK-NEXT: ret i32 [[RFL]]
4646;
4747bb:
@@ -54,8 +54,8 @@ define float @hoist_fadd_f32(float %arg, i32 %lane) {
5454; CHECK-LABEL: define float @hoist_fadd_f32(
5555; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
5656; CHECK-NEXT: [[BB:.*:]]
57- ; CHECK-NEXT: [[VAL :%.*]] = fadd float [[ARG]], 1.280000e+02
58- ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL ]], i32 [[LANE]])
57+ ; CHECK-NEXT: [[TMP0 :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
58+ ; CHECK-NEXT: [[RFL:%.*]] = fadd float [[TMP0 ]], 1.280000e+02
5959; CHECK-NEXT: ret float [[RFL]]
6060;
6161bb:
@@ -70,8 +70,8 @@ define i64 @hoist_and_i64(i64 %arg, i32 %lane) {
7070; CHECK-LABEL: define i64 @hoist_and_i64(
7171; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
7272; CHECK-NEXT: [[BB:.*:]]
73- ; CHECK-NEXT: [[VAL :%.*]] = and i64 [[ARG]], 16777215
74- ; CHECK-NEXT: [[RFL:%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[VAL ]], i32 [[LANE]])
73+ ; CHECK-NEXT: [[TMP0 :%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
74+ ; CHECK-NEXT: [[RFL:%.*]] = and i64 [[TMP0 ]], 16777215
7575; CHECK-NEXT: ret i64 [[RFL]]
7676;
7777bb:
@@ -84,8 +84,8 @@ define double @hoist_fadd_f64(double %arg, i32 %lane) {
8484; CHECK-LABEL: define double @hoist_fadd_f64(
8585; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
8686; CHECK-NEXT: [[BB:.*:]]
87- ; CHECK-NEXT: [[VAL :%.*]] = fadd double [[ARG]], 1.280000e+02
88- ; CHECK-NEXT: [[RFL:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL ]], i32 [[LANE]])
87+ ; CHECK-NEXT: [[TMP0 :%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
88+ ; CHECK-NEXT: [[RFL:%.*]] = fadd double [[TMP0 ]], 1.280000e+02
8989; CHECK-NEXT: ret double [[RFL]]
9090;
9191bb:
@@ -100,8 +100,8 @@ define i32 @hoist_sub_i32_lhs(i32 %arg, i32 %lane) {
100100; CHECK-LABEL: define i32 @hoist_sub_i32_lhs(
101101; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
102102; CHECK-NEXT: [[BB:.*:]]
103- ; CHECK-NEXT: [[VAL :%.*]] = sub i32 16777215, [[ARG]]
104- ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[VAL]], i32 [[LANE]])
103+ ; CHECK-NEXT: [[TMP0 :%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
104+ ; CHECK-NEXT: [[RFL:%.*]] = sub i32 16777215, [[TMP0]]
105105; CHECK-NEXT: ret i32 [[RFL]]
106106;
107107bb:
@@ -114,8 +114,8 @@ define float @hoist_fsub_f32_lhs(float %arg, i32 %lane) {
114114; CHECK-LABEL: define float @hoist_fsub_f32_lhs(
115115; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
116116; CHECK-NEXT: [[BB:.*:]]
117- ; CHECK-NEXT: [[VAL :%.*]] = fsub float 1.280000e+02, [[ARG]]
118- ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]])
117+ ; CHECK-NEXT: [[TMP0 :%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
118+ ; CHECK-NEXT: [[RFL:%.*]] = fsub float 1.280000e+02, [[TMP0]]
119119; CHECK-NEXT: ret float [[RFL]]
120120;
121121bb:
141141 %rfl = call float @llvm.amdgcn.readlane.f32 (float %val , i32 %lane )
142142 ret float %rfl
143143}
144+
145+ define i32 @readlane_lane_op_in_other_block (i1 %cond , i32 %arg , i32 %base ) {
146+ ; CHECK-LABEL: define i32 @readlane_lane_op_in_other_block(
147+ ; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] {
148+ ; CHECK-NEXT: [[BB:.*]]:
149+ ; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2
150+ ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
151+ ; CHECK: [[THEN]]:
152+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
153+ ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0]], 16777215
154+ ; CHECK-NEXT: br label %[[END]]
155+ ; CHECK: [[END]]:
156+ ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL]], %[[THEN]] ], [ [[LANE]], %[[BB]] ]
157+ ; CHECK-NEXT: ret i32 [[RES]]
158+ ;
159+ bb:
160+ %lane = add i32 %base , 2
161+ br i1 %cond , label %then , label %end
162+
163+ then:
164+ %val = add i32 %arg , 16777215
165+ %rfl = call i32 @llvm.amdgcn.readlane.i32 (i32 %val , i32 %lane )
166+ br label %end
167+
168+ end:
169+ %res = phi i32 [%rfl , %then ], [%lane , %bb ]
170+ ret i32 %res
171+ }
172+
173+ ; test that convergence tokens are preserved
174+
175+ define i32 @hoist_preserves_convergence_token (i1 %cond , i32 %arg , i32 %lane ) convergent {
176+ ; CHECK-LABEL: define i32 @hoist_preserves_convergence_token(
177+ ; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR1:[0-9]+]] {
178+ ; CHECK-NEXT: [[BB:.*]]:
179+ ; CHECK-NEXT: [[ENTRY:%.*]] = call token @llvm.experimental.convergence.entry()
180+ ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]]
181+ ; CHECK: [[THEN]]:
182+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) [ "convergencectrl"(token [[ENTRY]]) ]
183+ ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP0]], 16777215
184+ ; CHECK-NEXT: br label %[[END]]
185+ ; CHECK: [[END]]:
186+ ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RFL]], %[[THEN]] ], [ [[ARG]], %[[BB]] ]
187+ ; CHECK-NEXT: ret i32 [[RES]]
188+ ;
189+ bb:
190+ %entry = call token @llvm.experimental.convergence.entry ()
191+ br i1 %cond , label %then , label %end
192+
193+ then:
194+ %val = add i32 %arg , 16777215
195+ %rfl = call i32 @llvm.amdgcn.readlane.i32 (i32 %val , i32 %lane ) [ "convergencectrl" (token %entry )]
196+ br label %end
197+
198+ end:
199+ %res = phi i32 [%rfl , %then ], [%arg , %bb ]
200+ ret i32 %res
201+ }
0 commit comments