11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 -passes=instcombine -S < %s | FileCheck %s
33
4- ; The readfirstlane version of this test covers all the interesting cases of the
4+ ; The permlane64 version of this test covers all the interesting cases of the
55; shared logic. This testcase focuses on permlane64 specific pitfalls.
66
77; test unary
@@ -10,27 +10,27 @@ define float @hoist_fneg_f32(float %arg) {
1010; CHECK-LABEL: define float @hoist_fneg_f32(
1111; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
1212; CHECK-NEXT: [[BB:.*:]]
13- ; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.amdgcn.readfirstlane .f32(float [[ARG]])
13+ ; CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.amdgcn.permlane64 .f32(float [[ARG]])
1414; CHECK-NEXT: [[RFL:%.*]] = fneg float [[TMP0]]
1515; CHECK-NEXT: ret float [[RFL]]
1616;
1717bb:
1818 %val = fneg float %arg
19- %pl = call float @llvm.amdgcn.readfirstlane .f32 (float %val )
19+ %pl = call float @llvm.amdgcn.permlane64 .f32 (float %val )
2020 ret float %pl
2121}
2222
2323define double @hoist_fneg_f64 (double %arg ) {
2424; CHECK-LABEL: define double @hoist_fneg_f64(
2525; CHECK-SAME: double [[ARG:%.*]]) #[[ATTR0]] {
2626; CHECK-NEXT: [[BB:.*:]]
27- ; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.amdgcn.readfirstlane .f64(double [[ARG]])
27+ ; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.amdgcn.permlane64 .f64(double [[ARG]])
2828; CHECK-NEXT: [[RFL:%.*]] = fneg double [[TMP0]]
2929; CHECK-NEXT: ret double [[RFL]]
3030;
3131bb:
3232 %val = fneg double %arg
33- %pl = call double @llvm.amdgcn.readfirstlane .f64 (double %val )
33+ %pl = call double @llvm.amdgcn.permlane64 .f64 (double %val )
3434 ret double %pl
3535}
3636
@@ -40,27 +40,27 @@ define i32 @hoist_trunc(i64 %arg) {
4040; CHECK-LABEL: define i32 @hoist_trunc(
4141; CHECK-SAME: i64 [[ARG:%.*]]) #[[ATTR0]] {
4242; CHECK-NEXT: [[BB:.*:]]
43- ; CHECK-NEXT: [[RFL:%.*]] = call i64 @llvm.amdgcn.readfirstlane .i64(i64 [[ARG]])
43+ ; CHECK-NEXT: [[RFL:%.*]] = call i64 @llvm.amdgcn.permlane64 .i64(i64 [[ARG]])
4444; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[RFL]] to i32
4545; CHECK-NEXT: ret i32 [[TMP0]]
4646;
4747bb:
4848 %val = trunc i64 %arg to i32
49- %pl = call i32 @llvm.amdgcn.readfirstlane .i32 (i32 %val )
49+ %pl = call i32 @llvm.amdgcn.permlane64 .i32 (i32 %val )
5050 ret i32 %pl
5151}
5252
5353define i64 @hoist_zext (i32 %arg ) {
5454; CHECK-LABEL: define i64 @hoist_zext(
5555; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
5656; CHECK-NEXT: [[BB:.*:]]
57- ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readfirstlane .i32(i32 [[ARG]])
57+ ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.permlane64 .i32(i32 [[ARG]])
5858; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[RFL]] to i64
5959; CHECK-NEXT: ret i64 [[TMP0]]
6060;
6161bb:
6262 %val = zext i32 %arg to i64
63- %pl = call i64 @llvm.amdgcn.readfirstlane .i64 (i64 %val )
63+ %pl = call i64 @llvm.amdgcn.permlane64 .i64 (i64 %val )
6464 ret i64 %pl
6565}
6666
@@ -70,8 +70,8 @@ define i32 @hoist_add_i32(i32 %arg) {
7070; CHECK-LABEL: define i32 @hoist_add_i32(
7171; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
7272; CHECK-NEXT: [[BB:.*:]]
73- ; CHECK-NEXT: [[VAL :%.*]] = add i32 [[ARG]], 16777215
74- ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[VAL]])
73+ ; CHECK-NEXT: [[PL :%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[ARG]])
74+ ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[PL]], 16777215
7575; CHECK-NEXT: ret i32 [[RFL]]
7676;
7777bb:
@@ -84,8 +84,8 @@ define float @hoist_fadd_f32(float %arg) {
8484; CHECK-LABEL: define float @hoist_fadd_f32(
8585; CHECK-SAME: float [[ARG:%.*]]) #[[ATTR0]] {
8686; CHECK-NEXT: [[BB:.*:]]
87- ; CHECK-NEXT: [[VAL :%.*]] = fadd float [[ARG]], 1.280000e+02
88- ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.permlane64.f32(float [[VAL]])
87+ ; CHECK-NEXT: [[PL :%.*]] = call float @llvm.amdgcn.permlane64.f32(float [[ARG]])
88+ ; CHECK-NEXT: [[RFL:%.*]] = fadd float [[PL]], 1.280000e+02
8989; CHECK-NEXT: ret float [[RFL]]
9090;
9191bb:
9494 ret float %pl
9595}
9696
97+ ; test multiple iterations
98+
99+ define i32 @hoist_multiple_times (i32 %arg ) {
100+ ; CHECK-LABEL: define i32 @hoist_multiple_times(
101+ ; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0]] {
102+ ; CHECK-NEXT: [[BB:.*:]]
103+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[ARG]])
104+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 2
105+ ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 16777215, [[TMP1]]
106+ ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 4242
107+ ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[TMP3]], 6
108+ ; CHECK-NEXT: ret i32 [[RFL]]
109+ ;
110+ bb:
111+ %val.0 = shl i32 %arg , 2
112+ %val.1 = sub i32 16777215 , %val.0
113+ %val.2 = xor i32 %val.1 , 4242
114+ %val.3 = add i32 %val.2 , 6
115+ %rfl = call i32 @llvm.amdgcn.permlane64.i32 (i32 %val.3 )
116+ ret i32 %rfl
117+ }
118+
97119; test cases where hoisting isn't possible
98120
121+ define i32 @operand_is_instr (i32 %arg , ptr %src ) {
122+ ; CHECK-LABEL: define i32 @operand_is_instr(
123+ ; CHECK-SAME: i32 [[ARG:%.*]], ptr [[SRC:%.*]]) #[[ATTR0]] {
124+ ; CHECK-NEXT: [[BB:.*:]]
125+ ; CHECK-NEXT: [[OTHER:%.*]] = load i32, ptr [[SRC]], align 4
126+ ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[ARG]], [[OTHER]]
127+ ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[VAL]])
128+ ; CHECK-NEXT: ret i32 [[RFL]]
129+ ;
130+ bb:
131+ %other = load i32 , ptr %src
132+ %val = add i32 %arg , %other
133+ %rfl = call i32 @llvm.amdgcn.permlane64.i32 (i32 %val )
134+ ret i32 %rfl
135+ }
136+
137+ define i32 @operand_is_arg (i32 %arg , i32 %other ) {
138+ ; CHECK-LABEL: define i32 @operand_is_arg(
139+ ; CHECK-SAME: i32 [[ARG:%.*]], i32 [[OTHER:%.*]]) #[[ATTR0]] {
140+ ; CHECK-NEXT: [[BB:.*:]]
141+ ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[ARG]], [[OTHER]]
142+ ; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[VAL]])
143+ ; CHECK-NEXT: ret i32 [[RFL]]
144+ ;
145+ bb:
146+ %val = add i32 %arg , %other
147+ %rfl = call i32 @llvm.amdgcn.permlane64.i32 (i32 %val )
148+ ret i32 %rfl
149+ }
150+
99151define float @cross_block_hoisting (i1 %cond , float %arg ) {
100152; CHECK-LABEL: define float @cross_block_hoisting(
101153; CHECK-SAME: i1 [[COND:%.*]], float [[ARG:%.*]]) #[[ATTR0]] {
0 commit comments