44define i32 @add_of_reduce_add (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
55; CHECK-LABEL: define i32 @add_of_reduce_add(
66; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
7- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
8- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
9- ; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
7+ ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[V0]], [[V1]]
8+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
109; CHECK-NEXT: ret i32 [[RES]]
1110;
1211 %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 > %v0 )
@@ -31,9 +30,8 @@ define i32 @sub_of_reduce_add(<16 x i32> %v0, <16 x i32> %v1) {
3130define i32 @mul_of_reduce_mul (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
3231; CHECK-LABEL: define i32 @mul_of_reduce_mul(
3332; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
34- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V0]])
35- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[V1]])
36- ; CHECK-NEXT: [[RES:%.*]] = mul i32 [[V0_RED]], [[V1_RED]]
33+ ; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i32> [[V0]], [[V1]]
34+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]])
3735; CHECK-NEXT: ret i32 [[RES]]
3836;
3937 %v0_red = tail call i32 @llvm.vector.reduce.mul.v16i32 (<16 x i32 > %v0 )
@@ -45,9 +43,8 @@ define i32 @mul_of_reduce_mul(<16 x i32> %v0, <16 x i32> %v1) {
4543define i32 @and_of_reduce_and (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
4644; CHECK-LABEL: define i32 @and_of_reduce_and(
4745; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
48- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V0]])
49- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[V1]])
50- ; CHECK-NEXT: [[RES:%.*]] = and i32 [[V0_RED]], [[V1_RED]]
46+ ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[V0]], [[V1]]
47+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP1]])
5148; CHECK-NEXT: ret i32 [[RES]]
5249;
5350 %v0_red = tail call i32 @llvm.vector.reduce.and.v16i32 (<16 x i32 > %v0 )
@@ -59,9 +56,8 @@ define i32 @and_of_reduce_and(<16 x i32> %v0, <16 x i32> %v1) {
5956define i32 @or_of_reduce_or (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
6057; CHECK-LABEL: define i32 @or_of_reduce_or(
6158; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
62- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
63- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
64- ; CHECK-NEXT: [[RES:%.*]] = or i32 [[V0_RED]], [[V1_RED]]
59+ ; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i32> [[V0]], [[V1]]
60+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
6561; CHECK-NEXT: ret i32 [[RES]]
6662;
6763 %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32 (<16 x i32 > %v0 )
@@ -73,9 +69,8 @@ define i32 @or_of_reduce_or(<16 x i32> %v0, <16 x i32> %v1) {
7369define i32 @xor_of_reduce_xor (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
7470; CHECK-LABEL: define i32 @xor_of_reduce_xor(
7571; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
76- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V0]])
77- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[V1]])
78- ; CHECK-NEXT: [[RES:%.*]] = xor i32 [[V0_RED]], [[V1_RED]]
72+ ; CHECK-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[V0]], [[V1]]
73+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> [[TMP1]])
7974; CHECK-NEXT: ret i32 [[RES]]
8075;
8176 %v0_red = tail call i32 @llvm.vector.reduce.xor.v16i32 (<16 x i32 > %v0 )
@@ -161,9 +156,8 @@ define i32 @multiple_use_of_reduction_1(<16 x i32> %v0, <16 x i32> %v1, ptr %p)
161156define i32 @do_not_preserve_overflow_flags (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
162157; CHECK-LABEL: define i32 @do_not_preserve_overflow_flags(
163158; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
164- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V0]])
165- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[V1]])
166- ; CHECK-NEXT: [[RES:%.*]] = add nuw nsw i32 [[V0_RED]], [[V1_RED]]
159+ ; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[V0]], [[V1]]
160+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP1]])
167161; CHECK-NEXT: ret i32 [[RES]]
168162;
169163 %v0_red = tail call i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 > %v0 )
@@ -175,9 +169,8 @@ define i32 @do_not_preserve_overflow_flags(<16 x i32> %v0, <16 x i32> %v1) {
175169define i32 @preserve_disjoint_flags (<16 x i32 > %v0 , <16 x i32 > %v1 ) {
176170; CHECK-LABEL: define i32 @preserve_disjoint_flags(
177171; CHECK-SAME: <16 x i32> [[V0:%.*]], <16 x i32> [[V1:%.*]]) {
178- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V0]])
179- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[V1]])
180- ; CHECK-NEXT: [[RES:%.*]] = or disjoint i32 [[V0_RED]], [[V1_RED]]
172+ ; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <16 x i32> [[V0]], [[V1]]
173+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
181174; CHECK-NEXT: ret i32 [[RES]]
182175;
183176 %v0_red = tail call i32 @llvm.vector.reduce.or.v16i32 (<16 x i32 > %v0 )
@@ -189,9 +182,8 @@ define i32 @preserve_disjoint_flags(<16 x i32> %v0, <16 x i32> %v1) {
189182define i32 @add_of_reduce_add_vscale (<vscale x 16 x i32 > %v0 , <vscale x 16 x i32 > %v1 ) {
190183; CHECK-LABEL: define i32 @add_of_reduce_add_vscale(
191184; CHECK-SAME: <vscale x 16 x i32> [[V0:%.*]], <vscale x 16 x i32> [[V1:%.*]]) {
192- ; CHECK-NEXT: [[V0_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V0]])
193- ; CHECK-NEXT: [[V1_RED:%.*]] = tail call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[V1]])
194- ; CHECK-NEXT: [[RES:%.*]] = add i32 [[V0_RED]], [[V1_RED]]
185+ ; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 16 x i32> [[V0]], [[V1]]
186+ ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[TMP1]])
195187; CHECK-NEXT: ret i32 [[RES]]
196188;
197189 %v0_red = tail call i32 @llvm.vector.reduce.add.nxv16i32 (<vscale x 16 x i32 > %v0 )
0 commit comments