@@ -5,22 +5,17 @@ define void @vec_reduce_and_v32i8(ptr %src, ptr %dst) nounwind {
55; CHECK-LABEL: vec_reduce_and_v32i8:
66; CHECK: # %bb.0:
77; CHECK-NEXT: xvld $xr0, $a0, 0
8- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
9- ; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
10- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
11- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
12- ; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
13- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
14- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
15- ; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
16- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
17- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
18- ; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
19- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
20- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
21- ; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
22- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
23- ; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
8+ ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
9+ ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
10+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
11+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
12+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
13+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
14+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
15+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
16+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
17+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
18+ ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
2419; CHECK-NEXT: ret
2520 %v = load <32 x i8 >, ptr %src
2621 %res = call i8 @llvm.vector.reduce.and.v32i8 (<32 x i8 > %v )
@@ -32,19 +27,15 @@ define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind {
3227; CHECK-LABEL: vec_reduce_and_v16i16:
3328; CHECK: # %bb.0:
3429; CHECK-NEXT: xvld $xr0, $a0, 0
35- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
36- ; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
37- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
38- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
39- ; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
40- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
41- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
42- ; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
43- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
44- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
45- ; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
46- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
47- ; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
30+ ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
31+ ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
32+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
33+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
34+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
35+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
36+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
37+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
38+ ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
4839; CHECK-NEXT: ret
4940 %v = load <16 x i16 >, ptr %src
5041 %res = call i16 @llvm.vector.reduce.and.v16i16 (<16 x i16 > %v )
@@ -56,16 +47,13 @@ define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind {
5647; CHECK-LABEL: vec_reduce_and_v8i32:
5748; CHECK: # %bb.0:
5849; CHECK-NEXT: xvld $xr0, $a0, 0
59- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
60- ; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
61- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
62- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
63- ; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
64- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
65- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
66- ; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
67- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
68- ; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
50+ ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
51+ ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
52+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
53+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
54+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
55+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
56+ ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
6957; CHECK-NEXT: ret
7058 %v = load <8 x i32 >, ptr %src
7159 %res = call i32 @llvm.vector.reduce.and.v8i32 (<8 x i32 > %v )
@@ -77,15 +65,11 @@ define void @vec_reduce_and_v4i64(ptr %src, ptr %dst) nounwind {
7765; CHECK-LABEL: vec_reduce_and_v4i64:
7866; CHECK: # %bb.0:
7967; CHECK-NEXT: xvld $xr0, $a0, 0
80- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
81- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
82- ; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
83- ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
84- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
85- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
86- ; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
87- ; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
88- ; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
68+ ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
69+ ; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
70+ ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
71+ ; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
72+ ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
8973; CHECK-NEXT: ret
9074 %v = load <4 x i64 >, ptr %src
9175 %res = call i64 @llvm.vector.reduce.and.v4i64 (<4 x i64 > %v )
0 commit comments