Skip to content

Commit bf6d52a

Browse files
authored
[LoongArch] Pre-commit for vecreduce_add. (#154302)
1 parent 7905d5e commit bf6d52a

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
4+
5+
define void @vec_reduce_add_v32i8(ptr %src, ptr %dst) nounwind {
6+
; CHECK-LABEL: vec_reduce_add_v32i8:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: xvld $xr0, $a0, 0
9+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
10+
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
11+
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
12+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
13+
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
14+
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
15+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
16+
; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
17+
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
18+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
19+
; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
20+
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
21+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
22+
; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
23+
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
24+
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
25+
; CHECK-NEXT: ret
26+
%v = load <32 x i8>, ptr %src
27+
%res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %v)
28+
store i8 %res, ptr %dst
29+
ret void
30+
}
31+
32+
define void @vec_reduce_add_v16i16(ptr %src, ptr %dst) nounwind {
33+
; CHECK-LABEL: vec_reduce_add_v16i16:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: xvld $xr0, $a0, 0
36+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
37+
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
38+
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
39+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
40+
; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
41+
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
42+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
43+
; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
44+
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
45+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
46+
; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
47+
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
48+
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
49+
; CHECK-NEXT: ret
50+
%v = load <16 x i16>, ptr %src
51+
%res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %v)
52+
store i16 %res, ptr %dst
53+
ret void
54+
}
55+
56+
define void @vec_reduce_add_v8i32(ptr %src, ptr %dst) nounwind {
57+
; CHECK-LABEL: vec_reduce_add_v8i32:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: xvld $xr0, $a0, 0
60+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
61+
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
62+
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
63+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
64+
; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
65+
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
66+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
67+
; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
68+
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
69+
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
70+
; CHECK-NEXT: ret
71+
%v = load <8 x i32>, ptr %src
72+
%res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v)
73+
store i32 %res, ptr %dst
74+
ret void
75+
}
76+
77+
define void @vec_reduce_add_v4i64(ptr %src, ptr %dst) nounwind {
78+
; CHECK-LABEL: vec_reduce_add_v4i64:
79+
; CHECK: # %bb.0:
80+
; CHECK-NEXT: xvld $xr0, $a0, 0
81+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
82+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
83+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
84+
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
85+
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
86+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
87+
; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
88+
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
89+
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
90+
; CHECK-NEXT: ret
91+
%v = load <4 x i64>, ptr %src
92+
%res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
93+
store i64 %res, ptr %dst
94+
ret void
95+
}
96+
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
3+
4+
define void @vec_reduce_add_v16i8(ptr %src, ptr %dst) nounwind {
5+
; CHECK-LABEL: vec_reduce_add_v16i8:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vld $vr0, $a0, 0
8+
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
9+
; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
10+
; CHECK-NEXT: vsrli.d $vr1, $vr0, 32
11+
; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
12+
; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14
13+
; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
14+
; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1
15+
; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
16+
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
17+
; CHECK-NEXT: ret
18+
%v = load <16 x i8>, ptr %src
19+
%res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v)
20+
store i8 %res, ptr %dst
21+
ret void
22+
}
23+
24+
define void @vec_reduce_add_v8i16(ptr %src, ptr %dst) nounwind {
25+
; CHECK-LABEL: vec_reduce_add_v8i16:
26+
; CHECK: # %bb.0:
27+
; CHECK-NEXT: vld $vr0, $a0, 0
28+
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
29+
; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
30+
; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14
31+
; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
32+
; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1
33+
; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
34+
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
35+
; CHECK-NEXT: ret
36+
%v = load <8 x i16>, ptr %src
37+
%res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v)
38+
store i16 %res, ptr %dst
39+
ret void
40+
}
41+
42+
define void @vec_reduce_add_v4i32(ptr %src, ptr %dst) nounwind {
43+
; CHECK-LABEL: vec_reduce_add_v4i32:
44+
; CHECK: # %bb.0:
45+
; CHECK-NEXT: vld $vr0, $a0, 0
46+
; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14
47+
; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
48+
; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
49+
; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
50+
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
51+
; CHECK-NEXT: ret
52+
%v = load <4 x i32>, ptr %src
53+
%res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v)
54+
store i32 %res, ptr %dst
55+
ret void
56+
}
57+
58+
define void @vec_reduce_add_v2i64(ptr %src, ptr %dst) nounwind {
59+
; CHECK-LABEL: vec_reduce_add_v2i64:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: vld $vr0, $a0, 0
62+
; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
63+
; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
64+
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
65+
; CHECK-NEXT: ret
66+
%v = load <2 x i64>, ptr %src
67+
%res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v)
68+
store i64 %res, ptr %dst
69+
ret void
70+
}

0 commit comments

Comments
 (0)