1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
-
3
2
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
4
3
5
4
define void @vec_reduce_add_v32i8 (ptr %src , ptr %dst ) nounwind {
6
5
; CHECK-LABEL: vec_reduce_add_v32i8:
7
6
; CHECK: # %bb.0:
8
7
; CHECK-NEXT: xvld $xr0, $a0, 0
9
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
10
- ; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
11
- ; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
12
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
13
- ; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
14
- ; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
15
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
16
- ; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
17
- ; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
18
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
19
- ; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
20
- ; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
21
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
22
- ; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
23
- ; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
24
- ; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
8
+ ; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr0
9
+ ; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
10
+ ; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
11
+ ; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
12
+ ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
13
+ ; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
14
+ ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
15
+ ; CHECK-NEXT: st.b $a0, $a1, 0
25
16
; CHECK-NEXT: ret
26
17
%v = load <32 x i8 >, ptr %src
27
18
%res = call i8 @llvm.vector.reduce.add.v32i8 (<32 x i8 > %v )
@@ -33,19 +24,13 @@ define void @vec_reduce_add_v16i16(ptr %src, ptr %dst) nounwind {
33
24
; CHECK-LABEL: vec_reduce_add_v16i16:
34
25
; CHECK: # %bb.0:
35
26
; CHECK-NEXT: xvld $xr0, $a0, 0
36
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
37
- ; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
38
- ; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
39
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
40
- ; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
41
- ; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
42
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
43
- ; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
44
- ; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
45
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
46
- ; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
47
- ; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
48
- ; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
27
+ ; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
28
+ ; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
29
+ ; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
30
+ ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
31
+ ; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
32
+ ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
33
+ ; CHECK-NEXT: st.h $a0, $a1, 0
49
34
; CHECK-NEXT: ret
50
35
%v = load <16 x i16 >, ptr %src
51
36
%res = call i16 @llvm.vector.reduce.add.v16i16 (<16 x i16 > %v )
@@ -57,16 +42,12 @@ define void @vec_reduce_add_v8i32(ptr %src, ptr %dst) nounwind {
57
42
; CHECK-LABEL: vec_reduce_add_v8i32:
58
43
; CHECK: # %bb.0:
59
44
; CHECK-NEXT: xvld $xr0, $a0, 0
60
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
61
- ; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
62
- ; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
63
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
64
- ; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
65
- ; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
66
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
67
- ; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
68
- ; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
69
- ; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
45
+ ; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
46
+ ; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
47
+ ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
48
+ ; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
49
+ ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
50
+ ; CHECK-NEXT: st.w $a0, $a1, 0
70
51
; CHECK-NEXT: ret
71
52
%v = load <8 x i32 >, ptr %src
72
53
%res = call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %v )
@@ -78,19 +59,13 @@ define void @vec_reduce_add_v4i64(ptr %src, ptr %dst) nounwind {
78
59
; CHECK-LABEL: vec_reduce_add_v4i64:
79
60
; CHECK: # %bb.0:
80
61
; CHECK-NEXT: xvld $xr0, $a0, 0
81
- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
82
- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
83
- ; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
84
- ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
85
- ; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
86
- ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
87
- ; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
88
- ; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
62
+ ; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
63
+ ; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
64
+ ; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
89
65
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
90
66
; CHECK-NEXT: ret
91
67
%v = load <4 x i64 >, ptr %src
92
68
%res = call i64 @llvm.vector.reduce.add.v4i64 (<4 x i64 > %v )
93
69
store i64 %res , ptr %dst
94
70
ret void
95
71
}
96
-
0 commit comments