11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2- ; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
2+ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3+ ; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
34
4- ; TODO: Load a element and splat it to a vector could be lowerd to xvldrepl
55
6- ; A load has more than one user shouldn't be lowered to xvldrepl
76define <4 x i64 > @should_not_be_optimized (ptr %ptr , ptr %dst ) {
8- ; CHECK-LABEL: should_not_be_optimized:
9- ; CHECK: # %bb.0:
10- ; CHECK-NEXT: ld.d $a0, $a0, 0
11- ; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
12- ; CHECK-NEXT: st.d $a0, $a1, 0
13- ; CHECK-NEXT: ret
7+ ; LA32-LABEL: should_not_be_optimized:
8+ ; LA32: # %bb.0:
9+ ; LA32-NEXT: ld.w $a2, $a0, 0
10+ ; LA32-NEXT: ld.w $a0, $a0, 4
11+ ; LA32-NEXT: st.w $a2, $a1, 0
12+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 0
13+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
14+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 2
15+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
16+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 4
17+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
18+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 6
19+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
20+ ; LA32-NEXT: st.w $a0, $a1, 4
21+ ; LA32-NEXT: ret
22+ ;
23+ ; LA64-LABEL: should_not_be_optimized:
24+ ; LA64: # %bb.0:
25+ ; LA64-NEXT: ld.d $a0, $a0, 0
26+ ; LA64-NEXT: xvreplgr2vr.d $xr0, $a0
27+ ; LA64-NEXT: st.d $a0, $a1, 0
28+ ; LA64-NEXT: ret
1429 %tmp = load i64 , ptr %ptr
1530 store i64 %tmp , ptr %dst
1631 %tmp1 = insertelement <4 x i64 > zeroinitializer , i64 %tmp , i32 0
@@ -19,11 +34,25 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
1934}
2035
2136define <4 x i64 > @xvldrepl_d_unaligned_offset (ptr %ptr ) {
22- ; CHECK-LABEL: xvldrepl_d_unaligned_offset:
23- ; CHECK: # %bb.0:
24- ; CHECK-NEXT: addi.d $a0, $a0, 4
25- ; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
26- ; CHECK-NEXT: ret
37+ ; LA32-LABEL: xvldrepl_d_unaligned_offset:
38+ ; LA32: # %bb.0:
39+ ; LA32-NEXT: ld.w $a1, $a0, 4
40+ ; LA32-NEXT: ld.w $a0, $a0, 8
41+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0
42+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
43+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2
44+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
45+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4
46+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
47+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6
48+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
49+ ; LA32-NEXT: ret
50+ ;
51+ ; LA64-LABEL: xvldrepl_d_unaligned_offset:
52+ ; LA64: # %bb.0:
53+ ; LA64-NEXT: addi.d $a0, $a0, 4
54+ ; LA64-NEXT: xvldrepl.d $xr0, $a0, 0
55+ ; LA64-NEXT: ret
2756 %p = getelementptr i32 , ptr %ptr , i32 1
2857 %tmp = load i64 , ptr %p
2958 %tmp1 = insertelement <4 x i64 > zeroinitializer , i64 %tmp , i32 0
@@ -103,21 +132,49 @@ define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
103132
104133
105134define <4 x i64 > @xvldrepl_d (ptr %ptr ) {
106- ; CHECK-LABEL: xvldrepl_d:
107- ; CHECK: # %bb.0:
108- ; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
109- ; CHECK-NEXT: ret
135+ ; LA32-LABEL: xvldrepl_d:
136+ ; LA32: # %bb.0:
137+ ; LA32-NEXT: ld.w $a1, $a0, 0
138+ ; LA32-NEXT: ld.w $a0, $a0, 4
139+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0
140+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
141+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2
142+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
143+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4
144+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
145+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6
146+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
147+ ; LA32-NEXT: ret
148+ ;
149+ ; LA64-LABEL: xvldrepl_d:
150+ ; LA64: # %bb.0:
151+ ; LA64-NEXT: xvldrepl.d $xr0, $a0, 0
152+ ; LA64-NEXT: ret
110153 %tmp = load i64 , ptr %ptr
111154 %tmp1 = insertelement <4 x i64 > zeroinitializer , i64 %tmp , i32 0
112155 %tmp2 = shufflevector <4 x i64 > %tmp1 , <4 x i64 > poison, <4 x i32 > zeroinitializer
113156 ret <4 x i64 > %tmp2
114157}
115158
116159define <4 x i64 > @xvldrepl_d_offset (ptr %ptr ) {
117- ; CHECK-LABEL: xvldrepl_d_offset:
118- ; CHECK: # %bb.0:
119- ; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264
120- ; CHECK-NEXT: ret
160+ ; LA32-LABEL: xvldrepl_d_offset:
161+ ; LA32: # %bb.0:
162+ ; LA32-NEXT: ld.w $a1, $a0, 264
163+ ; LA32-NEXT: ld.w $a0, $a0, 268
164+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0
165+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
166+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2
167+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
168+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4
169+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
170+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6
171+ ; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
172+ ; LA32-NEXT: ret
173+ ;
174+ ; LA64-LABEL: xvldrepl_d_offset:
175+ ; LA64: # %bb.0:
176+ ; LA64-NEXT: xvldrepl.d $xr0, $a0, 264
177+ ; LA64-NEXT: ret
121178 %p = getelementptr i64 , ptr %ptr , i64 33
122179 %tmp = load i64 , ptr %p
123180 %tmp1 = insertelement <4 x i64 > zeroinitializer , i64 %tmp , i32 0
0 commit comments