1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2
- ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
2
+ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3
+ ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
3
4
4
5
define void @extract_32xi8 (ptr %src , ptr %dst ) nounwind {
5
- ; CHECK-LABEL: extract_32xi8:
6
- ; CHECK: # %bb.0:
7
- ; CHECK-NEXT: xvld $xr0, $a0, 0
8
- ; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
9
- ; CHECK-NEXT: ret
6
+ ; LA32-LABEL: extract_32xi8:
7
+ ; LA32: # %bb.0:
8
+ ; LA32-NEXT: xvld $xr0, $a0, 0
9
+ ; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
10
+ ; LA32-NEXT: st.b $a0, $a1, 0
11
+ ; LA32-NEXT: ret
12
+ ;
13
+ ; LA64-LABEL: extract_32xi8:
14
+ ; LA64: # %bb.0:
15
+ ; LA64-NEXT: xvld $xr0, $a0, 0
16
+ ; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 1
17
+ ; LA64-NEXT: ret
10
18
%v = load volatile <32 x i8 >, ptr %src
11
19
%e = extractelement <32 x i8 > %v , i32 1
12
20
store i8 %e , ptr %dst
13
21
ret void
14
22
}
15
23
16
24
define void @extract_16xi16 (ptr %src , ptr %dst ) nounwind {
17
- ; CHECK-LABEL: extract_16xi16:
18
- ; CHECK: # %bb.0:
19
- ; CHECK-NEXT: xvld $xr0, $a0, 0
20
- ; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
21
- ; CHECK-NEXT: ret
25
+ ; LA32-LABEL: extract_16xi16:
26
+ ; LA32: # %bb.0:
27
+ ; LA32-NEXT: xvld $xr0, $a0, 0
28
+ ; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
29
+ ; LA32-NEXT: st.h $a0, $a1, 0
30
+ ; LA32-NEXT: ret
31
+ ;
32
+ ; LA64-LABEL: extract_16xi16:
33
+ ; LA64: # %bb.0:
34
+ ; LA64-NEXT: xvld $xr0, $a0, 0
35
+ ; LA64-NEXT: xvstelm.h $xr0, $a1, 0, 1
36
+ ; LA64-NEXT: ret
22
37
%v = load volatile <16 x i16 >, ptr %src
23
38
%e = extractelement <16 x i16 > %v , i32 1
24
39
store i16 %e , ptr %dst
25
40
ret void
26
41
}
27
42
28
43
define void @extract_8xi32 (ptr %src , ptr %dst ) nounwind {
29
- ; CHECK-LABEL: extract_8xi32:
30
- ; CHECK: # %bb.0:
31
- ; CHECK-NEXT: xvld $xr0, $a0, 0
32
- ; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1
33
- ; CHECK-NEXT: ret
44
+ ; LA32-LABEL: extract_8xi32:
45
+ ; LA32: # %bb.0:
46
+ ; LA32-NEXT: xvld $xr0, $a0, 0
47
+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
48
+ ; LA32-NEXT: st.w $a0, $a1, 0
49
+ ; LA32-NEXT: ret
50
+ ;
51
+ ; LA64-LABEL: extract_8xi32:
52
+ ; LA64: # %bb.0:
53
+ ; LA64-NEXT: xvld $xr0, $a0, 0
54
+ ; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 1
55
+ ; LA64-NEXT: ret
34
56
%v = load volatile <8 x i32 >, ptr %src
35
57
%e = extractelement <8 x i32 > %v , i32 1
36
58
store i32 %e , ptr %dst
37
59
ret void
38
60
}
39
61
40
62
define void @extract_4xi64 (ptr %src , ptr %dst ) nounwind {
41
- ; CHECK-LABEL: extract_4xi64:
42
- ; CHECK: # %bb.0:
43
- ; CHECK-NEXT: xvld $xr0, $a0, 0
44
- ; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1
45
- ; CHECK-NEXT: ret
63
+ ; LA32-LABEL: extract_4xi64:
64
+ ; LA32: # %bb.0:
65
+ ; LA32-NEXT: xvld $xr0, $a0, 0
66
+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
67
+ ; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 3
68
+ ; LA32-NEXT: st.w $a2, $a1, 4
69
+ ; LA32-NEXT: st.w $a0, $a1, 0
70
+ ; LA32-NEXT: ret
71
+ ;
72
+ ; LA64-LABEL: extract_4xi64:
73
+ ; LA64: # %bb.0:
74
+ ; LA64-NEXT: xvld $xr0, $a0, 0
75
+ ; LA64-NEXT: xvstelm.d $xr0, $a1, 0, 1
76
+ ; LA64-NEXT: ret
46
77
%v = load volatile <4 x i64 >, ptr %src
47
78
%e = extractelement <4 x i64 > %v , i32 1
48
79
store i64 %e , ptr %dst
@@ -74,58 +105,102 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
74
105
}
75
106
76
107
define void @extract_32xi8_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
77
- ; CHECK-LABEL: extract_32xi8_idx:
78
- ; CHECK: # %bb.0:
79
- ; CHECK-NEXT: xvld $xr0, $a0, 0
80
- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
81
- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
82
- ; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
83
- ; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
84
- ; CHECK-NEXT: ret
108
+ ; LA32-LABEL: extract_32xi8_idx:
109
+ ; LA32: # %bb.0:
110
+ ; LA32-NEXT: xvld $xr0, $a0, 0
111
+ ; LA32-NEXT: movgr2fr.w $fa1, $a2
112
+ ; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
113
+ ; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1
114
+ ; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0
115
+ ; LA32-NEXT: st.b $a0, $a1, 0
116
+ ; LA32-NEXT: ret
117
+ ;
118
+ ; LA64-LABEL: extract_32xi8_idx:
119
+ ; LA64: # %bb.0:
120
+ ; LA64-NEXT: xvld $xr0, $a0, 0
121
+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
122
+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
123
+ ; LA64-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
124
+ ; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 0
125
+ ; LA64-NEXT: ret
85
126
%v = load volatile <32 x i8 >, ptr %src
86
127
%e = extractelement <32 x i8 > %v , i32 %idx
87
128
store i8 %e , ptr %dst
88
129
ret void
89
130
}
90
131
91
132
define void @extract_16xi16_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
92
- ; CHECK-LABEL: extract_16xi16_idx:
93
- ; CHECK: # %bb.0:
94
- ; CHECK-NEXT: xvld $xr0, $a0, 0
95
- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
96
- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
97
- ; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
98
- ; CHECK-NEXT: xvstelm.h $xr2, $a1, 0, 0
99
- ; CHECK-NEXT: ret
133
+ ; LA32-LABEL: extract_16xi16_idx:
134
+ ; LA32: # %bb.0:
135
+ ; LA32-NEXT: xvld $xr0, $a0, 0
136
+ ; LA32-NEXT: movgr2fr.w $fa1, $a2
137
+ ; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
138
+ ; LA32-NEXT: xvshuf.h $xr1, $xr2, $xr0
139
+ ; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0
140
+ ; LA32-NEXT: st.h $a0, $a1, 0
141
+ ; LA32-NEXT: ret
142
+ ;
143
+ ; LA64-LABEL: extract_16xi16_idx:
144
+ ; LA64: # %bb.0:
145
+ ; LA64-NEXT: xvld $xr0, $a0, 0
146
+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
147
+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
148
+ ; LA64-NEXT: xvshuf.h $xr2, $xr1, $xr0
149
+ ; LA64-NEXT: xvstelm.h $xr2, $a1, 0, 0
150
+ ; LA64-NEXT: ret
100
151
%v = load volatile <16 x i16 >, ptr %src
101
152
%e = extractelement <16 x i16 > %v , i32 %idx
102
153
store i16 %e , ptr %dst
103
154
ret void
104
155
}
105
156
106
157
define void @extract_8xi32_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
107
- ; CHECK-LABEL: extract_8xi32_idx:
108
- ; CHECK: # %bb.0:
109
- ; CHECK-NEXT: xvld $xr0, $a0, 0
110
- ; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
111
- ; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
112
- ; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
113
- ; CHECK-NEXT: ret
158
+ ; LA32-LABEL: extract_8xi32_idx:
159
+ ; LA32: # %bb.0:
160
+ ; LA32-NEXT: xvld $xr0, $a0, 0
161
+ ; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
162
+ ; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
163
+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
164
+ ; LA32-NEXT: st.w $a0, $a1, 0
165
+ ; LA32-NEXT: ret
166
+ ;
167
+ ; LA64-LABEL: extract_8xi32_idx:
168
+ ; LA64: # %bb.0:
169
+ ; LA64-NEXT: xvld $xr0, $a0, 0
170
+ ; LA64-NEXT: xvreplgr2vr.w $xr1, $a2
171
+ ; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1
172
+ ; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0
173
+ ; LA64-NEXT: ret
114
174
%v = load volatile <8 x i32 >, ptr %src
115
175
%e = extractelement <8 x i32 > %v , i32 %idx
116
176
store i32 %e , ptr %dst
117
177
ret void
118
178
}
119
179
120
180
define void @extract_4xi64_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
121
- ; CHECK-LABEL: extract_4xi64_idx:
122
- ; CHECK: # %bb.0:
123
- ; CHECK-NEXT: xvld $xr0, $a0, 0
124
- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
125
- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
126
- ; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
127
- ; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
128
- ; CHECK-NEXT: ret
181
+ ; LA32-LABEL: extract_4xi64_idx:
182
+ ; LA32: # %bb.0:
183
+ ; LA32-NEXT: xvld $xr0, $a0, 0
184
+ ; LA32-NEXT: add.w $a0, $a2, $a2
185
+ ; LA32-NEXT: addi.w $a2, $a0, 1
186
+ ; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
187
+ ; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1
188
+ ; LA32-NEXT: xvpickve2gr.w $a2, $xr1, 0
189
+ ; LA32-NEXT: xvreplgr2vr.w $xr1, $a0
190
+ ; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
191
+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
192
+ ; LA32-NEXT: st.w $a0, $a1, 0
193
+ ; LA32-NEXT: st.w $a2, $a1, 4
194
+ ; LA32-NEXT: ret
195
+ ;
196
+ ; LA64-LABEL: extract_4xi64_idx:
197
+ ; LA64: # %bb.0:
198
+ ; LA64-NEXT: xvld $xr0, $a0, 0
199
+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
200
+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
201
+ ; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
202
+ ; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
203
+ ; LA64-NEXT: ret
129
204
%v = load volatile <4 x i64 >, ptr %src
130
205
%e = extractelement <4 x i64 > %v , i32 %idx
131
206
store i64 %e , ptr %dst
@@ -147,28 +222,45 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
147
222
}
148
223
149
224
define void @extract_4xdouble_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
150
- ; CHECK-LABEL: extract_4xdouble_idx:
151
- ; CHECK: # %bb.0:
152
- ; CHECK-NEXT: xvld $xr0, $a0, 0
153
- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
154
- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
155
- ; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
156
- ; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
157
- ; CHECK-NEXT: ret
225
+ ; LA32-LABEL: extract_4xdouble_idx:
226
+ ; LA32: # %bb.0:
227
+ ; LA32-NEXT: xvld $xr0, $a0, 0
228
+ ; LA32-NEXT: movgr2fr.w $fa1, $a2
229
+ ; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
230
+ ; LA32-NEXT: xvshuf.d $xr1, $xr2, $xr0
231
+ ; LA32-NEXT: xvstelm.d $xr1, $a1, 0, 0
232
+ ; LA32-NEXT: ret
233
+ ;
234
+ ; LA64-LABEL: extract_4xdouble_idx:
235
+ ; LA64: # %bb.0:
236
+ ; LA64-NEXT: xvld $xr0, $a0, 0
237
+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
238
+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
239
+ ; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
240
+ ; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
241
+ ; LA64-NEXT: ret
158
242
%v = load volatile <4 x double >, ptr %src
159
243
%e = extractelement <4 x double > %v , i32 %idx
160
244
store double %e , ptr %dst
161
245
ret void
162
246
}
163
247
164
248
define void @eliminate_frame_index (<8 x i32 > %a ) nounwind {
165
- ; CHECK-LABEL: eliminate_frame_index:
166
- ; CHECK: # %bb.0:
167
- ; CHECK-NEXT: addi.d $sp, $sp, -1040
168
- ; CHECK-NEXT: addi.d $a0, $sp, 524
169
- ; CHECK-NEXT: xvstelm.w $xr0, $a0, 0, 1
170
- ; CHECK-NEXT: addi.d $sp, $sp, 1040
171
- ; CHECK-NEXT: ret
249
+ ; LA32-LABEL: eliminate_frame_index:
250
+ ; LA32: # %bb.0:
251
+ ; LA32-NEXT: addi.w $sp, $sp, -1040
252
+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
253
+ ; LA32-NEXT: st.w $a0, $sp, 524
254
+ ; LA32-NEXT: addi.w $sp, $sp, 1040
255
+ ; LA32-NEXT: ret
256
+ ;
257
+ ; LA64-LABEL: eliminate_frame_index:
258
+ ; LA64: # %bb.0:
259
+ ; LA64-NEXT: addi.d $sp, $sp, -1040
260
+ ; LA64-NEXT: addi.d $a0, $sp, 524
261
+ ; LA64-NEXT: xvstelm.w $xr0, $a0, 0, 1
262
+ ; LA64-NEXT: addi.d $sp, $sp, 1040
263
+ ; LA64-NEXT: ret
172
264
%1 = alloca [32 x [8 x i32 ]]
173
265
%2 = getelementptr i8 , ptr %1 , i64 508
174
266
%b = extractelement <8 x i32 > %a , i64 1
0 commit comments