Skip to content

Commit a180845

Browse files
committed
[LoongArch] Combine build_vector for half width vector building optimization
1 parent 5796c8d commit a180845

File tree

3 files changed

+114
-194
lines changed

3 files changed

+114
-194
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
466466

467467
// Set DAG combine for 'LASX' feature.
468468

469-
if (Subtarget.hasExtLASX())
469+
if (Subtarget.hasExtLASX()) {
470470
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
471+
setTargetDAGCombine(ISD::BUILD_VECTOR);
472+
}
471473

472474
// Compute derived properties from the register classes.
473475
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -6679,6 +6681,58 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
66796681
return SDValue();
66806682
}
66816683

6684+
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
6685+
TargetLowering::DAGCombinerInfo &DCI,
6686+
const LoongArchSubtarget &Subtarget,
6687+
const LoongArchTargetLowering &TLI) {
6688+
SDLoc DL(N);
6689+
EVT VT = N->getValueType(0);
6690+
unsigned NumElts = N->getNumOperands();
6691+
6692+
if (!VT.is128BitVector() || !TLI.isTypeLegal(VT))
6693+
return SDValue();
6694+
6695+
// Combine:
6696+
// t1 = extract_vector_elt t0, imm1
6697+
// t2 = extract_vector_elt t0, imm2
6698+
// t3 = BUILD_VECTOR t1, t2
6699+
// to:
6700+
// t4 = extract_subvector t0, 0
6701+
// t5 = extract_subvector t0, 2
6702+
// t3 = vector_shuffle t4, t5, <imm1, imm2>
6703+
SDValue OrigVec;
6704+
SmallVector<int, 16> Mask(NumElts, -1);
6705+
for (unsigned i = 0; i < NumElts; ++i) {
6706+
SDValue Op = N->getOperand(i);
6707+
6708+
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6709+
return SDValue();
6710+
6711+
auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6712+
if (!ExtractIdx)
6713+
return SDValue();
6714+
Mask[i] = ExtractIdx->getZExtValue();
6715+
6716+
SDValue Vec = Op.getOperand(0);
6717+
if (i == 0) {
6718+
OrigVec = Vec;
6719+
EVT OrigTy = OrigVec.getValueType();
6720+
if (!OrigTy.is256BitVector() || !TLI.isTypeLegal(OrigTy))
6721+
return SDValue();
6722+
if (OrigTy.getVectorElementType() != VT.getVectorElementType() ||
6723+
OrigTy.getVectorNumElements() != NumElts * 2)
6724+
return SDValue();
6725+
} else {
6726+
if (Vec != OrigVec)
6727+
return SDValue();
6728+
}
6729+
}
6730+
6731+
SDValue SubVec0 = DAG.getExtractSubvector(DL, VT, OrigVec, 0);
6732+
SDValue SubVec1 = DAG.getExtractSubvector(DL, VT, OrigVec, NumElts);
6733+
return DAG.getVectorShuffle(VT, DL, SubVec0, SubVec1, Mask);
6734+
}
6735+
66826736
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
66836737
DAGCombinerInfo &DCI) const {
66846738
SelectionDAG &DAG = DCI.DAG;
@@ -6714,6 +6768,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
67146768
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
67156769
case ISD::EXTRACT_VECTOR_ELT:
67166770
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6771+
case ISD::BUILD_VECTOR:
6772+
return performBUILD_VECTORCombine(N, DAG, DCI, Subtarget, *this);
67176773
}
67186774
return SDValue();
67196775
}

llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll

Lines changed: 47 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
44

55
define void @buildvector_ext32107654ba98fedc(ptr %dst, ptr %src) nounwind {
66
; CHECK-LABEL: buildvector_ext32107654ba98fedc:
77
; CHECK: # %bb.0: # %entry
8-
; CHECK-NEXT: xvld $xr0, $a1, 0
9-
; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
10-
; CHECK-NEXT: vpickve2gr.b $a2, $vr0, 2
11-
; CHECK-NEXT: vpickve2gr.b $a3, $vr0, 1
12-
; CHECK-NEXT: vpickve2gr.b $a4, $vr0, 0
13-
; CHECK-NEXT: vpickve2gr.b $a5, $vr0, 7
14-
; CHECK-NEXT: vpickve2gr.b $a6, $vr0, 6
15-
; CHECK-NEXT: vpickve2gr.b $a7, $vr0, 5
16-
; CHECK-NEXT: vpickve2gr.b $t0, $vr0, 4
17-
; CHECK-NEXT: vpickve2gr.b $t1, $vr0, 11
18-
; CHECK-NEXT: vpickve2gr.b $t2, $vr0, 10
19-
; CHECK-NEXT: vpickve2gr.b $t3, $vr0, 9
20-
; CHECK-NEXT: vpickve2gr.b $t4, $vr0, 8
21-
; CHECK-NEXT: vpickve2gr.b $t5, $vr0, 15
22-
; CHECK-NEXT: vpickve2gr.b $t6, $vr0, 14
23-
; CHECK-NEXT: vpickve2gr.b $t7, $vr0, 13
24-
; CHECK-NEXT: vpickve2gr.b $t8, $vr0, 12
25-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
26-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
27-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
28-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3
29-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4
30-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5
31-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6
32-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 7
33-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t1, 8
34-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t2, 9
35-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t3, 10
36-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t4, 11
37-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t5, 12
38-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t6, 13
39-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t7, 14
40-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t8, 15
8+
; CHECK-NEXT: vld $vr0, $a1, 0
9+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
4110
; CHECK-NEXT: vst $vr0, $a0, 0
4211
; CHECK-NEXT: ret
4312
entry:
@@ -82,39 +51,8 @@ define void @buildvector_ext13579bdfx13579bdf(ptr %dst, ptr %src) nounwind {
8251
; CHECK-LABEL: buildvector_ext13579bdfx13579bdf:
8352
; CHECK: # %bb.0: # %entry
8453
; CHECK-NEXT: xvld $xr0, $a1, 0
85-
; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
86-
; CHECK-NEXT: vpickve2gr.b $a2, $vr0, 3
87-
; CHECK-NEXT: vpickve2gr.b $a3, $vr0, 5
88-
; CHECK-NEXT: vpickve2gr.b $a4, $vr0, 7
89-
; CHECK-NEXT: vpickve2gr.b $a5, $vr0, 9
90-
; CHECK-NEXT: vpickve2gr.b $a6, $vr0, 11
91-
; CHECK-NEXT: vpickve2gr.b $a7, $vr0, 13
92-
; CHECK-NEXT: vpickve2gr.b $t0, $vr0, 15
93-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14
94-
; CHECK-NEXT: vpickve2gr.b $t1, $vr0, 1
95-
; CHECK-NEXT: vpickve2gr.b $t2, $vr0, 3
96-
; CHECK-NEXT: vpickve2gr.b $t3, $vr0, 5
97-
; CHECK-NEXT: vpickve2gr.b $t4, $vr0, 7
98-
; CHECK-NEXT: vpickve2gr.b $t5, $vr0, 9
99-
; CHECK-NEXT: vpickve2gr.b $t6, $vr0, 11
100-
; CHECK-NEXT: vpickve2gr.b $t7, $vr0, 13
101-
; CHECK-NEXT: vpickve2gr.b $t8, $vr0, 15
102-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
103-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
104-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
105-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3
106-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4
107-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5
108-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6
109-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 7
110-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t1, 8
111-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t2, 9
112-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t3, 10
113-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t4, 11
114-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t5, 12
115-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t6, 13
116-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t7, 14
117-
; CHECK-NEXT: vinsgr2vr.b $vr0, $t8, 15
54+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
55+
; CHECK-NEXT: vpickod.b $vr0, $vr1, $vr0
11856
; CHECK-NEXT: vst $vr0, $a0, 0
11957
; CHECK-NEXT: ret
12058
entry:
@@ -158,23 +96,11 @@ entry:
15896
define void @buildvector_ext01234560(ptr %dst, ptr %src) nounwind {
15997
; CHECK-LABEL: buildvector_ext01234560:
16098
; CHECK: # %bb.0: # %entry
161-
; CHECK-NEXT: xvld $xr0, $a1, 0
162-
; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0
163-
; CHECK-NEXT: vpickve2gr.h $a2, $vr0, 1
164-
; CHECK-NEXT: vpickve2gr.h $a3, $vr0, 2
165-
; CHECK-NEXT: vpickve2gr.h $a4, $vr0, 3
166-
; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 4
167-
; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 5
168-
; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 6
169-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
170-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
171-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2
172-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3
173-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4
174-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5
175-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6
176-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
177-
; CHECK-NEXT: vst $vr0, $a0, 0
99+
; CHECK-NEXT: vld $vr0, $a1, 0
100+
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
101+
; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI2_0)
102+
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
103+
; CHECK-NEXT: vst $vr1, $a0, 0
178104
; CHECK-NEXT: ret
179105
entry:
180106
%v = load <16 x i16>, ptr %src
@@ -202,23 +128,8 @@ define void @buildvector_ext08192a3b(ptr %dst, ptr %src) nounwind {
202128
; CHECK-LABEL: buildvector_ext08192a3b:
203129
; CHECK: # %bb.0: # %entry
204130
; CHECK-NEXT: xvld $xr0, $a1, 0
205-
; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0
206-
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
207-
; CHECK-NEXT: vpickve2gr.h $a2, $vr1, 0
208-
; CHECK-NEXT: vpickve2gr.h $a3, $vr0, 1
209-
; CHECK-NEXT: vpickve2gr.h $a4, $vr1, 1
210-
; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 2
211-
; CHECK-NEXT: vpickve2gr.h $a6, $vr1, 2
212-
; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 3
213-
; CHECK-NEXT: vpickve2gr.h $t0, $vr1, 3
214-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
215-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
216-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2
217-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3
218-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4
219-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5
220-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6
221-
; CHECK-NEXT: vinsgr2vr.h $vr0, $t0, 7
131+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
132+
; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
222133
; CHECK-NEXT: vst $vr0, $a0, 0
223134
; CHECK-NEXT: ret
224135
entry:
@@ -244,20 +155,12 @@ entry:
244155
}
245156

246157
define void @buildvector_ext0000(ptr %dst, ptr %src) nounwind {
247-
; LA32-LABEL: buildvector_ext0000:
248-
; LA32: # %bb.0: # %entry
249-
; LA32-NEXT: ld.w $a1, $a1, 0
250-
; LA32-NEXT: vreplgr2vr.w $vr0, $a1
251-
; LA32-NEXT: vst $vr0, $a0, 0
252-
; LA32-NEXT: ret
253-
;
254-
; LA64-LABEL: buildvector_ext0000:
255-
; LA64: # %bb.0: # %entry
256-
; LA64-NEXT: xvld $xr0, $a1, 0
257-
; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
258-
; LA64-NEXT: vreplgr2vr.w $vr0, $a1
259-
; LA64-NEXT: vst $vr0, $a0, 0
260-
; LA64-NEXT: ret
158+
; CHECK-LABEL: buildvector_ext0000:
159+
; CHECK: # %bb.0: # %entry
160+
; CHECK-NEXT: vld $vr0, $a1, 0
161+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
162+
; CHECK-NEXT: vst $vr0, $a0, 0
163+
; CHECK-NEXT: ret
261164
entry:
262165
%v = load <8 x i32>, ptr %src
263166
%e0 = extractelement <8 x i32> %v, i32 0
@@ -276,15 +179,11 @@ define void @buildvector_ext7610(ptr %dst, ptr %src) nounwind {
276179
; CHECK-LABEL: buildvector_ext7610:
277180
; CHECK: # %bb.0: # %entry
278181
; CHECK-NEXT: xvld $xr0, $a1, 0
279-
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
280-
; CHECK-NEXT: xvpickve2gr.w $a2, $xr0, 6
281-
; CHECK-NEXT: xvpickve2gr.w $a3, $xr0, 1
282-
; CHECK-NEXT: xvpickve2gr.w $a4, $xr0, 0
283-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0
284-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1
285-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2
286-
; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3
287-
; CHECK-NEXT: vst $vr0, $a0, 0
182+
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0)
183+
; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI5_0)
184+
; CHECK-NEXT: xvpermi.q $xr2, $xr0, 1
185+
; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
186+
; CHECK-NEXT: vst $vr1, $a0, 0
288187
; CHECK-NEXT: ret
289188
entry:
290189
%v = load <8 x i32>, ptr %src
@@ -303,12 +202,8 @@ entry:
303202
define void @buildvector_ext0113(ptr %dst, ptr %src) nounwind {
304203
; CHECK-LABEL: buildvector_ext0113:
305204
; CHECK: # %bb.0: # %entry
306-
; CHECK-NEXT: xvld $xr0, $a1, 0
307-
; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
308-
; CHECK-NEXT: xvpickve.w $xr2, $xr0, 3
309-
; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
310-
; CHECK-NEXT: vextrins.w $vr0, $vr1, 32
311-
; CHECK-NEXT: vextrins.w $vr0, $vr2, 48
205+
; CHECK-NEXT: vld $vr0, $a1, 0
206+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 212
312207
; CHECK-NEXT: vst $vr0, $a0, 0
313208
; CHECK-NEXT: ret
314209
entry:
@@ -329,12 +224,10 @@ define void @buildvector_ext6060(ptr %dst, ptr %src) nounwind {
329224
; CHECK-LABEL: buildvector_ext6060:
330225
; CHECK: # %bb.0: # %entry
331226
; CHECK-NEXT: xvld $xr0, $a1, 0
332-
; CHECK-NEXT: xvpickve.w $xr1, $xr0, 0
333-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238
334-
; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 2
335-
; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
336-
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
337-
; CHECK-NEXT: vst $vr0, $a0, 0
227+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
228+
; CHECK-NEXT: vrepli.d $vr2, 6
229+
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
230+
; CHECK-NEXT: vst $vr2, $a0, 0
338231
; CHECK-NEXT: ret
339232
entry:
340233
%v = load <8 x float>, ptr %src
@@ -351,23 +244,12 @@ entry:
351244
}
352245

353246
define void @buildvector_ext00(ptr %dst, ptr %src) nounwind {
354-
; LA32-LABEL: buildvector_ext00:
355-
; LA32: # %bb.0: # %entry
356-
; LA32-NEXT: xvld $xr0, $a1, 0
357-
; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
358-
; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
359-
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
360-
; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
361-
; LA32-NEXT: vreplvei.d $vr0, $vr0, 0
362-
; LA32-NEXT: vst $vr0, $a0, 0
363-
; LA32-NEXT: ret
364-
;
365-
; LA64-LABEL: buildvector_ext00:
366-
; LA64: # %bb.0: # %entry
367-
; LA64-NEXT: ld.d $a1, $a1, 0
368-
; LA64-NEXT: vreplgr2vr.d $vr0, $a1
369-
; LA64-NEXT: vst $vr0, $a0, 0
370-
; LA64-NEXT: ret
247+
; CHECK-LABEL: buildvector_ext00:
248+
; CHECK: # %bb.0: # %entry
249+
; CHECK-NEXT: vld $vr0, $a1, 0
250+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
251+
; CHECK-NEXT: vst $vr0, $a0, 0
252+
; CHECK-NEXT: ret
371253
entry:
372254
%v = load <4 x i64>, ptr %src
373255
%e0 = extractelement <4 x i64> %v, i32 0
@@ -379,29 +261,13 @@ entry:
379261
}
380262

381263
define void @buildvector_ext12(ptr %dst, ptr %src) nounwind {
382-
; LA32-LABEL: buildvector_ext12:
383-
; LA32: # %bb.0: # %entry
384-
; LA32-NEXT: xvld $xr0, $a1, 0
385-
; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
386-
; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
387-
; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 5
388-
; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 4
389-
; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
390-
; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
391-
; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2
392-
; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
393-
; LA32-NEXT: vst $vr0, $a0, 0
394-
; LA32-NEXT: ret
395-
;
396-
; LA64-LABEL: buildvector_ext12:
397-
; LA64: # %bb.0: # %entry
398-
; LA64-NEXT: xvld $xr0, $a1, 0
399-
; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
400-
; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 2
401-
; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
402-
; LA64-NEXT: vinsgr2vr.d $vr0, $a2, 1
403-
; LA64-NEXT: vst $vr0, $a0, 0
404-
; LA64-NEXT: ret
264+
; CHECK-LABEL: buildvector_ext12:
265+
; CHECK: # %bb.0: # %entry
266+
; CHECK-NEXT: xvld $xr0, $a1, 0
267+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
268+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
269+
; CHECK-NEXT: vst $vr0, $a0, 0
270+
; CHECK-NEXT: ret
405271
entry:
406272
%v = load <4 x i64>, ptr %src
407273
%e0 = extractelement <4 x i64> %v, i32 1
@@ -415,10 +281,8 @@ entry:
415281
define void @buildvector_ext10(ptr %dst, ptr %src) nounwind {
416282
; CHECK-LABEL: buildvector_ext10:
417283
; CHECK: # %bb.0: # %entry
418-
; CHECK-NEXT: xvld $xr0, $a1, 0
419-
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 0
420-
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
421-
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
284+
; CHECK-NEXT: vld $vr0, $a1, 0
285+
; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
422286
; CHECK-NEXT: vst $vr0, $a0, 0
423287
; CHECK-NEXT: ret
424288
entry:
@@ -435,9 +299,8 @@ define void @buildvector_ext31(ptr %dst, ptr %src) nounwind {
435299
; CHECK-LABEL: buildvector_ext31:
436300
; CHECK: # %bb.0: # %entry
437301
; CHECK-NEXT: xvld $xr0, $a1, 0
438-
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
439-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 3
440-
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
302+
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
303+
; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1
441304
; CHECK-NEXT: vst $vr0, $a0, 0
442305
; CHECK-NEXT: ret
443306
entry:

0 commit comments

Comments
 (0)