Skip to content

Commit d218011

Browse files
authored
[LoongArch] Optimize inserting extracted elements (#146018)
1 parent 2194bca commit d218011

File tree

7 files changed

+144
-118
lines changed

7 files changed

+144
-118
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2597,12 +2597,9 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
25972597
SelectionDAG &DAG) const {
25982598
EVT VecTy = Op->getOperand(0)->getValueType(0);
25992599
SDValue Idx = Op->getOperand(1);
2600-
EVT EltTy = VecTy.getVectorElementType();
26012600
unsigned NumElts = VecTy.getVectorNumElements();
26022601

2603-
if (isa<ConstantSDNode>(Idx) &&
2604-
(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
2605-
EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
2602+
if (isa<ConstantSDNode>(Idx) && Idx->getAsZExtVal() < NumElts)
26062603
return Op;
26072604

26082605
return SDValue();

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,32 @@ multiclass PatCCXrXrF<CondCode CC, string Inst> {
12821282
(!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
12831283
}
12841284

1285+
multiclass PairInsertExtractPatV8<ValueType vecty, ValueType elemty> {
1286+
foreach imm1 = 0...3 in {
1287+
foreach imm2 = 0...3 in {
1288+
defvar Imm = !or(!shl(imm2, 4), imm1);
1289+
def : Pat<(vector_insert (vector_insert vecty:$xd,
1290+
(elemty (vector_extract vecty:$xj, imm1)), imm2),
1291+
(elemty (vector_extract vecty:$xj, !add(imm1, 4))),
1292+
!add(imm2, 4)),
1293+
(XVEXTRINS_W $xd, $xj, Imm)>;
1294+
}
1295+
}
1296+
}
1297+
1298+
multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> {
1299+
foreach imm1 = 0...1 in {
1300+
foreach imm2 = 0...1 in {
1301+
defvar Imm = !or(!shl(imm2, 4), imm1);
1302+
def : Pat<(vector_insert (vector_insert vecty:$xd,
1303+
(elemty (vector_extract vecty:$xj, imm1)), imm2),
1304+
(elemty (vector_extract vecty:$xj, !add(imm1, 2))),
1305+
!add(imm2, 2)),
1306+
(XVEXTRINS_D $xd, $xj, Imm)>;
1307+
}
1308+
}
1309+
}
1310+
12851311
let Predicates = [HasExtLASX] in {
12861312

12871313
// XVADD_{B/H/W/D}
@@ -1582,6 +1608,38 @@ defm : PatCCXrXrF<SETUNE, "XVFCMP_CUNE">;
15821608
defm : PatCCXrXrF<SETO, "XVFCMP_COR">;
15831609
defm : PatCCXrXrF<SETUO, "XVFCMP_CUN">;
15841610

1611+
// Insert two elements extracted from vector into vector. (The positions
1612+
// of the two elements must be same in the source or destination vector's
1613+
// front and back 128bits.)
1614+
// 2*XVPICKVE2GR_{W/D} + 2*XVINSGR2VR_{W/D} -> XVEXTRINS_{W/D}
1615+
// XVPERMI_D + 2*XVPICKVE2GR_{B/H} + 2*PseudoXVINSGR2VR_{B/H} -> XVEXTRINS_{W/D}
1616+
foreach imm1 = 0...15 in {
1617+
foreach imm2 = 0...15 in {
1618+
defvar Imm = !or(!shl(imm2, 4), imm1);
1619+
def : Pat<(vector_insert (vector_insert v32i8:$xd,
1620+
(GRLenVT (vector_extract v32i8:$xj, imm1)), imm2),
1621+
(GRLenVT (vector_extract v32i8:$xj, !add(imm1, 16))),
1622+
!add(imm2, 16)),
1623+
(XVEXTRINS_B $xd, $xj, Imm)>;
1624+
}
1625+
}
1626+
1627+
foreach imm1 = 0...7 in {
1628+
foreach imm2 = 0...7 in {
1629+
defvar Imm = !or(!shl(imm2, 4), imm1);
1630+
def : Pat<(vector_insert (vector_insert v16i16:$xd,
1631+
(GRLenVT (vector_extract v16i16:$xj, imm1)), imm2),
1632+
(GRLenVT (vector_extract v16i16:$xj, !add(imm1, 8))),
1633+
!add(imm2, 8)),
1634+
(XVEXTRINS_H $xd, $xj, Imm)>;
1635+
}
1636+
}
1637+
1638+
defm : PairInsertExtractPatV8<v8i32, GRLenVT>;
1639+
defm : PairInsertExtractPatV8<v8f32, f32>;
1640+
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
1641+
defm : PairInsertExtractPatV4<v4f64, f64>;
1642+
15851643
// PseudoXVINSGR2VR_{B/H}
15861644
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
15871645
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
@@ -1593,11 +1651,14 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
15931651
(XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
15941652
def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
15951653
(XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;
1596-
1597-
def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
1598-
(XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
1599-
def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
1600-
(XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
1654+
def : Pat<(vector_insert v8f32:$xd, (f32 (vector_extract v8f32:$xj, uimm3:$imm1)), uimm3:$imm2),
1655+
(XVINSGR2VR_W $xd, (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm1), uimm3:$imm2)>;
1656+
def : Pat<(vector_insert v4f64:$xd, (f64 (vector_extract v4f64:$xj, uimm2:$imm1)), uimm2:$imm2),
1657+
(XVINSGR2VR_D $xd, (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm1), uimm2:$imm2)>;
1658+
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
1659+
(XVINSGR2VR_W $xd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
1660+
def : Pat<(vector_insert v4f64:$xd, FPR64:$fj, uimm2:$imm),
1661+
(XVINSGR2VR_D $xd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
16011662

16021663
// scalar_to_vector
16031664
def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)),
@@ -1791,6 +1852,18 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
17911852
}
17921853

17931854
// Vector extraction with constant index.
1855+
foreach imm = 16...31 in {
1856+
defvar Imm = !and(imm, 15);
1857+
def : Pat<(i64 (vector_extract v32i8:$xj, imm)),
1858+
(VPICKVE2GR_B (EXTRACT_SUBREG (XVPERMI_D v32i8:$xj, 14), sub_128),
1859+
Imm)>;
1860+
}
1861+
foreach imm = 8...15 in {
1862+
defvar Imm = !and(imm, 7);
1863+
def : Pat<(i64 (vector_extract v16i16:$xj, imm)),
1864+
(VPICKVE2GR_H (EXTRACT_SUBREG (XVPERMI_D v16i16:$xj, 14), sub_128),
1865+
Imm)>;
1866+
}
17941867
def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
17951868
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
17961869
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,28 @@ multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
14821482
(Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
14831483
}
14841484

1485+
multiclass InsertExtractPatV4<ValueType vecty, ValueType elemty> {
1486+
foreach imm1 = 0...3 in {
1487+
foreach imm2 = 0...3 in {
1488+
defvar Imm = !or(!shl(imm2, 4), imm1);
1489+
def : Pat<(vector_insert vecty:$vd,
1490+
(elemty (vector_extract vecty:$vj, imm1)), imm2),
1491+
(VEXTRINS_W $vd, $vj, Imm)>;
1492+
}
1493+
}
1494+
}
1495+
1496+
multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
1497+
foreach imm1 = 0...1 in {
1498+
foreach imm2 = 0...1 in {
1499+
defvar Imm = !or(!shl(imm2, 4), imm1);
1500+
def : Pat<(vector_insert vecty:$vd,
1501+
(elemty (vector_extract vecty:$vj, imm1)), imm2),
1502+
(VEXTRINS_D $vd, $vj, Imm)>;
1503+
}
1504+
}
1505+
}
1506+
14851507
let Predicates = [HasExtLSX] in {
14861508

14871509
// VADD_{B/H/W/D}
@@ -1782,6 +1804,31 @@ defm : PatCCVrVrF<SETUNE, "VFCMP_CUNE">;
17821804
defm : PatCCVrVrF<SETO, "VFCMP_COR">;
17831805
defm : PatCCVrVrF<SETUO, "VFCMP_CUN">;
17841806

1807+
// Insert element extracted from vector into vector.
1808+
// VPICKVE2GR_{B/H/W/D} + VINSGR2VR_{B/H/W/D} -> VEXTRINS_{B/H/W/D}
1809+
foreach imm1 = 0...15 in {
1810+
foreach imm2 = 0...15 in {
1811+
defvar Imm = !or(!shl(imm2, 4), imm1);
1812+
def : Pat<(vector_insert v16i8:$vd,
1813+
(GRLenVT (vector_extract v16i8:$vj, imm1)), imm2),
1814+
(VEXTRINS_B $vd, $vj, Imm)>;
1815+
}
1816+
}
1817+
1818+
foreach imm1 = 0...7 in {
1819+
foreach imm2 = 0...7 in {
1820+
defvar Imm = !or(!shl(imm2, 4), imm1);
1821+
def : Pat<(vector_insert v8i16:$vd,
1822+
(GRLenVT (vector_extract v8i16:$vj, imm1)), imm2),
1823+
(VEXTRINS_H $vd, $vj, Imm)>;
1824+
}
1825+
}
1826+
1827+
defm : InsertExtractPatV4<v4i32, GRLenVT>;
1828+
defm : InsertExtractPatV4<v4f32, f32>;
1829+
defm : InsertExtractPatV2<v2i64, GRLenVT>;
1830+
defm : InsertExtractPatV2<v2f64, f64>;
1831+
17851832
// VINSGR2VR_{B/H/W/D}
17861833
def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm),
17871834
(VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>;
@@ -1791,7 +1838,6 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
17911838
(VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>;
17921839
def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
17931840
(VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
1794-
17951841
def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
17961842
(VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
17971843
def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
77
; CHECK-LABEL: shufflevector_v4f64:
88
; CHECK: # %bb.0: # %entry
99
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
10-
; CHECK-NEXT: movgr2fr.d $fa2, $a0
11-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2
12-
; CHECK-NEXT: movgr2fr.d $fa3, $a0
13-
; CHECK-NEXT: movfr2gr.d $a0, $fa2
1410
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 0
15-
; CHECK-NEXT: movfr2gr.d $a0, $fa3
11+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2
1612
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 1
1713
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
18-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
19-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3
20-
; CHECK-NEXT: movgr2fr.d $fa1, $a0
21-
; CHECK-NEXT: movfr2gr.d $a0, $fa0
2214
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 2
23-
; CHECK-NEXT: movfr2gr.d $a0, $fa1
15+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3
2416
; CHECK-NEXT: xvinsgr2vr.d $xr2, $a0, 3
2517
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
2618
; CHECK-NEXT: ret

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,9 @@
44
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
55
; CHECK-LABEL: insert_extract_v32i8:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: addi.d $sp, $sp, -64
8-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
9-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
10-
; CHECK-NEXT: addi.d $fp, $sp, 64
11-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
12-
; CHECK-NEXT: xvst $xr0, $sp, 0
13-
; CHECK-NEXT: ld.b $a0, $sp, 31
7+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
8+
; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 15
149
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
15-
; CHECK-NEXT: addi.d $sp, $fp, -64
16-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
17-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
18-
; CHECK-NEXT: addi.d $sp, $sp, 64
1910
; CHECK-NEXT: ret
2011
entry:
2112
%b = extractelement <32 x i8> %a, i32 31
@@ -26,18 +17,9 @@ entry:
2617
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
2718
; CHECK-LABEL: insert_extract_v16i16:
2819
; CHECK: # %bb.0: # %entry
29-
; CHECK-NEXT: addi.d $sp, $sp, -64
30-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
31-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
32-
; CHECK-NEXT: addi.d $fp, $sp, 64
33-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
34-
; CHECK-NEXT: xvst $xr0, $sp, 0
35-
; CHECK-NEXT: ld.h $a0, $sp, 30
20+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
21+
; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7
3622
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
37-
; CHECK-NEXT: addi.d $sp, $fp, -64
38-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
39-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
40-
; CHECK-NEXT: addi.d $sp, $sp, 64
4123
; CHECK-NEXT: ret
4224
entry:
4325
%b = extractelement <16 x i16> %a, i32 15
@@ -61,8 +43,6 @@ define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
6143
; CHECK-LABEL: insert_extract_v8f32:
6244
; CHECK: # %bb.0: # %entry
6345
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
64-
; CHECK-NEXT: movgr2fr.w $fa1, $a0
65-
; CHECK-NEXT: movfr2gr.s $a0, $fa1
6646
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
6747
; CHECK-NEXT: ret
6848
entry:
@@ -87,8 +67,6 @@ define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
8767
; CHECK-LABEL: insert_extract_v4f64:
8868
; CHECK: # %bb.0: # %entry
8969
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
90-
; CHECK-NEXT: movgr2fr.d $fa1, $a0
91-
; CHECK-NEXT: movfr2gr.d $a0, $fa1
9270
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
9371
; CHECK-NEXT: ret
9472
entry:

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll

Lines changed: 6 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,7 @@
44
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
55
; CHECK-LABEL: insert_extract_v32i8:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: addi.d $sp, $sp, -64
8-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
9-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
10-
; CHECK-NEXT: addi.d $fp, $sp, 64
11-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
12-
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
13-
; CHECK-NEXT: xvst $xr0, $sp, 0
14-
; CHECK-NEXT: ld.b $a1, $sp, 31
15-
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
16-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
17-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
18-
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
19-
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
20-
; CHECK-NEXT: addi.d $sp, $fp, -64
21-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
22-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
23-
; CHECK-NEXT: addi.d $sp, $sp, 64
7+
; CHECK-NEXT: xvextrins.b $xr0, $xr0, 31
248
; CHECK-NEXT: ret
259
entry:
2610
%b_lo = extractelement <32 x i8> %a, i32 15
@@ -33,23 +17,7 @@ entry:
3317
define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
3418
; CHECK-LABEL: insert_extract_v16i16:
3519
; CHECK: # %bb.0: # %entry
36-
; CHECK-NEXT: addi.d $sp, $sp, -64
37-
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
38-
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
39-
; CHECK-NEXT: addi.d $fp, $sp, 64
40-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
41-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
42-
; CHECK-NEXT: xvst $xr0, $sp, 0
43-
; CHECK-NEXT: ld.h $a1, $sp, 30
44-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
45-
; CHECK-NEXT: xvori.b $xr1, $xr0, 0
46-
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
47-
; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
48-
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
49-
; CHECK-NEXT: addi.d $sp, $fp, -64
50-
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
51-
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
52-
; CHECK-NEXT: addi.d $sp, $sp, 64
20+
; CHECK-NEXT: xvextrins.h $xr0, $xr0, 23
5321
; CHECK-NEXT: ret
5422
entry:
5523
%b_lo = extractelement <16 x i16> %a, i32 7
@@ -62,10 +30,7 @@ entry:
6230
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
6331
; CHECK-LABEL: insert_extract_v8i32:
6432
; CHECK: # %bb.0: # %entry
65-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
66-
; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
67-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
68-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
33+
; CHECK-NEXT: xvextrins.w $xr0, $xr0, 19
6934
; CHECK-NEXT: ret
7035
entry:
7136
%b_lo = extractelement <8 x i32> %a, i32 3
@@ -78,14 +43,7 @@ entry:
7843
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
7944
; CHECK-LABEL: insert_extract_v8f32:
8045
; CHECK: # %bb.0: # %entry
81-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
82-
; CHECK-NEXT: movgr2fr.w $fa1, $a0
83-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
84-
; CHECK-NEXT: movgr2fr.w $fa2, $a0
85-
; CHECK-NEXT: movfr2gr.s $a0, $fa1
86-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
87-
; CHECK-NEXT: movfr2gr.s $a0, $fa2
88-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
46+
; CHECK-NEXT: xvextrins.w $xr0, $xr0, 19
8947
; CHECK-NEXT: ret
9048
entry:
9149
%b_lo = extractelement <8 x float> %a, i32 3
@@ -98,10 +56,7 @@ entry:
9856
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
9957
; CHECK-LABEL: insert_extract_v4i64:
10058
; CHECK: # %bb.0: # %entry
101-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
102-
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
103-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
104-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
59+
; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
10560
; CHECK-NEXT: ret
10661
entry:
10762
%b_lo = extractelement <4 x i64> %a, i32 1
@@ -114,14 +69,7 @@ entry:
11469
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
11570
; CHECK-LABEL: insert_extract_v4f64:
11671
; CHECK: # %bb.0: # %entry
117-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
118-
; CHECK-NEXT: movgr2fr.d $fa1, $a0
119-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
120-
; CHECK-NEXT: movgr2fr.d $fa2, $a0
121-
; CHECK-NEXT: movfr2gr.d $a0, $fa1
122-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
123-
; CHECK-NEXT: movfr2gr.d $a0, $fa2
124-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
72+
; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
12573
; CHECK-NEXT: ret
12674
entry:
12775
%b_lo = extractelement <4 x double> %a, i32 1

0 commit comments

Comments
 (0)