Skip to content

Commit 7132dbf

Browse files
committed
Optimize inserting extracted element for v4i64/v8i32
1 parent 1458eb2 commit 7132dbf

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1640,6 +1640,22 @@ defm : PairInsertExtractPatV8<v8f32, f32>;
16401640
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
16411641
defm : PairInsertExtractPatV4<v4f64, f64>;
16421642

1643+
foreach imm1 = 0...7 in {
1644+
foreach imm2 = 0...7 in {
1645+
def : Pat<(vector_insert v8i32:$xd,
1646+
(GRLenVT(vector_extract v8i32:$xj, imm1)), imm2),
1647+
(XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, imm1), imm2)>;
1648+
}
1649+
}
1650+
1651+
foreach imm1 = 0...3 in {
1652+
foreach imm2 = 0...3 in {
1653+
def : Pat<(vector_insert v4i64:$xd,
1654+
(GRLenVT(vector_extract v4i64:$xj, imm1)), imm2),
1655+
(XVINSVE0_D v4i64:$xd, (XVPICKVE_D v4i64:$xj, imm1), imm2)>;
1656+
}
1657+
}
1658+
16431659
// PseudoXVINSGR2VR_{B/H}
16441660
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
16451661
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ entry:
3030
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
3131
; CHECK-LABEL: insert_extract_v8i32:
3232
; CHECK: # %bb.0: # %entry
33-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
34-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
33+
; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
34+
; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
3535
; CHECK-NEXT: ret
3636
entry:
3737
%b = extractelement <8 x i32> %a, i32 7
@@ -54,8 +54,8 @@ entry:
5454
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
5555
; CHECK-LABEL: insert_extract_v4i64:
5656
; CHECK: # %bb.0: # %entry
57-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
58-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
57+
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
58+
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
5959
; CHECK-NEXT: ret
6060
entry:
6161
%b = extractelement <4 x i64> %a, i32 3

0 commit comments

Comments
 (0)