Skip to content

Commit 9315d70

Browse files
authored
[LoongArch] Optimize inserting extracted element for v4i64/v8i32 (#152629)
1 parent 5e7924a commit 9315d70

File tree

2 files changed

+45
-4
lines changed

2 files changed

+45
-4
lines changed

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1640,6 +1640,24 @@ defm : PairInsertExtractPatV8<v8f32, f32>;
16401640
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
16411641
defm : PairInsertExtractPatV4<v4f64, f64>;
16421642

1643+
def : Pat<(vector_insert v8i32:$xd, (GRLenVT(vector_extract v8i32:$xj, 0)),
1644+
uimm3:$imm),
1645+
(XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
1646+
1647+
def : Pat<(vector_insert v4i64:$xd, (GRLenVT(vector_extract v4i64:$xj, 0)),
1648+
uimm2:$imm),
1649+
(XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
1650+
1651+
def : Pat<(vector_insert v8i32:$xd,
1652+
(GRLenVT(vector_extract v8i32:$xj, uimm3:$imm1)), uimm3:$imm2),
1653+
(XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, uimm3:$imm1),
1654+
uimm3:$imm2)>;
1655+
1656+
def : Pat<(vector_insert v4i64:$xd,
1657+
(GRLenVT(vector_extract v4i64:$xj, uimm2:$imm1)), uimm2:$imm2),
1658+
(XVINSVE0_D v4i64:$xd, (XVPICKVE_D v4i64:$xj, uimm2:$imm1),
1659+
uimm2:$imm2)>;
1660+
16431661
// PseudoXVINSGR2VR_{B/H}
16441662
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
16451663
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,27 @@ entry:
3030
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
3131
; CHECK-LABEL: insert_extract_v8i32:
3232
; CHECK: # %bb.0: # %entry
33-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
34-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
33+
; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
34+
; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
3535
; CHECK-NEXT: ret
3636
entry:
3737
%b = extractelement <8 x i32> %a, i32 7
3838
%c = insertelement <8 x i32> %a, i32 %b, i32 1
3939
ret <8 x i32> %c
4040
}
4141

42+
43+
define <8 x i32> @insert_extract0_v8i32(<8 x i32> %a) nounwind {
44+
; CHECK-LABEL: insert_extract0_v8i32:
45+
; CHECK: # %bb.0: # %entry
46+
; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 1
47+
; CHECK-NEXT: ret
48+
entry:
49+
%b = extractelement <8 x i32> %a, i32 0
50+
%c = insertelement <8 x i32> %a, i32 %b, i32 1
51+
ret <8 x i32> %c
52+
}
53+
4254
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
4355
; CHECK-LABEL: insert_extract_v8f32:
4456
; CHECK: # %bb.0: # %entry
@@ -54,15 +66,26 @@ entry:
5466
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
5567
; CHECK-LABEL: insert_extract_v4i64:
5668
; CHECK: # %bb.0: # %entry
57-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
58-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
69+
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
70+
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
5971
; CHECK-NEXT: ret
6072
entry:
6173
%b = extractelement <4 x i64> %a, i32 3
6274
%c = insertelement <4 x i64> %a, i64 %b, i32 1
6375
ret <4 x i64> %c
6476
}
6577

78+
define <4 x i64> @insert_extract0_v4i64(<4 x i64> %a) nounwind {
79+
; CHECK-LABEL: insert_extract0_v4i64:
80+
; CHECK: # %bb.0: # %entry
81+
; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 1
82+
; CHECK-NEXT: ret
83+
entry:
84+
%b = extractelement <4 x i64> %a, i32 0
85+
%c = insertelement <4 x i64> %a, i64 %b, i32 1
86+
ret <4 x i64> %c
87+
}
88+
6689
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
6790
; CHECK-LABEL: insert_extract_v4f64:
6891
; CHECK: # %bb.0: # %entry

0 commit comments

Comments
 (0)