Skip to content

Commit b05e26b

Browse files
authored
[LoongArch] Optimize extracting f32/f64 from 256-bit vector by using XVPICKVE. (#151914)
1 parent fe0948c commit b05e26b

File tree

4 files changed

+47
-48
lines changed

4 files changed

+47
-48
lines changed

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,20 +1651,19 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
16511651
(XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
16521652
def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
16531653
(XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;
1654-
def : Pat<(vector_insert v8f32:$xd, (loongarch_movgr2fr_w_la64 GPR:$rj), uimm3:$imm),
1655-
(XVINSGR2VR_W $xd, $rj, uimm3:$imm)>;
1656-
def : Pat<(vector_insert v4f64:$xd, (f64 (bitconvert i64:$rj)), uimm2:$imm),
1657-
(XVINSGR2VR_D $xd, $rj, uimm2:$imm)>;
1658-
def : Pat<(vector_insert v8f32:$xd, (f32 (vector_extract v8f32:$xj, uimm3:$imm1)), uimm3:$imm2),
1659-
(XVINSGR2VR_W $xd, (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm1), uimm3:$imm2)>;
1660-
def : Pat<(vector_insert v4f64:$xd, (f64 (vector_extract v4f64:$xj, uimm2:$imm1)), uimm2:$imm2),
1661-
(XVINSGR2VR_D $xd, (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm1), uimm2:$imm2)>;
1654+
def : Pat<(vector_insert v8f32:$xd, (loongarch_movgr2fr_w_la64 GPR:$rj),
1655+
uimm3:$imm),
1656+
(XVINSGR2VR_W v8f32:$xd, GPR:$rj, uimm3:$imm)>;
1657+
def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm),
1658+
(XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>;
16621659

16631660
// XVINSVE0_{W/D}
16641661
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
1665-
(XVINSVE0_W $xd, (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), uimm3:$imm)>;
1662+
(XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32),
1663+
uimm3:$imm)>;
16661664
def : Pat<(vector_insert v4f64:$xd, FPR64:$fj, uimm2:$imm),
1667-
(XVINSVE0_D $xd, (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), uimm2:$imm)>;
1665+
(XVINSVE0_D v4f64:$xd, (SUBREG_TO_REG(i64 0), FPR64:$fj, sub_64),
1666+
uimm2:$imm)>;
16681667

16691668
// scalar_to_vector
16701669
def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)),
@@ -1884,10 +1883,10 @@ def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
18841883
(XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
18851884
def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
18861885
(XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
1887-
def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)),
1888-
(MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>;
1889-
def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)),
1890-
(MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>;
1886+
def : Pat<(f32(vector_extract v8f32:$xj, uimm3:$imm)),
1887+
(EXTRACT_SUBREG(XVPICKVE_W v8f32:$xj, uimm3:$imm), sub_32)>;
1888+
def : Pat<(f64(vector_extract v4f64:$xj, uimm2:$imm)),
1889+
(EXTRACT_SUBREG(XVPICKVE_D v4f64:$xj, uimm2:$imm), sub_64)>;
18911890

18921891
// vselect
18931892
def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)),

llvm/test/CodeGen/LoongArch/lasx/fpowi.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
1111
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
1212
; CHECK-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
1313
; CHECK-NEXT: addi.w $fp, $a0, 0
14-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
15-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
14+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1
15+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
1616
; CHECK-NEXT: move $a0, $fp
1717
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
1818
; CHECK-NEXT: jirl $ra, $ra, 0
1919
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
2020
; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
2121
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
22-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
23-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
22+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 0
23+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
2424
; CHECK-NEXT: move $a0, $fp
2525
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
2626
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -29,8 +29,8 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
2929
; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
3030
; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
3131
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
32-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
33-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
32+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2
33+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
3434
; CHECK-NEXT: move $a0, $fp
3535
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
3636
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -39,8 +39,8 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
3939
; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 2
4040
; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
4141
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
42-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
43-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
42+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
43+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
4444
; CHECK-NEXT: move $a0, $fp
4545
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
4646
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -49,8 +49,8 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
4949
; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 3
5050
; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
5151
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
52-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
53-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
52+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 4
53+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
5454
; CHECK-NEXT: move $a0, $fp
5555
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
5656
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -59,8 +59,8 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
5959
; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 4
6060
; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
6161
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
62-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
63-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
62+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 5
63+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
6464
; CHECK-NEXT: move $a0, $fp
6565
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
6666
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -69,8 +69,8 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
6969
; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 5
7070
; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
7171
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
72-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
73-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
72+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 6
73+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
7474
; CHECK-NEXT: move $a0, $fp
7575
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
7676
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -79,8 +79,8 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
7979
; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 6
8080
; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
8181
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
82-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
83-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
82+
; CHECK-NEXT: xvpickve.w $xr0, $xr0, 7
83+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
8484
; CHECK-NEXT: move $a0, $fp
8585
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
8686
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -107,16 +107,16 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
107107
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
108108
; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
109109
; CHECK-NEXT: addi.w $fp, $a0, 0
110-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
111-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
110+
; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1
111+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
112112
; CHECK-NEXT: move $a0, $fp
113113
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
114114
; CHECK-NEXT: jirl $ra, $ra, 0
115115
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
116116
; CHECK-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
117117
; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
118-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
119-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
118+
; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
119+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
120120
; CHECK-NEXT: move $a0, $fp
121121
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
122122
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -125,8 +125,8 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
125125
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
126126
; CHECK-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
127127
; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
128-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
129-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
128+
; CHECK-NEXT: xvpickve.d $xr0, $xr0, 2
129+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
130130
; CHECK-NEXT: move $a0, $fp
131131
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
132132
; CHECK-NEXT: jirl $ra, $ra, 0
@@ -135,8 +135,8 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
135135
; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 2
136136
; CHECK-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
137137
; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
138-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
139-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
138+
; CHECK-NEXT: xvpickve.d $xr0, $xr0, 3
139+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
140140
; CHECK-NEXT: move $a0, $fp
141141
; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
142142
; CHECK-NEXT: jirl $ra, $ra, 0

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
77
; CHECK-LABEL: shufflevector_v4f64:
88
; CHECK: # %bb.0: # %entry
9-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 2
10-
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
11-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
12-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
13-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr1, 3
14-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
9+
; CHECK-NEXT: xvpickve.d $xr2, $xr1, 2
10+
; CHECK-NEXT: xvpickve.d $xr3, $xr0, 3
11+
; CHECK-NEXT: xvinsve0.d $xr0, $xr2, 1
12+
; CHECK-NEXT: xvinsve0.d $xr0, $xr3, 2
13+
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 3
14+
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 3
1515
; CHECK-NEXT: ret
1616
entry:
1717
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ entry:
4242
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
4343
; CHECK-LABEL: insert_extract_v8f32:
4444
; CHECK: # %bb.0: # %entry
45-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
46-
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
45+
; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
46+
; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
4747
; CHECK-NEXT: ret
4848
entry:
4949
%b = extractelement <8 x float> %a, i32 7
@@ -66,8 +66,8 @@ entry:
6666
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
6767
; CHECK-LABEL: insert_extract_v4f64:
6868
; CHECK: # %bb.0: # %entry
69-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
70-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
69+
; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
70+
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
7171
; CHECK-NEXT: ret
7272
entry:
7373
%b = extractelement <4 x double> %a, i32 3

0 commit comments

Comments
 (0)