Skip to content

Commit 8c17ed1

Browse files
authored
[RISCV] Generalize RISCVDAGToDAGISel::selectFPImm to handle bitcasts from int to FP. (#108284)
selectFPImm previously handled cases where an FPImm could be materialized in an integer register. We can generalize this to cases where a value was in an integer register and then copied to a scalar FP register to be used by a vector instruction. In the affected test, the call lowering code used up all of the FP argument registers and started using GPRs. Now we use integer vector instructions to consume those GPRs instead of moving them to scalar FP first.
1 parent 08740a6 commit 8c17ed1

File tree

6 files changed

+79
-80
lines changed

6 files changed

+79
-80
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3535,7 +3535,21 @@ bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
35353535
return selectVSplat(N, SplatVal);
35363536
}
35373537

3538-
bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3538+
bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
3539+
// Allow bitcasts from XLenVT -> FP.
3540+
if (N.getOpcode() == ISD::BITCAST &&
3541+
N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3542+
Imm = N.getOperand(0);
3543+
return true;
3544+
}
3545+
// Allow moves from XLenVT to FP.
3546+
if (N.getOpcode() == RISCVISD::FMV_H_X ||
3547+
N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3548+
Imm = N.getOperand(0);
3549+
return true;
3550+
}
3551+
3552+
// Otherwise, look for FP constants that can materialized with scalar int.
35393553
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
35403554
if (!CFP)
35413555
return false;

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
140140
// Matches the splat of a value which can be extended or truncated, such that
141141
// only the bottom 8 bits are preserved.
142142
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal);
143-
bool selectFPImm(SDValue N, SDValue &Imm);
143+
bool selectScalarFPAsInt(SDValue N, SDValue &Imm);
144144

145145
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
146146
template <unsigned Width> bool selectRVVSimm5(SDValue N, SDValue &Imm) {

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
236236
// This must be kept in sync with RISCV::VLMaxSentinel.
237237
def VLMax : OutPatFrag<(ops), (XLenVT -1)>;
238238

239-
def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
239+
def SelectScalarFPAsInt : ComplexPattern<fAny, 1, "selectScalarFPAsInt", [], [],
240+
1>;
240241

241242
// List of EEW.
242243
defvar EEWList = [8, 16, 32, 64];

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1374,7 +1374,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
13741374
fvti.AVL, fvti.Log2SEW)>;
13751375

13761376
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
1377-
(SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
1377+
(SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
13781378
fvti.RegClass:$rs2)),
13791379
(!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
13801380
(fvti.Vector (IMPLICIT_DEF)),

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,7 +2575,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
25752575
GPR:$vl, fvti.Log2SEW)>;
25762576

25772577
def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0),
2578-
(SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
2578+
(SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
25792579
fvti.RegClass:$rs2,
25802580
fvti.RegClass:$passthru,
25812581
VLOpFrag)),
@@ -2619,7 +2619,7 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in {
26192619
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
26202620
$passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
26212621
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
2622-
fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)),
2622+
fvti.Vector:$passthru, (fvti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))), VLOpFrag)),
26232623
(!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
26242624
$passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
26252625
}
@@ -2940,7 +2940,7 @@ foreach vti = NoGroupFloatVectors in {
29402940
VLOpFrag)),
29412941
(PseudoVMV_S_X $passthru, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
29422942
def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),
2943-
(vti.Scalar (SelectFPImm (XLenVT GPR:$imm))),
2943+
(vti.Scalar (SelectScalarFPAsInt (XLenVT GPR:$imm))),
29442944
VLOpFrag)),
29452945
(PseudoVMV_S_X $passthru, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
29462946
def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$passthru),

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 57 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,20 +1348,16 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
13481348
;
13491349
; RV64-LABEL: buildvec_v32f64_exact_vlen:
13501350
; RV64: # %bb.0:
1351-
; RV64-NEXT: addi sp, sp, -96
1352-
; RV64-NEXT: .cfi_def_cfa_offset 96
1353-
; RV64-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill
1354-
; RV64-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill
1355-
; RV64-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill
1356-
; RV64-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill
1357-
; RV64-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill
1358-
; RV64-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill
1359-
; RV64-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill
1360-
; RV64-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill
1361-
; RV64-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill
1362-
; RV64-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill
1363-
; RV64-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill
1364-
; RV64-NEXT: fsd fs11, 0(sp) # 8-byte Folded Spill
1351+
; RV64-NEXT: addi sp, sp, -64
1352+
; RV64-NEXT: .cfi_def_cfa_offset 64
1353+
; RV64-NEXT: fsd fs0, 56(sp) # 8-byte Folded Spill
1354+
; RV64-NEXT: fsd fs1, 48(sp) # 8-byte Folded Spill
1355+
; RV64-NEXT: fsd fs2, 40(sp) # 8-byte Folded Spill
1356+
; RV64-NEXT: fsd fs3, 32(sp) # 8-byte Folded Spill
1357+
; RV64-NEXT: fsd fs4, 24(sp) # 8-byte Folded Spill
1358+
; RV64-NEXT: fsd fs5, 16(sp) # 8-byte Folded Spill
1359+
; RV64-NEXT: fsd fs6, 8(sp) # 8-byte Folded Spill
1360+
; RV64-NEXT: fsd fs7, 0(sp) # 8-byte Folded Spill
13651361
; RV64-NEXT: .cfi_offset fs0, -8
13661362
; RV64-NEXT: .cfi_offset fs1, -16
13671363
; RV64-NEXT: .cfi_offset fs2, -24
@@ -1370,34 +1366,26 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
13701366
; RV64-NEXT: .cfi_offset fs5, -48
13711367
; RV64-NEXT: .cfi_offset fs6, -56
13721368
; RV64-NEXT: .cfi_offset fs7, -64
1373-
; RV64-NEXT: .cfi_offset fs8, -72
1374-
; RV64-NEXT: .cfi_offset fs9, -80
1375-
; RV64-NEXT: .cfi_offset fs10, -88
1376-
; RV64-NEXT: .cfi_offset fs11, -96
13771369
; RV64-NEXT: fmv.d.x ft4, a7
1378-
; RV64-NEXT: fmv.d.x ft5, a6
1379-
; RV64-NEXT: fmv.d.x ft6, a5
1380-
; RV64-NEXT: fmv.d.x ft7, a4
1381-
; RV64-NEXT: fmv.d.x ft8, a3
1382-
; RV64-NEXT: fmv.d.x ft9, a2
1383-
; RV64-NEXT: fmv.d.x ft10, a1
1384-
; RV64-NEXT: fmv.d.x ft11, a0
1385-
; RV64-NEXT: fld ft0, 216(sp)
1386-
; RV64-NEXT: fld ft1, 208(sp)
1387-
; RV64-NEXT: fld ft2, 200(sp)
1388-
; RV64-NEXT: fld ft3, 192(sp)
1389-
; RV64-NEXT: fld fs0, 184(sp)
1390-
; RV64-NEXT: fld fs1, 176(sp)
1391-
; RV64-NEXT: fld fs2, 168(sp)
1392-
; RV64-NEXT: fld fs3, 160(sp)
1393-
; RV64-NEXT: fld fs4, 152(sp)
1394-
; RV64-NEXT: fld fs5, 144(sp)
1395-
; RV64-NEXT: fld fs6, 136(sp)
1396-
; RV64-NEXT: fld fs7, 128(sp)
1397-
; RV64-NEXT: fld fs8, 104(sp)
1398-
; RV64-NEXT: fld fs9, 96(sp)
1399-
; RV64-NEXT: fld fs10, 120(sp)
1400-
; RV64-NEXT: fld fs11, 112(sp)
1370+
; RV64-NEXT: fmv.d.x ft5, a5
1371+
; RV64-NEXT: fmv.d.x ft6, a3
1372+
; RV64-NEXT: fmv.d.x ft7, a1
1373+
; RV64-NEXT: fld ft0, 184(sp)
1374+
; RV64-NEXT: fld ft1, 176(sp)
1375+
; RV64-NEXT: fld ft2, 168(sp)
1376+
; RV64-NEXT: fld ft3, 160(sp)
1377+
; RV64-NEXT: fld ft8, 152(sp)
1378+
; RV64-NEXT: fld ft9, 144(sp)
1379+
; RV64-NEXT: fld ft10, 136(sp)
1380+
; RV64-NEXT: fld ft11, 128(sp)
1381+
; RV64-NEXT: fld fs0, 120(sp)
1382+
; RV64-NEXT: fld fs1, 112(sp)
1383+
; RV64-NEXT: fld fs2, 104(sp)
1384+
; RV64-NEXT: fld fs3, 96(sp)
1385+
; RV64-NEXT: fld fs4, 72(sp)
1386+
; RV64-NEXT: fld fs5, 64(sp)
1387+
; RV64-NEXT: fld fs6, 88(sp)
1388+
; RV64-NEXT: fld fs7, 80(sp)
14011389
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14021390
; RV64-NEXT: vfmv.v.f v8, fa2
14031391
; RV64-NEXT: vfslide1down.vf v9, v8, fa3
@@ -1407,43 +1395,39 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
14071395
; RV64-NEXT: vfslide1down.vf v10, v10, fa5
14081396
; RV64-NEXT: vfmv.v.f v11, fa6
14091397
; RV64-NEXT: vfslide1down.vf v11, v11, fa7
1410-
; RV64-NEXT: vfmv.v.f v12, ft11
1411-
; RV64-NEXT: vfslide1down.vf v12, v12, ft10
1412-
; RV64-NEXT: vfmv.v.f v13, ft9
1413-
; RV64-NEXT: vfslide1down.vf v13, v13, ft8
1414-
; RV64-NEXT: vfmv.v.f v14, ft7
1415-
; RV64-NEXT: vfslide1down.vf v14, v14, ft6
1416-
; RV64-NEXT: vfmv.v.f v15, ft5
1398+
; RV64-NEXT: vmv.v.x v12, a0
1399+
; RV64-NEXT: vfslide1down.vf v12, v12, ft7
1400+
; RV64-NEXT: vmv.v.x v13, a2
1401+
; RV64-NEXT: vfslide1down.vf v13, v13, ft6
1402+
; RV64-NEXT: vmv.v.x v14, a4
1403+
; RV64-NEXT: vfslide1down.vf v14, v14, ft5
1404+
; RV64-NEXT: vmv.v.x v15, a6
14171405
; RV64-NEXT: vfslide1down.vf v15, v15, ft4
1418-
; RV64-NEXT: vfmv.v.f v16, fs11
1419-
; RV64-NEXT: vfslide1down.vf v17, v16, fs10
1420-
; RV64-NEXT: vfmv.v.f v16, fs9
1421-
; RV64-NEXT: vfslide1down.vf v16, v16, fs8
1422-
; RV64-NEXT: vfmv.v.f v18, fs7
1423-
; RV64-NEXT: vfslide1down.vf v18, v18, fs6
1424-
; RV64-NEXT: vfmv.v.f v19, fs5
1425-
; RV64-NEXT: vfslide1down.vf v19, v19, fs4
1426-
; RV64-NEXT: vfmv.v.f v20, fs3
1427-
; RV64-NEXT: vfslide1down.vf v20, v20, fs2
1428-
; RV64-NEXT: vfmv.v.f v21, fs1
1429-
; RV64-NEXT: vfslide1down.vf v21, v21, fs0
1406+
; RV64-NEXT: vfmv.v.f v16, fs7
1407+
; RV64-NEXT: vfslide1down.vf v17, v16, fs6
1408+
; RV64-NEXT: vfmv.v.f v16, fs5
1409+
; RV64-NEXT: vfslide1down.vf v16, v16, fs4
1410+
; RV64-NEXT: vfmv.v.f v18, fs3
1411+
; RV64-NEXT: vfslide1down.vf v18, v18, fs2
1412+
; RV64-NEXT: vfmv.v.f v19, fs1
1413+
; RV64-NEXT: vfslide1down.vf v19, v19, fs0
1414+
; RV64-NEXT: vfmv.v.f v20, ft11
1415+
; RV64-NEXT: vfslide1down.vf v20, v20, ft10
1416+
; RV64-NEXT: vfmv.v.f v21, ft9
1417+
; RV64-NEXT: vfslide1down.vf v21, v21, ft8
14301418
; RV64-NEXT: vfmv.v.f v22, ft3
14311419
; RV64-NEXT: vfslide1down.vf v22, v22, ft2
14321420
; RV64-NEXT: vfmv.v.f v23, ft1
14331421
; RV64-NEXT: vfslide1down.vf v23, v23, ft0
1434-
; RV64-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload
1435-
; RV64-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload
1436-
; RV64-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload
1437-
; RV64-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload
1438-
; RV64-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload
1439-
; RV64-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload
1440-
; RV64-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload
1441-
; RV64-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload
1442-
; RV64-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload
1443-
; RV64-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload
1444-
; RV64-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload
1445-
; RV64-NEXT: fld fs11, 0(sp) # 8-byte Folded Reload
1446-
; RV64-NEXT: addi sp, sp, 96
1422+
; RV64-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload
1423+
; RV64-NEXT: fld fs1, 48(sp) # 8-byte Folded Reload
1424+
; RV64-NEXT: fld fs2, 40(sp) # 8-byte Folded Reload
1425+
; RV64-NEXT: fld fs3, 32(sp) # 8-byte Folded Reload
1426+
; RV64-NEXT: fld fs4, 24(sp) # 8-byte Folded Reload
1427+
; RV64-NEXT: fld fs5, 16(sp) # 8-byte Folded Reload
1428+
; RV64-NEXT: fld fs6, 8(sp) # 8-byte Folded Reload
1429+
; RV64-NEXT: fld fs7, 0(sp) # 8-byte Folded Reload
1430+
; RV64-NEXT: addi sp, sp, 64
14471431
; RV64-NEXT: ret
14481432
%v0 = insertelement <32 x double> poison, double %e0, i64 0
14491433
%v1 = insertelement <32 x double> %v0, double %e1, i64 1

0 commit comments

Comments
 (0)