Skip to content

Commit bc7fd5f

Browse files
paperchaliceLukacma
authored andcommitted
[PowerPC] Remove UnsafeFPMath uses (llvm#154901)
Try to remove `UnsafeFPMath` uses in PowerPC backend. These global flags block some improvements like https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast/80797. Remove them incrementally. FP operations may raise exceptions are replaced by constrained intrinsics. However, vector type is not supported by these intrinsics.
1 parent a135a13 commit bc7fd5f

File tree

6 files changed

+369
-3186
lines changed

6 files changed

+369
-3186
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -442,14 +442,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
442442
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
443443

444444
// If we're enabling GP optimizations, use hardware square root
445-
if (!Subtarget.hasFSQRT() &&
446-
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
447-
Subtarget.hasFRE()))
445+
if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
448446
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
449447

450448
if (!Subtarget.hasFSQRT() &&
451-
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
452-
Subtarget.hasFRES()))
449+
!(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
453450
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
454451

455452
if (Subtarget.hasFCPSGN()) {
@@ -565,16 +562,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
565562
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
566563
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
567564
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
568-
if (TM.Options.UnsafeFPMath) {
569-
setOperationAction(ISD::LRINT, MVT::f64, Legal);
570-
setOperationAction(ISD::LRINT, MVT::f32, Legal);
571-
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
572-
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
573-
setOperationAction(ISD::LROUND, MVT::f64, Legal);
574-
setOperationAction(ISD::LROUND, MVT::f32, Legal);
575-
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
576-
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
577-
}
565+
566+
setOperationAction(ISD::STRICT_LRINT, MVT::f64, Custom);
567+
setOperationAction(ISD::STRICT_LRINT, MVT::f32, Custom);
568+
setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Custom);
569+
setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Custom);
570+
setOperationAction(ISD::STRICT_LROUND, MVT::f64, Custom);
571+
setOperationAction(ISD::STRICT_LROUND, MVT::f32, Custom);
572+
setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Custom);
573+
setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Custom);
578574
} else {
579575
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
580576
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1034,11 +1030,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
10341030
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
10351031

10361032
// The nearbyint variants are not allowed to raise the inexact exception
1037-
// so we can only code-gen them with unsafe math.
1038-
if (TM.Options.UnsafeFPMath) {
1039-
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1040-
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1041-
}
1033+
// so we can only code-gen them with fpexcept.ignore.
1034+
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Custom);
1035+
setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f32, Custom);
10421036

10431037
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
10441038
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -8911,11 +8905,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
89118905
// be lost at this stage, but is below the single-precision rounding
89128906
// position.
89138907
//
8914-
// However, if -enable-unsafe-fp-math is in effect, accept double
8908+
// However, if afn is in effect, accept double
89158909
// rounding to avoid the extra overhead.
8916-
if (Op.getValueType() == MVT::f32 &&
8917-
!Subtarget.hasFPCVT() &&
8918-
!DAG.getTarget().Options.UnsafeFPMath) {
8910+
// FIXME: Currently INT_TO_FP can't support fast math flags because
8911+
// of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8912+
// false.
8913+
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8914+
!Op->getFlags().hasApproximateFuncs()) {
89198915

89208916
// Twiddle input to make sure the low 11 bits are zero. (If this
89218917
// is the case, we are guaranteed the value will fit into the 53 bit
@@ -12759,6 +12755,14 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1275912755
return LowerADDSUBO_CARRY(Op, DAG);
1276012756
case ISD::UCMP:
1276112757
return LowerUCMP(Op, DAG);
12758+
case ISD::STRICT_LRINT:
12759+
case ISD::STRICT_LLRINT:
12760+
case ISD::STRICT_LROUND:
12761+
case ISD::STRICT_LLROUND:
12762+
case ISD::STRICT_FNEARBYINT:
12763+
if (Op->getFlags().hasNoFPExcept())
12764+
return Op;
12765+
return SDValue();
1276212766
}
1276312767
}
1276412768

@@ -18707,11 +18711,12 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
1870718711
const Function *F = I->getFunction();
1870818712
const DataLayout &DL = F->getDataLayout();
1870918713
Type *Ty = User->getOperand(0)->getType();
18714+
bool AllowContract = I->getFastMathFlags().allowContract() &&
18715+
User->getFastMathFlags().allowContract();
1871018716

18711-
return !(
18712-
isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18713-
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
18714-
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
18717+
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18718+
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
18719+
(AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
1871518720
}
1871618721
case Instruction::Load: {
1871718722
// Don't break "store (load float*)" pattern, this pattern will be combined

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2786,14 +2786,16 @@ def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
27862786
// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
27872787
// these need to be defined after the any_frint versions so ISEL will correctly
27882788
// add the chain to the strict versions.
2789-
def : Pat<(f32 (fnearbyint f32:$S)),
2789+
// TODO: Match strict fp rounding intrinsics with instructions like xsrdpiz when
2790+
// rounding mode is propagated to CodeGen part.
2791+
def : Pat<(f32 (strict_fnearbyint f32:$S)),
27902792
(f32 (COPY_TO_REGCLASS (XSRDPIC
27912793
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2792-
def : Pat<(f64 (fnearbyint f64:$S)),
2794+
def : Pat<(f64 (strict_fnearbyint f64:$S)),
27932795
(f64 (XSRDPIC $S))>;
2794-
def : Pat<(v2f64 (fnearbyint v2f64:$S)),
2796+
def : Pat<(v2f64 (strict_fnearbyint v2f64:$S)),
27952797
(v2f64 (XVRDPIC $S))>;
2796-
def : Pat<(v4f32 (fnearbyint v4f32:$S)),
2798+
def : Pat<(v4f32 (strict_fnearbyint v4f32:$S)),
27972799
(v4f32 (XVRSPIC $S))>;
27982800

27992801
// Materialize a zero-vector of long long
@@ -3578,25 +3580,25 @@ def : Pat<(f64 (bitconvert i64:$S)),
35783580
(f64 (MTVSRD $S))>;
35793581

35803582
// Rounding to integer.
3581-
def : Pat<(i64 (lrint f64:$S)),
3583+
def : Pat<(i64 (strict_lrint f64:$S)),
35823584
(i64 (MFVSRD (FCTID $S)))>;
3583-
def : Pat<(i64 (lrint f32:$S)),
3585+
def : Pat<(i64 (strict_lrint f32:$S)),
35843586
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
3585-
def : Pat<(i64 (llrint f64:$S)),
3587+
def : Pat<(i64 (strict_llrint f64:$S)),
35863588
(i64 (MFVSRD (FCTID $S)))>;
3587-
def : Pat<(i64 (llrint f32:$S)),
3589+
def : Pat<(i64 (strict_llrint f32:$S)),
35883590
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
3589-
def : Pat<(i64 (lround f64:$S)),
3591+
def : Pat<(i64 (strict_lround f64:$S)),
35903592
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
3591-
def : Pat<(i64 (lround f32:$S)),
3593+
def : Pat<(i64 (strict_lround f32:$S)),
35923594
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
3593-
def : Pat<(i32 (lround f64:$S)),
3595+
def : Pat<(i32 (strict_lround f64:$S)),
35943596
(i32 (MFVSRWZ (FCTIW (XSRDPI $S))))>;
3595-
def : Pat<(i32 (lround f32:$S)),
3597+
def : Pat<(i32 (strict_lround f32:$S)),
35963598
(i32 (MFVSRWZ (FCTIW (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
3597-
def : Pat<(i64 (llround f64:$S)),
3599+
def : Pat<(i64 (strict_llround f64:$S)),
35983600
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
3599-
def : Pat<(i64 (llround f32:$S)),
3601+
def : Pat<(i64 (strict_llround f32:$S)),
36003602
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
36013603

36023604
// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead

llvm/test/CodeGen/PowerPC/i64_fp_round.ll

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,18 @@
44
; for minor code generation differences.
55
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
66
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -mattr=-isel < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL
7-
; Also check that with -enable-unsafe-fp-math we do not get that extra
7+
; Also check that with fpexcept.ignore we do not get that extra
88
; code sequence. Simply verify that there is no "isel" present.
9-
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
10-
; CHECK-UNSAFE-NOT: isel
9+
1110
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
1211
target triple = "powerpc64-unknown-linux-gnu"
1312

1413
define float @test(i64 %x) nounwind readnone {
1514
; Verify that we get the code sequence needed to avoid double-rounding.
1615
; Note that only parts of the sequence are checked for here, to allow
1716
; for minor code generation differences.
18-
; Also check that with -enable-unsafe-fp-math we do not get that extra
17+
; Also check that with fpexcept.ignore we do not get that extra
1918
; code sequence. Simply verify that there is no "isel" present.
20-
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
2119
; CHECK-LABEL: test:
2220
; CHECK: # %bb.0: # %entry
2321
; CHECK-NEXT: clrldi 4, 3, 53
@@ -51,18 +49,10 @@ define float @test(i64 %x) nounwind readnone {
5149
; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0
5250
; CHECK-NO-ISEL-NEXT: frsp 1, 0
5351
; CHECK-NO-ISEL-NEXT: blr
54-
;
55-
; CHECK-UNSAFE-LABEL: test:
56-
; CHECK-UNSAFE: # %bb.0: # %entry
57-
; CHECK-UNSAFE-NEXT: std 3, -8(1)
58-
; CHECK-UNSAFE-NEXT: lfd 0, -8(1)
59-
; CHECK-UNSAFE-NEXT: xscvsxddp 0, 0
60-
; CHECK-UNSAFE-NEXT: frsp 1, 0
61-
; CHECK-UNSAFE-NEXT: blr
6252

6353
entry:
6454
%conv = sitofp i64 %x to float
6555
ret float %conv
6656
}
6757

68-
58+
; TODO: Add sitofp afn test.

0 commit comments

Comments
 (0)