-
Couldn't load subscription status.
- Fork 15k
[PowerPC] Remove UnsafeFPMath uses
#154901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-powerpc Author: None (paperchalice) ChangesTry to remove FP operations may raise exceptions are replaced by constrained intrinsics. Patch is 140.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154901.diff 6 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 74ae8502dccea..5013b76a33136 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -446,14 +446,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
- Subtarget.hasFRE()))
+ if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
- Subtarget.hasFRES()))
+ !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
@@ -569,16 +566,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::LRINT, MVT::f64, Legal);
- setOperationAction(ISD::LRINT, MVT::f32, Legal);
- setOperationAction(ISD::LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::LROUND, MVT::f64, Legal);
- setOperationAction(ISD::LROUND, MVT::f32, Legal);
- setOperationAction(ISD::LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::LLROUND, MVT::f32, Legal);
- }
+
+ setOperationAction(ISD::STRICT_LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Custom);
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1034,11 +1030,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
// The nearbyint variants are not allowed to raise the inexact exception
- // so we can only code-gen them with unsafe math.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- }
+ // so we can only code-gen them with fpexcept.ignore.
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -8925,9 +8919,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
//
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
- if (Op.getValueType() == MVT::f32 &&
- !Subtarget.hasFPCVT() &&
- !DAG.getTarget().Options.UnsafeFPMath) {
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
+ !Op->getFlags().hasNoFPExcept()) {
// Twiddle input to make sure the low 11 bits are zero. (If this
// is the case, we are guaranteed the value will fit into the 53 bit
@@ -12694,6 +12687,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerADDSUBO_CARRY(Op, DAG);
+
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FNEARBYINT:
+ if (Op->getFlags().hasNoFPExcept())
+ return Op;
+ return SDValue();
}
}
@@ -18504,11 +18506,12 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
const Function *F = I->getFunction();
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
+ bool AllowContract = I->getFastMathFlags().allowContract() &&
+ User->getFastMathFlags().allowContract();
- return !(
- isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
}
case Instruction::Load: {
// Don't break "store (load float*)" pattern, this pattern will be combined
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 19448210f5db1..598fc57ff65f2 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2800,14 +2800,14 @@ def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
// these need to be defined after the any_frint versions so ISEL will correctly
// add the chain to the strict versions.
-def : Pat<(f32 (fnearbyint f32:$S)),
+def : Pat<(f32 (strict_fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f64 (fnearbyint f64:$S)),
+def : Pat<(f64 (strict_fnearbyint f64:$S)),
(f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (fnearbyint v2f64:$S)),
+def : Pat<(v2f64 (strict_fnearbyint v2f64:$S)),
(v2f64 (XVRDPIC $S))>;
-def : Pat<(v4f32 (fnearbyint v4f32:$S)),
+def : Pat<(v4f32 (strict_fnearbyint v4f32:$S)),
(v4f32 (XVRSPIC $S))>;
// Materialize a zero-vector of long long
@@ -3592,25 +3592,25 @@ def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;
// Rounding to integer.
-def : Pat<(i64 (lrint f64:$S)),
+def : Pat<(i64 (strict_lrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (lrint f32:$S)),
+def : Pat<(i64 (strict_lrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (llrint f64:$S)),
+def : Pat<(i64 (strict_llrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (llrint f32:$S)),
+def : Pat<(i64 (strict_llrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (lround f64:$S)),
+def : Pat<(i64 (strict_lround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (lround f32:$S)),
+def : Pat<(i64 (strict_lround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i32 (lround f64:$S)),
+def : Pat<(i32 (strict_lround f64:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI $S))))>;
-def : Pat<(i32 (lround f32:$S)),
+def : Pat<(i32 (strict_lround f32:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i64 (llround f64:$S)),
+def : Pat<(i64 (strict_llround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (llround f32:$S)),
+def : Pat<(i64 (strict_llround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
diff --git a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll
index f7df003fcc3f8..ad22591478814 100644
--- a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -4,10 +4,9 @@
; for minor code generation differences.
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -mattr=-isel < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL
-; Also check that with -enable-unsafe-fp-math we do not get that extra
+; Also check that with fpexcept.ignore we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
-; CHECK-UNSAFE-NOT: isel
+
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -15,9 +14,8 @@ define float @test(i64 %x) nounwind readnone {
; Verify that we get the code sequence needed to avoid double-rounding.
; Note that only parts of the sequence are checked for here, to allow
; for minor code generation differences.
-; Also check that with -enable-unsafe-fp-math we do not get that extra
+; Also check that with fpexcept.ignore we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: clrldi 4, 3, 53
@@ -51,18 +49,33 @@ define float @test(i64 %x) nounwind readnone {
; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0
; CHECK-NO-ISEL-NEXT: frsp 1, 0
; CHECK-NO-ISEL-NEXT: blr
-;
-; CHECK-UNSAFE-LABEL: test:
-; CHECK-UNSAFE: # %bb.0: # %entry
-; CHECK-UNSAFE-NEXT: std 3, -8(1)
-; CHECK-UNSAFE-NEXT: lfd 0, -8(1)
-; CHECK-UNSAFE-NEXT: xscvsxddp 0, 0
-; CHECK-UNSAFE-NEXT: frsp 1, 0
-; CHECK-UNSAFE-NEXT: blr
entry:
%conv = sitofp i64 %x to float
ret float %conv
}
+define float @test_constrained(i64 %x) nounwind readnone {
+; Also check that with fpexcept.ignore we do not get that extra
+; code sequence. Simply verify that there is no "isel" present.
+; CHECK-LABEL: test_constrained:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: std 3, -8(1)
+; CHECK-NEXT: lfd 0, -8(1)
+; CHECK-NEXT: xscvsxddp 0, 0
+; CHECK-NEXT: frsp 1, 0
+; CHECK-NEXT: blr
+;
+; CHECK-NO-ISEL-LABEL: test_constrained:
+; CHECK-NO-ISEL: # %bb.0: # %entry
+; CHECK-NO-ISEL-NEXT: std 3, -8(1)
+; CHECK-NO-ISEL-NEXT: lfd 0, -8(1)
+; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0
+; CHECK-NO-ISEL-NEXT: frsp 1, 0
+; CHECK-NO-ISEL-NEXT: blr
+entry:
+ %conv = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret float %conv
+}
+declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
index 2be370f638d5b..af48bf22a7669 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
@@ -5,9 +5,6 @@
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \
; RUN: FileCheck %s
-; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \
-; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST
define dso_local i64 @test_lrint(double %d) local_unnamed_addr {
; BE-LABEL: test_lrint:
; BE: # %bb.0: # %entry
@@ -36,17 +33,36 @@ define dso_local i64 @test_lrint(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lrint:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lrint.i64.f64(double %d)
ret i64 %0
}
+define dso_local i64 @test_constrained_lrint(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_lrint:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lrint
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lrint:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.lrint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.lrint.i64.f64(double)
define dso_local i64 @test_lrintf(float %f) local_unnamed_addr {
@@ -77,17 +93,36 @@ define dso_local i64 @test_lrintf(float %f) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lrintf:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lrint.i64.f32(float %f)
ret i64 %0
}
+define dso_local i64 @test_constrained_lrintf(float %f) local_unnamed_addr {
+; BE-LABEL: test_constrained_lrintf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lrintf
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lrintf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.lrint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.lrint.i64.f32(float)
define dso_local i64 @test_llrint(double %d) local_unnamed_addr {
@@ -118,17 +153,36 @@ define dso_local i64 @test_llrint(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_llrint:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.llrint.i64.f64(double %d)
ret i64 %0
}
+define dso_local i64 @test_constrained_llrint(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_llrint:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl llrint
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_llrint:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.i64.f64(double)
define dso_local i64 @test_llrintf(float %f) local_unnamed_addr {
@@ -159,17 +213,36 @@ define dso_local i64 @test_llrintf(float %f) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_llrintf:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.llrint.i64.f32(float %f)
ret i64 %0
}
+define dso_local i64 @test_constrained_llrintf(float %f) local_unnamed_addr {
+; BE-LABEL: test_constrained_llrintf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_llrintf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.i64.f32(float)
define dso_local i64 @test_lround(double %d) local_unnamed_addr {
@@ -200,18 +273,37 @@ define dso_local i64 @test_lround(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lround:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: xsrdpi f0, f1
-; FAST-NEXT: fctid f0, f0
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lround.i64.f64(double %d)
ret i64 %0
}
+define dso_local i64 @test_constrained_lround(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_lround:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lround
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lround:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpi f0, f1
+; CHECK-NEXT: fctid f0, f0
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.lround(double %d, metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.lround.i64.f64(double)
define dso_local i32 @test_lroundi32f64(double %d) local_unnamed_addr {
@@ -242,18 +334,37 @@ define dso_local i32 @test_lroundi32f64(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lroundi32f64:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: xsrdpi f0, f1
-; FAST-NEXT: fctiw f0, f0
-; FAST-NEXT: mffprwz r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i32 @llvm.lround.i32.f64(double %d)
ret i32 %0
}
+define dso_local i32 @test_constrained_lroundi32f64(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_lroundi32f64:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lround
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lroundi32f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpi f0, f1
+; CHECK-NEXT: fctiw f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.experimental.constrained.lround(double %d, metadata !"fpexcept.ignore")
+ ret i32 %0
+}
+
declare i32 @llvm.lround.i32.f64(double)
define dso_local i64 @test_lroundf(float %f) local_unnamed_addr {
@@ -284,18 +395,37 @@ define dso_local i64 @test_lroundf(float %f) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lroundf:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: xsrdpi f0, f1
-; FAST-NEXT: fctid f0, f0
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lround.i64.f32(float %f)
ret i64 %0
}
+define dso_local i64 @test_constrained_lroundf(float %f) local_unnamed_addr {
+; BE-LABEL: test_constrained_lroundf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lroundf
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lroundf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpi f0, f1
+; CHECK-NEXT: fctid f0, f0
+; CHECK-NEXT: mffprd r3, f0
+; CH...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi! I realized I have a question regarding this PR while reviewing again, as it is possible I am misunderstanding something. I would be happy to re-approve once I am able to clarify my question.
We produce different code now for -O3 -ffast-math for the cases that were updated. For example, for this test:
$ cat near.c
#include <math.h>
double test_constrained_nearbyint(double x) {
return nearbyint(x);
}
We previously generated:
$ clang -O3 -target powerpc64le-unknown-unknown near.c -ffast-math -S -o -
.abiversion 2
.file "near.c"
.text
.globl test_constrained_nearbyint # -- Begin function test_constrained_nearbyint
.p2align 4
.type test_constrained_nearbyint,@function
test_constrained_nearbyint: # @test_constrained_nearbyint
.Lfunc_begin0:
.cfi_startproc
# %bb.0: # %entry
xsrdpic 1, 1
blr
.long 0
.quad 0
.Lfunc_end0:
.size test_constrained_nearbyint, .Lfunc_end0-.Lfunc_begin0
.cfi_endproc
# -- End function
.ident "clang version 22.0.0git"
.section ".note.GNU-stack","",@progbits
.addrsig
With this change, we no longer generate this because fnearbyint is no longer made legal.
If we want to generate the same instructions with this patch, we would need -frounding-math to ensure we generate the correct constrained intrinsic:
$ clang -O3 -target powerpc64le-unknown-unknown near.c -ffast-math -frounding-math -S -o -
.abiversion 2
.file "near.c"
.text
.globl test_constrained_nearbyint # -- Begin function test_constrained_nearbyint
.p2align 4
.type test_constrained_nearbyint,@function
test_constrained_nearbyint: # @test_constrained_nearbyint
.Lfunc_begin0:
.cfi_startproc
# %bb.0: # %entry
xsrdpic 1, 1
blr
.long 0
.quad 0
.Lfunc_end0:
.size test_constrained_nearbyint, .Lfunc_end0-.Lfunc_begin0
.cfi_endproc
# -- End function
.ident "clang version 22.0.0git"
.section ".note.GNU-stack","",@progbits
.addrsig
I am aware that -Ofast/-O3 -ffast-math implies -fno-rounding-math, but I am wondering if the previous behaviour of generating the fast math instructions may have been a bug if they are intended for -frounding-math. Unless if there is another way to generate these constrained intrinsics?
@RolandF77 Do you have any opinions on this?
| @@ -8925,9 +8919,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, | |||
| // | |||
| // However, if -enable-unsafe-fp-math is in effect, accept double | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: It would be good to update this comment if we are removing the check for the option.
|
Initially, I want to check and the doc string (sorry I couldn't find the doc about So I think the constrained intrinsic is more appropriate for this situation. I also wonder why the vector fp types are not wrapped in |
PPC ISA should be available here: https://openpowerfoundation.org/specifications/isa/ |
xsrdpic checks round mode, but may cause an inexact exception. There are other instructions that use a static rounding mode and don't raise an exception. So maybe we can use xsrdpiz if -fno-rounding-math? |
6aac61c to
86a60e1
Compare
|
INT_TO_FP lowering need afn, but nneg flag is incompatible with fast math flags. |
|
Ping @RolandF77 @amy-kwan |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the delay in getting back to the patch.
I personally think the patch is fine currently and I am OK with addressing the TODOs/FIXMEs after the patch/in the future. Thank you for answering my initial question.
|
|
||
| // Rounding without exceptions (nearbyint). Due to strange tblgen behaviour, | ||
| // these need to be defined after the any_frint versions so ISEL will correctly | ||
| // add the chain to the strict versions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we can add a TODO to investigate whether or not we can use Roland's suggestion of xsrdpiz in these cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be possible when SDNodeFlags and MIFlag support rounding modes.
86a60e1 to
54a89c4
Compare
Try to remove
UnsafeFPMathuses in PowerPC backend. These global flags block some improvements like https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast/80797. Remove them incrementally.FP operations may raise exceptions are replaced by constrained intrinsics. However, vector type is not supported by these intrinsics.