diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 737610b73b081..0fd5de3b8ea42 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -112,7 +112,6 @@ inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic"); - return false; } inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { @@ -179,7 +178,6 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { } llvm_unreachable( "Checking invalid f2i/d2i intrinsic for signed int conversion"); - return false; } inline APFloat::roundingMode @@ -250,7 +248,6 @@ GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) { return APFloat::rmTowardZero; } llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic"); - return APFloat::roundingMode::Invalid; } inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { @@ -280,7 +277,6 @@ inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic"); - return false; } inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { @@ -310,7 +306,6 @@ inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic"); - return false; } inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { @@ -340,7 +335,83 @@ inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { return false; } llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic"); - return false; +} + +inline bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_ceil_ftz_f: + case Intrinsic::nvvm_fabs_ftz: + case Intrinsic::nvvm_floor_ftz_f: + case Intrinsic::nvvm_round_ftz_f: + case Intrinsic::nvvm_saturate_ftz_f: + case Intrinsic::nvvm_sqrt_rn_ftz_f: + return true; + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_d: + case Intrinsic::nvvm_fabs: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_d: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_d: + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid unary intrinsic"); +} + +inline bool RCPShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + return true; + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid rcp intrinsic"); +} + +inline APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_ftz_f: + return APFloat::rmTowardNegative; + + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_ftz_f: + return APFloat::rmTowardPositive; + + case Intrinsic::nvvm_rcp_rz_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Checking rounding mode for invalid rcp intrinsic"); +} + +inline DenormalMode GetNVVMDenromMode(bool ShouldFTZ) { + if (ShouldFTZ) + return DenormalMode::getPreserveSign(); + return DenormalMode::getIEEE(); } } // namespace nvvm diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 6e469c034d9c8..95f5e6f0f986d 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1799,6 +1799,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nvvm_d2ull_rn: case Intrinsic::nvvm_d2ull_rp: case Intrinsic::nvvm_d2ull_rz: + + // NVVM math intrinsics: + case Intrinsic::nvvm_ceil_d: + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_ftz_f: + + case Intrinsic::nvvm_fabs: + case Intrinsic::nvvm_fabs_ftz: + + case Intrinsic::nvvm_floor_d: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_ftz_f: + + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + + case Intrinsic::nvvm_round_d: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_ftz_f: + + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: + case Intrinsic::nvvm_saturate_ftz_f: + + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + case Intrinsic::nvvm_sqrt_rn_ftz_f: return !Call->isStrictFP(); // Sign operations are actually bitwise operations, they do not raise @@ -1816,6 +1854,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nearbyint: case Intrinsic::rint: case Intrinsic::canonicalize: + // Constrained intrinsics can be folded if FP environment is known // to compiler. case Intrinsic::experimental_constrained_fma: @@ -1963,22 +2002,56 @@ inline bool llvm_fenv_testexcept() { return false; } -static APFloat FTZPreserveSign(const APFloat &V) { +static const APFloat FTZPreserveSign(const APFloat &V) { if (V.isDenormal()) return APFloat::getZero(V.getSemantics(), V.isNegative()); return V; } -Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, - Type *Ty) { +static const APFloat FlushToPositiveZero(const APFloat &V) { + if (V.isDenormal()) + return APFloat::getZero(V.getSemantics(), false); + return V; +} + +static const APFloat +FlushWithDenormKind(const APFloat &V, + DenormalMode::DenormalModeKind DenormKind) { + assert(DenormKind != DenormalMode::DenormalModeKind::Invalid && + DenormKind != DenormalMode::DenormalModeKind::Dynamic); + switch (DenormKind) { + case DenormalMode::DenormalModeKind::IEEE: + return V; + case DenormalMode::DenormalModeKind::PreserveSign: + return FTZPreserveSign(V); + case DenormalMode::DenormalModeKind::PositiveZero: + return FlushToPositiveZero(V); + default: + llvm_unreachable("Invalid denormal mode!"); + } +} + +Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty, + DenormalMode DenormMode = DenormalMode::getIEEE()) { + if (!DenormMode.isValid() || + DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic || + DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic) + return nullptr; + llvm_fenv_clearexcept(); - double Result = NativeFP(V.convertToDouble()); + auto Input = FlushWithDenormKind(V, DenormMode.Input); + double Result = NativeFP(Input.convertToDouble()); if (llvm_fenv_testexcept()) { llvm_fenv_clearexcept(); return nullptr; } - return GetConstantFoldFPValue(Result, Ty); + Constant *Output = GetConstantFoldFPValue(Result, Ty); + if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE) + return Output; + const auto *CFP = static_cast(Output); + const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output); + return ConstantFP::get(Ty->getContext(), Res); } #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) @@ -2548,6 +2621,91 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFoldFP(atan, APF, Ty); case Intrinsic::sqrt: return ConstantFoldFP(sqrt, APF, Ty); + + // NVVM Intrinsics: + case Intrinsic::nvvm_ceil_ftz_f: + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_d: + return ConstantFoldFP( + ceil, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_fabs_ftz: + case Intrinsic::nvvm_fabs: + return ConstantFoldFP( + fabs, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_floor_ftz_f: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_d: + return ConstantFoldFP( + floor, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: { + APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID); + bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID); + + auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF; + APFloat Res = APFloat::getOne(APF.getSemantics()); + APFloat::opStatus Status = Res.divide(Denominator, RoundMode); + + if (Status == APFloat::opOK || Status == APFloat::opInexact) { + if (IsFTZ) + Res = FTZPreserveSign(Res); + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } + + case Intrinsic::nvvm_round_ftz_f: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_d: + return ConstantFoldFP( + round, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_saturate_ftz_f: + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: { + bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); + auto V = IsFTZ ? FTZPreserveSign(APF) : APF; + if (V.isNegative() || V.isZero() || V.isNaN()) + return ConstantFP::getZero(Ty); + APFloat One = APFloat::getOne(APF.getSemantics()); + if (V > One) + return ConstantFP::get(Ty->getContext(), One); + return ConstantFP::get(Ty->getContext(), APF); + } + + case Intrinsic::nvvm_sqrt_rn_ftz_f: + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + if (APF.isNegative()) + return nullptr; + return ConstantFoldFP( + sqrt, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + // AMDGCN Intrinsics: case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_sin: { double V = getValueAsDouble(Op); diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll new file mode 100644 index 0000000000000..75b850978b75a --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll @@ -0,0 +1,646 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s + +; Test constant-folding for various NVVM unary arithmetic intrinsics. + +;############################################################### +;# Ceil # +;############################################################### + +define double @test_ceil_d_1_25() { +; CHECK-LABEL: define double @test_ceil_d_1_25() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.ceil.d(double 1.25) + ret double %res +} + +define float @test_ceil_f_1_25() { +; CHECK-LABEL: define float @test_ceil_f_1_25() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.ceil.f(float 1.25) + ret float %res +} + +define float @test_ceil_ftz_f_1_25() { +; CHECK-LABEL: define float @test_ceil_ftz_f_1_25() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.ceil.ftz.f(float 1.25) + ret float %res +} + +define double @test_ceil_d_pos_subnorm() { +; CHECK-LABEL: define double @test_ceil_d_pos_subnorm() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.ceil.d(double 0x380FFFFFC0000000) + ret double %res +} + +define float @test_ceil_f_pos_subnorm() { +; CHECK-LABEL: define float @test_ceil_f_pos_subnorm() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.ceil.f(float 0x380FFFFFC0000000) + ret float %res +} + +define float @test_ceil_ftz_f_pos_subnorm() { +; CHECK-LABEL: define float @test_ceil_ftz_f_pos_subnorm() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.ceil.ftz.f(float 0x380FFFFFC0000000) + ret float %res +} + +;############################################################### +;# FAbs # +;############################################################### + +define float @test_fabs_neg_1_5() { +; CHECK-LABEL: define float @test_fabs_neg_1_5() { +; CHECK-NEXT: ret float 1.500000e+00 +; + %res = call float @llvm.nvvm.fabs(float -1.5) + ret float %res +} + +define float @test_fabs_ftz_neg_1_5() { +; CHECK-LABEL: define float @test_fabs_ftz_neg_1_5() { +; CHECK-NEXT: ret float 1.500000e+00 +; + %res = call float @llvm.nvvm.fabs.ftz(float -1.5) + ret float %res +} + +define float @test_fabs_1_25() { +; CHECK-LABEL: define float @test_fabs_1_25() { +; CHECK-NEXT: ret float 1.250000e+00 +; + %res = call float @llvm.nvvm.fabs(float 1.25) + ret float %res +} + +define float @test_fabs_ftz_1_25() { +; CHECK-LABEL: define float @test_fabs_ftz_1_25() { +; CHECK-NEXT: ret float 1.250000e+00 +; + %res = call float @llvm.nvvm.fabs.ftz(float 1.25) + ret float %res +} + +define float @test_fabs_neg_subnorm() { +; CHECK-LABEL: define float @test_fabs_neg_subnorm() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fabs(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fabs_ftz_neg_subnorm() { +; CHECK-LABEL: define float @test_fabs_ftz_neg_subnorm() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fabs.ftz(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fabs_pos_subnorm() { +; CHECK-LABEL: define float @test_fabs_pos_subnorm() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fabs(float 0x380FFFFFC0000000) + ret float %res +} + +define float @test_fabs_ftz_pos_subnorm() { +; CHECK-LABEL: define float @test_fabs_ftz_pos_subnorm() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fabs.ftz(float 0x380FFFFFC0000000) + ret float %res +} + + +;############################################################### +;# Floor # +;############################################################### + +define double @test_floor_d_1_25() { +; CHECK-LABEL: define double @test_floor_d_1_25() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.floor.d(double 1.25) + ret double %res +} + +define float @test_floor_f_1_25() { +; CHECK-LABEL: define float @test_floor_f_1_25() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.floor.f(float 1.25) + ret float %res +} + +define float @test_floor_ftz_f_1_25() { +; CHECK-LABEL: define float @test_floor_ftz_f_1_25() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.floor.ftz.f(float 1.25) + ret float %res +} + +define double @test_floor_d_neg_subnorm() { +; CHECK-LABEL: define double @test_floor_d_neg_subnorm() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.floor.d(double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_floor_f_neg_subnorm() { +; CHECK-LABEL: define float @test_floor_f_neg_subnorm() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.floor.f(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_floor_ftz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_floor_ftz_f_neg_subnorm() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.floor.ftz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +;############################################################### +;# Rcp # +;############################################################### + +;+-------------------------------------------------------------+ +;| rcp_rm | +;+-------------------------------------------------------------+ +define double @test_rcp_rm_d_0_5() { +; CHECK-LABEL: define double @test_rcp_rm_d_0_5() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.rcp.rm.d(double 0.5) + ret double %res +} + +define float @test_rcp_rm_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rm_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rm.f(float 0.5) + ret float %res +} + +define float @test_rcp_rm_ftz_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rm_ftz_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rm.ftz.f(float 0.5) + ret float %res +} + +define double @test_rcp_rm_d_neg_subnorm() { +; CHECK-LABEL: define double @test_rcp_rm_d_neg_subnorm() { +; CHECK-NEXT: ret double 0xC7D0000020000041 +; + %res = call double @llvm.nvvm.rcp.rm.d(double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_rcp_rm_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rm_f_neg_subnorm() { +; CHECK-NEXT: ret float 0xC7D0000040000000 +; + %res = call float @llvm.nvvm.rcp.rm.f(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_rcp_rm_ftz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rm_ftz_f_neg_subnorm() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rm.ftz.f(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.rcp.rm.ftz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +;+-------------------------------------------------------------+ +;| rcp_rn | +;+-------------------------------------------------------------+ +define double @test_rcp_rn_d_0_5() { +; CHECK-LABEL: define double @test_rcp_rn_d_0_5() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.rcp.rn.d(double 0.5) + ret double %res +} + +define float @test_rcp_rn_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rn_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rn.f(float 0.5) + ret float %res +} + +define float @test_rcp_rn_ftz_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rn_ftz_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rn.ftz.f(float 0.5) + ret float %res +} + +define double @test_rcp_rn_d_neg_subnorm() { +; CHECK-LABEL: define double @test_rcp_rn_d_neg_subnorm() { +; CHECK-NEXT: ret double 0xC7D0000020000040 +; + %res = call double @llvm.nvvm.rcp.rn.d(double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_rcp_rn_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rn_f_neg_subnorm() { +; CHECK-NEXT: ret float 0xC7D0000020000000 +; + %res = call float @llvm.nvvm.rcp.rn.f(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_rcp_rn_ftz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rn_ftz_f_neg_subnorm() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rn.ftz.f(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.rcp.rn.ftz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +;+-------------------------------------------------------------+ +;| rcp_rp | +;+-------------------------------------------------------------+ +define double @test_rcp_rp_d_0_5() { +; CHECK-LABEL: define double @test_rcp_rp_d_0_5() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.rcp.rp.d(double 0.5) + ret double %res +} + +define float @test_rcp_rp_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rp_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rp.f(float 0.5) + ret float %res +} + +define float @test_rcp_rp_ftz_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rp_ftz_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rp.ftz.f(float 0.5) + ret float %res +} + +define double @test_rcp_rp_d_neg_subnorm() { +; CHECK-LABEL: define double @test_rcp_rp_d_neg_subnorm() { +; CHECK-NEXT: ret double 0xC7D0000020000040 +; + %res = call double @llvm.nvvm.rcp.rp.d(double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_rcp_rp_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rp_f_neg_subnorm() { +; CHECK-NEXT: ret float 0xC7D0000020000000 +; + %res = call float @llvm.nvvm.rcp.rp.f(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_rcp_rp_ftz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rp_ftz_f_neg_subnorm() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rp.ftz.f(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.rcp.rp.ftz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +;+-------------------------------------------------------------+ +;| rcp_rz | +;+-------------------------------------------------------------+ +define double @test_rcp_rz_d_0_5() { +; CHECK-LABEL: define double @test_rcp_rz_d_0_5() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.rcp.rz.d(double 0.5) + ret double %res +} + +define float @test_rcp_rz_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rz_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rz.f(float 0.5) + ret float %res +} + +define float @test_rcp_rz_ftz_f_0_5() { +; CHECK-LABEL: define float @test_rcp_rz_ftz_f_0_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.rcp.rz.ftz.f(float 0.5) + ret float %res +} + +define double @test_rcp_rz_d_neg_subnorm() { +; CHECK-LABEL: define double @test_rcp_rz_d_neg_subnorm() { +; CHECK-NEXT: ret double 0xC7D0000020000040 +; + %res = call double @llvm.nvvm.rcp.rz.d(double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_rcp_rz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rz_f_neg_subnorm() { +; CHECK-NEXT: ret float 0xC7D0000020000000 +; + %res = call float @llvm.nvvm.rcp.rz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_rcp_rz_ftz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_rcp_rz_ftz_f_neg_subnorm() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.rcp.rz.ftz.f(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.rcp.rz.ftz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +;############################################################### +;# Round # +;############################################################### + +define double @test_round_d_neg_1_5() { +; CHECK-LABEL: define double @test_round_d_neg_1_5() { +; CHECK-NEXT: ret double -2.000000e+00 +; + %res = call double @llvm.nvvm.round.d(double -1.5) + ret double %res +} + +define float @test_round_f_neg_1_5() { +; CHECK-LABEL: define float @test_round_f_neg_1_5() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.round.f(float -1.5) + ret float %res +} + +define float @test_round_ftz_f_neg_1_5() { +; CHECK-LABEL: define float @test_round_ftz_f_neg_1_5() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.round.ftz.f(float -1.5) + ret float %res +} + +define double @test_round_d_neg_subnorm() { +; CHECK-LABEL: define double @test_round_d_neg_subnorm() { +; CHECK-NEXT: ret double -0.000000e+00 +; + %res = call double @llvm.nvvm.round.d(double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_round_f_neg_subnorm() { +; CHECK-LABEL: define float @test_round_f_neg_subnorm() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.round.f(float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_round_ftz_f_neg_subnorm() { +; CHECK-LABEL: define float @test_round_ftz_f_neg_subnorm() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.round.ftz.f(float 0xB80FFFFFC0000000) + ret float %res +} + +;############################################################### +;# Saturate # +;############################################################### + +define double @test_saturate_d_1_25() { +; CHECK-LABEL: define double @test_saturate_d_1_25() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.saturate.d(double 1.25) + ret double %res +} + +define float @test_saturate_f_1_25() { +; CHECK-LABEL: define float @test_saturate_f_1_25() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.saturate.f(float 1.25) + ret float %res +} + +define float @test_saturate_ftz_f_1_25() { +; CHECK-LABEL: define float @test_saturate_ftz_f_1_25() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.saturate.ftz.f(float 1.25) + ret float %res +} + +define double @test_saturate_d_neg_1_25() { +; CHECK-LABEL: define double @test_saturate_d_neg_1_25() { +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = call double @llvm.nvvm.saturate.d(double -1.25) + ret double %res +} + +define float @test_saturate_f_neg_1_25() { +; CHECK-LABEL: define float @test_saturate_f_neg_1_25() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.saturate.f(float -1.25) + ret float %res +} + +define float @test_saturate_ftz_f_neg_1_25() { +; CHECK-LABEL: define float @test_saturate_ftz_f_neg_1_25() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.saturate.ftz.f(float -1.25) + ret float %res +} + +define double @test_saturate_d_0_5() { +; CHECK-LABEL: define double @test_saturate_d_0_5() { +; CHECK-NEXT: ret double 5.000000e-01 +; + %res = call double @llvm.nvvm.saturate.d(double 0.5) + ret double %res +} + +define float @test_saturate_f_0_5() { +; CHECK-LABEL: define float @test_saturate_f_0_5() { +; CHECK-NEXT: ret float 5.000000e-01 +; + %res = call float @llvm.nvvm.saturate.f(float 0.5) + ret float %res +} + +define float @test_saturate_ftz_f_0_5() { +; CHECK-LABEL: define float @test_saturate_ftz_f_0_5() { +; CHECK-NEXT: ret float 5.000000e-01 +; + %res = call float @llvm.nvvm.saturate.ftz.f(float 0.5) + ret float %res +} + +define double @test_saturate_d_pos_subnorm() { +; CHECK-LABEL: define double @test_saturate_d_pos_subnorm() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.saturate.d(double 0x380FFFFFC0000000) + ret double %res +} + +define float @test_saturate_f_pos_subnorm() { +; CHECK-LABEL: define float @test_saturate_f_pos_subnorm() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.saturate.f(float 0x380FFFFFC0000000) + ret float %res +} + +define float @test_saturate_ftz_f_pos_subnorm() { +; CHECK-LABEL: define float @test_saturate_ftz_f_pos_subnorm() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.saturate.ftz.f(float 0x380FFFFFC0000000) + ret float %res +} + +;############################################################### +;# Sqrt # +;############################################################### + +define float @test_sqrt_f_4() { +; CHECK-LABEL: define float @test_sqrt_f_4() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.sqrt.f(float 4.0) + ret float %res +} + +define float @test_sqrt_rn_f_4() { +; CHECK-LABEL: define float @test_sqrt_rn_f_4() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.sqrt.rn.f(float 4.0) + ret float %res +} + +define double @test_sqrt_rn_d_4() { +; CHECK-LABEL: define double @test_sqrt_rn_d_4() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.sqrt.rn.d(double 4.0) + ret double %res +} + +define float @test_sqrt_rn_ftz_f_4() { +; CHECK-LABEL: define float @test_sqrt_rn_ftz_f_4() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.sqrt.rn.ftz.f(float 4.0) + ret float %res +} + +define float @test_sqrt_f_pos_subnorm() { +; CHECK-LABEL: define float @test_sqrt_f_pos_subnorm() { +; CHECK-NEXT: ret float 0x3BFFFFFFE0000000 +; + %res = call float @llvm.nvvm.sqrt.f(float 0x380FFFFFC0000000) + ret float %res +} + +define float @test_sqrt_rn_f_pos_subnorm() { +; CHECK-LABEL: define float @test_sqrt_rn_f_pos_subnorm() { +; CHECK-NEXT: ret float 0x3BFFFFFFE0000000 +; + %res = call float @llvm.nvvm.sqrt.rn.f(float 0x380FFFFFC0000000) + ret float %res +} + +define double @test_sqrt_rn_d_pos_subnorm() { +; CHECK-LABEL: define double @test_sqrt_rn_d_pos_subnorm() { +; CHECK-NEXT: ret double 0x3BFFFFFFDFFFFFF0 +; + %res = call double @llvm.nvvm.sqrt.rn.d(double 0x380FFFFFC0000000) + ret double %res +} + +define float @test_sqrt_rn_ftz_f_pos_subnorm() { +; CHECK-LABEL: define float @test_sqrt_rn_ftz_f_pos_subnorm() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.sqrt.rn.ftz.f(float 0x380FFFFFC0000000) + ret float %res +} + +declare double @llvm.nvvm.ceil.d(double) +declare float @llvm.nvvm.ceil.f(float) +declare float @llvm.nvvm.ceil.ftz.f(float) + +declare float @llvm.nvvm.fabs(float) +declare float @llvm.nvvm.fabs.ftz(float) + +declare double @llvm.nvvm.floor.d(double) +declare float @llvm.nvvm.floor.f(float) +declare float @llvm.nvvm.floor.ftz.f(float) + +declare double @llvm.nvvm.rcp.rm.d(double) +declare float @llvm.nvvm.rcp.rm.f(float) +declare float @llvm.nvvm.rcp.rm.ftz.f(float) +declare double @llvm.nvvm.rcp.rn.d(double) +declare float @llvm.nvvm.rcp.rn.f(float) +declare float @llvm.nvvm.rcp.rn.ftz.f(float) +declare double @llvm.nvvm.rcp.rp.d(double) +declare float @llvm.nvvm.rcp.rp.f(float) +declare float @llvm.nvvm.rcp.rp.ftz.f(float) +declare double @llvm.nvvm.rcp.rz.d(double) +declare float @llvm.nvvm.rcp.rz.f(float) +declare float @llvm.nvvm.rcp.rz.ftz.f(float) + +declare double @llvm.nvvm.round.d(double) +declare float @llvm.nvvm.round.f(float) +declare float @llvm.nvvm.round.ftz.f(float) + +declare double @llvm.nvvm.saturate.d(double) +declare float @llvm.nvvm.saturate.f(float) +declare float @llvm.nvvm.saturate.ftz.f(float) + +declare float @llvm.nvvm.sqrt.f(float) +declare double @llvm.nvvm.sqrt.rn.d(double) +declare float @llvm.nvvm.sqrt.rn.f(float) +declare float @llvm.nvvm.sqrt.rn.ftz.f(float)