From e520c47ef2066eaf697eb78b65c2c2eb41398a03 Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Fri, 15 Nov 2024 17:09:38 +0000 Subject: [PATCH 1/6] [NVPTX] Constant-folding for f2i, d2ui, f2ll etc. Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers. --- llvm/lib/Analysis/ConstantFolding.cpp | 265 ++++ .../InstSimplify/const-fold-nvvm-f2i-d2i.ll | 1129 +++++++++++++++++ .../InstSimplify/const-fold-nvvm-f2ll-d2ll.ll | 1129 +++++++++++++++++ 3 files changed, 2523 insertions(+) create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 3d5022e5502e2..dbc8ff2cfca34 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" @@ -1687,6 +1688,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::x86_avx512_cvttsd2usi64: return !Call->isStrictFP(); + // NVVM float/double to int32/uint32 conversion intrinsics + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz_ftz: + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + case Intrinsic::nvvm_d2ui_rm: + case Intrinsic::nvvm_d2ui_rn: + case Intrinsic::nvvm_d2ui_rp: + case Intrinsic::nvvm_d2ui_rz: + + // NVVM float/double to int64/uint64 conversion intrinsics + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz_ftz: + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + case Intrinsic::nvvm_d2ull_rm: + case Intrinsic::nvvm_d2ull_rn: + case Intrinsic::nvvm_d2ull_rp: + case Intrinsic::nvvm_d2ull_rz: + // Sign operations are actually bitwise operations, they do not raise // exceptions even for SNANs. case Intrinsic::fabs: @@ -1849,6 +1902,13 @@ inline bool llvm_fenv_testexcept() { return false; } +static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) { + if (V.isDenormal()) + return APFloat::getZero(Ty->getFltSemantics(), V.isNegative()); + + return V; +} + Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty) { llvm_fenv_clearexcept(); @@ -2309,6 +2369,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::get(Ty->getContext(), U); } + // NVVM float/double to signed/unsigned int32/int64 conversions: + switch (IntrinsicID) { + // f2i + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz_ftz: + // f2ui + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + // d2i + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + // d2ui + case Intrinsic::nvvm_d2ui_rm: + case Intrinsic::nvvm_d2ui_rn: + case Intrinsic::nvvm_d2ui_rp: + case Intrinsic::nvvm_d2ui_rz: + // f2ll + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz_ftz: + // f2ull + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + // d2ll + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + // d2ull + case Intrinsic::nvvm_d2ull_rm: + case Intrinsic::nvvm_d2ull_rn: + case Intrinsic::nvvm_d2ull_rp: + case Intrinsic::nvvm_d2ull_rz: { + // In float-to-integer conversion, NaN inputs are converted to 0. + if (U.isNaN()) + return ConstantInt::get(Ty, 0); + + APFloat::roundingMode RMode = APFloat::roundingMode::Invalid; + switch (IntrinsicID) { + // i_rm + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2ui_rm: + // ll_rm + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ull_rm: + RMode = APFloat::rmTowardNegative; + break; + + // i_rn + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2ui_rn: + // ll_rn + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ull_rn: + RMode = APFloat::rmNearestTiesToEven; + break; + + // i_rp + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2ui_rp: + // ll_rp + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ull_rp: + RMode = APFloat::rmTowardPositive; + break; + + // i_rz + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2i_rz_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + case Intrinsic::nvvm_d2i_rz: + case Intrinsic::nvvm_d2ui_rz: + // ll_rz + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ll_rz_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + case Intrinsic::nvvm_d2ll_rz: + case Intrinsic::nvvm_d2ull_rz: + RMode = APFloat::rmTowardZero; + break; + default: + llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); + } + assert(RM != APFloat::roundingMode::Invalid); + + bool IsFTZ = false; + switch (IntrinsicID) { + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz_ftz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz_ftz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + IsFTZ = true; + break; + } + + bool IsSigned = false; + switch (IntrinsicID) { + // f2i + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2i_rz_ftz: + // d2i + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + // f2ll + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ll_rz_ftz: + // d2ll + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + IsSigned = true; + break; + } + + APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); + auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U; + + bool IsExact = false; + APFloat::opStatus Status = + FloatToRound.convertToInteger(ResInt, RMode, &IsExact); + + if (Status != APFloat::opInvalidOp) + return ConstantInt::get(Ty, ResInt); + return nullptr; + } + } + /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll new file mode 100644 index 0000000000000..543c73137c1b6 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll @@ -0,0 +1,1129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s + +; f2i/f2ui and d2i/d2ui - double/float to i32 tests + +;############################################################### +;# Tests with Positive 1.5 # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2i | +;+-------------------------------------------------------------+ +define i32 @test_pos_1_5_f2i_rm() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2i.rm(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2i_rn() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2i.rn(float 1.5) + ret i32 %res +} + + +define i32 @test_pos_1_5_f2i_rp() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2i.rp(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2i_rz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2i.rz(float 1.5) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2i_ftz | +;+-------------------------------------------------------------+ +define i32 @test_pos_1_5_f2i_rm_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm_ftz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2i_rn_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn_ftz() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2i_rp_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp_ftz() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2i_rz_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz_ftz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 1.5) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2i | +;+-------------------------------------------------------------+ +define i32 @test_pos_1_5_d2i_rm() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rm() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.d2i.rm(double 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_d2i_rn() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rn() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.d2i.rn(double 1.5) + ret i32 %res +} + + +define i32 @test_pos_1_5_d2i_rp() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rp() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.d2i.rp(double 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_d2i_rz() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.d2i.rz(double 1.5) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui | +;+-------------------------------------------------------------+ +define i32 @test_pos_1_5_f2ui_rm() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2ui.rm(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2ui_rn() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2ui.rn(float 1.5) + ret i32 %res +} + + +define i32 @test_pos_1_5_f2ui_rp() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2ui.rp(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2ui_rz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2ui.rz(float 1.5) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui_ftz | +;+-------------------------------------------------------------+ +define i32 @test_pos_1_5_f2ui_rm_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm_ftz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2ui_rn_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn_ftz() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2ui_rp_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp_ftz() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_f2ui_rz_ftz() { +; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz_ftz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 1.5) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2ui | +;+-------------------------------------------------------------+ +define i32 @test_pos_1_5_d2ui_rm() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rm() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.d2ui.rm(double 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_d2ui_rn() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rn() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.d2ui.rn(double 1.5) + ret i32 %res +} + + +define i32 @test_pos_1_5_d2ui_rp() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rp() { +; CHECK-NEXT: ret i32 2 +; + %res = call i32 @llvm.nvvm.d2ui.rp(double 1.5) + ret i32 %res +} + +define i32 @test_pos_1_5_d2ui_rz() { +; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rz() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.d2ui.rz(double 1.5) + ret i32 %res +} + +;############################################################### +;# Tests with Negative 1.5 # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2i | +;+-------------------------------------------------------------+ +define i32 @test_neg_1_5_f2i_rm() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm() { +; CHECK-NEXT: ret i32 -2 +; + %res = call i32 @llvm.nvvm.f2i.rm(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2i_rn() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn() { +; CHECK-NEXT: ret i32 -2 +; + %res = call i32 @llvm.nvvm.f2i.rn(float -1.5) + ret i32 %res +} + + +define i32 @test_neg_1_5_f2i_rp() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.f2i.rp(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2i_rz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.f2i.rz(float -1.5) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2i_ftz | +;+-------------------------------------------------------------+ +define i32 @test_neg_1_5_f2i_rm_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm_ftz() { +; CHECK-NEXT: ret i32 -2 +; + %res = call i32 @llvm.nvvm.f2i.rm.ftz(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2i_rn_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn_ftz() { +; CHECK-NEXT: ret i32 -2 +; + %res = call i32 @llvm.nvvm.f2i.rn.ftz(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2i_rp_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp_ftz() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.f2i.rp.ftz(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2i_rz_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz_ftz() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.f2i.rz.ftz(float -1.5) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2i | +;+-------------------------------------------------------------+ +define i32 @test_neg_1_5_d2i_rm() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rm() { +; CHECK-NEXT: ret i32 -2 +; + %res = call i32 @llvm.nvvm.d2i.rm(double -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_d2i_rn() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rn() { +; CHECK-NEXT: ret i32 -2 +; + %res = call i32 @llvm.nvvm.d2i.rn(double -1.5) + ret i32 %res +} + + +define i32 @test_neg_1_5_d2i_rp() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rp() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.d2i.rp(double -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_d2i_rz() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rz() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.d2i.rz(double -1.5) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui | +;+-------------------------------------------------------------+ +define i32 @test_neg_1_5_f2ui_rm() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rm(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2ui_rn() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rn(float -1.5) + ret i32 %res +} + + +define i32 @test_neg_1_5_f2ui_rp() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rp() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rp(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rp(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2ui_rz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rz() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rz(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rz(float -1.5) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui_ftz | +;+-------------------------------------------------------------+ +define i32 @test_neg_1_5_f2ui_rm_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2ui_rn_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2ui_rp_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rp_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rp.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_f2ui_rz_ftz() { +; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rz_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rz.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float -1.5) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2ui | +;+-------------------------------------------------------------+ +define i32 @test_neg_1_5_d2ui_rm() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rm(double -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.d2ui.rm(double -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_d2ui_rn() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rn() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rn(double -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.d2ui.rn(double -1.5) + ret i32 %res +} + + +define i32 @test_neg_1_5_d2ui_rp() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rp() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rp(double -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.d2ui.rp(double -1.5) + ret i32 %res +} + +define i32 @test_neg_1_5_d2ui_rz() { +; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rz() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rz(double -1.500000e+00) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.d2ui.rz(double -1.5) + ret i32 %res +} + +;############################################################### +;# Tests with NaN # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2i | +;+-------------------------------------------------------------+ +define i32 @test_nan_f2i_rm() { +; CHECK-LABEL: define i32 @test_nan_f2i_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rm(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2i_rn() { +; CHECK-LABEL: define i32 @test_nan_f2i_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rn(float 0x7FFFFF0000000000) + ret i32 %res +} + + +define i32 @test_nan_f2i_rp() { +; CHECK-LABEL: define i32 @test_nan_f2i_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rp(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2i_rz() { +; CHECK-LABEL: define i32 @test_nan_f2i_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rz(float 0x7FFFFF0000000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2i_ftz | +;+-------------------------------------------------------------+ +define i32 @test_nan_f2i_rm_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2i_rm_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2i_rn_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2i_rn_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2i_rp_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2i_rp_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2i_rz_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2i_rz_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2i | +;+-------------------------------------------------------------+ +define i32 @test_nan_d2i_rm() { +; CHECK-LABEL: define i32 @test_nan_d2i_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rm(double 0xFFF8000000000000) + ret i32 %res +} + +define i32 @test_nan_d2i_rn() { +; CHECK-LABEL: define i32 @test_nan_d2i_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rn(double 0xFFF8000000000000) + ret i32 %res +} + + +define i32 @test_nan_d2i_rp() { +; CHECK-LABEL: define i32 @test_nan_d2i_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rp(double 0xFFF8000000000000) + ret i32 %res +} + +define i32 @test_nan_d2i_rz() { +; CHECK-LABEL: define i32 @test_nan_d2i_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rz(double 0xFFF8000000000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui | +;+-------------------------------------------------------------+ +define i32 @test_nan_f2ui_rm() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rm(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2ui_rn() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rn(float 0x7FFFFF0000000000) + ret i32 %res +} + + +define i32 @test_nan_f2ui_rp() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rp(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2ui_rz() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rz(float 0x7FFFFF0000000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui_ftz | +;+-------------------------------------------------------------+ +define i32 @test_nan_f2ui_rm_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rm_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2ui_rn_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rn_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2ui_rp_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rp_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} + +define i32 @test_nan_f2ui_rz_ftz() { +; CHECK-LABEL: define i32 @test_nan_f2ui_rz_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 0x7FFFFF0000000000) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2ui | +;+-------------------------------------------------------------+ +define i32 @test_nan_d2ui_rm() { +; CHECK-LABEL: define i32 @test_nan_d2ui_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rm(double 0xFFF8000000000000) + ret i32 %res +} + +define i32 @test_nan_d2ui_rn() { +; CHECK-LABEL: define i32 @test_nan_d2ui_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rn(double 0xFFF8000000000000) + ret i32 %res +} + + +define i32 @test_nan_d2ui_rp() { +; CHECK-LABEL: define i32 @test_nan_d2ui_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rp(double 0xFFF8000000000000) + ret i32 %res +} + +define i32 @test_nan_d2ui_rz() { +; CHECK-LABEL: define i32 @test_nan_d2ui_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rz(double 0xFFF8000000000000) + ret i32 %res +} + +;############################################################### +;# Tests with Positive Subnormal # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2i | +;+-------------------------------------------------------------+ +define i32 @test_pos_subnormal_f2i_rm() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rm(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2i_rn() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rn(float 0x380FFFFFC0000000) + ret i32 %res +} + + +define i32 @test_pos_subnormal_f2i_rp() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rp() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2i.rp(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2i_rz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rz(float 0x380FFFFFC0000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2i_ftz | +;+-------------------------------------------------------------+ +define i32 @test_pos_subnormal_f2i_rm_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rm_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2i_rn_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rn_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2i_rp_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rp_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2i_rz_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rz_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2i | +;+-------------------------------------------------------------+ +define i32 @test_pos_subnormal_d2i_rm() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rm(double 0x000fffffffffffff) + ret i32 %res +} + +define i32 @test_pos_subnormal_d2i_rn() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rn(double 0x000fffffffffffff) + ret i32 %res +} + + +define i32 @test_pos_subnormal_d2i_rp() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rp() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.d2i.rp(double 0x000fffffffffffff) + ret i32 %res +} + +define i32 @test_pos_subnormal_d2i_rz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rz(double 0x000fffffffffffff) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui | +;+-------------------------------------------------------------+ +define i32 @test_pos_subnormal_f2ui_rm() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rm(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2ui_rn() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rn(float 0x380FFFFFC0000000) + ret i32 %res +} + + +define i32 @test_pos_subnormal_f2ui_rp() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rp() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.f2ui.rp(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2ui_rz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rz(float 0x380FFFFFC0000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui_ftz | +;+-------------------------------------------------------------+ +define i32 @test_pos_subnormal_f2ui_rm_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rm_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2ui_rn_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rn_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2ui_rp_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rp_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} + +define i32 @test_pos_subnormal_f2ui_rz_ftz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rz_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 0x380FFFFFC0000000) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2ui | +;+-------------------------------------------------------------+ +define i32 @test_pos_subnormal_d2ui_rm() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rm() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rm(double 0x000fffffffffffff) + ret i32 %res +} + +define i32 @test_pos_subnormal_d2ui_rn() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rn(double 0x000fffffffffffff) + ret i32 %res +} + + +define i32 @test_pos_subnormal_d2ui_rp() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rp() { +; CHECK-NEXT: ret i32 1 +; + %res = call i32 @llvm.nvvm.d2ui.rp(double 0x000fffffffffffff) + ret i32 %res +} + +define i32 @test_pos_subnormal_d2ui_rz() { +; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rz(double 0x000fffffffffffff) + ret i32 %res +} + +;############################################################### +;# Tests with Negative Subnormal # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2i | +;+-------------------------------------------------------------+ +define i32 @test_neg_subnormal_f2i_rm() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rm() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.f2i.rm(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2i_rn() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rn(float 0xB80FFFFFC0000000) + ret i32 %res +} + + +define i32 @test_neg_subnormal_f2i_rp() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rp(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2i_rz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2i_ftz | +;+-------------------------------------------------------------+ +define i32 @test_neg_subnormal_f2i_rm_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rm_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2i_rn_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rn_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2i_rp_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rp_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2i_rz_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rz_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2i | +;+-------------------------------------------------------------+ +define i32 @test_neg_subnormal_d2i_rm() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rm() { +; CHECK-NEXT: ret i32 -1 +; + %res = call i32 @llvm.nvvm.d2i.rm(double 0x800fffffffffffff) + ret i32 %res +} + +define i32 @test_neg_subnormal_d2i_rn() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rn(double 0x800fffffffffffff) + ret i32 %res +} + + +define i32 @test_neg_subnormal_d2i_rp() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rp(double 0x800fffffffffffff) + ret i32 %res +} + +define i32 @test_neg_subnormal_d2i_rz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2i.rz(double 0x800fffffffffffff) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui | +;+-------------------------------------------------------------+ +define i32 @test_neg_subnormal_f2ui_rm() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.f2ui.rm(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2ui_rn() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rn(float 0xB80FFFFFC0000000) + ret i32 %res +} + + +define i32 @test_neg_subnormal_f2ui_rp() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rp(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2ui_rz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +;+-------------------------------------------------------------+ +;| f2ui_ftz | +;+-------------------------------------------------------------+ +define i32 @test_neg_subnormal_f2ui_rm_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rm_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2ui_rn_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rn_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2ui_rp_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rp_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} + +define i32 @test_neg_subnormal_f2ui_rz_ftz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rz_ftz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 0xB80FFFFFC0000000) + ret i32 %res +} +;+-------------------------------------------------------------+ +;| d2ui | +;+-------------------------------------------------------------+ +define i32 @test_neg_subnormal_d2ui_rm() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rm(double 0x800FFFFFFFFFFFFF) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.nvvm.d2ui.rm(double 0x800fffffffffffff) + ret i32 %res +} + +define i32 @test_neg_subnormal_d2ui_rn() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rn() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rn(double 0x800fffffffffffff) + ret i32 %res +} + + +define i32 @test_neg_subnormal_d2ui_rp() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rp() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rp(double 0x800fffffffffffff) + ret i32 %res +} + +define i32 @test_neg_subnormal_d2ui_rz() { +; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rz() { +; CHECK-NEXT: ret i32 0 +; + %res = call i32 @llvm.nvvm.d2ui.rz(double 0x800fffffffffffff) + ret i32 %res +} + +declare i32 @llvm.nvvm.f2i.rm(float) +declare i32 @llvm.nvvm.f2i.rn(float) +declare i32 @llvm.nvvm.f2i.rp(float) +declare i32 @llvm.nvvm.f2i.rz(float) + +declare i32 @llvm.nvvm.f2i.rm.ftz(float) +declare i32 @llvm.nvvm.f2i.rn.ftz(float) +declare i32 @llvm.nvvm.f2i.rp.ftz(float) +declare i32 @llvm.nvvm.f2i.rz.ftz(float) + +declare i32 @llvm.nvvm.d2i.rm(double) +declare i32 @llvm.nvvm.d2i.rn(double) +declare i32 @llvm.nvvm.d2i.rp(double) +declare i32 @llvm.nvvm.d2i.rz(double) + + +declare i32 @llvm.nvvm.f2ui.rm(float) +declare i32 @llvm.nvvm.f2ui.rn(float) +declare i32 @llvm.nvvm.f2ui.rp(float) +declare i32 @llvm.nvvm.f2ui.rz(float) + +declare i32 @llvm.nvvm.f2ui.rm.ftz(float) +declare i32 @llvm.nvvm.f2ui.rn.ftz(float) +declare i32 @llvm.nvvm.f2ui.rp.ftz(float) +declare i32 @llvm.nvvm.f2ui.rz.ftz(float) + +declare i32 @llvm.nvvm.d2ui.rm(double) +declare i32 @llvm.nvvm.d2ui.rn(double) +declare i32 @llvm.nvvm.d2ui.rp(double) +declare i32 @llvm.nvvm.d2ui.rz(double) diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll new file mode 100644 index 0000000000000..be38177dce2c3 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll @@ -0,0 +1,1129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s + +; f2ll/f2ull and d2ll/d2ull - double/float to i64 tests + +;############################################################### +;# Tests with Positive 1.5 # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2ll | +;+-------------------------------------------------------------+ +define i64 @test_pos_1_5_f2ll_rm() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rm() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ll.rm(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ll_rn() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rn() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ll.rn(float 1.5) + ret i64 %res +} + + +define i64 @test_pos_1_5_f2ll_rp() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rp() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ll.rp(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ll_rz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ll.rz(float 1.5) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ll_ftz | +;+-------------------------------------------------------------+ +define i64 @test_pos_1_5_f2ll_rm_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rm_ftz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ll_rn_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rn_ftz() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ll_rp_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rp_ftz() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ll_rz_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rz_ftz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 1.5) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ll | +;+-------------------------------------------------------------+ +define i64 @test_pos_1_5_d2ll_rm() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rm() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.d2ll.rm(double 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_d2ll_rn() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rn() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.d2ll.rn(double 1.5) + ret i64 %res +} + + +define i64 @test_pos_1_5_d2ll_rp() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rp() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.d2ll.rp(double 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_d2ll_rz() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.d2ll.rz(double 1.5) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull | +;+-------------------------------------------------------------+ +define i64 @test_pos_1_5_f2ull_rm() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rm() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ull.rm(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ull_rn() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rn() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ull.rn(float 1.5) + ret i64 %res +} + + +define i64 @test_pos_1_5_f2ull_rp() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rp() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ull.rp(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ull_rz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ull.rz(float 1.5) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull_ftz | +;+-------------------------------------------------------------+ +define i64 @test_pos_1_5_f2ull_rm_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rm_ftz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ull_rn_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rn_ftz() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ull_rp_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rp_ftz() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_f2ull_rz_ftz() { +; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rz_ftz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 1.5) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ull | +;+-------------------------------------------------------------+ +define i64 @test_pos_1_5_d2ull_rm() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rm() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.d2ull.rm(double 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_d2ull_rn() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rn() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.d2ull.rn(double 1.5) + ret i64 %res +} + + +define i64 @test_pos_1_5_d2ull_rp() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rp() { +; CHECK-NEXT: ret i64 2 +; + %res = call i64 @llvm.nvvm.d2ull.rp(double 1.5) + ret i64 %res +} + +define i64 @test_pos_1_5_d2ull_rz() { +; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rz() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.d2ull.rz(double 1.5) + ret i64 %res +} + +;############################################################### +;# Tests with Negative 1.5 # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2ll | +;+-------------------------------------------------------------+ +define i64 @test_neg_1_5_f2ll_rm() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rm() { +; CHECK-NEXT: ret i64 -2 +; + %res = call i64 @llvm.nvvm.f2ll.rm(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ll_rn() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rn() { +; CHECK-NEXT: ret i64 -2 +; + %res = call i64 @llvm.nvvm.f2ll.rn(float -1.5) + ret i64 %res +} + + +define i64 @test_neg_1_5_f2ll_rp() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rp() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.f2ll.rp(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ll_rz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rz() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.f2ll.rz(float -1.5) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ll_ftz | +;+-------------------------------------------------------------+ +define i64 @test_neg_1_5_f2ll_rm_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rm_ftz() { +; CHECK-NEXT: ret i64 -2 +; + %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ll_rn_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rn_ftz() { +; CHECK-NEXT: ret i64 -2 +; + %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ll_rp_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rp_ftz() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ll_rz_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rz_ftz() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float -1.5) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ll | +;+-------------------------------------------------------------+ +define i64 @test_neg_1_5_d2ll_rm() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rm() { +; CHECK-NEXT: ret i64 -2 +; + %res = call i64 @llvm.nvvm.d2ll.rm(double -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_d2ll_rn() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rn() { +; CHECK-NEXT: ret i64 -2 +; + %res = call i64 @llvm.nvvm.d2ll.rn(double -1.5) + ret i64 %res +} + + +define i64 @test_neg_1_5_d2ll_rp() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rp() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.d2ll.rp(double -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_d2ll_rz() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rz() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.d2ll.rz(double -1.5) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull | +;+-------------------------------------------------------------+ +define i64 @test_neg_1_5_f2ull_rm() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rm(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ull_rn() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rn() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rn(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rn(float -1.5) + ret i64 %res +} + + +define i64 @test_neg_1_5_f2ull_rp() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rp() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rp(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rp(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ull_rz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rz() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rz(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rz(float -1.5) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull_ftz | +;+-------------------------------------------------------------+ +define i64 @test_neg_1_5_f2ull_rm_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rm_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ull_rn_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rn_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rn.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ull_rp_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rp_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rp.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_f2ull_rz_ftz() { +; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rz_ftz() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rz.ftz(float -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float -1.5) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ull | +;+-------------------------------------------------------------+ +define i64 @test_neg_1_5_d2ull_rm() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rm(double -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.d2ull.rm(double -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_d2ull_rn() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rn() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rn(double -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.d2ull.rn(double -1.5) + ret i64 %res +} + + +define i64 @test_neg_1_5_d2ull_rp() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rp() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rp(double -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.d2ull.rp(double -1.5) + ret i64 %res +} + +define i64 @test_neg_1_5_d2ull_rz() { +; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rz() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rz(double -1.500000e+00) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.d2ull.rz(double -1.5) + ret i64 %res +} + +;############################################################### +;# Tests with NaN # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2ll | +;+-------------------------------------------------------------+ +define i64 @test_nan_f2ll_rm() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rm(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ll_rn() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rn(float 0x7FFFFF0000000000) + ret i64 %res +} + + +define i64 @test_nan_f2ll_rp() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rp(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ll_rz() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rz(float 0x7FFFFF0000000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ll_ftz | +;+-------------------------------------------------------------+ +define i64 @test_nan_f2ll_rm_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rm_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ll_rn_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rn_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ll_rp_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rp_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ll_rz_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ll_rz_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ll | +;+-------------------------------------------------------------+ +define i64 @test_nan_d2ll_rm() { +; CHECK-LABEL: define i64 @test_nan_d2ll_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rm(double 0xFFF8000000000000) + ret i64 %res +} + +define i64 @test_nan_d2ll_rn() { +; CHECK-LABEL: define i64 @test_nan_d2ll_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rn(double 0xFFF8000000000000) + ret i64 %res +} + + +define i64 @test_nan_d2ll_rp() { +; CHECK-LABEL: define i64 @test_nan_d2ll_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rp(double 0xFFF8000000000000) + ret i64 %res +} + +define i64 @test_nan_d2ll_rz() { +; CHECK-LABEL: define i64 @test_nan_d2ll_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rz(double 0xFFF8000000000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull | +;+-------------------------------------------------------------+ +define i64 @test_nan_f2ull_rm() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rm(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ull_rn() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rn(float 0x7FFFFF0000000000) + ret i64 %res +} + + +define i64 @test_nan_f2ull_rp() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rp(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ull_rz() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rz(float 0x7FFFFF0000000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull_ftz | +;+-------------------------------------------------------------+ +define i64 @test_nan_f2ull_rm_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rm_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ull_rn_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rn_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ull_rp_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rp_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} + +define i64 @test_nan_f2ull_rz_ftz() { +; CHECK-LABEL: define i64 @test_nan_f2ull_rz_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0x7FFFFF0000000000) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ull | +;+-------------------------------------------------------------+ +define i64 @test_nan_d2ull_rm() { +; CHECK-LABEL: define i64 @test_nan_d2ull_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rm(double 0xFFF8000000000000) + ret i64 %res +} + +define i64 @test_nan_d2ull_rn() { +; CHECK-LABEL: define i64 @test_nan_d2ull_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rn(double 0xFFF8000000000000) + ret i64 %res +} + + +define i64 @test_nan_d2ull_rp() { +; CHECK-LABEL: define i64 @test_nan_d2ull_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rp(double 0xFFF8000000000000) + ret i64 %res +} + +define i64 @test_nan_d2ull_rz() { +; CHECK-LABEL: define i64 @test_nan_d2ull_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rz(double 0xFFF8000000000000) + ret i64 %res +} + +;############################################################### +;# Tests with Positive Subnormal # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2ll | +;+-------------------------------------------------------------+ +define i64 @test_pos_subnormal_f2ll_rm() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rm(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ll_rn() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rn(float 0x380FFFFFC0000000) + ret i64 %res +} + + +define i64 @test_pos_subnormal_f2ll_rp() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rp() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ll.rp(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ll_rz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rz(float 0x380FFFFFC0000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ll_ftz | +;+-------------------------------------------------------------+ +define i64 @test_pos_subnormal_f2ll_rm_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rm_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ll_rn_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rn_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ll_rp_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rp_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ll_rz_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rz_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ll | +;+-------------------------------------------------------------+ +define i64 @test_pos_subnormal_d2ll_rm() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rm(double 0x000fffffffffffff) + ret i64 %res +} + +define i64 @test_pos_subnormal_d2ll_rn() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rn(double 0x000fffffffffffff) + ret i64 %res +} + + +define i64 @test_pos_subnormal_d2ll_rp() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rp() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.d2ll.rp(double 0x000fffffffffffff) + ret i64 %res +} + +define i64 @test_pos_subnormal_d2ll_rz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rz(double 0x000fffffffffffff) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull | +;+-------------------------------------------------------------+ +define i64 @test_pos_subnormal_f2ull_rm() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rm(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ull_rn() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rn(float 0x380FFFFFC0000000) + ret i64 %res +} + + +define i64 @test_pos_subnormal_f2ull_rp() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rp() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.f2ull.rp(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ull_rz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rz(float 0x380FFFFFC0000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull_ftz | +;+-------------------------------------------------------------+ +define i64 @test_pos_subnormal_f2ull_rm_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rm_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ull_rn_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rn_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ull_rp_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rp_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} + +define i64 @test_pos_subnormal_f2ull_rz_ftz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rz_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0x380FFFFFC0000000) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ull | +;+-------------------------------------------------------------+ +define i64 @test_pos_subnormal_d2ull_rm() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rm() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rm(double 0x000fffffffffffff) + ret i64 %res +} + +define i64 @test_pos_subnormal_d2ull_rn() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rn(double 0x000fffffffffffff) + ret i64 %res +} + + +define i64 @test_pos_subnormal_d2ull_rp() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rp() { +; CHECK-NEXT: ret i64 1 +; + %res = call i64 @llvm.nvvm.d2ull.rp(double 0x000fffffffffffff) + ret i64 %res +} + +define i64 @test_pos_subnormal_d2ull_rz() { +; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rz(double 0x000fffffffffffff) + ret i64 %res +} + +;############################################################### +;# Tests with Negative Subnormal # +;############################################################### + +;+-------------------------------------------------------------+ +;| f2ll | +;+-------------------------------------------------------------+ +define i64 @test_neg_subnormal_f2ll_rm() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rm() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.f2ll.rm(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ll_rn() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rn(float 0xB80FFFFFC0000000) + ret i64 %res +} + + +define i64 @test_neg_subnormal_f2ll_rp() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rp(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ll_rz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ll_ftz | +;+-------------------------------------------------------------+ +define i64 @test_neg_subnormal_f2ll_rm_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rm_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ll_rn_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rn_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ll_rp_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rp_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ll_rz_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rz_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ll | +;+-------------------------------------------------------------+ +define i64 @test_neg_subnormal_d2ll_rm() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rm() { +; CHECK-NEXT: ret i64 -1 +; + %res = call i64 @llvm.nvvm.d2ll.rm(double 0x800fffffffffffff) + ret i64 %res +} + +define i64 @test_neg_subnormal_d2ll_rn() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rn(double 0x800fffffffffffff) + ret i64 %res +} + + +define i64 @test_neg_subnormal_d2ll_rp() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rp(double 0x800fffffffffffff) + ret i64 %res +} + +define i64 @test_neg_subnormal_d2ll_rz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ll.rz(double 0x800fffffffffffff) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull | +;+-------------------------------------------------------------+ +define i64 @test_neg_subnormal_f2ull_rm() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm(float 0xB80FFFFFC0000000) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.f2ull.rm(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ull_rn() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rn(float 0xB80FFFFFC0000000) + ret i64 %res +} + + +define i64 @test_neg_subnormal_f2ull_rp() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rp(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ull_rz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +;+-------------------------------------------------------------+ +;| f2ull_ftz | +;+-------------------------------------------------------------+ +define i64 @test_neg_subnormal_f2ull_rm_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rm_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ull_rn_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rn_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ull_rp_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rp_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} + +define i64 @test_neg_subnormal_f2ull_rz_ftz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rz_ftz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0xB80FFFFFC0000000) + ret i64 %res +} +;+-------------------------------------------------------------+ +;| d2ull | +;+-------------------------------------------------------------+ +define i64 @test_neg_subnormal_d2ull_rm() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rm() { +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rm(double 0x800FFFFFFFFFFFFF) +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.nvvm.d2ull.rm(double 0x800fffffffffffff) + ret i64 %res +} + +define i64 @test_neg_subnormal_d2ull_rn() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rn() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rn(double 0x800fffffffffffff) + ret i64 %res +} + + +define i64 @test_neg_subnormal_d2ull_rp() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rp() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rp(double 0x800fffffffffffff) + ret i64 %res +} + +define i64 @test_neg_subnormal_d2ull_rz() { +; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rz() { +; CHECK-NEXT: ret i64 0 +; + %res = call i64 @llvm.nvvm.d2ull.rz(double 0x800fffffffffffff) + ret i64 %res +} + +declare i64 @llvm.nvvm.f2ll.rm(float) +declare i64 @llvm.nvvm.f2ll.rn(float) +declare i64 @llvm.nvvm.f2ll.rp(float) +declare i64 @llvm.nvvm.f2ll.rz(float) + +declare i64 @llvm.nvvm.f2ll.rm.ftz(float) +declare i64 @llvm.nvvm.f2ll.rn.ftz(float) +declare i64 @llvm.nvvm.f2ll.rp.ftz(float) +declare i64 @llvm.nvvm.f2ll.rz.ftz(float) + +declare i64 @llvm.nvvm.d2ll.rm(double) +declare i64 @llvm.nvvm.d2ll.rn(double) +declare i64 @llvm.nvvm.d2ll.rp(double) +declare i64 @llvm.nvvm.d2ll.rz(double) + + +declare i64 @llvm.nvvm.f2ull.rm(float) +declare i64 @llvm.nvvm.f2ull.rn(float) +declare i64 @llvm.nvvm.f2ull.rp(float) +declare i64 @llvm.nvvm.f2ull.rz(float) + +declare i64 @llvm.nvvm.f2ull.rm.ftz(float) +declare i64 @llvm.nvvm.f2ull.rn.ftz(float) +declare i64 @llvm.nvvm.f2ull.rp.ftz(float) +declare i64 @llvm.nvvm.f2ull.rz.ftz(float) + +declare i64 @llvm.nvvm.d2ull.rm(double) +declare i64 @llvm.nvvm.d2ull.rn(double) +declare i64 @llvm.nvvm.d2ull.rp(double) +declare i64 @llvm.nvvm.d2ull.rz(double) From 54524d334c41d2eca93b4a026cb1eed586db7f31 Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Fri, 13 Dec 2024 13:17:27 +0000 Subject: [PATCH 2/6] Move internal case statements into helper funcs Also remove unnecessary parameter for FTZPreserveSign. --- llvm/lib/Analysis/ConstantFolding.cpp | 274 ++++++++++++++------------ 1 file changed, 143 insertions(+), 131 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index dbc8ff2cfca34..9ad5207cc52ce 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -291,6 +291,143 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { return ConstantVector::get(Result); } +//===----------------------------------------------------------------------===// +// NVVM-specific internal helper functions +//===----------------------------------------------------------------------===// + +static bool NVVMIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // Float to i32 / i64 conversion intrinsics: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz_ftz: + + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz_ftz: + + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + return true; + } + return false; +} + +static bool NVVMIntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // f2i + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2i_rz_ftz: + // d2i + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + // f2ll + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ll_rz_ftz: + // d2ll + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + return true; + } + return false; +} + +static APFloat::roundingMode +NVVMIntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // RM: + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2ui_rm: + + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ull_rm: + return APFloat::rmTowardNegative; + + // RN: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2ui_rn: + + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ull_rn: + return APFloat::rmNearestTiesToEven; + + // RP: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2ui_rp: + + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ull_rp: + return APFloat::rmTowardPositive; + + // RZ: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2i_rz_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + case Intrinsic::nvvm_d2i_rz: + case Intrinsic::nvvm_d2ui_rz: + + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ll_rz_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + case Intrinsic::nvvm_d2ll_rz: + case Intrinsic::nvvm_d2ull_rz: + return APFloat::rmTowardZero; + } + llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); + return APFloat::roundingMode::Invalid; +} + } // end anonymous namespace /// If this constant is a constant offset from a global, return the global and @@ -1902,10 +2039,9 @@ inline bool llvm_fenv_testexcept() { return false; } -static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) { +static const APFloat FTZPreserveSign(const APFloat &V) { if (V.isDenormal()) - return APFloat::getZero(Ty->getFltSemantics(), V.isNegative()); - + return APFloat::getZero(V.getSemantics(), V.isNegative()); return V; } @@ -2431,138 +2567,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (U.isNaN()) return ConstantInt::get(Ty, 0); - APFloat::roundingMode RMode = APFloat::roundingMode::Invalid; - switch (IntrinsicID) { - // i_rm - case Intrinsic::nvvm_f2i_rm: - case Intrinsic::nvvm_f2ui_rm: - case Intrinsic::nvvm_f2i_rm_ftz: - case Intrinsic::nvvm_f2ui_rm_ftz: - case Intrinsic::nvvm_d2i_rm: - case Intrinsic::nvvm_d2ui_rm: - // ll_rm - case Intrinsic::nvvm_f2ll_rm: - case Intrinsic::nvvm_f2ull_rm: - case Intrinsic::nvvm_f2ll_rm_ftz: - case Intrinsic::nvvm_f2ull_rm_ftz: - case Intrinsic::nvvm_d2ll_rm: - case Intrinsic::nvvm_d2ull_rm: - RMode = APFloat::rmTowardNegative; - break; - - // i_rn - case Intrinsic::nvvm_f2i_rn: - case Intrinsic::nvvm_f2ui_rn: - case Intrinsic::nvvm_f2i_rn_ftz: - case Intrinsic::nvvm_f2ui_rn_ftz: - case Intrinsic::nvvm_d2i_rn: - case Intrinsic::nvvm_d2ui_rn: - // ll_rn - case Intrinsic::nvvm_f2ll_rn: - case Intrinsic::nvvm_f2ull_rn: - case Intrinsic::nvvm_f2ll_rn_ftz: - case Intrinsic::nvvm_f2ull_rn_ftz: - case Intrinsic::nvvm_d2ll_rn: - case Intrinsic::nvvm_d2ull_rn: - RMode = APFloat::rmNearestTiesToEven; - break; - - // i_rp - case Intrinsic::nvvm_f2i_rp: - case Intrinsic::nvvm_f2ui_rp: - case Intrinsic::nvvm_f2i_rp_ftz: - case Intrinsic::nvvm_f2ui_rp_ftz: - case Intrinsic::nvvm_d2i_rp: - case Intrinsic::nvvm_d2ui_rp: - // ll_rp - case Intrinsic::nvvm_f2ll_rp: - case Intrinsic::nvvm_f2ull_rp: - case Intrinsic::nvvm_f2ll_rp_ftz: - case Intrinsic::nvvm_f2ull_rp_ftz: - case Intrinsic::nvvm_d2ll_rp: - case Intrinsic::nvvm_d2ull_rp: - RMode = APFloat::rmTowardPositive; - break; - - // i_rz - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_f2ui_rz: - case Intrinsic::nvvm_f2i_rz_ftz: - case Intrinsic::nvvm_f2ui_rz_ftz: - case Intrinsic::nvvm_d2i_rz: - case Intrinsic::nvvm_d2ui_rz: - // ll_rz - case Intrinsic::nvvm_f2ll_rz: - case Intrinsic::nvvm_f2ull_rz: - case Intrinsic::nvvm_f2ll_rz_ftz: - case Intrinsic::nvvm_f2ull_rz_ftz: - case Intrinsic::nvvm_d2ll_rz: - case Intrinsic::nvvm_d2ull_rz: - RMode = APFloat::rmTowardZero; - break; - default: - llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); - } + APFloat::roundingMode RMode = NVVMIntrinsicGetRoundingMode(IntrinsicID); assert(RM != APFloat::roundingMode::Invalid); - bool IsFTZ = false; - switch (IntrinsicID) { - case Intrinsic::nvvm_f2i_rm_ftz: - case Intrinsic::nvvm_f2i_rn_ftz: - case Intrinsic::nvvm_f2i_rp_ftz: - case Intrinsic::nvvm_f2i_rz_ftz: - case Intrinsic::nvvm_f2ui_rm_ftz: - case Intrinsic::nvvm_f2ui_rn_ftz: - case Intrinsic::nvvm_f2ui_rp_ftz: - case Intrinsic::nvvm_f2ui_rz_ftz: - case Intrinsic::nvvm_f2ll_rm_ftz: - case Intrinsic::nvvm_f2ll_rn_ftz: - case Intrinsic::nvvm_f2ll_rp_ftz: - case Intrinsic::nvvm_f2ll_rz_ftz: - case Intrinsic::nvvm_f2ull_rm_ftz: - case Intrinsic::nvvm_f2ull_rn_ftz: - case Intrinsic::nvvm_f2ull_rp_ftz: - case Intrinsic::nvvm_f2ull_rz_ftz: - IsFTZ = true; - break; - } - - bool IsSigned = false; - switch (IntrinsicID) { - // f2i - case Intrinsic::nvvm_f2i_rm: - case Intrinsic::nvvm_f2i_rm_ftz: - case Intrinsic::nvvm_f2i_rn: - case Intrinsic::nvvm_f2i_rn_ftz: - case Intrinsic::nvvm_f2i_rp: - case Intrinsic::nvvm_f2i_rp_ftz: - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_f2i_rz_ftz: - // d2i - case Intrinsic::nvvm_d2i_rm: - case Intrinsic::nvvm_d2i_rn: - case Intrinsic::nvvm_d2i_rp: - case Intrinsic::nvvm_d2i_rz: - // f2ll - case Intrinsic::nvvm_f2ll_rm: - case Intrinsic::nvvm_f2ll_rm_ftz: - case Intrinsic::nvvm_f2ll_rn: - case Intrinsic::nvvm_f2ll_rn_ftz: - case Intrinsic::nvvm_f2ll_rp: - case Intrinsic::nvvm_f2ll_rp_ftz: - case Intrinsic::nvvm_f2ll_rz: - case Intrinsic::nvvm_f2ll_rz_ftz: - // d2ll - case Intrinsic::nvvm_d2ll_rm: - case Intrinsic::nvvm_d2ll_rn: - case Intrinsic::nvvm_d2ll_rp: - case Intrinsic::nvvm_d2ll_rz: - IsSigned = true; - break; - } + bool IsFTZ = NVVMIntrinsicShouldFTZ(IntrinsicID); + bool IsSigned = NVVMIntrinsicConvertsToSignedInteger(IntrinsicID); APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); - auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U; + auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U; bool IsExact = false; APFloat::opStatus Status = From 108265f807ec6e9c13e98e8cc0ffd5f80665c498 Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Fri, 13 Dec 2024 16:50:58 +0000 Subject: [PATCH 3/6] Move helper functions into separate file Move NVVM intrinsic helper functions into NVVMIntrinsicFlags.h and then rename it NVVMIntrinsicUtils.h. --- llvm/include/llvm/IR/NVVMIntrinsicFlags.h | 39 ---- llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 173 ++++++++++++++++++ llvm/lib/Analysis/ConstantFolding.cpp | 145 +-------------- .../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 2 +- 5 files changed, 179 insertions(+), 182 deletions(-) delete mode 100644 llvm/include/llvm/IR/NVVMIntrinsicFlags.h create mode 100644 llvm/include/llvm/IR/NVVMIntrinsicUtils.h diff --git a/llvm/include/llvm/IR/NVVMIntrinsicFlags.h b/llvm/include/llvm/IR/NVVMIntrinsicFlags.h deleted file mode 100644 index dfb6e857b3a6a..0000000000000 --- a/llvm/include/llvm/IR/NVVMIntrinsicFlags.h +++ /dev/null @@ -1,39 +0,0 @@ -//===--- NVVMIntrinsicFlags.h -----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This file contains the definitions of the enumerations and flags -/// associated with NVVM Intrinsics. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_IR_NVVMINTRINSICFLAGS_H -#define LLVM_IR_NVVMINTRINSICFLAGS_H - -#include - -namespace llvm { -namespace nvvm { - -// Reduction Ops supported with TMA Copy from Shared -// to Global Memory for the "cp.reduce.async.bulk.tensor.*" -// family of PTX instructions. -enum class TMAReductionOp : uint8_t { - ADD = 0, - MIN = 1, - MAX = 2, - INC = 3, - DEC = 4, - AND = 5, - OR = 6, - XOR = 7, -}; - -} // namespace nvvm -} // namespace llvm -#endif // LLVM_IR_NVVMINTRINSICFLAGS_H diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h new file mode 100644 index 0000000000000..a463da688cb16 --- /dev/null +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -0,0 +1,173 @@ +//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file contains the definitions of the enumerations and flags +/// associated with NVVM Intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_NVVMINTRINSICUTILS_H +#define LLVM_IR_NVVMINTRINSICUTILS_H + +#include "llvm/ADT/APFloat.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsNVPTX.h" + +namespace llvm { +namespace nvvm { + +// Reduction Ops supported with TMA Copy from Shared +// to Global Memory for the "cp.reduce.async.bulk.tensor.*" +// family of PTX instructions. +enum class TMAReductionOp : uint8_t { + ADD = 0, + MIN = 1, + MAX = 2, + INC = 3, + DEC = 4, + AND = 5, + OR = 6, + XOR = 7, +}; + +bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // Float to i32 / i64 conversion intrinsics: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz_ftz: + + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz_ftz: + + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + return true; + } + return false; +} + +bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // f2i + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2i_rz_ftz: + // d2i + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + // f2ll + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ll_rz_ftz: + // d2ll + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + return true; + } + return false; +} + +APFloat::roundingMode IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // RM: + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2ui_rm: + + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ull_rm: + return APFloat::rmTowardNegative; + + // RN: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2ui_rn: + + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ull_rn: + return APFloat::rmNearestTiesToEven; + + // RP: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2ui_rp: + + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ull_rp: + return APFloat::rmTowardPositive; + + // RZ: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2i_rz_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + case Intrinsic::nvvm_d2i_rz: + case Intrinsic::nvvm_d2ui_rz: + + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ll_rz_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + case Intrinsic::nvvm_d2ll_rz: + case Intrinsic::nvvm_d2ull_rz: + return APFloat::rmTowardZero; + } + llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); + return APFloat::roundingMode::Invalid; +} + +} // namespace nvvm +} // namespace llvm +#endif // LLVM_IR_NVVMINTRINSICUTILS_H diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 9ad5207cc52ce..dca3359132609 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" +#include "llvm/IR/NVVMIntrinsicUtils.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -290,144 +291,6 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { return ConstantVector::get(Result); } - -//===----------------------------------------------------------------------===// -// NVVM-specific internal helper functions -//===----------------------------------------------------------------------===// - -static bool NVVMIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { - switch (IntrinsicID) { - // Float to i32 / i64 conversion intrinsics: - case Intrinsic::nvvm_f2i_rm_ftz: - case Intrinsic::nvvm_f2i_rn_ftz: - case Intrinsic::nvvm_f2i_rp_ftz: - case Intrinsic::nvvm_f2i_rz_ftz: - - case Intrinsic::nvvm_f2ui_rm_ftz: - case Intrinsic::nvvm_f2ui_rn_ftz: - case Intrinsic::nvvm_f2ui_rp_ftz: - case Intrinsic::nvvm_f2ui_rz_ftz: - - case Intrinsic::nvvm_f2ll_rm_ftz: - case Intrinsic::nvvm_f2ll_rn_ftz: - case Intrinsic::nvvm_f2ll_rp_ftz: - case Intrinsic::nvvm_f2ll_rz_ftz: - - case Intrinsic::nvvm_f2ull_rm_ftz: - case Intrinsic::nvvm_f2ull_rn_ftz: - case Intrinsic::nvvm_f2ull_rp_ftz: - case Intrinsic::nvvm_f2ull_rz_ftz: - return true; - } - return false; -} - -static bool NVVMIntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { - switch (IntrinsicID) { - // f2i - case Intrinsic::nvvm_f2i_rm: - case Intrinsic::nvvm_f2i_rm_ftz: - case Intrinsic::nvvm_f2i_rn: - case Intrinsic::nvvm_f2i_rn_ftz: - case Intrinsic::nvvm_f2i_rp: - case Intrinsic::nvvm_f2i_rp_ftz: - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_f2i_rz_ftz: - // d2i - case Intrinsic::nvvm_d2i_rm: - case Intrinsic::nvvm_d2i_rn: - case Intrinsic::nvvm_d2i_rp: - case Intrinsic::nvvm_d2i_rz: - // f2ll - case Intrinsic::nvvm_f2ll_rm: - case Intrinsic::nvvm_f2ll_rm_ftz: - case Intrinsic::nvvm_f2ll_rn: - case Intrinsic::nvvm_f2ll_rn_ftz: - case Intrinsic::nvvm_f2ll_rp: - case Intrinsic::nvvm_f2ll_rp_ftz: - case Intrinsic::nvvm_f2ll_rz: - case Intrinsic::nvvm_f2ll_rz_ftz: - // d2ll - case Intrinsic::nvvm_d2ll_rm: - case Intrinsic::nvvm_d2ll_rn: - case Intrinsic::nvvm_d2ll_rp: - case Intrinsic::nvvm_d2ll_rz: - return true; - } - return false; -} - -static APFloat::roundingMode -NVVMIntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { - switch (IntrinsicID) { - // RM: - case Intrinsic::nvvm_f2i_rm: - case Intrinsic::nvvm_f2ui_rm: - case Intrinsic::nvvm_f2i_rm_ftz: - case Intrinsic::nvvm_f2ui_rm_ftz: - case Intrinsic::nvvm_d2i_rm: - case Intrinsic::nvvm_d2ui_rm: - - case Intrinsic::nvvm_f2ll_rm: - case Intrinsic::nvvm_f2ull_rm: - case Intrinsic::nvvm_f2ll_rm_ftz: - case Intrinsic::nvvm_f2ull_rm_ftz: - case Intrinsic::nvvm_d2ll_rm: - case Intrinsic::nvvm_d2ull_rm: - return APFloat::rmTowardNegative; - - // RN: - case Intrinsic::nvvm_f2i_rn: - case Intrinsic::nvvm_f2ui_rn: - case Intrinsic::nvvm_f2i_rn_ftz: - case Intrinsic::nvvm_f2ui_rn_ftz: - case Intrinsic::nvvm_d2i_rn: - case Intrinsic::nvvm_d2ui_rn: - - case Intrinsic::nvvm_f2ll_rn: - case Intrinsic::nvvm_f2ull_rn: - case Intrinsic::nvvm_f2ll_rn_ftz: - case Intrinsic::nvvm_f2ull_rn_ftz: - case Intrinsic::nvvm_d2ll_rn: - case Intrinsic::nvvm_d2ull_rn: - return APFloat::rmNearestTiesToEven; - - // RP: - case Intrinsic::nvvm_f2i_rp: - case Intrinsic::nvvm_f2ui_rp: - case Intrinsic::nvvm_f2i_rp_ftz: - case Intrinsic::nvvm_f2ui_rp_ftz: - case Intrinsic::nvvm_d2i_rp: - case Intrinsic::nvvm_d2ui_rp: - - case Intrinsic::nvvm_f2ll_rp: - case Intrinsic::nvvm_f2ull_rp: - case Intrinsic::nvvm_f2ll_rp_ftz: - case Intrinsic::nvvm_f2ull_rp_ftz: - case Intrinsic::nvvm_d2ll_rp: - case Intrinsic::nvvm_d2ull_rp: - return APFloat::rmTowardPositive; - - // RZ: - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_f2ui_rz: - case Intrinsic::nvvm_f2i_rz_ftz: - case Intrinsic::nvvm_f2ui_rz_ftz: - case Intrinsic::nvvm_d2i_rz: - case Intrinsic::nvvm_d2ui_rz: - - case Intrinsic::nvvm_f2ll_rz: - case Intrinsic::nvvm_f2ull_rz: - case Intrinsic::nvvm_f2ll_rz_ftz: - case Intrinsic::nvvm_f2ull_rz_ftz: - case Intrinsic::nvvm_d2ll_rz: - case Intrinsic::nvvm_d2ull_rz: - return APFloat::rmTowardZero; - } - llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); - return APFloat::roundingMode::Invalid; -} - } // end anonymous namespace /// If this constant is a constant offset from a global, return the global and @@ -2567,11 +2430,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (U.isNaN()) return ConstantInt::get(Ty, 0); - APFloat::roundingMode RMode = NVVMIntrinsicGetRoundingMode(IntrinsicID); + APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID); assert(RM != APFloat::roundingMode::Invalid); - bool IsFTZ = NVVMIntrinsicShouldFTZ(IntrinsicID); - bool IsSigned = NVVMIntrinsicConvertsToSignedInteger(IntrinsicID); + bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID); + bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID); APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U; diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 65e1893d3f3bd..d34f45fcac008 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -14,7 +14,7 @@ #include "NVPTX.h" #include "NVPTXUtilities.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/NVVMIntrinsicFlags.h" +#include "llvm/IR/NVVMIntrinsicUtils.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index e1fb2d7fcee03..bcb35c972b70a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -17,7 +17,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsNVPTX.h" -#include "llvm/IR/NVVMIntrinsicFlags.h" +#include "llvm/IR/NVVMIntrinsicUtils.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" From 623215d12aa0c944e4ff65b6b7a894d184450be5 Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Fri, 13 Dec 2024 17:38:15 +0000 Subject: [PATCH 4/6] Minor tidying --- llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 2 +- llvm/lib/Analysis/ConstantFolding.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index a463da688cb16..19c403a171325 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -8,7 +8,7 @@ // /// \file /// This file contains the definitions of the enumerations and flags -/// associated with NVVM Intrinsics. +/// associated with NVVM Intrinsics, along with some helper functions. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index dca3359132609..1801831e3bb9b 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -291,6 +291,7 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { return ConstantVector::get(Result); } + } // end anonymous namespace /// If this constant is a constant offset from a global, return the global and @@ -2431,8 +2432,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantInt::get(Ty, 0); APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID); - assert(RM != APFloat::roundingMode::Invalid); - bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID); bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID); From 216709f203049a35b8456f81ea9959fab3bed507 Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Fri, 13 Dec 2024 20:39:59 +0000 Subject: [PATCH 5/6] Add back include to NVVMIntrinsicUtils Add the header include back into NVVMIntrinsicUtils.h after rebasing the commit which renamed it to a point after the original NVVMIntrinsicFlags.h version was edited to include This extra include was originally added by: f33e2369051 [clang][Modules] Fixing Build Breaks When -DLLVM_ENABLE_MODULES=ON (#119473) --- llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 19c403a171325..eec162ed5c182 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -15,6 +15,8 @@ #ifndef LLVM_IR_NVVMINTRINSICUTILS_H #define LLVM_IR_NVVMINTRINSICUTILS_H +#include + #include "llvm/ADT/APFloat.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" From 0a51bdde3fcd240096065bb05030edb4f5d2849a Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Fri, 13 Dec 2024 21:33:40 +0000 Subject: [PATCH 6/6] Mark helper functions as inline Mark the new intrinsic helper functions as inline to avoid linker issues. --- llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index eec162ed5c182..8ca073ba82253 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -38,7 +38,7 @@ enum class TMAReductionOp : uint8_t { XOR = 7, }; -bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { +inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { // Float to i32 / i64 conversion intrinsics: case Intrinsic::nvvm_f2i_rm_ftz: @@ -65,7 +65,7 @@ bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { return false; } -bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { +inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { // f2i case Intrinsic::nvvm_f2i_rm: @@ -100,7 +100,8 @@ bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { return false; } -APFloat::roundingMode IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { +inline APFloat::roundingMode +IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { // RM: case Intrinsic::nvvm_f2i_rm: