Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
Expand Down Expand Up @@ -932,6 +933,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(Op, MVT::bf16, Promote);
AddPromotedToType(Op, MVT::bf16, MVT::f32);
}
setOperationAction(ISD::FREM, {MVT::f32, MVT::f64}, Custom);

setOperationAction(ISD::FABS, {MVT::f32, MVT::f64}, Legal);
if (STI.getPTXVersion() >= 65) {
Expand Down Expand Up @@ -2819,6 +2821,34 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) {
SDLoc(Op), Opcode, DAG);
}

static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
bool AllowUnsafeFPMath) {
// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches
// the semantics of LLVM's frem.
SDLoc DL(Op);
SDValue X = Op->getOperand(0);
SDValue Y = Op->getOperand(1);
EVT Ty = Op.getValueType();

SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y);
SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div);
SDValue Mul =
DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y, SDNodeFlags::AllowContract);
SDValue Sub =
DAG.getNode(ISD::FSUB, DL, Ty, X, Mul, SDNodeFlags::AllowContract);

if (AllowUnsafeFPMath || Op->getFlags().hasNoInfs())
return Sub;

// If Y is infinite, return X
SDValue AbsY = DAG.getNode(ISD::FABS, DL, Ty, Y);
SDValue Inf =
DAG.getConstantFP(APFloat::getInf(Ty.getFltSemantics()), DL, Ty);
SDValue IsInf = DAG.getSetCC(DL, MVT::i1, AbsY, Inf, ISD::SETEQ);
return DAG.getSelect(DL, Ty, IsInf, X, Sub);
}

SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
Expand Down Expand Up @@ -2913,6 +2943,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTPOP:
case ISD::CTLZ:
return lowerCTLZCTPOP(Op, DAG);
case ISD::FREM:
return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction()));

default:
llvm_unreachable("Custom lowering not defined for operation");
Expand Down
126 changes: 21 additions & 105 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,6 @@ def doRsqrtOpt : Predicate<"doRsqrtOpt()">;

def doMulWide : Predicate<"doMulWide">;

def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">;
def noUnsafeFPMath : Predicate<"!allowUnsafeFPMath()">;

def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;

Expand Down Expand Up @@ -211,6 +208,12 @@ class ValueToRegClass<ValueType T> {
// Some Common Instruction Class Templates
//===----------------------------------------------------------------------===//

class OneUse1<SDPatternOperator operator>
: PatFrag<(ops node:$A), (operator node:$A), [{ return N->hasOneUse(); }]>;

class fpimm_pos_inf<ValueType vt>
: FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;

// Utility class to wrap up information about a register and DAG type for more
// convenient iteration and parameterization
class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm> {
Expand Down Expand Up @@ -442,7 +445,7 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
class BinOpAllowsFMA<SDPatternOperator operator>
: PatFrag<(ops node:$A, node:$B),
(operator node:$A, node:$B), [{
return allowFMA() || N->getFlags().hasAllowContract();;
return allowFMA() || N->getFlags().hasAllowContract();
}]>;

multiclass F3_fma_component<string op_str, SDNode op_node> {
Expand Down Expand Up @@ -693,10 +696,7 @@ let hasSideEffects = false in {
defm CVT_to_tf32_rz_relu_satf : CVT_TO_TF32<"rz.relu.satfinite", [hasPTX<86>, hasSM<100>]>;
}

def fpround_oneuse : PatFrag<(ops node:$a), (fpround node:$a), [{
return N->hasOneUse();
}]>;

def fpround_oneuse : OneUse1<fpround>;
def : Pat<(v2bf16 (build_vector (bf16 (fpround_oneuse f32:$lo)),
(bf16 (fpround_oneuse f32:$hi)))),
(CVT_bf16x2_f32 $hi, $lo, CvtRN)>,
Expand Down Expand Up @@ -786,18 +786,14 @@ def : Pat<(vt (select i1:$p, vt:$a, vt:$b)),
// Test Instructions
//-----------------------------------

def fabs_oneuse : OneUse1<fabs>;

def TESTINF_f32r : NVPTXInst<(outs Int1Regs:$p), (ins Float32Regs:$a),
"testp.infinite.f32 \t$p, $a;",
[]>;
def TESTINF_f32i : NVPTXInst<(outs Int1Regs:$p), (ins f32imm:$a),
"testp.infinite.f32 \t$p, $a;",
[]>;
[(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf<f32>))]>;
def TESTINF_f64r : NVPTXInst<(outs Int1Regs:$p), (ins Float64Regs:$a),
"testp.infinite.f64 \t$p, $a;",
[]>;
def TESTINF_f64i : NVPTXInst<(outs Int1Regs:$p), (ins f64imm:$a),
"testp.infinite.f64 \t$p, $a;",
[]>;
[(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf<f64>))]>;

//-----------------------------------
// Integer Arithmetic
Expand Down Expand Up @@ -1362,99 +1358,19 @@ defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;

// sin/cos

class UnaryOpAllowsApproxFn<SDPatternOperator operator>
: PatFrag<(ops node:$A),
(operator node:$A), [{
return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
}]>;

def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
[(set f32:$dst, (fsin f32:$src))]>,
Requires<[allowUnsafeFPMath]>;
[(set f32:$dst, (UnaryOpAllowsApproxFn<fsin> f32:$src))]>;
def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"cos.approx.f32 \t$dst, $src;",
[(set f32:$dst, (fcos f32:$src))]>,
Requires<[allowUnsafeFPMath]>;

// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches the
// semantics of LLVM's frem.

// frem - f32 FTZ
def : Pat<(frem f32:$x, f32:$y),
(FSUBf32rr_ftz $x, (FMULf32rr_ftz (CVT_f32_f32
(FDIV32rr_prec_ftz $x, $y), CvtRZI_FTZ),
$y))>,
Requires<[doF32FTZ, allowUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
(FSUBf32rr_ftz $x, (FMULf32ri_ftz (CVT_f32_f32
(FDIV32ri_prec_ftz $x, fpimm:$y), CvtRZI_FTZ),
fpimm:$y))>,
Requires<[doF32FTZ, allowUnsafeFPMath]>;

def : Pat<(frem f32:$x, f32:$y),
(SELP_f32rr $x,
(FSUBf32rr_ftz $x, (FMULf32rr_ftz (CVT_f32_f32
(FDIV32rr_prec_ftz $x, $y), CvtRZI_FTZ),
$y)),
(TESTINF_f32r $y))>,
Requires<[doF32FTZ, noUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
(SELP_f32rr $x,
(FSUBf32rr_ftz $x, (FMULf32ri_ftz (CVT_f32_f32
(FDIV32ri_prec_ftz $x, fpimm:$y), CvtRZI_FTZ),
fpimm:$y)),
(TESTINF_f32i fpimm:$y))>,
Requires<[doF32FTZ, noUnsafeFPMath]>;

// frem - f32
def : Pat<(frem f32:$x, f32:$y),
(FSUBf32rr $x, (FMULf32rr (CVT_f32_f32
(FDIV32rr_prec $x, $y), CvtRZI),
$y))>,
Requires<[allowUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
(FSUBf32rr $x, (FMULf32ri (CVT_f32_f32
(FDIV32ri_prec $x, fpimm:$y), CvtRZI),
fpimm:$y))>,
Requires<[allowUnsafeFPMath]>;

def : Pat<(frem f32:$x, f32:$y),
(SELP_f32rr $x,
(FSUBf32rr $x, (FMULf32rr (CVT_f32_f32
(FDIV32rr_prec $x, $y), CvtRZI),
$y)),
(TESTINF_f32r Float32Regs:$y))>,
Requires<[noUnsafeFPMath]>;
def : Pat<(frem f32:$x, fpimm:$y),
(SELP_f32rr $x,
(FSUBf32rr $x, (FMULf32ri (CVT_f32_f32
(FDIV32ri_prec $x, fpimm:$y), CvtRZI),
fpimm:$y)),
(TESTINF_f32i fpimm:$y))>,
Requires<[noUnsafeFPMath]>;

// frem - f64
def : Pat<(frem f64:$x, f64:$y),
(FSUBf64rr $x, (FMULf64rr (CVT_f64_f64
(FDIV64rr $x, $y), CvtRZI),
$y))>,
Requires<[allowUnsafeFPMath]>;
def : Pat<(frem f64:$x, fpimm:$y),
(FSUBf64rr $x, (FMULf64ri (CVT_f64_f64
(FDIV64ri $x, fpimm:$y), CvtRZI),
fpimm:$y))>,
Requires<[allowUnsafeFPMath]>;

def : Pat<(frem f64:$x, f64:$y),
(SELP_f64rr $x,
(FSUBf64rr $x, (FMULf64rr (CVT_f64_f64
(FDIV64rr $x, $y), CvtRZI),
$y)),
(TESTINF_f64r Float64Regs:$y))>,
Requires<[noUnsafeFPMath]>;
def : Pat<(frem f64:$x, fpimm:$y),
(SELP_f64rr $x,
(FSUBf64rr $x, (FMULf64ri (CVT_f64_f64
(FDIV64ri $x, fpimm:$y), CvtRZI),
fpimm:$y)),
(TESTINF_f64r $y))>,
Requires<[noUnsafeFPMath]>;
[(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;

//-----------------------------------
// Bitwise operations
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/f16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,14 @@ define half @test_fdiv(half %a, half %b) #0 {
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
; CHECK-NOFTZ-NEXT: cvt.rzi.f32.f32 [[DI:%f[0-9]+]], [[D]];
; CHECK-NOFTZ-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
; CHECK-NOFTZ-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
; CHECK-NOFTZ-NEXT: neg.f32 [[DNEG:%f[0-9]+]], [[DI]];
; CHECK-NOFTZ-NEXT: fma.rn.f32 [[RF:%f[0-9]+]], [[DNEG]], [[FB]], [[FA]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FA:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FB:%f[0-9]+]], [[B]];
; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
; CHECK-F16-FTZ-NEXT: cvt.rzi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]];
; CHECK-F16-FTZ-NEXT: mul.ftz.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
; CHECK-F16-FTZ-NEXT: neg.ftz.f32 [[DNEG:%f[0-9]+]], [[DI]];
; CHECK-F16-FTZ-NEXT: fma.rn.ftz.f32 [[RF:%f[0-9]+]], [[DNEG]], [[FB]], [[FA]];
; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]];
; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]];
; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]];
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -362,17 +362,17 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: cvt.f32.f16 %f2, %rs4;
; CHECK-NEXT: div.rn.f32 %f3, %f2, %f1;
; CHECK-NEXT: cvt.rzi.f32.f32 %f4, %f3;
; CHECK-NEXT: mul.f32 %f5, %f4, %f1;
; CHECK-NEXT: sub.f32 %f6, %f2, %f5;
; CHECK-NEXT: neg.f32 %f5, %f4;
; CHECK-NEXT: fma.rn.f32 %f6, %f5, %f1, %f2;
; CHECK-NEXT: testp.infinite.f32 %p1, %f1;
; CHECK-NEXT: selp.f32 %f7, %f2, %f6, %p1;
; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %f7;
; CHECK-NEXT: cvt.f32.f16 %f8, %rs1;
; CHECK-NEXT: cvt.f32.f16 %f9, %rs3;
; CHECK-NEXT: div.rn.f32 %f10, %f9, %f8;
; CHECK-NEXT: cvt.rzi.f32.f32 %f11, %f10;
; CHECK-NEXT: mul.f32 %f12, %f11, %f8;
; CHECK-NEXT: sub.f32 %f13, %f9, %f12;
; CHECK-NEXT: neg.f32 %f12, %f11;
; CHECK-NEXT: fma.rn.f32 %f13, %f12, %f8, %f9;
; CHECK-NEXT: testp.infinite.f32 %p2, %f8;
; CHECK-NEXT: selp.f32 %f14, %f9, %f13, %p2;
; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %f14;
Expand Down
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/NVPTX/fast-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ define float @fadd_ftz(float %a, float %b) #1 {
declare float @llvm.sin.f32(float)
declare float @llvm.cos.f32(float)

; CHECK-LABEL: fsin_approx_afn
; CHECK: sin.approx.f32
define float @fsin_approx_afn(float %a) {
%r = tail call afn float @llvm.sin.f32(float %a)
ret float %r
}

; CHECK-LABEL: fcos_approx_afn
; CHECK: cos.approx.f32
define float @fcos_approx_afn(float %a) {
%r = tail call afn float @llvm.cos.f32(float %a)
ret float %r
}

; CHECK-LABEL: fsin_approx
; CHECK: sin.approx.f32
define float @fsin_approx(float %a) #0 {
Expand Down
Loading