diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 115fcee0b04f2..8802ca2534355 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -820,6 +820,13 @@ let TargetPrefix = "nvvm" in { DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; + def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; + def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; + // // Sad // diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 5878940812f62..5528e7b9fe0dd 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1096,6 +1096,18 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; +def : Pat<(int_nvvm_div_full Float32Regs:$a, Float32Regs:$b), + (FDIV32rr Float32Regs:$a, Float32Regs:$b)>; + +def : Pat<(int_nvvm_div_full Float32Regs:$a, fpimm:$b), + (FDIV32ri Float32Regs:$a, f32imm:$b)>; + +def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, Float32Regs:$b), + (FDIV32rr_ftz Float32Regs:$a, Float32Regs:$b)>; + +def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, fpimm:$b), + (FDIV32ri_ftz Float32Regs:$a, f32imm:$b)>; + // // Sad // diff --git a/llvm/test/CodeGen/NVPTX/div.ll b/llvm/test/CodeGen/NVPTX/div.ll new file mode 100644 index 0000000000000..1df3010384917 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/div.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} + +define float @div_full(float %a, float %b) { +; CHECK-LABEL: div_full( +; CHECK: { +; CHECK-NEXT: .reg .f32 %f<9>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [div_full_param_0]; +; CHECK-NEXT: ld.param.f32 %f2, [div_full_param_1]; +; CHECK-NEXT: div.full.f32 %f3, %f1, %f2; +; CHECK-NEXT: mov.f32 %f4, 0f40400000; +; CHECK-NEXT: div.full.f32 %f5, %f3, %f4; +; CHECK-NEXT: div.full.ftz.f32 %f6, %f5, %f2; +; CHECK-NEXT: mov.f32 %f7, 0f40800000; +; CHECK-NEXT: div.full.ftz.f32 %f8, %f6, %f7; +; CHECK-NEXT: st.param.f32 [func_retval0], %f8; +; CHECK-NEXT: ret; + %1 = call float @llvm.nvvm.div.full(float %a, float %b) + %2 = call float @llvm.nvvm.div.full(float %1, float 3.0) + %3 = call float @llvm.nvvm.div.full.ftz(float %2, float %b) + %4 = call float @llvm.nvvm.div.full.ftz(float %3, float 4.0) + ret float %4 +}