diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index a2b6db69b9284..9f15612355e6c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -946,6 +946,15 @@ void CudaToolChain::addClangTargetOptions( if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) CC1Args.push_back("-fcuda-prec-sqrt"); + + bool FastRelaxedMath = DriverArgs.hasFlag( + options::OPT_ffast_math, options::OPT_fno_fast_math, false); + bool UnsafeMathOpt = + DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, + options::OPT_fno_unsafe_math_optimizations, false); + if (FastRelaxedMath || UnsafeMathOpt) + CC1Args.append({"-mllvm", "--nvptx-prec-divf32=0", "-mllvm", + "--nvptx-prec-sqrtf32=0"}); } else { CC1Args.append( {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); diff --git a/clang/test/Driver/sycl-nvptx-fast-math.cpp b/clang/test/Driver/sycl-nvptx-fast-math.cpp new file mode 100644 index 0000000000000..b4593a3700148 --- /dev/null +++ b/clang/test/Driver/sycl-nvptx-fast-math.cpp @@ -0,0 +1,18 @@ +// REQUIRES: nvptx-registered-target + +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s + +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ffast-math %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FAST %s + +// RUN: %clang -### -nocudalib \ +// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -funsafe-math-optimizations %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-FAST %s + +// CHECK-FAST: "-mllvm" "--nvptx-prec-divf32=0" "-mllvm" "--nvptx-prec-sqrtf32=0" + +// CHECK-DEFAULT-NOT: "nvptx-prec-divf32=0" +// CHECK-DEFAULT-NOT: "nvptx-prec-sqrtf32=0"