Skip to content

Commit 523ad69

Browse files
committed
[Clang] Add -f[no-]cuda-prec-sqrt flag
NVCC provides the `-prec-sqrt` flag to control whether a precise or approximate square root function is used. However, LLVM previously always use the approximated version. With this change, Clang introduces the `-f[no-]cuda-prec-sqrt` flag, allowing users to specify precision behavior. The default is set to false to maintain existing behavior.
1 parent 739fe98 commit 523ad69

File tree

4 files changed

+20
-0
lines changed

4 files changed

+20
-0
lines changed

clang/include/clang/Basic/CodeGenOptions.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
317317
/// CUDA runtime back-end for incorporating them into host-side object file.
318318
std::string CudaGpuBinaryFileName;
319319

320+
/// Whether a precise or approximate square root should be used for CUDA
321+
/// device code.
322+
bool CudaPreciseSqrt;
323+
320324
/// List of filenames passed in using the -fembed-offload-object option. These
321325
/// are offloading binaries containing device images and metadata.
322326
std::vector<std::string> OffloadObjects;

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,6 +1279,11 @@ def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">
12791279
Alias<fgpu_flush_denormals_to_zero>;
12801280
def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">,
12811281
Alias<fno_gpu_flush_denormals_to_zero>;
1282+
defm cuda_prec_sqrt : BoolFOption<"cuda-prec-sqrt",
1283+
CodeGenOpts<"CudaPreciseSqrt">, DefaultFalse,
1284+
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
1285+
NegFlag<SetFalse, [], [ClangOption], "Disable">,
1286+
BothFlags<[], [ClangOption], " precise square root for CUDA device code.">>;
12821287
def : Flag<["-"], "fcuda-rdc">, Alias<fgpu_rdc>;
12831288
def : Flag<["-"], "fno-cuda-rdc">, Alias<fno_gpu_rdc>;
12841289
defm cuda_short_ptr : BoolFOption<"cuda-short-ptr",

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/StringExtras.h"
2020
#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
2121
#include "llvm/Option/ArgList.h"
22+
#include "llvm/Option/Option.h"
2223
#include "llvm/Support/FileSystem.h"
2324
#include "llvm/Support/FormatAdapters.h"
2425
#include "llvm/Support/FormatVariadic.h"
@@ -862,6 +863,10 @@ void CudaToolChain::addClangTargetOptions(
862863
if (CudaInstallation.version() >= CudaVersion::CUDA_90)
863864
CC1Args.push_back("-fcuda-allow-variadic-functions");
864865

866+
if (DriverArgs.hasFlag(options::OPT_fcuda_prec_sqrt,
867+
options::OPT_fno_cuda_prec_sqrt, false))
868+
CC1Args.append({"-fcuda-prec-sqrt"});
869+
865870
if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
866871
options::OPT_fno_cuda_short_ptr, false))
867872
CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Checks that the -fcuda-prec-sqrt flag is passed to the cc1 frontend.
2+
3+
// RUN: %clang -### --target=x86_64-linux-gnu -c -fcuda-prec-sqrt -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s
4+
5+
// CHECK: "-triple" "nvptx64-nvidia-cuda"
6+
// CHECK-SAME: "-fcuda-prec-sqrt"

0 commit comments

Comments
 (0)