Restricting the use of the options to sycl only.

zahiraam · zahiraam · commit ce002961198d · 2024-11-15T09:53:33.000-08:00
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3020,7 +3020,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
   std::string ComplexRangeStr = "";
   std::string GccRangeComplexOption = "";
-  bool IsDeviceOffloading = JA.isDeviceOffloading(Action::OFK_SYCL);
+  bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
+                                  !JA.isDeviceOffloading(Action::OFK_Cuda) &&
+                                  !JA.isOffloading(Action::OFK_HIP);
 
   // Lambda to set fast-math options. This is also used by -ffp-model=fast
   auto applyFastMath = [&]() {
@@ -3050,7 +3052,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
               : ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic));
     Range = LangOptions::ComplexRangeKind::CX_Basic;
     SeenUnsafeMathModeOption = true;
-    if (IsDeviceOffloading) {
+    if (IsFp32PrecDivSqrtAllowed) {
       // when fp-model=fast is used the default precision for division and
       // sqrt is not precise.
       NoOffloadFP32PrecDiv = true;
@@ -3085,7 +3087,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   }
 
   auto addSPIRVArgs = [&](StringRef SPIRVArg) {
-    if (IsDeviceOffloading) {
+    if (IsFp32PrecDivSqrtAllowed) {
       if (!FPAccuracy.empty())
         EmitAccuracyDiag(D, JA, FPAccuracy, SPIRVArg);
       if (SPIRVArg == "-fno-offload-fp32-prec-div")
@@ -3631,7 +3633,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
     CmdArgs.push_back("-fno-cx-limited-range");
   if (Args.hasArg(options::OPT_fno_cx_fortran_rules))
     CmdArgs.push_back("-fno-cx-fortran-rules");
-  if (IsDeviceOffloading) {
+  if (IsFp32PrecDivSqrtAllowed) {
     if (NoOffloadFP32PrecDiv)
       CmdArgs.push_back("-fno-offload-fp32-prec-div");
     if (NoOffloadFP32PrecSqrt)
diff --git a/clang/test/Driver/offload-fp32-div-sqrt.cpp b/clang/test/Driver/offload-fp32-div-sqrt.cpp
@@ -91,8 +91,8 @@
 
 
 // CHECK: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}}
-
-// CHECK-NOT: "-triple{{.*}}" "-fsycl-is-host"{{.*}} "-foffload-fp32-prec-div" "-foffload-fp32-prec-sqrt"s
+// CHECK-NOT: "-foffload-fp32-prec-div"
+// CHECK-NOT: "-foffload-fp32-prec-sqrt"
 
 // NO_PREC_DIV: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div"
 
diff --git a/clang/test/Driver/offload-fp32-div-sqrt.cu b/clang/test/Driver/offload-fp32-div-sqrt.cu
@@ -0,0 +1,39 @@
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -foffload-fp32-prec-div -### %s 2>&1 | FileCheck %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -foffload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -fno-offload-fp32-prec-div -### %s 2>&1 | FileCheck %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -ffp-accuracy=high -fno-offload-fp32-prec-div -### %s 2>&1 \
+// RUN: | FileCheck --check-prefix=FPACC %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -ffp-accuracy=high -fno-offload-fp32-prec-sqrt -### %s 2>&1 \
+// RUN: | FileCheck --check-prefix=FPACC %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -fno-offload-fp32-prec-div -ffp-accuracy=high -### %s 2>&1 \
+// RUN: | FileCheck --check-prefix=FPACC %s
+
+// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
+// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
+// RUN: -fno-offload-fp32-prec-sqrt -ffp-accuracy=high  -### %s 2>&1 \
+// RUN: | FileCheck --check-prefix=FPACC %s
+
+// CHECK-NOT: "-foffload-fp32-prec-div"
+// CHECK-NOT: "-foffload-fp32-prec-sqrt"
+// FPACC: "-ffp-builtin-accuracy=high"