Skip to content

Commit ce00296

Browse files
committed
Restricting the use of the options to sycl only.
1 parent b25e5ac commit ce00296

File tree

3 files changed

+47
-6
lines changed

3 files changed

+47
-6
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,7 +3020,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30203020
LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
30213021
std::string ComplexRangeStr = "";
30223022
std::string GccRangeComplexOption = "";
3023-
bool IsDeviceOffloading = JA.isDeviceOffloading(Action::OFK_SYCL);
3023+
bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
3024+
!JA.isDeviceOffloading(Action::OFK_Cuda) &&
3025+
!JA.isOffloading(Action::OFK_HIP);
30243026

30253027
// Lambda to set fast-math options. This is also used by -ffp-model=fast
30263028
auto applyFastMath = [&]() {
@@ -3050,7 +3052,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30503052
: ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic));
30513053
Range = LangOptions::ComplexRangeKind::CX_Basic;
30523054
SeenUnsafeMathModeOption = true;
3053-
if (IsDeviceOffloading) {
3055+
if (IsFp32PrecDivSqrtAllowed) {
30543056
// when fp-model=fast is used the default precision for division and
30553057
// sqrt is not precise.
30563058
NoOffloadFP32PrecDiv = true;
@@ -3085,7 +3087,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30853087
}
30863088

30873089
auto addSPIRVArgs = [&](StringRef SPIRVArg) {
3088-
if (IsDeviceOffloading) {
3090+
if (IsFp32PrecDivSqrtAllowed) {
30893091
if (!FPAccuracy.empty())
30903092
EmitAccuracyDiag(D, JA, FPAccuracy, SPIRVArg);
30913093
if (SPIRVArg == "-fno-offload-fp32-prec-div")
@@ -3631,7 +3633,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
36313633
CmdArgs.push_back("-fno-cx-limited-range");
36323634
if (Args.hasArg(options::OPT_fno_cx_fortran_rules))
36333635
CmdArgs.push_back("-fno-cx-fortran-rules");
3634-
if (IsDeviceOffloading) {
3636+
if (IsFp32PrecDivSqrtAllowed) {
36353637
if (NoOffloadFP32PrecDiv)
36363638
CmdArgs.push_back("-fno-offload-fp32-prec-div");
36373639
if (NoOffloadFP32PrecSqrt)

clang/test/Driver/offload-fp32-div-sqrt.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@
9191

9292

9393
// CHECK: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}}
94-
95-
// CHECK-NOT: "-triple{{.*}}" "-fsycl-is-host"{{.*}} "-foffload-fp32-prec-div" "-foffload-fp32-prec-sqrt"s
94+
// CHECK-NOT: "-foffload-fp32-prec-div"
95+
// CHECK-NOT: "-foffload-fp32-prec-sqrt"
9696

9797
// NO_PREC_DIV: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div"
9898

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
2+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
3+
// RUN: -foffload-fp32-prec-div -### %s 2>&1 | FileCheck %s
4+
5+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
6+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
7+
// RUN: -foffload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s
8+
9+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
10+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
11+
// RUN: -fno-offload-fp32-prec-div -### %s 2>&1 | FileCheck %s
12+
13+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
14+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
15+
// RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s
16+
17+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
18+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
19+
// RUN: -ffp-accuracy=high -fno-offload-fp32-prec-div -### %s 2>&1 \
20+
// RUN: | FileCheck --check-prefix=FPACC %s
21+
22+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
23+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
24+
// RUN: -ffp-accuracy=high -fno-offload-fp32-prec-sqrt -### %s 2>&1 \
25+
// RUN: | FileCheck --check-prefix=FPACC %s
26+
27+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
28+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
29+
// RUN: -fno-offload-fp32-prec-div -ffp-accuracy=high -### %s 2>&1 \
30+
// RUN: | FileCheck --check-prefix=FPACC %s
31+
32+
// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \
33+
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \
34+
// RUN: -fno-offload-fp32-prec-sqrt -ffp-accuracy=high -### %s 2>&1 \
35+
// RUN: | FileCheck --check-prefix=FPACC %s
36+
37+
// CHECK-NOT: "-foffload-fp32-prec-div"
38+
// CHECK-NOT: "-foffload-fp32-prec-sqrt"
39+
// FPACC: "-ffp-builtin-accuracy=high"

0 commit comments

Comments
 (0)