Skip to content

Commit f8caf83

Browse files
committed
Add support for -ftarget-prec-div/sqrt options.
1 parent 3b0be29 commit f8caf83

File tree

9 files changed

+187
-7
lines changed

9 files changed

+187
-7
lines changed

clang/include/clang/Basic/DiagnosticCommonKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,11 @@ def err_ppc_impossible_musttail: Error<
379379
def err_aix_musttail_unsupported: Error<
380380
"'musttail' attribute is not supported on AIX">;
381381

382+
def warn_acuracy_conflicts_with_explicit_target_prec_option : Warning<
383+
"floating point accuracy control '%0' conflicts with explicit target "
384+
"precision option '%1'">,
385+
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-target-prec-option">>;
386+
382387
// Source manager
383388
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
384389
def err_file_modified : Error<

clang/include/clang/Basic/FPOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
3030
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
3131
OPTION(MathErrno, bool, 1, FPAccuracy)
3232
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
33+
OPTION(TargetPrecDiv, bool, 1, ComplexRange)
34+
OPTION(TargetPrecSqrt, bool, 1, TargetPrecDiv)
3335
#undef OPTION

clang/include/clang/Basic/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
372372
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
373373
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
374374
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
375+
LANGOPT(TargetPrecDiv, 1, 1, "Return correctly rounded results of fdiv")
376+
LANGOPT(TargetPrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
375377
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
376378
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
377379
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")

clang/include/clang/Driver/Options.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
11571157
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
11581158
"for complex arithmetic operations">>;
11591159

1160+
defm target_prec_div: BoolOption<"f", "target-prec-div",
1161+
LangOpts<"TargetPrecDiv">, DefaultTrue,
1162+
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
1163+
"code are required to return correctly rounded results.">,
1164+
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
1165+
"code are not required to return correctly rounded results.">>,
1166+
Group<f_Group>;
1167+
1168+
defm target_prec_sqrt: BoolOption<"f", "target-prec-sqrt",
1169+
LangOpts<"TargetPrecSqrt">, DefaultTrue,
1170+
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
1171+
"code are required to return correctly rounded results.">,
1172+
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
1173+
"code are not required to return correctly rounded results.">>,
1174+
Group<f_Group>;
1175+
11601176
// OpenCL-only Options
11611177
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
11621178
Visibility<[ClangOption, CC1Option]>,

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24099,6 +24099,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
2409924099
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
2410024100
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
2410124101
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
24102+
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
2410224103
.Default(0);
2410324104
} else {
2410424105
// The function has a clang builtin. Create an attribute for it
@@ -24200,7 +24201,8 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
2420024201
// a TU fp-accuracy requested.
2420124202
const LangOptions &LangOpts = getLangOpts();
2420224203
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
24203-
!LangOpts.FPAccuracyVal.empty()) {
24204+
!LangOpts.FPAccuracyVal.empty() || !LangOpts.TargetPrecDiv ||
24205+
!LangOpts.TargetPrecSqrt) {
2420424206
llvm::Function *Func =
2420524207
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
2420624208
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),

clang/lib/CodeGen/CGCall.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,15 +1889,23 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
18891889
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
18901890
}
18911891
}
1892-
if (FuncAttrs.attrs().size() == 0)
1892+
if (FuncAttrs.attrs().size() == 0) {
1893+
StringRef FPAccuracyVal;
18931894
if (!getLangOpts().FPAccuracyVal.empty()) {
1894-
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
1895+
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
18951896
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
18961897
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
18971898
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
18981899
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
18991900
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
19001901
}
1902+
if (Name == "sqrt" && !getLangOpts().TargetPrecSqrt)
1903+
FPAccuracyVal = "3.0";
1904+
if (Name == "fdiv" && !getLangOpts().TargetPrecDiv)
1905+
FPAccuracyVal = "2.5";
1906+
if (!FPAccuracyVal.empty())
1907+
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
1908+
}
19011909
}
19021910

19031911
/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
@@ -5790,10 +5798,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
57905798
// Emit the actual call/invoke instruction.
57915799
llvm::CallBase *CI;
57925800
if (!InvokeDest) {
5793-
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
5794-
!getLangOpts().FPAccuracyVal.empty()) {
5795-
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
5796-
if (FD && FD->getNameInfo().getName().isIdentifier()) {
5801+
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
5802+
if (FD && FD->getNameInfo().getName().isIdentifier()) {
5803+
StringRef FuncName = FD->getName();
5804+
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
5805+
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
5806+
!getLangOpts().FPAccuracyVal.empty() ||
5807+
(FuncName == "sqrt" && !getLangOpts().TargetPrecSqrt &&
5808+
IsFloat32Type) ||
5809+
(FuncName == "fdiv" && !getLangOpts().TargetPrecDiv &&
5810+
IsFloat32Type)) {
57975811
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
57985812
FD->getName(), FD->getBuiltinID());
57995813
if (CI)

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2942,6 +2942,14 @@ static void EmitComplexRangeDiag(const Driver &D, std::string str1,
29422942
}
29432943
}
29442944

2945+
static void EmitAccuracyDiag(const Driver &D, const JobAction &JA,
2946+
StringRef AccuracValStr, StringRef TargetPrecStr) {
2947+
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
2948+
D.Diag(clang::diag::warn_acuracy_conflicts_with_explicit_target_prec_option)
2949+
<< AccuracValStr << TargetPrecStr;
2950+
}
2951+
}
2952+
29452953
static std::string
29462954
RenderComplexRangeOption(LangOptions::ComplexRangeKind Range) {
29472955
std::string ComplexRangeStr = ComplexRangeKindToStr(Range);
@@ -2950,6 +2958,14 @@ RenderComplexRangeOption(LangOptions::ComplexRangeKind Range) {
29502958
return ComplexRangeStr;
29512959
}
29522960

2961+
static bool shouldUsePreciseDivision(const ArgList &Args) {
2962+
return Args.hasArg(options::OPT_ftarget_prec_div);
2963+
}
2964+
2965+
static bool shouldUsePreciseSqrt(const ArgList &Args) {
2966+
return Args.hasArg(options::OPT_ftarget_prec_sqrt);
2967+
}
2968+
29532969
static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
29542970
bool OFastEnabled, const ArgList &Args,
29552971
ArgStringList &CmdArgs,
@@ -2998,6 +3014,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
29983014
LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
29993015
std::string ComplexRangeStr = "";
30003016
std::string GccRangeComplexOption = "";
3017+
bool NoTargetPrecDiv = false;
3018+
bool NoTargetPrecSqrt = false;
30013019

30023020
// Lambda to set fast-math options. This is also used by -ffp-model=fast
30033021
auto applyFastMath = [&]() {
@@ -3060,6 +3078,19 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30603078
// If this isn't an FP option skip the claim below
30613079
default: continue;
30623080

3081+
case options::OPT_ftarget_prec_div:
3082+
case options::OPT_ftarget_prec_sqrt:
3083+
break;
3084+
case options::OPT_fno_target_prec_sqrt:
3085+
if (!FPAccuracy.empty())
3086+
EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-target-prec-sqrt");
3087+
NoTargetPrecSqrt = true;
3088+
break;
3089+
case options::OPT_fno_target_prec_div:
3090+
if (!FPAccuracy.empty())
3091+
EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-target-prec-div");
3092+
NoTargetPrecDiv = true;
3093+
break;
30633094
case options::OPT_fcx_limited_range:
30643095
if (GccRangeComplexOption.empty()) {
30653096
if (Range != LangOptions::ComplexRangeKind::CX_Basic)
@@ -3144,6 +3175,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
31443175
case options::OPT_ffp_accuracy_EQ: {
31453176
StringRef Val = A->getValue();
31463177
FPAccuracy = Val;
3178+
if (NoTargetPrecDiv)
3179+
EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-target-prec-div");
3180+
if (NoTargetPrecSqrt)
3181+
EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-target-prec-sqrt");
31473182
break;
31483183
}
31493184
case options::OPT_ffp_model_EQ: {
@@ -3176,6 +3211,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
31763211
applyFastMath();
31773212
// applyFastMath sets fp-contract="fast"
31783213
LastFpContractOverrideOption = "-ffp-model=fast";
3214+
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
3215+
// when fp-model=fast is used the default precision for division and
3216+
// sqrt is not precise.
3217+
NoTargetPrecDiv = shouldUsePreciseDivision(Args);
3218+
NoTargetPrecSqrt = shouldUsePreciseSqrt(Args);
3219+
}
31793220
} else if (Val == "precise") {
31803221
FPModel = Val;
31813222
FPContract = "on";
@@ -3557,6 +3598,16 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
35573598
CmdArgs.push_back("-fno-cx-limited-range");
35583599
if (Args.hasArg(options::OPT_fno_cx_fortran_rules))
35593600
CmdArgs.push_back("-fno-cx-fortran-rules");
3601+
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
3602+
if (NoTargetPrecDiv)
3603+
CmdArgs.push_back("-fno-target-prec-div");
3604+
else
3605+
CmdArgs.push_back("-ftarget-prec-div");
3606+
if (NoTargetPrecSqrt)
3607+
CmdArgs.push_back("-fno-target-prec-sqrt");
3608+
else
3609+
CmdArgs.push_back("-ftarget-prec-sqrt");
3610+
}
35603611
}
35613612

35623613
static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// DEFINE: %{common_opts} = -internal-isystem %S/Inputs -fsycl-is-device \
2+
// DEFINE: -emit-llvm -triple spir64-unknown-unknown
3+
4+
// RUN: %clang_cc1 %{common_opts} %s -o - \
5+
// RUN: | FileCheck --check-prefix PREC-SQRT %s
6+
7+
// RUN: %clang_cc1 %{common_opts} -ftarget-prec-sqrt %s -o - \
8+
// RUN: | FileCheck --check-prefix PREC-SQRT %s
9+
10+
// RUN: %clang_cc1 %{common_opts} -fno-target-prec-sqrt %s -o - \
11+
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s
12+
13+
// RUN: %clang_cc1 %{common_opts} -ftarget-prec-div %s -o - \
14+
// RUN: | FileCheck --check-prefix PREC-DIV %s
15+
16+
// RUN: %clang_cc1 %{common_opts} -fno-target-prec-div %s -o - \
17+
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
18+
19+
20+
#include "sycl.hpp"
21+
22+
extern "C" SYCL_EXTERNAL float sqrt(float);
23+
extern "C" SYCL_EXTERNAL float fdiv(float, float);
24+
25+
using namespace sycl;
26+
27+
int main() {
28+
const unsigned array_size = 4;
29+
range<1> numOfItems{array_size};
30+
float Value1 = .5f;
31+
float Value2 = .9f;
32+
queue deviceQueue;
33+
34+
deviceQueue.submit([&](handler& cgh) {
35+
cgh.parallel_for<class KernelSqrt>(numOfItems,
36+
[=](id<1> wiID) {
37+
// PREC-SQRT: call spir_func float @sqrt(float noundef {{.*}})
38+
// ROUNDED-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]]
39+
// ROUNDED-DIV: call spir_func float @sqrt(float noundef {{.*}})
40+
(void)sqrt(Value1);
41+
});
42+
});
43+
44+
deviceQueue.submit([&](handler& cgh) {
45+
cgh.parallel_for<class KernelFdiv>(numOfItems,
46+
[=](id<1> wiID) {
47+
// PREC-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
48+
// ROUNDED-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
49+
// PREC-DIV: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
50+
// ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
51+
52+
(void)fdiv(Value1, Value1);
53+
});
54+
});
55+
56+
return 0;
57+
}
58+
59+
// ROUNDED-SQRT: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0"
60+
// ROUNDED-DIV: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang -c -fsycl -### %s 2>&1 | FileCheck %s
2+
// RUN: %clang -c -fsycl -ftarget-prec-div -### %s 2>&1 | FileCheck %s
3+
// RUN: %clang -c -fsycl -ftarget-prec-sqrt -### %s 2>&1 | FileCheck %s
4+
// RUN: %clang -c -fsycl -ftarget-prec-div -ftarget-prec-sqrt -### %s 2>&1 | FileCheck %s
5+
// RUN: %clang -c -fsycl -ftarget-prec-sqrt -ftarget-prec-div -### %s 2>&1 | FileCheck %s
6+
// RUN: %clang -c -fsycl -fno-target-prec-div -### %s 2>&1 | FileCheck --check-prefix=NO_PREC_DIV %s
7+
// RUN: %clang -c -fsycl -fno-target-prec-sqrt -### %s 2>&1 | FileCheck --check-prefix=NO_PREC_SQRT %s
8+
// RUN: %clang -c -fsycl -fno-target-prec-div -fno-target-prec-sqrt -### %s 2>&1 | FileCheck --check-prefix=NO_PREC_DIV_SQRT %s
9+
// RUN: %clang -c -fsycl -fno-target-prec-sqrt -fno-target-prec-div -### %s 2>&1 | FileCheck --check-prefix=NO_PREC_DIV_SQRT %s
10+
// RUN: %clang -c -fsycl -ffp-accuracy=high -fno-math-errno -fno-target-prec-div -### %s 2>&1 | FileCheck %s --check-prefix=WARN-HIGH-DIV
11+
// RUN: %clang -c -fsycl -fno-target-prec-div -ffp-accuracy=high -fno-math-errno -### %s 2>&1 | FileCheck %s --check-prefix=WARN-HIGH-DIV
12+
// RUN: %clang -c -fsycl -fno-target-prec-sqrt -ffp-accuracy=high -fno-math-errno -### %s 2>&1 | FileCheck %s --check-prefix=WARN-HIGH-SQRT
13+
// RUN: %clang -c -fsycl -ffp-accuracy=high -fno-math-errno -fno-target-prec-sqrt -### %s 2>&1 | FileCheck %s --check-prefix=WARN-HIGH-SQRT
14+
// RUN: %clang -c -fsycl -ffp-accuracy=low -fno-math-errno -fno-target-prec-div -### %s 2>&1 | FileCheck %s --check-prefix=WARN-LOW-DIV
15+
// RUN: %clang -c -fsycl -ffp-accuracy=low -fno-math-errno -fno-target-prec-sqrt -### %s 2>&1 | FileCheck %s --check-prefix=WARN-LOW-SQRT
16+
17+
// CHECK: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-ftarget-prec-div" "-ftarget-prec-sqrt"
18+
// CHECK-NOT: "-triple{{.*}}" "-fsycl-is-host"{{.*}} "-ftarget-prec-div" "-ftarget-prec-sqrt"
19+
// NO_PREC_DIV: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-target-prec-div" "-ftarget-prec-sqrt"
20+
// NO_PREC_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-ftarget-prec-div" "-fno-target-prec-sqrt"
21+
// NO_PREC_DIV_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-target-prec-div" "-fno-target-prec-sqrt"
22+
23+
// WARN-HIGH-DIV: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-target-prec-div'
24+
// WARN-HIGH-SQRT: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-target-prec-sqrt'
25+
// WARN-LOW-DIV: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-target-prec-div'
26+
// WARN-LOW-SQRT: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-target-prec-sqrt'
27+
28+

0 commit comments

Comments
 (0)