Skip to content

Commit bc01759

Browse files
committed
Remove restriction on Cuda/Hip and changed the code so that the div
instruction gets the precision set instead of the fdiv function.
1 parent ce00296 commit bc01759

File tree

7 files changed

+59
-52
lines changed

7 files changed

+59
-52
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -490,29 +490,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
490490
return Store;
491491
}
492492

493-
static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
494-
llvm::Function *FPBuiltinF,
495-
ArrayRef<Value *> Args,
496-
unsigned ID) {
497-
llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
498-
// TODO: Replace AttrList with a single attribute. The call can only have a
499-
// single FPAccuracy attribute.
500-
llvm::AttributeList AttrList;
501-
// "sycl_used_aspects" metadata associated with the call.
502-
llvm::Metadata *AspectMD = nullptr;
503-
// sincos() doesn't return a value, but it still has a type associated with
504-
// it that corresponds to the operand type.
505-
CGF.CGM.getFPAccuracyFuncAttributes(
506-
Name, AttrList, AspectMD, ID,
507-
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
508-
CI->setAttributes(AttrList);
509-
510-
if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
511-
CI->setMetadata("sycl_used_aspects",
512-
llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
513-
return CI;
514-
}
515-
516493
static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
517494
unsigned FPIntrinsicID, unsigned IntrinsicID,
518495
bool HasAccuracyRequirement) {
@@ -558,8 +535,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
558535
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
559536
IntrinsicID, Src0, Name);
560537
if (Func)
561-
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
562-
FPAccuracyIntrinsicID);
538+
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
539+
FPAccuracyIntrinsicID);
563540

564541
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
565542
if (CGF.Builder.getIsFPConstrained()) {
@@ -583,8 +560,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
583560
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
584561
IntrinsicID, Src0, Name);
585562
if (Func)
586-
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
587-
FPAccuracyIntrinsicID);
563+
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
564+
FPAccuracyIntrinsicID);
588565

589566
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
590567
if (CGF.Builder.getIsFPConstrained()) {
@@ -24198,7 +24175,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
2419824175
!LangOpts.OffloadFP32PrecSqrt) {
2419924176
llvm::Function *Func =
2420024177
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
24201-
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
24178+
return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
2420224179
FPAccuracyIntrinsicID);
2420324180
}
2420424181
return nullptr;

clang/lib/CodeGen/CGCall.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,7 +1882,7 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
18821882
StringRef FPAccuracyVal;
18831883
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
18841884
if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
1885-
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
1885+
if (!getLangOpts().OffloadFP32PrecDiv && Name == "div")
18861886
FPAccuracyVal = "2.5";
18871887
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
18881888
FPAccuracyVal = "3.0";
@@ -1898,7 +1898,7 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
18981898
if (FuncAttrs.attrs().size() == 0) {
18991899
if (!getLangOpts().FPAccuracyVal.empty()) {
19001900
StringRef FPAccuracyVal;
1901-
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
1901+
if (!getLangOpts().OffloadFP32PrecDiv && Name == "div")
19021902
FPAccuracyVal = "2.5";
19031903
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
19041904
FPAccuracyVal = "3.0";
@@ -1910,7 +1910,7 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
19101910
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
19111911
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
19121912
} else {
1913-
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
1913+
if (!getLangOpts().OffloadFP32PrecDiv && Name == "div") {
19141914
FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
19151915
} else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
19161916
FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
@@ -5818,11 +5818,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
58185818
bool isFp32SqrtFunction =
58195819
(FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
58205820
IsFloat32Type);
5821-
bool isFP32FdivFunction =
5822-
(FuncName == "fdiv" && !getLangOpts().OffloadFP32PrecDiv &&
5823-
IsFloat32Type);
5824-
if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction ||
5825-
isFP32FdivFunction) {
5821+
if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
58265822
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
58275823
FD->getName(), FD->getBuiltinID());
58285824
if (CI)

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3783,6 +3783,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
37833783
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
37843784
llvm::Value *Val;
37853785
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
3786+
if (Ops.LHS->getType()->isFloatTy()) {
3787+
if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
3788+
unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
3789+
llvm::Function *Func =
3790+
CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
3791+
llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
3792+
"div", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
3793+
return Val;
3794+
}
3795+
}
37863796
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
37873797
CGF.SetDivFPAccuracy(Val);
37883798
return Val;

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,28 @@ bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
129129
return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
130130
}
131131

132+
llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
133+
StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
134+
unsigned ID) {
135+
llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
136+
// TODO: Replace AttrList with a single attribute. The call can only have a
137+
// single FPAccuracy attribute.
138+
llvm::AttributeList AttrList;
139+
// "sycl_used_aspects" metadata associated with the call.
140+
llvm::Metadata *AspectMD = nullptr;
141+
// sincos() doesn't return a value, but it still has a type associated with
142+
// it that corresponds to the operand type.
143+
CGM.getFPAccuracyFuncAttributes(
144+
Name, AttrList, AspectMD, ID,
145+
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
146+
CI->setAttributes(AttrList);
147+
148+
if (getLangOpts().SYCLIsDevice && AspectMD)
149+
CI->setMetadata("sycl_used_aspects",
150+
llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
151+
return CI;
152+
}
153+
132154
void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
133155
llvm::FastMathFlags FMF;
134156
FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5215,6 +5215,11 @@ class CodeGenFunction : public CodeGenTypeCache {
52155215

52165216
bool hasAccuracyRequirement(StringRef Name);
52175217

5218+
llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
5219+
llvm::Function *FPBuiltinF,
5220+
ArrayRef<llvm::Value *> Args,
5221+
unsigned ID);
5222+
52185223
/// Set the codegen fast-math flags.
52195224
void SetFastMathFlags(FPOptions FPFeatures);
52205225

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,9 +3020,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
30203020
LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
30213021
std::string ComplexRangeStr = "";
30223022
std::string GccRangeComplexOption = "";
3023-
bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
3024-
!JA.isDeviceOffloading(Action::OFK_Cuda) &&
3025-
!JA.isOffloading(Action::OFK_HIP);
3023+
bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL);
30263024

30273025
// Lambda to set fast-math options. This is also used by -ffp-model=fast
30283026
auto applyFastMath = [&]() {

clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
// RUN: -ffp-builtin-accuracy=high %s -o - \
6464
// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
6565

66-
// RUN: %clang_cc1 %{common_opts_spirv32} -ffp-builtin-accuracy=high:fdiv \
66+
// RUN: %clang_cc1 %{common_opts_spirv32} -ffp-builtin-accuracy=high:div \
6767
// RUN: -fno-offload-fp32-prec-div %s -o - \
6868
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
6969

@@ -131,7 +131,7 @@
131131
// RUN: -ffp-builtin-accuracy=high %s -o - \
132132
// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
133133

134-
// RUN: %clang_cc1 %{common_opts_spirv64} -ffp-builtin-accuracy=high:fdiv \
134+
// RUN: %clang_cc1 %{common_opts_spirv64} -ffp-builtin-accuracy=high:div \
135135
// RUN: -fno-offload-fp32-prec-div %s -o - \
136136
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
137137

@@ -200,7 +200,7 @@
200200
// RUN: -ffp-builtin-accuracy=high %s -o - \
201201
// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
202202

203-
// RUN: %clang_cc1 %{common_opts_spir} -ffp-builtin-accuracy=high:fdiv \
203+
// RUN: %clang_cc1 %{common_opts_spir} -ffp-builtin-accuracy=high:div \
204204
// RUN: -fno-offload-fp32-prec-div %s -o - \
205205
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
206206

@@ -268,7 +268,7 @@
268268
// RUN: -ffp-builtin-accuracy=high %s -o - \
269269
// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
270270

271-
// RUN: %clang_cc1 %{common_opts_spir64} -ffp-builtin-accuracy=high:fdiv \
271+
// RUN: %clang_cc1 %{common_opts_spir64} -ffp-builtin-accuracy=high:div \
272272
// RUN: -fno-offload-fp32-prec-div %s -o - \
273273
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
274274

@@ -287,7 +287,6 @@
287287
#include "sycl.hpp"
288288

289289
extern "C" SYCL_EXTERNAL float sqrt(float);
290-
extern "C" SYCL_EXTERNAL float fdiv(float, float);
291290

292291
using namespace sycl;
293292

@@ -297,6 +296,7 @@ int main() {
297296
float Value1 = .5f;
298297
float Value2 = .9f;
299298
queue deviceQueue;
299+
float *a;
300300

301301
deviceQueue.submit([&](handler& cgh) {
302302
cgh.parallel_for<class KernelSqrt>(numOfItems,
@@ -322,22 +322,22 @@ int main() {
322322
deviceQueue.submit([&](handler& cgh) {
323323
cgh.parallel_for<class KernelFdiv>(numOfItems,
324324
[=](id<1> wiID) {
325-
// PREC-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
326-
// ROUNDED-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
325+
// PREC-SQRT: fdiv float {{.*}}, {{.*}}
326+
// ROUNDED-SQRT: fdiv float {{.*}}, {{.*}}
327327
// ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}) #[[ATTR_DIV:[0-9]+]]
328-
// PREC-DIV: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
328+
// PREC-DIV: fdiv float {{.*}}, {{.*}}
329329
// ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
330330
// ROUNDED-DIV-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
331-
// PREC-FAST: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @fdiv(float noundef nofpclass(nan inf) {{.*}}, float noundef nofpclass(nan inf) {{.*}})
331+
// PREC-FAST: fdiv reassoc nnan ninf nsz arcp afn float {{.*}}, {{.*}}
332332
// ROUNDED-DIV-ROUNDED-SQRT: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
333333
// PREC-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
334-
// ROUNDED-SQRT-PREC-DIV: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @fdiv(float noundef nofpclass(nan inf) {{.*}}, float noundef nofpclass(nan inf) {{.*}})
334+
// ROUNDED-SQRT-PREC-DIV: fdiv reassoc nnan ninf nsz arcp afn float {{.*}}, {{.*}}
335335
// ROUNDED-DIV-PREC-SQRT: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
336336
// ROUNDED-DIV-ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
337337
// LOW-PREC-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_LOW:[0-9]+]]
338338
// HIGH-PREC: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_HIGH:[0-9]+]]
339-
// LOW-PREC-SQRT: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_LOW:[0-9]+]]
340-
(void)fdiv(Value1, Value1);
339+
// LOW-PREC-SQRT: fdiv float {{.*}}, {{.*}}
340+
a[0] = Value1 / Value2;
341341
});
342342
});
343343

@@ -355,4 +355,3 @@ return 0;
355355
// LOW-PREC-DIV: attributes #[[ATTR_FDIV_LOW]] = {{.*}}"fpbuiltin-max-error"="2.5"
356356
// HIGH-PREC: attributes #[[ATTR_FDIV_HIGH]] = {{.*}}"fpbuiltin-max-error"="1.0"
357357
// LOW-PREC-SQRT: attributes #[[ATTR_SQRT_LOW]] = {{.*}}"fpbuiltin-max-error"="3.0"
358-
// LOW-PREC-SQRT: attributes #[[ATTR_FDIV_LOW]] = {{.*}}"fpbuiltin-max-error"="1.0"

0 commit comments

Comments
 (0)