Remove restriction on Cuda/Hip and changed the code so that the div

zahiraam · zahiraam · commit bc0175966993 · 2024-11-18T12:46:54.000-08:00
instruction gets the precision set instead of the fdiv function.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -490,29 +490,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
   return Store;
 }
 
-static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
-                                           llvm::Function *FPBuiltinF,
-                                           ArrayRef<Value *> Args,
-                                           unsigned ID) {
-  llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
-  // TODO: Replace AttrList with a single attribute. The call can only have a
-  // single FPAccuracy attribute.
-  llvm::AttributeList AttrList;
-  // "sycl_used_aspects" metadata associated with the call.
-  llvm::Metadata *AspectMD = nullptr;
-  // sincos() doesn't return a value, but it still has a type associated with
-  // it that corresponds to the operand type.
-  CGF.CGM.getFPAccuracyFuncAttributes(
-      Name, AttrList, AspectMD, ID,
-      Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
-  CI->setAttributes(AttrList);
-
-  if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
-    CI->setMetadata("sycl_used_aspects",
-                    llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
-  return CI;
-}
-
 static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
                               unsigned FPIntrinsicID, unsigned IntrinsicID,
                               bool HasAccuracyRequirement) {
@@ -558,8 +535,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
   Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
                                       IntrinsicID, Src0, Name);
   if (Func)
-    return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
-                                     FPAccuracyIntrinsicID);
+    return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
+                                         FPAccuracyIntrinsicID);
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
@@ -583,8 +560,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
   Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
                                       IntrinsicID, Src0, Name);
   if (Func)
-    return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
-                                     FPAccuracyIntrinsicID);
+    return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
+                                         FPAccuracyIntrinsicID);
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
@@ -24198,7 +24175,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
       !LangOpts.OffloadFP32PrecSqrt) {
     llvm::Function *Func =
         CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
-    return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
+    return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
                                      FPAccuracyIntrinsicID);
   }
   return nullptr;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
@@ -1882,7 +1882,7 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
     StringRef FPAccuracyVal;
     auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
     if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
-      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "div")
         FPAccuracyVal = "2.5";
       else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
         FPAccuracyVal = "3.0";
@@ -1898,7 +1898,7 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
   if (FuncAttrs.attrs().size() == 0) {
     if (!getLangOpts().FPAccuracyVal.empty()) {
       StringRef FPAccuracyVal;
-      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "div")
         FPAccuracyVal = "2.5";
       else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
         FPAccuracyVal = "3.0";
@@ -1910,7 +1910,7 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
       MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
     } else {
-      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "div") {
         FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
       } else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
         FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
@@ -5818,11 +5818,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       bool isFp32SqrtFunction =
           (FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
            IsFloat32Type);
-      bool isFP32FdivFunction =
-          (FuncName == "fdiv" && !getLangOpts().OffloadFP32PrecDiv &&
-           IsFloat32Type);
-      if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction ||
-          isFP32FdivFunction) {
+      if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
         CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
                                     FD->getName(), FD->getBuiltinID());
         if (CI)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3783,6 +3783,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
   if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
     llvm::Value *Val;
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+    if (Ops.LHS->getType()->isFloatTy()) {
+      if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
+        unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
+        llvm::Function *Func =
+            CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
+        llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
+            "div", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
+        return Val;
+      }
+    }
     Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
     CGF.SetDivFPAccuracy(Val);
     return Val;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -129,6 +129,28 @@ bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
   return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
 }
 
+llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
+    StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
+    unsigned ID) {
+  llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
+  // TODO: Replace AttrList with a single attribute. The call can only have a
+  // single FPAccuracy attribute.
+  llvm::AttributeList AttrList;
+  // "sycl_used_aspects" metadata associated with the call.
+  llvm::Metadata *AspectMD = nullptr;
+  // sincos() doesn't return a value, but it still has a type associated with
+  // it that corresponds to the operand type.
+  CGM.getFPAccuracyFuncAttributes(
+      Name, AttrList, AspectMD, ID,
+      Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
+  CI->setAttributes(AttrList);
+
+  if (getLangOpts().SYCLIsDevice && AspectMD)
+    CI->setMetadata("sycl_used_aspects",
+                    llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
+  return CI;
+}
+
 void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
   llvm::FastMathFlags FMF;
   FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5215,6 +5215,11 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   bool hasAccuracyRequirement(StringRef Name);
 
+  llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
+                                            llvm::Function *FPBuiltinF,
+                                            ArrayRef<llvm::Value *> Args,
+                                            unsigned ID);
+
   /// Set the codegen fast-math flags.
   void SetFastMathFlags(FPOptions FPFeatures);
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3020,9 +3020,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
   std::string ComplexRangeStr = "";
   std::string GccRangeComplexOption = "";
-  bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
-                                  !JA.isDeviceOffloading(Action::OFK_Cuda) &&
-                                  !JA.isOffloading(Action::OFK_HIP);
+  bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL);
 
   // Lambda to set fast-math options. This is also used by -ffp-model=fast
   auto applyFastMath = [&]() {
diff --git a/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp b/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp
@@ -63,7 +63,7 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spirv32} -ffp-builtin-accuracy=high:fdiv \
+// RUN: %clang_cc1 %{common_opts_spirv32} -ffp-builtin-accuracy=high:div \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
 // RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
@@ -131,7 +131,7 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spirv64} -ffp-builtin-accuracy=high:fdiv  \
+// RUN: %clang_cc1 %{common_opts_spirv64} -ffp-builtin-accuracy=high:div  \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
 // RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
@@ -200,7 +200,7 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spir} -ffp-builtin-accuracy=high:fdiv \
+// RUN: %clang_cc1 %{common_opts_spir} -ffp-builtin-accuracy=high:div \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
 // RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
@@ -268,7 +268,7 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spir64} -ffp-builtin-accuracy=high:fdiv  \
+// RUN: %clang_cc1 %{common_opts_spir64} -ffp-builtin-accuracy=high:div  \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
 // RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
@@ -287,7 +287,6 @@
 #include "sycl.hpp"
 
 extern "C" SYCL_EXTERNAL float sqrt(float);
-extern "C" SYCL_EXTERNAL float fdiv(float, float);
 
 using namespace sycl;
 
@@ -297,6 +296,7 @@ int main() {
   float Value1 = .5f;
   float Value2 = .9f;
   queue deviceQueue;
+  float *a;
 
   deviceQueue.submit([&](handler& cgh) {
     cgh.parallel_for<class KernelSqrt>(numOfItems,
@@ -322,22 +322,22 @@ int main() {
   deviceQueue.submit([&](handler& cgh) {
     cgh.parallel_for<class KernelFdiv>(numOfItems,
     [=](id<1> wiID) {
-      // PREC-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
-      // ROUNDED-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
+      // PREC-SQRT: fdiv float {{.*}}, {{.*}}
+      // ROUNDED-SQRT: fdiv float {{.*}}, {{.*}}
       // ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}) #[[ATTR_DIV:[0-9]+]]
-      // PREC-DIV: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
+      // PREC-DIV: fdiv float {{.*}}, {{.*}}
       // ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
       // ROUNDED-DIV-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
-      // PREC-FAST: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @fdiv(float noundef nofpclass(nan inf) {{.*}}, float noundef nofpclass(nan inf) {{.*}})
+      // PREC-FAST: fdiv reassoc nnan ninf nsz arcp afn float {{.*}}, {{.*}}
       // ROUNDED-DIV-ROUNDED-SQRT: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
       // PREC-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
-      // ROUNDED-SQRT-PREC-DIV: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @fdiv(float noundef nofpclass(nan inf) {{.*}}, float noundef nofpclass(nan inf) {{.*}})
+      // ROUNDED-SQRT-PREC-DIV: fdiv reassoc nnan ninf nsz arcp afn float {{.*}}, {{.*}}
       // ROUNDED-DIV-PREC-SQRT: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
       // ROUNDED-DIV-ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
       // LOW-PREC-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_LOW:[0-9]+]]
       // HIGH-PREC: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_HIGH:[0-9]+]]
-      // LOW-PREC-SQRT: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_LOW:[0-9]+]]
-      (void)fdiv(Value1, Value1);
+      // LOW-PREC-SQRT: fdiv float {{.*}}, {{.*}}
+      a[0] = Value1 / Value2;
     });
   });
 
@@ -355,4 +355,3 @@ return 0;
 // LOW-PREC-DIV: attributes #[[ATTR_FDIV_LOW]] = {{.*}}"fpbuiltin-max-error"="2.5"
 // HIGH-PREC: attributes #[[ATTR_FDIV_HIGH]] = {{.*}}"fpbuiltin-max-error"="1.0"
 // LOW-PREC-SQRT: attributes #[[ATTR_SQRT_LOW]] = {{.*}}"fpbuiltin-max-error"="3.0"
-// LOW-PREC-SQRT: attributes #[[ATTR_FDIV_LOW]] = {{.*}}"fpbuiltin-max-error"="1.0"