Addressed review comments.

zahiraam · zahiraam · commit e64302720f19 · 2024-11-13T12:37:51.000-08:00
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
@@ -1882,30 +1882,40 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
     StringRef FPAccuracyVal;
     auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
     if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
-      FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
-          ID, FuncType, convertFPAccuracy(FuncMapIt->second));
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+        FPAccuracyVal = "2.5";
+      else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
+        FPAccuracyVal = "3.0";
+      else
+        FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
+            ID, FuncType, convertFPAccuracy(FuncMapIt->second));
       assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
       FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
       MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
     }
   }
   if (FuncAttrs.attrs().size() == 0) {
-    StringRef FPAccuracyVal =
-        llvm::StringSwitch<StringRef>(Name)
-            .Case("sqrt", getLangOpts().OffloadFP32PrecSqrt ? "" : "3.0")
-            .Case("fdiv", getLangOpts().OffloadFP32PrecDiv ? "" : "2.5")
-            .Default("");
-    if (FPAccuracyVal.empty()) {
-      if (!getLangOpts().FPAccuracyVal.empty()) {
+    if (!getLangOpts().FPAccuracyVal.empty()) {
+      StringRef FPAccuracyVal;
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+        FPAccuracyVal = "2.5";
+      else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
+        FPAccuracyVal = "3.0";
+      else
         FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
             ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
-        assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
-        MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-            Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
+      assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
+      FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
+      MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
+    } else {
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
+        FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
+      } else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
+        FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
       }
     }
-    FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
   }
 }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2959,10 +2959,23 @@ static void EmitAccuracyDiag(const Driver &D, const JobAction &JA,
   }
 }
 
+auto SplitFPAccuracyVal = [](StringRef Val) {
+  SmallVector<StringRef, 8> ValuesArr;
+  SmallVector<StringRef, 8> FuncsArr;
+  Val.split(ValuesArr, ":");
+  if (ValuesArr.size() > 1) {
+    StringRef x = ValuesArr[1];
+    x.split(FuncsArr, ",");
+  }
+  return FuncsArr;
+};
+
 static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
                                        bool OFastEnabled, const ArgList &Args,
                                        ArgStringList &CmdArgs,
-                                       const JobAction &JA) {
+                                       const JobAction &JA,
+                                       bool &NoOffloadFP32PrecDiv,
+                                       bool &NoOffloadFP32PrecSqrt) {
   // Handle various floating point optimization flags, mapping them to the
   // appropriate LLVM code generation flags. This is complicated by several
   // "umbrella" flags, so we do this by stepping through the flags incrementally
@@ -3007,8 +3020,6 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
   std::string ComplexRangeStr = "";
   std::string GccRangeComplexOption = "";
-  bool NoOffloadFP32PrecDiv = false;
-  bool NoOffloadFP32PrecSqrt = false;
   bool IsDeviceOffloading = JA.isDeviceOffloading(Action::OFK_SYCL);
 
   // Lambda to set fast-math options. This is also used by -ffp-model=fast
@@ -3077,7 +3088,6 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
     if (IsDeviceOffloading) {
       if (!FPAccuracy.empty())
         EmitAccuracyDiag(D, JA, FPAccuracy, SPIRVArg);
-
       if (SPIRVArg == "-fno-offload-fp32-prec-div")
         NoOffloadFP32PrecDiv = true;
       else if (SPIRVArg == "-fno-offload-fp32-prec-sqrt")
@@ -3090,20 +3100,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   };
 
   auto parseFPAccOption = [&](StringRef Val, bool &NoOffloadFlag) {
-    SmallVector<StringRef, 8> ValuesArr;
-    Val.split(ValuesArr, ":");
-    if (ValuesArr.size() == 1)
-      NoOffloadFlag = false;
-    if (ValuesArr.size() > 1) {
-      StringRef x = ValuesArr[1];
-      SmallVector<StringRef, 8> FuncsArr;
-      x.split(FuncsArr, ",");
-      for (const auto &V : FuncsArr) {
-        if (V == "fdiv")
-          NoOffloadFlag = false;
-        else if (V == "sqrt")
-          NoOffloadFlag = false;
-      }
+    SmallVector<StringRef, 8> FuncsArr = SplitFPAccuracyVal(Val);
+    for (const auto &V : FuncsArr) {
+      if (V == "fdiv")
+        NoOffloadFlag = false;
+      else if (V == "sqrt")
+        NoOffloadFlag = false;
     }
   };
 
@@ -5389,6 +5391,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
   const Driver &D = TC.getDriver();
   ArgStringList CmdArgs;
+  bool NoOffloadFP32PrecDiv = false;
+  bool NoOffloadFP32PrecSqrt = false;
 
   assert(Inputs.size() >= 1 && "Must have at least one input.");
   // CUDA/HIP compilation may have multiple inputs (source file + results of
@@ -6197,7 +6201,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                        options::OPT_fno_optimize_sibling_calls);
 
     RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
-                               CmdArgs, JA);
+                               CmdArgs, JA, NoOffloadFP32PrecDiv,
+                               NoOffloadFP32PrecSqrt);
 
     // Render ABI arguments
     switch (TC.getArch()) {
@@ -6671,7 +6676,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_fno_protect_parens, false))
     CmdArgs.push_back("-fprotect-parens");
 
-  RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
+  RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA,
+                             NoOffloadFP32PrecDiv, NoOffloadFP32PrecSqrt);
 
   if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) {
     const llvm::Triple::ArchType Arch = TC.getArch();
@@ -6722,8 +6728,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       FpAccuracyAttr += OptStr.str();
     }
   };
-  for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ))
-    RenderFPAccuracyOptions(A);
+  auto shouldAddFpAccuracyOption = [&](StringRef Val, StringRef Func) {
+    SmallVector<StringRef, 8> FuncsArr = SplitFPAccuracyVal(Val);
+    for (const auto &V : FuncsArr)
+      return (V == Func);
+    return false;
+  };
+
+  for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ)) {
+    if (!(NoOffloadFP32PrecDiv && shouldAddFpAccuracyOption(A, "fdiv")) &&
+        !(NoOffloadFP32PrecSqrt && shouldAddFpAccuracyOption(A, "sqrt")))
+      RenderFPAccuracyOptions(A);
+  }
   if (!FpAccuracyAttr.empty())
     CmdArgs.push_back(Args.MakeArgString(FpAccuracyAttr));
 
diff --git a/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp b/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp
@@ -63,13 +63,9 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div \
-// RUN: -ffp-builtin-accuracy=high:fdiv %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
-
 // RUN: %clang_cc1 %{common_opts_spirv32} -ffp-builtin-accuracy=high:fdiv \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
+// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
 // RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt \
 // RUN: -ffp-builtin-accuracy=high %s -o - \
@@ -135,13 +131,9 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div \
-// RUN: -ffp-builtin-accuracy=high:fdiv %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
-
 // RUN: %clang_cc1 %{common_opts_spirv64} -ffp-builtin-accuracy=high:fdiv  \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
+// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
 // RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt \
 // RUN: -ffp-builtin-accuracy=high %s -o - \
@@ -208,13 +200,9 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div \
-// RUN: -ffp-builtin-accuracy=high:fdiv %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
-
 // RUN: %clang_cc1 %{common_opts_spir} -ffp-builtin-accuracy=high:fdiv \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
+// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
 // RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt \
 // RUN: -ffp-builtin-accuracy=high %s -o - \
@@ -280,13 +268,9 @@
 // RUN: -ffp-builtin-accuracy=high %s -o - \
 // RUN: | FileCheck --check-prefix LOW-PREC-DIV %s
 
-// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div \
-// RUN: -ffp-builtin-accuracy=high:fdiv %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
-
 // RUN: %clang_cc1 %{common_opts_spir64} -ffp-builtin-accuracy=high:fdiv  \
 // RUN: -fno-offload-fp32-prec-div %s -o - \
-// RUN: | FileCheck --check-prefix HIGH-PREC %s
+// RUN: | FileCheck --check-prefix ROUNDED-DIV %s
 
 // RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt \
 // RUN: -ffp-builtin-accuracy=high %s -o - \
diff --git a/clang/test/Driver/offload-fp32-div-sqrt.cpp b/clang/test/Driver/offload-fp32-div-sqrt.cpp
@@ -24,27 +24,43 @@
 
 // RUN: %clang -c -fsycl -ffp-accuracy=high -fno-math-errno \
 // RUN: -fno-offload-fp32-prec-div -### %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=WARN-HIGH-DIV
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV,NO_PREC_DIV_FP_ACC_HIGH
 
 // RUN: %clang -c -fsycl -fno-offload-fp32-prec-div -ffp-accuracy=high \
 // RUN: -fno-math-errno -### %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=WARN-HIGH-DIV
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV,NO_PREC_DIV_FP_ACC_HIGH
+
+// RUN: %clang -c -fsycl -fno-offload-fp32-prec-div -ffp-accuracy=high:fdiv \
+// RUN: -fno-math-errno -### %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV-ONLY,FP_ACC_HIGH_DIV
+
+// RUN: %clang -c -fsycl -ffp-accuracy=high:fdiv \
+// RUN: -fno-math-errno -fno-offload-fp32-prec-div  -### %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV-ONLY,NO_PREC_DIV
 
 // RUN: %clang -c -fsycl -fno-offload-fp32-prec-sqrt -ffp-accuracy=high \
 // RUN: -fno-math-errno -### %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=WARN-HIGH-SQRT
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT,NO_PREC_SQRT_FP_ACC_HIGH
+
+// RUN: %clang -c -fsycl -fno-offload-fp32-prec-sqrt -ffp-accuracy=high:sqrt \
+// RUN: -fno-math-errno -### %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT-ONLY,FP_ACC_HIGH_SQRT
+
+// RUN: %clang -c -fsycl -ffp-accuracy=high:sqrt \
+// RUN: -fno-math-errno -fno-offload-fp32-prec-sqrt -### %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT-ONLY,NO_PREC_SQRT
 
 // RUN: %clang -c -fsycl -ffp-accuracy=high -fno-math-errno \
 // RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=WARN-HIGH-SQRT
+// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT,NO_PREC_SQRT_FP_ACC_HIGH
 
 // RUN: %clang -c -fsycl -ffp-accuracy=low -fno-math-errno \
 // RUN: -fno-offload-fp32-prec-div -### %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=WARN-LOW-DIV
+// RUN: | FileCheck %s --check-prefixes=WARN-LOW-DIV,NO_PREC_DIV_FP_ACC_LOW
 
 // RUN: %clang -c -fsycl -ffp-accuracy=low -fno-math-errno \
 // RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 \
-// RUN: | FileCheck %s --check-prefix=WARN-LOW-SQRT
+// RUN: | FileCheck %s --check-prefixes=WARN-LOW-SQRT,NO_PREC_SQRT_FP_ACC_LOW
 
 // RUN: %clang -c -fsycl -ffp-model=fast  -### %s 2>&1 \
 // RUN: | FileCheck --check-prefix=FAST %s
@@ -61,23 +77,39 @@
 // RUN: %clang -c -fsycl -ffp-model=fast -foffload-fp32-prec-sqrt -### %s 2>&1 \
 // RUN: | FileCheck --check-prefix=NO_PREC_DIV %s
 
+// WARN-HIGH-DIV: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-offload-fp32-prec-div'
+
+// WARN-HIGH-DIV-ONLY: floating point accuracy control 'high:fdiv' conflicts with explicit target precision option '-fno-offload-fp32-prec-div'
+
+// WARN-HIGH-SQRT: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt'
+
+// WARN-HIGH-SQRT-ONLY: floating point accuracy control 'high:sqrt' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt'
+
+// WARN-LOW-DIV: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-offload-fp32-prec-div'
+
+// WARN-LOW-SQRT: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt'
+
+
 // CHECK: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}}
 
-// CHECK-NOT: "-triple{{.*}}" "-fsycl-is-host"{{.*}} "-foffload-fp32-prec-div" "-foffload-fp32-prec-sqrt"
+// CHECK-NOT: "-triple{{.*}}" "-fsycl-is-host"{{.*}} "-foffload-fp32-prec-div" "-foffload-fp32-prec-sqrt"s
 
 // NO_PREC_DIV: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div"
 
 // NO_PREC_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-sqrt"
 
 // NO_PREC_DIV_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-fno-offload-fp32-prec-sqrt"
 
-// WARN-HIGH-DIV: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-offload-fp32-prec-div'
+// FAST: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-fno-offload-fp32-prec-sqrt"
 
-// WARN-HIGH-SQRT: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt'
+// FP_ACC_HIGH_DIV: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-ffp-builtin-accuracy=high:fdiv"
 
-// WARN-LOW-DIV: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-offload-fp32-prec-div'
+// FP_ACC_HIGH_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-ffp-builtin-accuracy=high:sqrt"
 
-// WARN-LOW-SQRT: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt'
+// NO_PREC_DIV_FP_ACC_HIGH: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-ffp-builtin-accuracy=high"
 
-// FAST: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-fno-offload-fp32-prec-sqrt"
+// NO_PREC_SQRT_FP_ACC_HIGH: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}}  "-fno-offload-fp32-prec-sqrt" "-ffp-builtin-accuracy=high"
+
+// NO_PREC_DIV_FP_ACC_LOW: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-ffp-builtin-accuracy=low"
 
+// NO_PREC_SQRT_FP_ACC_LOW: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-sqrt" "-ffp-builtin-accuracy=low"