diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index 5d145d0e5d861..3895436addce0 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -374,6 +374,11 @@ def err_ppc_impossible_musttail: Error< def err_aix_musttail_unsupported: Error< "'musttail' attribute is not supported on AIX">; +def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning< + "floating point accuracy control '%0' conflicts with explicit target " + "precision option '%1'">, + InGroup>; + // Source manager def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal; def err_file_modified : Error< diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def index 81719a910e775..1afddf46f6a9d 100644 --- a/clang/include/clang/Basic/FPOptions.def +++ b/clang/include/clang/Basic/FPOptions.def @@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision) OPTION(MathErrno, bool, 1, FPAccuracy) OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno) +OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange) +OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi) #undef OPTION diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 8132089f9736b..1e43975584b7e 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -377,6 +377,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, " ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic") ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic") BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions") +LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv") +LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt") LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility") LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9af48188776dd..7fd47e579130b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1167,6 +1167,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules", NegFlag>; + defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div", + LangOpts<"OffloadFP32PrecDiv">, DefaultTrue, + PosFlag, + NegFlag>, + Group; + + defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt", + LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue, + PosFlag, + NegFlag>, + Group; + // OpenCL-only Options def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2f264d7508c5d..a1dfab51b9559 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -666,29 +666,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { return Store; } -static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name, - llvm::Function *FPBuiltinF, - ArrayRef Args, - unsigned ID) { - llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args); - // TODO: Replace AttrList with a single attribute. The call can only have a - // single FPAccuracy attribute. - llvm::AttributeList AttrList; - // "sycl_used_aspects" metadata associated with the call. - llvm::Metadata *AspectMD = nullptr; - // sincos() doesn't return a value, but it still has a type associated with - // it that corresponds to the operand type. - CGF.CGM.getFPAccuracyFuncAttributes( - Name, AttrList, AspectMD, ID, - Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType()); - CI->setAttributes(AttrList); - - if (CGF.getLangOpts().SYCLIsDevice && AspectMD) - CI->setMetadata("sycl_used_aspects", - llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD)); - return CI; -} - static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0, unsigned FPIntrinsicID, unsigned IntrinsicID, bool HasAccuracyRequirement) { @@ -697,13 +674,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0, : CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); } -static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) { - if (!CGF.getLangOpts().FPAccuracyVal.empty()) - return true; - auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str()); - return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end(); -} - static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E, unsigned FPAccuracyIntrinsicID, unsigned IntrinsicID, llvm::Value *Src0, @@ -722,7 +692,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E, CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID()); // Use fpbuiltin intrinsic only when needed. Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID, - hasAccuracyRequirement(CGF, Name)); + CGF.hasAccuracyRequirement(Name)); } } } @@ -741,8 +711,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin( Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID, IntrinsicID, Src0, Name); if (Func) - return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0}, - FPAccuracyIntrinsicID); + return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0}, + FPAccuracyIntrinsicID); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { @@ -766,8 +736,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin( Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID, IntrinsicID, Src0, Name); if (Func) - return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1}, - FPAccuracyIntrinsicID); + return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1}, + FPAccuracyIntrinsicID); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { @@ -25194,6 +25164,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD( .Case("sincos", llvm::Intrinsic::fpbuiltin_sincos) .Case("exp10", llvm::Intrinsic::fpbuiltin_exp10) .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt) + .Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt) .Default(0); } else { // The function has a clang builtin. Create an attribute for it @@ -25295,10 +25266,11 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD( // a TU fp-accuracy requested. const LangOptions &LangOpts = getLangOpts(); if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) || - !LangOpts.FPAccuracyVal.empty()) { + !LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv || + !LangOpts.OffloadFP32PrecSqrt) { llvm::Function *Func = CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType()); - return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs), + return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs), FPAccuracyIntrinsicID); } return nullptr; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 79bfd6b31a009..3a1ee134449b2 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1902,25 +1902,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes( // the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs // is empty), then set its accuracy from the TU's accuracy value. if (!getLangOpts().FPAccuracyFuncMap.empty()) { + StringRef FPAccuracyVal; auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str()); if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) { - StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( - ID, FuncType, convertFPAccuracy(FuncMapIt->second)); + if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") + FPAccuracyVal = "2.5"; + else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") + FPAccuracyVal = "3.0"; + else + FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( + ID, FuncType, convertFPAccuracy(FuncMapIt->second)); assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal); MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second))); } } - if (FuncAttrs.attrs().size() == 0) + if (FuncAttrs.attrs().size() == 0) { if (!getLangOpts().FPAccuracyVal.empty()) { - StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( - ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal)); + StringRef FPAccuracyVal; + if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") + FPAccuracyVal = "2.5"; + else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") + FPAccuracyVal = "3.0"; + else + FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( + ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal)); assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal); MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal))); + } else { + if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") { + FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5"); + } else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") { + FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0"); + } } + } } /// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the @@ -5864,10 +5883,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Emit the actual call/invoke instruction. llvm::CallBase *CI; if (!InvokeDest) { - if (!getLangOpts().FPAccuracyFuncMap.empty() || - !getLangOpts().FPAccuracyVal.empty()) { - const auto *FD = dyn_cast_if_present(TargetDecl); - if (FD && FD->getNameInfo().getName().isIdentifier()) { + const auto *FD = dyn_cast_if_present(TargetDecl); + if (FD && FD->getNameInfo().getName().isIdentifier()) { + StringRef FuncName = FD->getName(); + const bool IsFloat32Type = FD->getReturnType()->isFloat32Type(); + bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName); + bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty(); + bool isFp32SqrtFunction = + (FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt && + IsFloat32Type); + if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) { CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr, FD->getName(), FD->getBuiltinID()); if (CI) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 4e2392f93e74b..26a91fb364012 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3866,6 +3866,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { if (Ops.LHS->getType()->isFPOrFPVectorTy()) { llvm::Value *Val; CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); + if (Ops.LHS->getType()->isFloatTy()) { + if (!CGF.getLangOpts().OffloadFP32PrecDiv) { + unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv; + llvm::Function *Func = + CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType()); + llvm::Value *Val = CGF.CreateBuiltinCallWithAttr( + "fdiv", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID); + return Val; + } + } Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); CGF.SetDivFPAccuracy(Val); return Val; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index de237d83b4b67..44f8c2bedf51a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -121,6 +121,35 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) { } } +bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) { + if (!getLangOpts().FPAccuracyVal.empty()) + return true; + auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str()); + return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end(); +} + +llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr( + StringRef Name, llvm::Function *FPBuiltinF, ArrayRef Args, + unsigned ID) { + llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args); + // TODO: Replace AttrList with a single attribute. The call can only have a + // single FPAccuracy attribute. + llvm::AttributeList AttrList; + // "sycl_used_aspects" metadata associated with the call. + llvm::Metadata *AspectMD = nullptr; + // sincos() doesn't return a value, but it still has a type associated with + // it that corresponds to the operand type. + CGM.getFPAccuracyFuncAttributes( + Name, AttrList, AspectMD, ID, + Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType()); + CI->setAttributes(AttrList); + + if (getLangOpts().SYCLIsDevice && AspectMD) + CI->setMetadata("sycl_used_aspects", + llvm::MDNode::get(CGM.getLLVMContext(), AspectMD)); + return CI; +} + void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) { llvm::FastMathFlags FMF; FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate()); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index f472dfc8748dc..5f3e19454384b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5285,6 +5285,13 @@ class CodeGenFunction : public CodeGenTypeCache { /// CodeGenOpts. void SetDivFPAccuracy(llvm::Value *Val); + bool hasAccuracyRequirement(StringRef Name); + + llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name, + llvm::Function *FPBuiltinF, + ArrayRef Args, + unsigned ID); + /// Set the codegen fast-math flags. void SetFastMathFlags(FPOptions FPFeatures); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 16ceb800cdf2c..5f6cd793758ae 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2987,10 +2987,32 @@ RenderComplexRangeOption(LangOptions::ComplexRangeKind Range) { return ComplexRangeStr; } +static void EmitAccuracyDiag(const Driver &D, const JobAction &JA, + StringRef AccuracValStr, StringRef TargetPrecStr) { + if (JA.isDeviceOffloading(Action::OFK_SYCL)) { + D.Diag(clang::diag:: + warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option) + << AccuracValStr << TargetPrecStr; + } +} + +static SmallVector SplitFPAccuracyVal(StringRef Val) { + SmallVector ValuesArr; + SmallVector FuncsArr; + Val.split(ValuesArr, ":"); + if (ValuesArr.size() > 1) { + StringRef x = ValuesArr[1]; + x.split(FuncsArr, ","); + } + return FuncsArr; +} + static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, bool OFastEnabled, const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { + const JobAction &JA, + bool &NoOffloadFP32PrecDiv, + bool &NoOffloadFP32PrecSqrt) { // List of veclibs which when used with -fveclib imply -fno-math-errno. constexpr std::array VecLibImpliesNoMathErrno{llvm::StringLiteral("ArmPL"), llvm::StringLiteral("SLEEF")}; @@ -3043,6 +3065,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None; std::string ComplexRangeStr = ""; std::string GccRangeComplexOption = ""; + bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL); auto setComplexRange = [&](LangOptions::ComplexRangeKind NewRange) { // Warn if user expects to perform full implementation of complex @@ -3077,6 +3100,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, FPExceptionBehavior = ""; FPContract = "fast"; SeenUnsafeMathModeOption = true; + if (IsFp32PrecDivSqrtAllowed) { + // when fp-model=fast is used the default precision for division and + // sqrt is not precise. + NoOffloadFP32PrecDiv = true; + NoOffloadFP32PrecSqrt = true; + } }; // Lambda to consolidate common handling for fp-contract @@ -3105,6 +3134,31 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, CmdArgs.push_back(A->getValue()); } + auto addSPIRVArgs = [&](StringRef SPIRVArg) { + if (IsFp32PrecDivSqrtAllowed) { + if (!FPAccuracy.empty()) + EmitAccuracyDiag(D, JA, FPAccuracy, SPIRVArg); + if (SPIRVArg == "-fno-offload-fp32-prec-div") + NoOffloadFP32PrecDiv = true; + else if (SPIRVArg == "-fno-offload-fp32-prec-sqrt") + NoOffloadFP32PrecSqrt = true; + else if (SPIRVArg == "-foffload-fp32-prec-sqrt") + NoOffloadFP32PrecSqrt = false; + else if (SPIRVArg == "-foffload-fp32-prec-div") + NoOffloadFP32PrecDiv = false; + } + }; + + auto parseFPAccOption = [&](StringRef Val, bool &NoOffloadFlag) { + SmallVector FuncsArr = SplitFPAccuracyVal(Val); + for (const auto &V : FuncsArr) { + if (V == "fdiv") + NoOffloadFlag = false; + else if (V == "sqrt") + NoOffloadFlag = false; + } + }; + for (const Arg *A : Args) { auto CheckMathErrnoForVecLib = llvm::make_scope_exit([&, MathErrnoBeforeArg = MathErrno] { @@ -3116,6 +3170,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // If this isn't an FP option skip the claim below default: continue; + case options::OPT_foffload_fp32_prec_div: + addSPIRVArgs("-foffload-fp32-prec-div"); + break; + case options::OPT_foffload_fp32_prec_sqrt: + addSPIRVArgs("-foffload-fp32-prec-sqrt"); + break; + case options::OPT_fno_offload_fp32_prec_div: + addSPIRVArgs("-fno-offload-fp32-prec-div"); + break; + case options::OPT_fno_offload_fp32_prec_sqrt: + addSPIRVArgs("-fno-offload-fp32-prec-sqrt"); + break; case options::OPT_fcx_limited_range: if (GccRangeComplexOption.empty()) { if (Range != LangOptions::ComplexRangeKind::CX_Basic) @@ -3200,6 +3266,14 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_ffp_accuracy_EQ: { StringRef Val = A->getValue(); FPAccuracy = Val; + if (NoOffloadFP32PrecDiv) { + EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-offload-fp32-prec-div"); + parseFPAccOption(Val, NoOffloadFP32PrecDiv); + } + if (NoOffloadFP32PrecSqrt) { + EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-offload-fp32-prec-sqrt"); + parseFPAccOption(Val, NoOffloadFP32PrecSqrt); + } break; } case options::OPT_ffp_model_EQ: { @@ -3632,6 +3706,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, CmdArgs.push_back("-fno-cx-limited-range"); if (Args.hasArg(options::OPT_fno_cx_fortran_rules)) CmdArgs.push_back("-fno-cx-fortran-rules"); + if (IsFp32PrecDivSqrtAllowed) { + if (NoOffloadFP32PrecDiv) + CmdArgs.push_back("-fno-offload-fp32-prec-div"); + if (NoOffloadFP32PrecSqrt) + CmdArgs.push_back("-fno-offload-fp32-prec-sqrt"); + } } static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs, @@ -5409,6 +5489,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); const Driver &D = TC.getDriver(); ArgStringList CmdArgs; + bool NoOffloadFP32PrecDiv = false; + bool NoOffloadFP32PrecSqrt = false; assert(Inputs.size() >= 1 && "Must have at least one input."); // CUDA/HIP compilation may have multiple inputs (source file + results of @@ -6245,7 +6327,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_optimize_sibling_calls); RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args, - CmdArgs, JA); + CmdArgs, JA, NoOffloadFP32PrecDiv, + NoOffloadFP32PrecSqrt); // Render ABI arguments switch (TC.getArch()) { @@ -6720,7 +6803,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_protect_parens, false)) CmdArgs.push_back("-fprotect-parens"); - RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA); + RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA, + NoOffloadFP32PrecDiv, NoOffloadFP32PrecSqrt); if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) { const llvm::Triple::ArchType Arch = TC.getArch(); @@ -6776,8 +6860,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, FpAccuracyAttr += OptStr.str(); } }; - for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ)) - RenderFPAccuracyOptions(A); + auto shouldAddFpAccuracyOption = [&](StringRef Val, StringRef Func) { + SmallVector FuncsArr = SplitFPAccuracyVal(Val); + for (const auto &V : FuncsArr) + return (V == Func); + return false; + }; + + for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ)) { + if (!(NoOffloadFP32PrecDiv && shouldAddFpAccuracyOption(A, "fdiv")) && + !(NoOffloadFP32PrecSqrt && shouldAddFpAccuracyOption(A, "sqrt"))) + RenderFPAccuracyOptions(A); + } if (!FpAccuracyAttr.empty()) CmdArgs.push_back(Args.MakeArgString(FpAccuracyAttr)); @@ -10817,8 +10911,22 @@ static void getTripleBasedSPIRVTransOpts(Compilation &C, ",+SPV_KHR_non_semantic_info" ",+SPV_KHR_cooperative_matrix" ",+SPV_EXT_shader_atomic_float16_add"; - if (IsCPU) + auto hasNoOffloadFP32PrecOption = [](const llvm::opt::ArgList &TCArgs) { + return !TCArgs.hasFlag(options::OPT_foffload_fp32_prec_sqrt, + options::OPT_fno_offload_fp32_prec_sqrt, false) && + !TCArgs.hasFlag(options::OPT_foffload_fp32_prec_div, + options::OPT_fno_offload_fp32_prec_div, false); + }; + auto shouldUseOffloadFP32PrecOption = [](const llvm::opt::ArgList &TCArgs) { + return (TCArgs.hasFlag(options::OPT_fno_offload_fp32_prec_sqrt, + options::OPT_foffload_fp32_prec_sqrt, false) || + TCArgs.hasFlag(options::OPT_fno_offload_fp32_prec_div, + options::OPT_foffload_fp32_prec_div, false)); + }; + if ((IsCPU && hasNoOffloadFP32PrecOption(TCArgs)) || + shouldUseOffloadFP32PrecOption(TCArgs)) { ExtArg += ",+SPV_INTEL_fp_max_error"; + } TranslatorArgs.push_back(TCArgs.MakeArgString(ExtArg)); } diff --git a/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp b/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp new file mode 100644 index 0000000000000..0c00d16f0205a --- /dev/null +++ b/clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp @@ -0,0 +1,354 @@ +// DEFINE: %{common_opts_spirv32} = -internal-isystem %S/Inputs \ +// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown + +// DEFINE: %{common_opts_spirv64} = -internal-isystem %S/Inputs \ +// DEFINE: -fsycl-is-device -emit-llvm -triple spirv64-unknown-unknown + +// DEFINE: %{common_opts_spir} = -internal-isystem %S/Inputs \ +// DEFINE: -fsycl-is-device -emit-llvm -triple spir-unknown-unknown + +// DEFINE: %{common_opts_spir64} = -internal-isystem %S/Inputs \ +// DEFINE: -fsycl-is-device -emit-llvm -triple spir64-unknown-unknown + +// RUN: %clang_cc1 %{common_opts_spirv32} %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ +// RUN:-fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix PREC-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math -foffload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix PREC-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ +// RUN: -fno-offload-fp32-prec-sqrt -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-SQRT-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -ffp-builtin-accuracy=high:div \ +// RUN: -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// + +// RUN: %clang_cc1 %{common_opts_spirv64} %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix PREC-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-sqrt -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-SQRT-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -ffp-builtin-accuracy=high:div \ +// RUN: -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math \ +// RUN:-fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix PREC-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -foffload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix PREC-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math \ +// RUN: -fno-offload-fp32-prec-sqrt -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-SQRT-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir} -ffp-builtin-accuracy=high:div \ +// RUN: -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// + +// RUN: %clang_cc1 %{common_opts_spir64} %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix PREC-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt %s -o - \ +// RUN: | FileCheck --check-prefix PREC-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-sqrt -foffload-fp32-prec-div \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-SQRT-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -foffload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math \ +// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt \ +// RUN: %s -o - | FileCheck --check-prefix ROUNDED-DIV-ROUNDED-SQRT-FAST %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir64} -ffp-builtin-accuracy=high:div \ +// RUN: -fno-offload-fp32-prec-div %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high %s -o - \ +// RUN: | FileCheck --check-prefix LOW-PREC-SQRT %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-DIV %s + +// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt \ +// RUN: -ffp-builtin-accuracy=high:sin %s -o - \ +// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s + +#include "sycl.hpp" + +extern "C" SYCL_EXTERNAL float sqrt(float); + +using namespace sycl; + +int main() { + const unsigned array_size = 4; + range<1> numOfItems{array_size}; + float Value1 = .5f; + float Value2 = .9f; + queue deviceQueue; + float *a; + + deviceQueue.submit([&](handler& cgh) { + cgh.parallel_for(numOfItems, + [=](id<1> wiID) { + // PREC-SQRT: call spir_func float @sqrt(float noundef {{.*}}) + // ROUNDED-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] + // ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] + // PREC-DIV: call spir_func float @sqrt(float noundef {{.*}}) + // ROUNDED-DIV: call spir_func float @sqrt(float noundef {{.*}}) + // ROUNDED-DIV-ROUNDED-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] + // PREC-FAST: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @sqrt(float noundef nofpclass(nan inf) {{.*}}) + // PREC-SQRT-FAST: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @sqrt(float noundef nofpclass(nan inf) {{.*}}) + // ROUNDED-SQRT-PREC-DIV: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] + // ROUNDED-DIV-PREC-SQRT: call reassoc nnan ninf nsz arcp afn spir_func nofpclass(nan inf) float @sqrt(float noundef nofpclass(nan inf) {{.*}}) + // ROUNDED-DIV-ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] + // LOW-PREC-DIV: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT_LOW:[0-9]+]] + // LOW-PREC-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT_LOW:[0-9]+]] + (void)sqrt(Value1); + }); + }); + + deviceQueue.submit([&](handler& cgh) { + cgh.parallel_for(numOfItems, + [=](id<1> wiID) { + // PREC-SQRT: fdiv float {{.*}}, {{.*}} + // ROUNDED-SQRT: fdiv float {{.*}}, {{.*}} + // ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // PREC-DIV: fdiv float {{.*}}, {{.*}} + // ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // ROUNDED-DIV-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // PREC-FAST: fdiv reassoc nnan ninf nsz arcp afn float {{.*}}, {{.*}} + // ROUNDED-DIV-ROUNDED-SQRT: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // PREC-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // ROUNDED-SQRT-PREC-DIV: fdiv reassoc nnan ninf nsz arcp afn float {{.*}}, {{.*}} + // ROUNDED-DIV-PREC-SQRT: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // ROUNDED-DIV-ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] + // LOW-PREC-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_FDIV_LOW:[0-9]+]] + // LOW-PREC-SQRT: fdiv float {{.*}}, {{.*}} + a[0] = Value1 / Value2; + }); + }); + +return 0; +} + +// ROUNDED-SQRT: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0" +// ROUNDED-SQRT-FAST: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0" +// ROUNDED-SQRT-FAST: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5" +// ROUNDED-DIV: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5" +// ROUNDED-DIV-FAST: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5" +// ROUNDED-DIV-ROUNDED-SQRT-FAST: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0" +// ROUNDED-DIV-ROUNDED-SQRT-FAST: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5" +// LOW-PREC-DIV: attributes #[[ATTR_SQRT_LOW]] = {{.*}}"fpbuiltin-max-error"="1.0" +// LOW-PREC-DIV: attributes #[[ATTR_FDIV_LOW]] = {{.*}}"fpbuiltin-max-error"="2.5" +// LOW-PREC-SQRT: attributes #[[ATTR_SQRT_LOW]] = {{.*}}"fpbuiltin-max-error"="3.0" diff --git a/clang/test/Driver/offload-fp32-div-sqrt.cpp b/clang/test/Driver/offload-fp32-div-sqrt.cpp new file mode 100644 index 0000000000000..5ab227ec6a700 --- /dev/null +++ b/clang/test/Driver/offload-fp32-div-sqrt.cpp @@ -0,0 +1,115 @@ +// RUN: %clang -c -fsycl -### %s 2>&1 | FileCheck %s +// RUN: %clang -c -fsycl -foffload-fp32-prec-div -### %s 2>&1 | FileCheck %s +// RUN: %clang -c -fsycl -foffload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s + +// RUN: %clang -c -fsycl -foffload-fp32-prec-div -foffload-fp32-prec-sqrt \ +// RUN: -### %s 2>&1 | FileCheck %s + +// RUN: %clang -c -fsycl -foffload-fp32-prec-sqrt -foffload-fp32-prec-div \ +// RUN: -### %s 2>&1 | FileCheck %s + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO_PREC_DIV %s + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO_PREC_SQRT %s + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-div \ +// RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO_PREC_DIV_SQRT %s + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-sqrt \ +// RUN: -fno-offload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO_PREC_DIV_SQRT %s + +// RUN: %clang -c -fsycl -ffp-accuracy=high -fno-math-errno \ +// RUN: -fno-offload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV,NO_PREC_DIV_FP_ACC_HIGH + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-div -ffp-accuracy=high \ +// RUN: -fno-math-errno -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV,NO_PREC_DIV_FP_ACC_HIGH + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-div -ffp-accuracy=high:fdiv \ +// RUN: -fno-math-errno -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV-ONLY,FP_ACC_HIGH_DIV + +// RUN: %clang -c -fsycl -ffp-accuracy=high:fdiv \ +// RUN: -fno-math-errno -fno-offload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-DIV-ONLY,NO_PREC_DIV + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-sqrt -ffp-accuracy=high \ +// RUN: -fno-math-errno -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT,NO_PREC_SQRT_FP_ACC_HIGH + +// RUN: %clang -c -fsycl -fno-offload-fp32-prec-sqrt -ffp-accuracy=high:sqrt \ +// RUN: -fno-math-errno -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT-ONLY,FP_ACC_HIGH_SQRT + +// RUN: %clang -c -fsycl -ffp-accuracy=high:sqrt \ +// RUN: -fno-math-errno -fno-offload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT-ONLY,NO_PREC_SQRT + +// RUN: %clang -c -fsycl -ffp-accuracy=high -fno-math-errno \ +// RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-HIGH-SQRT,NO_PREC_SQRT_FP_ACC_HIGH + +// RUN: %clang -c -fsycl -ffp-accuracy=low -fno-math-errno \ +// RUN: -fno-offload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-LOW-DIV,NO_PREC_DIV_FP_ACC_LOW + +// RUN: %clang -c -fsycl -ffp-accuracy=low -fno-math-errno \ +// RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=WARN-LOW-SQRT,NO_PREC_SQRT_FP_ACC_LOW + +// RUN: %clang -c -fsycl -ffp-model=fast -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FAST %s + +// RUN: %clang -c -fsycl -foffload-fp32-prec-div -ffp-model=fast -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FAST %s + +// RUN: %clang -c -fsycl -foffload-fp32-prec-sqrt -ffp-model=fast -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FAST %s + +// RUN: %clang -c -fsycl -ffp-model=fast -foffload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO_PREC_SQRT %s + +// RUN: %clang -c -fsycl -ffp-model=fast -foffload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO_PREC_DIV %s + +// WARN-HIGH-DIV: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-offload-fp32-prec-div' + +// WARN-HIGH-DIV-ONLY: floating point accuracy control 'high:fdiv' conflicts with explicit target precision option '-fno-offload-fp32-prec-div' + +// WARN-HIGH-SQRT: floating point accuracy control 'high' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt' + +// WARN-HIGH-SQRT-ONLY: floating point accuracy control 'high:sqrt' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt' + +// WARN-LOW-DIV: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-offload-fp32-prec-div' + +// WARN-LOW-SQRT: floating point accuracy control 'low' conflicts with explicit target precision option '-fno-offload-fp32-prec-sqrt' + + +// CHECK: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} +// CHECK-NOT: "-foffload-fp32-prec-div" +// CHECK-NOT: "-foffload-fp32-prec-sqrt" + +// NO_PREC_DIV: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" + +// NO_PREC_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-sqrt" + +// NO_PREC_DIV_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-fno-offload-fp32-prec-sqrt" + +// FAST: "-triple" "spir64{{.*}}"{{.*}} "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-fno-offload-fp32-prec-sqrt" + +// FP_ACC_HIGH_DIV: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-ffp-builtin-accuracy=high:fdiv" + +// FP_ACC_HIGH_SQRT: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-ffp-builtin-accuracy=high:sqrt" + +// NO_PREC_DIV_FP_ACC_HIGH: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-ffp-builtin-accuracy=high" + +// NO_PREC_SQRT_FP_ACC_HIGH: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-sqrt" "-ffp-builtin-accuracy=high" + +// NO_PREC_DIV_FP_ACC_LOW: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-div" "-ffp-builtin-accuracy=low" + +// NO_PREC_SQRT_FP_ACC_LOW: "-triple" "spir64{{.*}}" "-fsycl-is-device"{{.*}} "-fno-offload-fp32-prec-sqrt" "-ffp-builtin-accuracy=low" diff --git a/clang/test/Driver/offload-fp32-div-sqrt.cu b/clang/test/Driver/offload-fp32-div-sqrt.cu new file mode 100644 index 0000000000000..766f87e5c48b2 --- /dev/null +++ b/clang/test/Driver/offload-fp32-div-sqrt.cu @@ -0,0 +1,39 @@ +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -foffload-fp32-prec-div -### %s 2>&1 | FileCheck %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -foffload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -fno-offload-fp32-prec-div -### %s 2>&1 | FileCheck %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -fno-offload-fp32-prec-sqrt -### %s 2>&1 | FileCheck %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -ffp-accuracy=high -fno-offload-fp32-prec-div -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FPACC %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -ffp-accuracy=high -fno-offload-fp32-prec-sqrt -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FPACC %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -fno-offload-fp32-prec-div -ffp-accuracy=high -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FPACC %s + +// RUN: %clang --cuda-device-only --cuda-gpu-arch=sm_20 \ +// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -c \ +// RUN: -fno-offload-fp32-prec-sqrt -ffp-accuracy=high -### %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FPACC %s + +// CHECK-NOT: "-foffload-fp32-prec-div" +// CHECK-NOT: "-foffload-fp32-prec-sqrt" +// FPACC: "-ffp-builtin-accuracy=high" diff --git a/clang/test/Driver/sycl-spirv-ext-old-model.c b/clang/test/Driver/sycl-spirv-ext-old-model.c index cde451bba65da..831da656f2f01 100644 --- a/clang/test/Driver/sycl-spirv-ext-old-model.c +++ b/clang/test/Driver/sycl-spirv-ext-old-model.c @@ -24,6 +24,63 @@ // RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT // RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-sqrt -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -foffload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU,CHECK-CPU-NFPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -foffload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU,CHECK-CPU-NFPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -foffload-fp32-prec-div -foffload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU,CHECK-CPU-NFPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -foffload-fp32-prec-sqrt -foffload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU,CHECK-CPU-NFPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_x86_64-unknown-unknown -fno-offload-fp32-prec-sqrt -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT,CHECK-DEFAULT-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT,CHECK-DEFAULT-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT,CHECK-DEFAULT-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -fno-offload-fp32-prec-sqrt -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT,CHECK-DEFAULT-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -foffload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -foffload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -foffload-fp32-prec-div -foffload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_gen-unknown-unknown -foffload-fp32-prec-sqrt -foffload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-DEFAULT +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW,CHECK-FPGA-HW-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW,CHECK-FPGA-HW-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW,CHECK-FPGA-HW-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -fno-offload-fp32-prec-sqrt -fno-offload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW,CHECK-FPGA-HW-FPME +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -foffload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -foffload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -foffload-fp32-prec-div -foffload-fp32-prec-sqrt %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -foffload-fp32-prec-sqrt -foffload-fp32-prec-div %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-FPGA-HW + // CHECK-DEFAULT: llvm-spirv{{.*}}"-spirv-ext=-all // CHECK-DEFAULT-SAME:,+SPV_EXT_shader_atomic_float_add @@ -61,7 +118,8 @@ // CHECK-DEFAULT-SAME:,+SPV_INTEL_optnone // CHECK-DEFAULT-SAME:,+SPV_KHR_non_semantic_info // CHECK-DEFAULT-SAME:,+SPV_KHR_cooperative_matrix -// CHECK-DEFAULT-SAME:,+SPV_EXT_shader_atomic_float16_add" +// CHECK-DEFAULT-SAME:,+SPV_EXT_shader_atomic_float16_add +// CHECK-DEFAULT-FPME:,+SPV_INTEL_fp_max_error" // CHECK-FPGA-HW: llvm-spirv{{.*}}"-spirv-ext=-all // CHECK-FPGA-HW-SAME:,+SPV_EXT_shader_atomic_float_add // CHECK-FPGA-HW-SAME:,+SPV_EXT_shader_atomic_float_min_max @@ -90,7 +148,8 @@ // CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_cluster_attributes,+SPV_INTEL_loop_fuse // CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_dsp_control // CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_memory_accesses -// CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_memory_attributes" +// CHECK-FPGA-HW-SAME:,+SPV_INTEL_fpga_memory_attributes +// CHECK-FPGA-HW-FPME:,+SPV_INTEL_fp_max_error" // CHECK-CPU: llvm-spirv{{.*}}"-spirv-allow-unknown-intrinsics=llvm.genx.,llvm.fpbuiltin" // CHECK-CPU-SAME: {{.*}}"-spirv-ext=-all // CHECK-CPU-SAME:,+SPV_EXT_shader_atomic_float_add @@ -126,4 +185,4 @@ // CHECK-CPU-SAME:,+SPV_INTEL_optnone // CHECK-CPU-SAME:,+SPV_KHR_non_semantic_info // CHECK-CPU-SAME:,+SPV_KHR_cooperative_matrix -// CHECK-CPU-SAME:,+SPV_INTEL_fp_max_error" +// CHECK-CPU-NFPME-NOT:,+SPV_INTEL_fp_max_error"