@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
3131using namespace clang ;
3232using namespace llvm ::opt;
3333
34+ RocmInstallationDetector::CommonBitcodeLibsPreferences::
35+ CommonBitcodeLibsPreferences (const Driver &D,
36+ const llvm::opt::ArgList &DriverArgs,
37+ StringRef GPUArch,
38+ const Action::OffloadKind DeviceOffloadingKind,
39+ const bool NeedsASanRT)
40+ : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41+ tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42+ const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44+
45+ IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46+
47+ const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48+ Wave64 =
49+ !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50+ options::OPT_mno_wavefrontsize64, false );
51+
52+ const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53+ DeviceOffloadingKind == Action::OFK_HIP;
54+
55+ // Default to enabling f32 denormals on subtargets where fma is fast with
56+ // denormals
57+ const bool DefaultDAZ =
58+ (Kind == llvm::AMDGPU::GK_NONE)
59+ ? false
60+ : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61+ (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62+ // TODO: There are way too many flags that change this. Do we need to
63+ // check them all?
64+ DAZ = IsKnownOffloading
65+ ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66+ options::OPT_fno_gpu_flush_denormals_to_zero,
67+ DefaultDAZ)
68+ : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69+
70+ FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71+ DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72+ options::OPT_fno_finite_math_only, false );
73+
74+ UnsafeMathOpt =
75+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76+ DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77+ options::OPT_fno_unsafe_math_optimizations, false );
78+
79+ FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80+ DriverArgs.hasFlag (options::OPT_ffast_math,
81+ options::OPT_fno_fast_math, false );
82+
83+ const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84+ CorrectSqrt =
85+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86+ DriverArgs.hasFlag (
87+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89+ // GPU Sanitizer currently only supports ASan and is enabled through host
90+ // ASan.
91+ GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92+ options::OPT_fno_gpu_sanitize, true ) &&
93+ NeedsASanRT);
94+ }
95+
3496void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
3597 assert (!Path.empty ());
3698
@@ -944,33 +1006,14 @@ void ROCMToolChain::addClangTargetOptions(
9441006 ABIVer, noGPULib))
9451007 return ;
9461008
947- bool Wave64 = isWave64 (DriverArgs, Kind);
948- // TODO: There are way too many flags that change this. Do we need to check
949- // them all?
950- bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
951- getDefaultDenormsAreZeroForTarget (Kind);
952- bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
953-
954- bool UnsafeMathOpt =
955- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
956- bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
957- bool CorrectSqrt =
958- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959-
960- // GPU Sanitizer currently only supports ASan and is enabled through host
961- // ASan.
962- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
963- options::OPT_fno_gpu_sanitize, true ) &&
964- getSanitizerArgs (DriverArgs).needsAsanRt ();
965-
9661009 // Add the OpenCL specific bitcode library.
9671010 llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
9681011 BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
9691012
9701013 // Add the generic set of libraries.
9711014 BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
972- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
973- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
1015+ DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
1016+ getSanitizerArgs (DriverArgs). needsAsanRt () ));
9741017
9751018 for (auto [BCFile, Internalize] : BCLibs) {
9761019 if (Internalize)
@@ -1009,41 +1052,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
10091052
10101053llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
10111054RocmInstallationDetector::getCommonBitcodeLibs (
1012- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013- bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014- bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015- bool isOpenMP) const {
1055+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1056+ StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1057+ const bool NeedsASanRT) const {
10161058 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
10171059
1060+ CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1061+ DeviceOffloadingKind, NeedsASanRT};
1062+
10181063 auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
10191064 bool Internalize = true ) {
10201065 BCLib.ShouldInternalize = Internalize;
10211066 BCLibs.push_back (BCLib);
10221067 };
10231068 auto AddSanBCLibs = [&]() {
1024- if (GPUSan)
1069+ if (Pref. GPUSan )
10251070 AddBCLib (getAsanRTLPath (), false );
10261071 };
10271072
10281073 AddSanBCLibs ();
10291074 AddBCLib (getOCMLPath ());
1030- // FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031- // we cannot exclude ocml here because of the crazy always-compile clang
1032- // headers for cuda, hip, and openmp. A more sane approach is to use libm
1033- // offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034- // libomptarget-<offload-arch>.bc files is built by compiling headers with
1035- // __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036- if (!isOpenMP)
1075+ if (!Pref.IsOpenMP )
10371076 AddBCLib (getOCKLPath ());
1038- else if (GPUSan && isOpenMP )
1077+ else if (Pref. GPUSan && Pref. IsOpenMP )
10391078 AddBCLib (getOCKLPath (), false );
1040- AddBCLib (getDenormalsAreZeroPath (DAZ));
1041- AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
1042- AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
1043- AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
1044- AddBCLib (getWavefrontSize64Path (Wave64));
1079+ AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1080+ AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1081+ AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1082+ AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1083+ AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
10451084 AddBCLib (LibDeviceFile);
1046- auto ABIVerPath = getABIVersionPath (ABIVer);
1085+ auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
10471086 if (!ABIVerPath.empty ())
10481087 AddBCLib (ABIVerPath);
10491088
@@ -1058,14 +1097,22 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
10581097}
10591098
10601099llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1061- ROCMToolChain::getCommonDeviceLibNames (const llvm::opt::ArgList &DriverArgs,
1062- const std::string &GPUArch,
1063- bool isOpenMP) const {
1064- RocmInstallationDetector RocmInstallation (getDriver (), getTriple (),
1065- DriverArgs, true , true );
1066- return amdgpu::dlr::getCommonDeviceLibNames (
1067- DriverArgs, getSanitizerArgs (DriverArgs), getDriver (), GPUArch, isOpenMP,
1068- RocmInstallation);
1100+ ROCMToolChain::getCommonDeviceLibNames (
1101+ const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1102+ Action::OffloadKind DeviceOffloadingKind) const {
1103+ auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
1104+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
1105+
1106+ StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
1107+ auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
1108+ getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
1109+ if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
1110+ ABIVer))
1111+ return {};
1112+
1113+ return RocmInstallation->getCommonBitcodeLibs (
1114+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1115+ getSanitizerArgs (DriverArgs).needsAsanRt ());
10691116}
10701117
10711118bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments