@@ -31,68 +31,6 @@ using namespace clang::driver::toolchains;
3131using namespace clang ;
3232using namespace llvm ::opt;
3333
34- RocmInstallationDetector::CommonBitcodeLibsPreferences::
35- CommonBitcodeLibsPreferences (const Driver &D,
36- const llvm::opt::ArgList &DriverArgs,
37- StringRef GPUArch,
38- const Action::OffloadKind DeviceOffloadingKind,
39- const bool NeedsASanRT)
40- : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41- tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42- const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43- const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44-
45- IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46-
47- const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48- Wave64 =
49- !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50- options::OPT_mno_wavefrontsize64, false );
51-
52- const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53- DeviceOffloadingKind == Action::OFK_HIP;
54-
55- // Default to enabling f32 denormals on subtargets where fma is fast with
56- // denormals
57- const bool DefaultDAZ =
58- (Kind == llvm::AMDGPU::GK_NONE)
59- ? false
60- : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61- (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62- // TODO: There are way too many flags that change this. Do we need to
63- // check them all?
64- DAZ = IsKnownOffloading
65- ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66- options::OPT_fno_gpu_flush_denormals_to_zero,
67- DefaultDAZ)
68- : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69-
70- FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71- DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72- options::OPT_fno_finite_math_only, false );
73-
74- UnsafeMathOpt =
75- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76- DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77- options::OPT_fno_unsafe_math_optimizations, false );
78-
79- FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80- DriverArgs.hasFlag (options::OPT_ffast_math,
81- options::OPT_fno_fast_math, false );
82-
83- const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84- CorrectSqrt =
85- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86- DriverArgs.hasFlag (
87- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89- // GPU Sanitizer currently only supports ASan and is enabled through host
90- // ASan.
91- GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92- options::OPT_fno_gpu_sanitize, true ) &&
93- NeedsASanRT);
94- }
95-
9634void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
9735 assert (!Path.empty ());
9836
@@ -1006,14 +944,33 @@ void ROCMToolChain::addClangTargetOptions(
1006944 ABIVer, noGPULib))
1007945 return ;
1008946
947+ bool Wave64 = isWave64 (DriverArgs, Kind);
948+ // TODO: There are way too many flags that change this. Do we need to check
949+ // them all?
950+ bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
951+ getDefaultDenormsAreZeroForTarget (Kind);
952+ bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
953+
954+ bool UnsafeMathOpt =
955+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
956+ bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
957+ bool CorrectSqrt =
958+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959+
960+ // GPU Sanitizer currently only supports ASan and is enabled through host
961+ // ASan.
962+ bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
963+ options::OPT_fno_gpu_sanitize, true ) &&
964+ getSanitizerArgs (DriverArgs).needsAsanRt ();
965+
1009966 // Add the OpenCL specific bitcode library.
1010967 llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
1011968 BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
1012969
1013970 // Add the generic set of libraries.
1014971 BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
1015- DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
1016- getSanitizerArgs (DriverArgs). needsAsanRt () ));
972+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
973+ FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
1017974
1018975 for (auto [BCFile, Internalize] : BCLibs) {
1019976 if (Internalize)
@@ -1052,37 +1009,41 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
10521009
10531010llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
10541011RocmInstallationDetector::getCommonBitcodeLibs (
1055- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1056- StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1057- const bool NeedsASanRT) const {
1012+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013+ bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014+ bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015+ bool isOpenMP) const {
10581016 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
10591017
1060- CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1061- DeviceOffloadingKind, NeedsASanRT};
1062-
10631018 auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
10641019 bool Internalize = true ) {
10651020 BCLib.ShouldInternalize = Internalize;
10661021 BCLibs.push_back (BCLib);
10671022 };
10681023 auto AddSanBCLibs = [&]() {
1069- if (Pref. GPUSan )
1024+ if (GPUSan)
10701025 AddBCLib (getAsanRTLPath (), false );
10711026 };
10721027
10731028 AddSanBCLibs ();
10741029 AddBCLib (getOCMLPath ());
1075- if (!Pref.IsOpenMP )
1030+ // FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031+ // we cannot exclude ocml here because of the crazy always-compile clang
1032+ // headers for cuda, hip, and openmp. A more sane approach is to use libm
1033+ // offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034+ // libomptarget-<offload-arch>.bc files is built by compiling headers with
1035+ // __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036+ if (!isOpenMP)
10761037 AddBCLib (getOCKLPath ());
1077- else if (Pref. GPUSan && Pref. IsOpenMP )
1038+ else if (GPUSan && isOpenMP )
10781039 AddBCLib (getOCKLPath (), false );
1079- AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1080- AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1081- AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1082- AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1083- AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
1040+ AddBCLib (getDenormalsAreZeroPath (DAZ));
1041+ AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
1042+ AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
1043+ AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
1044+ AddBCLib (getWavefrontSize64Path (Wave64));
10841045 AddBCLib (LibDeviceFile);
1085- auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
1046+ auto ABIVerPath = getABIVersionPath (ABIVer);
10861047 if (!ABIVerPath.empty ())
10871048 AddBCLib (ABIVerPath);
10881049
@@ -1097,22 +1058,14 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
10971058}
10981059
10991060llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1100- ROCMToolChain::getCommonDeviceLibNames (
1101- const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1102- Action::OffloadKind DeviceOffloadingKind) const {
1103- auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
1104- const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
1105-
1106- StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
1107- auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
1108- getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
1109- if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
1110- ABIVer))
1111- return {};
1112-
1113- return RocmInstallation->getCommonBitcodeLibs (
1114- DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1115- getSanitizerArgs (DriverArgs).needsAsanRt ());
1061+ ROCMToolChain::getCommonDeviceLibNames (const llvm::opt::ArgList &DriverArgs,
1062+ const std::string &GPUArch,
1063+ bool isOpenMP) const {
1064+ RocmInstallationDetector RocmInstallation (getDriver (), getTriple (),
1065+ DriverArgs, true , true );
1066+ return amdgpu::dlr::getCommonDeviceLibNames (
1067+ DriverArgs, getSanitizerArgs (DriverArgs), getDriver (), GPUArch, isOpenMP,
1068+ RocmInstallation);
11161069}
11171070
11181071bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments