@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
3131using namespace clang ;
3232using namespace llvm ::opt;
3333
34+ RocmInstallationDetector::CommonBitcodeLibsPreferences::
35+ CommonBitcodeLibsPreferences (const Driver &D,
36+ const llvm::opt::ArgList &DriverArgs,
37+ StringRef GPUArch,
38+ const Action::OffloadKind DeviceOffloadingKind,
39+ const bool NeedsASanRT)
40+ : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41+ tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42+ const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44+
45+ IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46+
47+ const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48+ Wave64 =
49+ !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50+ options::OPT_mno_wavefrontsize64, false );
51+
52+ const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53+ DeviceOffloadingKind == Action::OFK_HIP;
54+
55+ // Default to enabling f32 denormals on subtargets where fma is fast with
56+ // denormals
57+ const bool DefaultDAZ =
58+ (Kind == llvm::AMDGPU::GK_NONE)
59+ ? false
60+ : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61+ (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62+ // TODO: There are way too many flags that change this. Do we need to
63+ // check them all?
64+ DAZ = IsKnownOffloading
65+ ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66+ options::OPT_fno_gpu_flush_denormals_to_zero,
67+ DefaultDAZ)
68+ : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69+
70+ FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71+ DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72+ options::OPT_fno_finite_math_only, false );
73+
74+ UnsafeMathOpt =
75+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76+ DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77+ options::OPT_fno_unsafe_math_optimizations, false );
78+
79+ FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80+ DriverArgs.hasFlag (options::OPT_ffast_math,
81+ options::OPT_fno_fast_math, false );
82+
83+ const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84+ CorrectSqrt =
85+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86+ DriverArgs.hasFlag (
87+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89+ // GPU Sanitizer currently only supports ASan and is enabled through host
90+ // ASan.
91+ GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92+ options::OPT_fno_gpu_sanitize, true ) &&
93+ NeedsASanRT);
94+ }
95+
3496void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
3597 assert (!Path.empty ());
3698
@@ -658,46 +720,21 @@ llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
658720amdgpu::dlr::getCommonDeviceLibNames (
659721 const llvm::opt::ArgList &DriverArgs, const SanitizerArgs &SanArgs,
660722 const Driver &D, const std::string &GPUArch, bool isOpenMP,
661- const RocmInstallationDetector &RocmInstallation) {
723+ const RocmInstallationDetector &RocmInstallation,
724+ const clang::driver::Action::OffloadKind DeviceOffloadingKind) {
662725 auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
663726 const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
664727
665728 StringRef LibDeviceFile = RocmInstallation.getLibDeviceFile (CanonArch);
666729 auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
667730 getAMDGPUCodeObjectVersion (D, DriverArgs));
668- bool noGPULib = DriverArgs.hasArg (options::OPT_offloadlib);
669- if (!RocmInstallation.checkCommonBitcodeLibs (CanonArch, LibDeviceFile, ABIVer,
670- noGPULib))
731+ if (!RocmInstallation.checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
732+ ABIVer))
671733 return {};
672-
673- // If --hip-device-lib is not set, add the default bitcode libraries.
674- // TODO: There are way too many flags that change this. Do we need to check
675- // them all?
676- bool DAZ = DriverArgs.hasFlag (
677- options::OPT_fgpu_flush_denormals_to_zero,
678- options::OPT_fno_gpu_flush_denormals_to_zero,
679- toolchains::AMDGPUToolChain::getDefaultDenormsAreZeroForTarget (Kind));
680- bool FiniteOnly = DriverArgs.hasFlag (
681- options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false );
682- bool UnsafeMathOpt =
683- DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
684- options::OPT_fno_unsafe_math_optimizations, false );
685- bool FastRelaxedMath = DriverArgs.hasFlag (options::OPT_ffast_math,
686- options::OPT_fno_fast_math, false );
687- bool CorrectSqrt = DriverArgs.hasFlag (
688- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
689- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true );
690- bool Wave64 = toolchains::AMDGPUToolChain::isWave64 (DriverArgs, Kind);
691-
692- // GPU Sanitizer currently only supports ASan and is enabled through host
693- // ASan.
694- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
695- options::OPT_fno_gpu_sanitize, true ) &&
696- SanArgs.needsAsanRt ();
697-
734+
698735 return RocmInstallation.getCommonBitcodeLibs (
699- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
700- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP );
736+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind ,
737+ SanArgs. needsAsanRt () );
701738}
702739
703740// / AMDGPU Toolchain
@@ -939,38 +976,18 @@ void ROCMToolChain::addClangTargetOptions(
939976 StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
940977 auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
941978 getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
942- bool noGPULib = DriverArgs.hasArg (options::OPT_offloadlib);
943979 if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
944- ABIVer, noGPULib ))
980+ ABIVer))
945981 return ;
946982
947- bool Wave64 = isWave64 (DriverArgs, Kind);
948- // TODO: There are way too many flags that change this. Do we need to check
949- // them all?
950- bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
951- getDefaultDenormsAreZeroForTarget (Kind);
952- bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
953-
954- bool UnsafeMathOpt =
955- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
956- bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
957- bool CorrectSqrt =
958- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959-
960- // GPU Sanitizer currently only supports ASan and is enabled through host
961- // ASan.
962- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
963- options::OPT_fno_gpu_sanitize, true ) &&
964- getSanitizerArgs (DriverArgs).needsAsanRt ();
965-
966983 // Add the OpenCL specific bitcode library.
967984 llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
968985 BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
969986
970987 // Add the generic set of libraries.
971988 BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
972- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
973- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
989+ DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
990+ getSanitizerArgs (DriverArgs). needsAsanRt () ));
974991
975992 for (auto [BCFile, Internalize] : BCLibs) {
976993 if (Internalize)
@@ -983,15 +1000,13 @@ void ROCMToolChain::addClangTargetOptions(
9831000
9841001bool RocmInstallationDetector::checkCommonBitcodeLibs (
9851002 StringRef GPUArch, StringRef LibDeviceFile,
986- DeviceLibABIVersion ABIVer, bool noGPULib ) const {
1003+ DeviceLibABIVersion ABIVer) const {
9871004 if (!hasDeviceLibrary ()) {
988- if (!noGPULib)
989- D.Diag (diag::err_drv_no_rocm_device_lib) << 0 ;
1005+ D.Diag (diag::err_drv_no_rocm_device_lib) << 0 ;
9901006 return false ;
9911007 }
9921008 if (LibDeviceFile.empty ()) {
993- if (!noGPULib)
994- D.Diag (diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
1009+ D.Diag (diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
9951010 return false ;
9961011 }
9971012 if (ABIVer.requiresLibrary () && getABIVersionPath (ABIVer).empty ()) {
@@ -1009,41 +1024,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
10091024
10101025llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
10111026RocmInstallationDetector::getCommonBitcodeLibs (
1012- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013- bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014- bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015- bool isOpenMP) const {
1027+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1028+ StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1029+ const bool NeedsASanRT) const {
10161030 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
10171031
1032+ CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1033+ DeviceOffloadingKind, NeedsASanRT};
1034+
10181035 auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
10191036 bool Internalize = true ) {
10201037 BCLib.ShouldInternalize = Internalize;
10211038 BCLibs.push_back (BCLib);
10221039 };
10231040 auto AddSanBCLibs = [&]() {
1024- if (GPUSan)
1041+ if (Pref. GPUSan )
10251042 AddBCLib (getAsanRTLPath (), false );
10261043 };
10271044
10281045 AddSanBCLibs ();
10291046 AddBCLib (getOCMLPath ());
1030- // FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031- // we cannot exclude ocml here because of the crazy always-compile clang
1032- // headers for cuda, hip, and openmp. A more sane approach is to use libm
1033- // offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034- // libomptarget-<offload-arch>.bc files is built by compiling headers with
1035- // __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036- if (!isOpenMP)
1047+ if (!Pref.IsOpenMP )
10371048 AddBCLib (getOCKLPath ());
1038- else if (GPUSan && isOpenMP )
1049+ else if (Pref. GPUSan && Pref. IsOpenMP )
10391050 AddBCLib (getOCKLPath (), false );
1040- AddBCLib (getDenormalsAreZeroPath (DAZ));
1041- AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
1042- AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
1043- AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
1044- AddBCLib (getWavefrontSize64Path (Wave64));
1051+ AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1052+ AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1053+ AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1054+ AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1055+ AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
10451056 AddBCLib (LibDeviceFile);
1046- auto ABIVerPath = getABIVersionPath (ABIVer);
1057+ auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
10471058 if (!ABIVerPath.empty ())
10481059 AddBCLib (ABIVerPath);
10491060
@@ -1058,14 +1069,22 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
10581069}
10591070
10601071llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1061- ROCMToolChain::getCommonDeviceLibNames (const llvm::opt::ArgList &DriverArgs,
1062- const std::string &GPUArch,
1063- bool isOpenMP) const {
1064- RocmInstallationDetector RocmInstallation (getDriver (), getTriple (),
1065- DriverArgs, true , true );
1066- return amdgpu::dlr::getCommonDeviceLibNames (
1067- DriverArgs, getSanitizerArgs (DriverArgs), getDriver (), GPUArch, isOpenMP,
1068- RocmInstallation);
1072+ ROCMToolChain::getCommonDeviceLibNames (
1073+ const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1074+ Action::OffloadKind DeviceOffloadingKind) const {
1075+ auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
1076+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
1077+
1078+ StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
1079+ auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
1080+ getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
1081+ if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
1082+ ABIVer))
1083+ return {};
1084+
1085+ return RocmInstallation->getCommonBitcodeLibs (
1086+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1087+ getSanitizerArgs (DriverArgs).needsAsanRt ());
10691088}
10701089
10711090bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments