@@ -31,6 +31,72 @@ using namespace clang::driver::toolchains;
3131using namespace clang ;
3232using namespace llvm ::opt;
3333
34+ RocmInstallationDetector::CommonBitcodeLibsPreferences::
35+ CommonBitcodeLibsPreferences (const Driver &D,
36+ const llvm::opt::ArgList &DriverArgs,
37+ StringRef GPUArch,
38+ const Action::OffloadKind DeviceOffloadingKind,
39+ const bool NeedsASanRT)
40+ : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41+ tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42+ const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44+
45+ IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46+
47+ const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48+ Wave64 =
49+ !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50+ options::OPT_mno_wavefrontsize64, false );
51+
52+ const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53+ DeviceOffloadingKind == Action::OFK_HIP;
54+
55+ // Default to enabling f32 denormals on subtargets where fma is fast with
56+ // denormals
57+ const bool DefaultDAZ =
58+ (Kind == llvm::AMDGPU::GK_NONE)
59+ ? false
60+ : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61+ (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62+ // TODO: There are way too many flags that change this. Do we need to
63+ // check them all?
64+ DAZ = IsKnownOffloading
65+ ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66+ options::OPT_fno_gpu_flush_denormals_to_zero,
67+ DefaultDAZ)
68+ : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69+
70+ FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71+ DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72+ options::OPT_fno_finite_math_only, false );
73+
74+ UnsafeMathOpt =
75+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76+ DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77+ options::OPT_fno_unsafe_math_optimizations, false );
78+
79+ FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80+ DriverArgs.hasFlag (options::OPT_ffast_math,
81+ options::OPT_fno_fast_math, false );
82+
83+ const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84+ if (DeviceOffloadingKind == Action::OFK_SYCL)
85+ // When using SYCL, sqrt is only correctly rounded if the flag is specified.
86+ CorrectSqrt = DriverArgs.hasArg (options::OPT_foffload_fp32_prec_sqrt);
87+ else
88+ CorrectSqrt =
89+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
90+ DriverArgs.hasFlag (
91+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
92+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt,
93+ DefaultSqrt);
94+ // GPU Sanitizer currently only supports ASan and is enabled through host
95+ // ASan.
96+ GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
97+ options::OPT_fno_gpu_sanitize, true ) &&
98+ NeedsASanRT);
99+ }
34100void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
35101 assert (!Path.empty ());
36102
@@ -883,33 +949,14 @@ void ROCMToolChain::addClangTargetOptions(
883949 ABIVer))
884950 return ;
885951
886- bool Wave64 = isWave64 (DriverArgs, Kind);
887- // TODO: There are way too many flags that change this. Do we need to check
888- // them all?
889- bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
890- getDefaultDenormsAreZeroForTarget (Kind);
891- bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
892-
893- bool UnsafeMathOpt =
894- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
895- bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
896- bool CorrectSqrt =
897- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
898-
899- // GPU Sanitizer currently only supports ASan and is enabled through host
900- // ASan.
901- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
902- options::OPT_fno_gpu_sanitize, true ) &&
903- getSanitizerArgs (DriverArgs).needsAsanRt ();
904-
905952 // Add the OpenCL specific bitcode library.
906953 llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
907954 BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
908955
909956 // Add the generic set of libraries.
910957 BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
911- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
912- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
958+ DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
959+ getSanitizerArgs (DriverArgs). needsAsanRt () ));
913960
914961 for (auto [BCFile, Internalize] : BCLibs) {
915962 if (Internalize)
@@ -946,35 +993,38 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
946993
947994llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
948995RocmInstallationDetector::getCommonBitcodeLibs (
949- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64 ,
950- bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath ,
951- bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan ,
952- bool isOpenMP ) const {
996+ const llvm::opt::ArgList &DriverArgs,
997+ StringRef LibDeviceFile, StringRef GPUArch ,
998+ const Action::OffloadKind DeviceOffloadingKind ,
999+ const bool NeedsASanRT ) const {
9531000 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
9541001
1002+ CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1003+ DeviceOffloadingKind, NeedsASanRT};
1004+
9551005 auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
9561006 bool Internalize = true ) {
9571007 BCLib.ShouldInternalize = Internalize;
9581008 BCLibs.emplace_back (BCLib);
9591009 };
9601010 auto AddSanBCLibs = [&]() {
961- if (GPUSan)
1011+ if (Pref. GPUSan )
9621012 AddBCLib (getAsanRTLPath (), false );
9631013 };
9641014
9651015 AddSanBCLibs ();
9661016 AddBCLib (getOCMLPath ());
967- if (!isOpenMP )
1017+ if (!Pref. IsOpenMP )
9681018 AddBCLib (getOCKLPath ());
969- else if (GPUSan && isOpenMP )
1019+ else if (Pref. GPUSan && Pref. IsOpenMP )
9701020 AddBCLib (getOCKLPath (), false );
971- AddBCLib (getDenormalsAreZeroPath (DAZ));
972- AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
973- AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
974- AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
975- AddBCLib (getWavefrontSize64Path (Wave64));
1021+ AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1022+ AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1023+ AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1024+ AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1025+ AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
9761026 AddBCLib (LibDeviceFile);
977- auto ABIVerPath = getABIVersionPath (ABIVer);
1027+ auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
9781028 if (!ABIVerPath.empty ())
9791029 AddBCLib (ABIVerPath);
9801030
@@ -984,7 +1034,7 @@ RocmInstallationDetector::getCommonBitcodeLibs(
9841034llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
9851035ROCMToolChain::getCommonDeviceLibNames (
9861036 const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
987- const Action::OffloadKind DeviceOffloadingKind, bool isOpenMP ) const {
1037+ Action::OffloadKind DeviceOffloadingKind) const {
9881038 auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
9891039 const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
9901040
@@ -995,38 +1045,9 @@ ROCMToolChain::getCommonDeviceLibNames(
9951045 ABIVer))
9961046 return {};
9971047
998- // If --hip-device-lib is not set, add the default bitcode libraries.
999- // TODO: There are way too many flags that change this. Do we need to check
1000- // them all?
1001- bool DAZ = DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
1002- options::OPT_fno_gpu_flush_denormals_to_zero,
1003- getDefaultDenormsAreZeroForTarget (Kind));
1004- bool FiniteOnly = DriverArgs.hasFlag (
1005- options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false );
1006- bool UnsafeMathOpt =
1007- DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
1008- options::OPT_fno_unsafe_math_optimizations, false );
1009- bool FastRelaxedMath = DriverArgs.hasFlag (options::OPT_ffast_math,
1010- options::OPT_fno_fast_math, false );
1011- bool CorrectSqrt = false ;
1012- if (DeviceOffloadingKind == Action::OFK_SYCL)
1013- // When using SYCL, sqrt is only correctly rounded if the flag is specified
1014- CorrectSqrt = DriverArgs.hasArg (options::OPT_foffload_fp32_prec_sqrt);
1015- else
1016- CorrectSqrt = DriverArgs.hasFlag (
1017- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
1018- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true );
1019- bool Wave64 = isWave64 (DriverArgs, Kind);
1020-
1021- // GPU Sanitizer currently only supports ASan and is enabled through host
1022- // ASan.
1023- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
1024- options::OPT_fno_gpu_sanitize, true ) &&
1025- getSanitizerArgs (DriverArgs).needsAsanRt ();
1026-
10271048 return RocmInstallation->getCommonBitcodeLibs (
1028- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
1029- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP );
1049+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind ,
1050+ getSanitizerArgs (DriverArgs). needsAsanRt () );
10301051}
10311052
10321053bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments