@@ -31,68 +31,6 @@ using namespace clang::driver::toolchains;
31
31
using namespace clang ;
32
32
using namespace llvm ::opt;
33
33
34
- RocmInstallationDetector::CommonBitcodeLibsPreferences::
35
- CommonBitcodeLibsPreferences (const Driver &D,
36
- const llvm::opt::ArgList &DriverArgs,
37
- StringRef GPUArch,
38
- const Action::OffloadKind DeviceOffloadingKind,
39
- const bool NeedsASanRT)
40
- : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41
- tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42
- const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43
- const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44
-
45
- IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46
-
47
- const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48
- Wave64 =
49
- !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50
- options::OPT_mno_wavefrontsize64, false );
51
-
52
- const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53
- DeviceOffloadingKind == Action::OFK_HIP;
54
-
55
- // Default to enabling f32 denormals on subtargets where fma is fast with
56
- // denormals
57
- const bool DefaultDAZ =
58
- (Kind == llvm::AMDGPU::GK_NONE)
59
- ? false
60
- : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61
- (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62
- // TODO: There are way too many flags that change this. Do we need to
63
- // check them all?
64
- DAZ = IsKnownOffloading
65
- ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66
- options::OPT_fno_gpu_flush_denormals_to_zero,
67
- DefaultDAZ)
68
- : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69
-
70
- FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71
- DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72
- options::OPT_fno_finite_math_only, false );
73
-
74
- UnsafeMathOpt =
75
- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76
- DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77
- options::OPT_fno_unsafe_math_optimizations, false );
78
-
79
- FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80
- DriverArgs.hasFlag (options::OPT_ffast_math,
81
- options::OPT_fno_fast_math, false );
82
-
83
- const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84
- CorrectSqrt =
85
- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86
- DriverArgs.hasFlag (
87
- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88
- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89
- // GPU Sanitizer currently only supports ASan and is enabled through host
90
- // ASan.
91
- GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92
- options::OPT_fno_gpu_sanitize, true ) &&
93
- NeedsASanRT);
94
- }
95
-
96
34
void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
97
35
assert (!Path.empty ());
98
36
@@ -1006,14 +944,33 @@ void ROCMToolChain::addClangTargetOptions(
1006
944
ABIVer, noGPULib))
1007
945
return ;
1008
946
947
+ bool Wave64 = isWave64 (DriverArgs, Kind);
948
+ // TODO: There are way too many flags that change this. Do we need to check
949
+ // them all?
950
+ bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
951
+ getDefaultDenormsAreZeroForTarget (Kind);
952
+ bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
953
+
954
+ bool UnsafeMathOpt =
955
+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
956
+ bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
957
+ bool CorrectSqrt =
958
+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959
+
960
+ // GPU Sanitizer currently only supports ASan and is enabled through host
961
+ // ASan.
962
+ bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
963
+ options::OPT_fno_gpu_sanitize, true ) &&
964
+ getSanitizerArgs (DriverArgs).needsAsanRt ();
965
+
1009
966
// Add the OpenCL specific bitcode library.
1010
967
llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
1011
968
BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
1012
969
1013
970
// Add the generic set of libraries.
1014
971
BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
1015
- DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
1016
- getSanitizerArgs (DriverArgs). needsAsanRt () ));
972
+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
973
+ FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
1017
974
1018
975
for (auto [BCFile, Internalize] : BCLibs) {
1019
976
if (Internalize)
@@ -1052,37 +1009,41 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
1052
1009
1053
1010
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1054
1011
RocmInstallationDetector::getCommonBitcodeLibs (
1055
- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1056
- StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1057
- const bool NeedsASanRT) const {
1012
+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013
+ bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014
+ bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015
+ bool isOpenMP) const {
1058
1016
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
1059
1017
1060
- CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1061
- DeviceOffloadingKind, NeedsASanRT};
1062
-
1063
1018
auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
1064
1019
bool Internalize = true ) {
1065
1020
BCLib.ShouldInternalize = Internalize;
1066
1021
BCLibs.push_back (BCLib);
1067
1022
};
1068
1023
auto AddSanBCLibs = [&]() {
1069
- if (Pref. GPUSan )
1024
+ if (GPUSan)
1070
1025
AddBCLib (getAsanRTLPath (), false );
1071
1026
};
1072
1027
1073
1028
AddSanBCLibs ();
1074
1029
AddBCLib (getOCMLPath ());
1075
- if (!Pref.IsOpenMP )
1030
+ // FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031
+ // we cannot exclude ocml here because of the crazy always-compile clang
1032
+ // headers for cuda, hip, and openmp. A more sane approach is to use libm
1033
+ // offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034
+ // libomptarget-<offload-arch>.bc files is built by compiling headers with
1035
+ // __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036
+ if (!isOpenMP)
1076
1037
AddBCLib (getOCKLPath ());
1077
- else if (Pref. GPUSan && Pref. IsOpenMP )
1038
+ else if (GPUSan && isOpenMP )
1078
1039
AddBCLib (getOCKLPath (), false );
1079
- AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1080
- AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1081
- AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1082
- AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1083
- AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
1040
+ AddBCLib (getDenormalsAreZeroPath (DAZ));
1041
+ AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
1042
+ AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
1043
+ AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
1044
+ AddBCLib (getWavefrontSize64Path (Wave64));
1084
1045
AddBCLib (LibDeviceFile);
1085
- auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
1046
+ auto ABIVerPath = getABIVersionPath (ABIVer);
1086
1047
if (!ABIVerPath.empty ())
1087
1048
AddBCLib (ABIVerPath);
1088
1049
@@ -1097,22 +1058,14 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
1097
1058
}
1098
1059
1099
1060
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1100
- ROCMToolChain::getCommonDeviceLibNames (
1101
- const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1102
- Action::OffloadKind DeviceOffloadingKind) const {
1103
- auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
1104
- const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
1105
-
1106
- StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
1107
- auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
1108
- getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
1109
- if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
1110
- ABIVer))
1111
- return {};
1112
-
1113
- return RocmInstallation->getCommonBitcodeLibs (
1114
- DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1115
- getSanitizerArgs (DriverArgs).needsAsanRt ());
1061
+ ROCMToolChain::getCommonDeviceLibNames (const llvm::opt::ArgList &DriverArgs,
1062
+ const std::string &GPUArch,
1063
+ bool isOpenMP) const {
1064
+ RocmInstallationDetector RocmInstallation (getDriver (), getTriple (),
1065
+ DriverArgs, true , true );
1066
+ return amdgpu::dlr::getCommonDeviceLibNames (
1067
+ DriverArgs, getSanitizerArgs (DriverArgs), getDriver (), GPUArch, isOpenMP,
1068
+ RocmInstallation);
1116
1069
}
1117
1070
1118
1071
bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments