@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
31
31
using namespace clang ;
32
32
using namespace llvm ::opt;
33
33
34
+ RocmInstallationDetector::CommonBitcodeLibsPreferences::
35
+ CommonBitcodeLibsPreferences (const Driver &D,
36
+ const llvm::opt::ArgList &DriverArgs,
37
+ StringRef GPUArch,
38
+ const Action::OffloadKind DeviceOffloadingKind,
39
+ const bool NeedsASanRT)
40
+ : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41
+ tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42
+ const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43
+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44
+
45
+ IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46
+
47
+ const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48
+ Wave64 =
49
+ !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50
+ options::OPT_mno_wavefrontsize64, false );
51
+
52
+ const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53
+ DeviceOffloadingKind == Action::OFK_HIP;
54
+
55
+ // Default to enabling f32 denormals on subtargets where fma is fast with
56
+ // denormals
57
+ const bool DefaultDAZ =
58
+ (Kind == llvm::AMDGPU::GK_NONE)
59
+ ? false
60
+ : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61
+ (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62
+ // TODO: There are way too many flags that change this. Do we need to
63
+ // check them all?
64
+ DAZ = IsKnownOffloading
65
+ ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66
+ options::OPT_fno_gpu_flush_denormals_to_zero,
67
+ DefaultDAZ)
68
+ : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69
+
70
+ FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71
+ DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72
+ options::OPT_fno_finite_math_only, false );
73
+
74
+ UnsafeMathOpt =
75
+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76
+ DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77
+ options::OPT_fno_unsafe_math_optimizations, false );
78
+
79
+ FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80
+ DriverArgs.hasFlag (options::OPT_ffast_math,
81
+ options::OPT_fno_fast_math, false );
82
+
83
+ const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84
+ CorrectSqrt =
85
+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86
+ DriverArgs.hasFlag (
87
+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88
+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89
+ // GPU Sanitizer currently only supports ASan and is enabled through host
90
+ // ASan.
91
+ GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92
+ options::OPT_fno_gpu_sanitize, true ) &&
93
+ NeedsASanRT);
94
+ }
95
+
34
96
void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
35
97
assert (!Path.empty ());
36
98
@@ -884,33 +946,14 @@ void ROCMToolChain::addClangTargetOptions(
884
946
ABIVer))
885
947
return ;
886
948
887
- bool Wave64 = isWave64 (DriverArgs, Kind);
888
- // TODO: There are way too many flags that change this. Do we need to check
889
- // them all?
890
- bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
891
- getDefaultDenormsAreZeroForTarget (Kind);
892
- bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
893
-
894
- bool UnsafeMathOpt =
895
- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
896
- bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
897
- bool CorrectSqrt =
898
- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
899
-
900
- // GPU Sanitizer currently only supports ASan and is enabled through host
901
- // ASan.
902
- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
903
- options::OPT_fno_gpu_sanitize, true ) &&
904
- getSanitizerArgs (DriverArgs).needsAsanRt ();
905
-
906
949
// Add the OpenCL specific bitcode library.
907
950
llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
908
951
BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
909
952
910
953
// Add the generic set of libraries.
911
954
BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
912
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
913
- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
955
+ DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
956
+ getSanitizerArgs (DriverArgs). needsAsanRt () ));
914
957
915
958
for (auto [BCFile, Internalize] : BCLibs) {
916
959
if (Internalize)
@@ -947,35 +990,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
947
990
948
991
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
949
992
RocmInstallationDetector::getCommonBitcodeLibs (
950
- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
951
- bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
952
- bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
953
- bool isOpenMP) const {
993
+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
994
+ StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
995
+ const bool NeedsASanRT) const {
954
996
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
955
997
998
+ CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
999
+ DeviceOffloadingKind, NeedsASanRT};
1000
+
956
1001
auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
957
1002
bool Internalize = true ) {
958
1003
BCLib.ShouldInternalize = Internalize;
959
1004
BCLibs.emplace_back (BCLib);
960
1005
};
961
1006
auto AddSanBCLibs = [&]() {
962
- if (GPUSan)
1007
+ if (Pref. GPUSan )
963
1008
AddBCLib (getAsanRTLPath (), false );
964
1009
};
965
1010
966
1011
AddSanBCLibs ();
967
1012
AddBCLib (getOCMLPath ());
968
- if (!isOpenMP )
1013
+ if (!Pref. IsOpenMP )
969
1014
AddBCLib (getOCKLPath ());
970
- else if (GPUSan && isOpenMP )
1015
+ else if (Pref. GPUSan && Pref. IsOpenMP )
971
1016
AddBCLib (getOCKLPath (), false );
972
- AddBCLib (getDenormalsAreZeroPath (DAZ));
973
- AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
974
- AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
975
- AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
976
- AddBCLib (getWavefrontSize64Path (Wave64));
1017
+ AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1018
+ AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1019
+ AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1020
+ AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1021
+ AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
977
1022
AddBCLib (LibDeviceFile);
978
- auto ABIVerPath = getABIVersionPath (ABIVer);
1023
+ auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
979
1024
if (!ABIVerPath.empty ())
980
1025
AddBCLib (ABIVerPath);
981
1026
@@ -985,7 +1030,7 @@ RocmInstallationDetector::getCommonBitcodeLibs(
985
1030
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
986
1031
ROCMToolChain::getCommonDeviceLibNames (
987
1032
const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
988
- const Action::OffloadKind DeviceOffloadingKind, bool isOpenMP ) const {
1033
+ Action::OffloadKind DeviceOffloadingKind) const {
989
1034
auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
990
1035
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
991
1036
@@ -996,38 +1041,9 @@ ROCMToolChain::getCommonDeviceLibNames(
996
1041
ABIVer))
997
1042
return {};
998
1043
999
- // If --hip-device-lib is not set, add the default bitcode libraries.
1000
- // TODO: There are way too many flags that change this. Do we need to check
1001
- // them all?
1002
- bool DAZ = DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
1003
- options::OPT_fno_gpu_flush_denormals_to_zero,
1004
- getDefaultDenormsAreZeroForTarget (Kind));
1005
- bool FiniteOnly = DriverArgs.hasFlag (
1006
- options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false );
1007
- bool UnsafeMathOpt =
1008
- DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
1009
- options::OPT_fno_unsafe_math_optimizations, false );
1010
- bool FastRelaxedMath = DriverArgs.hasFlag (options::OPT_ffast_math,
1011
- options::OPT_fno_fast_math, false );
1012
- bool CorrectSqrt = false ;
1013
- if (DeviceOffloadingKind == Action::OFK_SYCL)
1014
- // When using SYCL, sqrt is only correctly rounded if the flag is specified
1015
- CorrectSqrt = DriverArgs.hasArg (options::OPT_foffload_fp32_prec_sqrt);
1016
- else
1017
- CorrectSqrt = DriverArgs.hasFlag (
1018
- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
1019
- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true );
1020
- bool Wave64 = isWave64 (DriverArgs, Kind);
1021
-
1022
- // GPU Sanitizer currently only supports ASan and is enabled through host
1023
- // ASan.
1024
- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
1025
- options::OPT_fno_gpu_sanitize, true ) &&
1026
- getSanitizerArgs (DriverArgs).needsAsanRt ();
1027
-
1028
1044
return RocmInstallation->getCommonBitcodeLibs (
1029
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
1030
- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP );
1045
+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind ,
1046
+ getSanitizerArgs (DriverArgs). needsAsanRt () );
1031
1047
}
1032
1048
1033
1049
bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments