@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
31
31
using namespace clang ;
32
32
using namespace llvm ::opt;
33
33
34
+ RocmInstallationDetector::CommonBitcodeLibsPreferences::
35
+ CommonBitcodeLibsPreferences (const Driver &D,
36
+ const llvm::opt::ArgList &DriverArgs,
37
+ StringRef GPUArch,
38
+ const Action::OffloadKind DeviceOffloadingKind,
39
+ const bool NeedsASanRT)
40
+ : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
41
+ tools::getAMDGPUCodeObjectVersion (D, DriverArgs))) {
42
+ const auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
43
+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN (Kind);
44
+
45
+ IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
46
+
47
+ const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
48
+ Wave64 =
49
+ !HasWave32 || DriverArgs.hasFlag (options::OPT_mwavefrontsize64,
50
+ options::OPT_mno_wavefrontsize64, false );
51
+
52
+ const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
53
+ DeviceOffloadingKind == Action::OFK_HIP;
54
+
55
+ // Default to enabling f32 denormals on subtargets where fma is fast with
56
+ // denormals
57
+ const bool DefaultDAZ =
58
+ (Kind == llvm::AMDGPU::GK_NONE)
59
+ ? false
60
+ : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
61
+ (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
62
+ // TODO: There are way too many flags that change this. Do we need to
63
+ // check them all?
64
+ DAZ = IsKnownOffloading
65
+ ? DriverArgs.hasFlag (options::OPT_fgpu_flush_denormals_to_zero,
66
+ options::OPT_fno_gpu_flush_denormals_to_zero,
67
+ DefaultDAZ)
68
+ : DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) || DefaultDAZ;
69
+
70
+ FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only) ||
71
+ DriverArgs.hasFlag (options::OPT_ffinite_math_only,
72
+ options::OPT_fno_finite_math_only, false );
73
+
74
+ UnsafeMathOpt =
75
+ DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations) ||
76
+ DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
77
+ options::OPT_fno_unsafe_math_optimizations, false );
78
+
79
+ FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math) ||
80
+ DriverArgs.hasFlag (options::OPT_ffast_math,
81
+ options::OPT_fno_fast_math, false );
82
+
83
+ const bool DefaultSqrt = IsKnownOffloading ? true : false ;
84
+ CorrectSqrt =
85
+ DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86
+ DriverArgs.hasFlag (
87
+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88
+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
89
+ // GPU Sanitizer currently only supports ASan and is enabled through host
90
+ // ASan.
91
+ GPUSan = (DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
92
+ options::OPT_fno_gpu_sanitize, true ) &&
93
+ NeedsASanRT);
94
+ }
95
+
34
96
void RocmInstallationDetector::scanLibDevicePath (llvm::StringRef Path) {
35
97
assert (!Path.empty ());
36
98
@@ -658,46 +720,21 @@ llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
658
720
amdgpu::dlr::getCommonDeviceLibNames (
659
721
const llvm::opt::ArgList &DriverArgs, const SanitizerArgs &SanArgs,
660
722
const Driver &D, const std::string &GPUArch, bool isOpenMP,
661
- const RocmInstallationDetector &RocmInstallation) {
723
+ const RocmInstallationDetector &RocmInstallation,
724
+ const clang::driver::Action::OffloadKind DeviceOffloadingKind) {
662
725
auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
663
726
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
664
727
665
728
StringRef LibDeviceFile = RocmInstallation.getLibDeviceFile (CanonArch);
666
729
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
667
730
getAMDGPUCodeObjectVersion (D, DriverArgs));
668
- bool noGPULib = DriverArgs.hasArg (options::OPT_offloadlib);
669
- if (!RocmInstallation.checkCommonBitcodeLibs (CanonArch, LibDeviceFile, ABIVer,
670
- noGPULib))
731
+ if (!RocmInstallation.checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
732
+ ABIVer))
671
733
return {};
672
-
673
- // If --hip-device-lib is not set, add the default bitcode libraries.
674
- // TODO: There are way too many flags that change this. Do we need to check
675
- // them all?
676
- bool DAZ = DriverArgs.hasFlag (
677
- options::OPT_fgpu_flush_denormals_to_zero,
678
- options::OPT_fno_gpu_flush_denormals_to_zero,
679
- toolchains::AMDGPUToolChain::getDefaultDenormsAreZeroForTarget (Kind));
680
- bool FiniteOnly = DriverArgs.hasFlag (
681
- options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false );
682
- bool UnsafeMathOpt =
683
- DriverArgs.hasFlag (options::OPT_funsafe_math_optimizations,
684
- options::OPT_fno_unsafe_math_optimizations, false );
685
- bool FastRelaxedMath = DriverArgs.hasFlag (options::OPT_ffast_math,
686
- options::OPT_fno_fast_math, false );
687
- bool CorrectSqrt = DriverArgs.hasFlag (
688
- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
689
- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true );
690
- bool Wave64 = toolchains::AMDGPUToolChain::isWave64 (DriverArgs, Kind);
691
-
692
- // GPU Sanitizer currently only supports ASan and is enabled through host
693
- // ASan.
694
- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
695
- options::OPT_fno_gpu_sanitize, true ) &&
696
- SanArgs.needsAsanRt ();
697
-
734
+
698
735
return RocmInstallation.getCommonBitcodeLibs (
699
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
700
- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP );
736
+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind ,
737
+ SanArgs. needsAsanRt () );
701
738
}
702
739
703
740
// / AMDGPU Toolchain
@@ -939,38 +976,18 @@ void ROCMToolChain::addClangTargetOptions(
939
976
StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
940
977
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
941
978
getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
942
- bool noGPULib = DriverArgs.hasArg (options::OPT_offloadlib);
943
979
if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
944
- ABIVer, noGPULib ))
980
+ ABIVer))
945
981
return ;
946
982
947
- bool Wave64 = isWave64 (DriverArgs, Kind);
948
- // TODO: There are way too many flags that change this. Do we need to check
949
- // them all?
950
- bool DAZ = DriverArgs.hasArg (options::OPT_cl_denorms_are_zero) ||
951
- getDefaultDenormsAreZeroForTarget (Kind);
952
- bool FiniteOnly = DriverArgs.hasArg (options::OPT_cl_finite_math_only);
953
-
954
- bool UnsafeMathOpt =
955
- DriverArgs.hasArg (options::OPT_cl_unsafe_math_optimizations);
956
- bool FastRelaxedMath = DriverArgs.hasArg (options::OPT_cl_fast_relaxed_math);
957
- bool CorrectSqrt =
958
- DriverArgs.hasArg (options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
959
-
960
- // GPU Sanitizer currently only supports ASan and is enabled through host
961
- // ASan.
962
- bool GPUSan = DriverArgs.hasFlag (options::OPT_fgpu_sanitize,
963
- options::OPT_fno_gpu_sanitize, true ) &&
964
- getSanitizerArgs (DriverArgs).needsAsanRt ();
965
-
966
983
// Add the OpenCL specific bitcode library.
967
984
llvm::SmallVector<BitCodeLibraryInfo, 12 > BCLibs;
968
985
BCLibs.emplace_back (RocmInstallation->getOpenCLPath ().str ());
969
986
970
987
// Add the generic set of libraries.
971
988
BCLibs.append (RocmInstallation->getCommonBitcodeLibs (
972
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt ,
973
- FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false ));
989
+ DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind ,
990
+ getSanitizerArgs (DriverArgs). needsAsanRt () ));
974
991
975
992
for (auto [BCFile, Internalize] : BCLibs) {
976
993
if (Internalize)
@@ -983,15 +1000,13 @@ void ROCMToolChain::addClangTargetOptions(
983
1000
984
1001
bool RocmInstallationDetector::checkCommonBitcodeLibs (
985
1002
StringRef GPUArch, StringRef LibDeviceFile,
986
- DeviceLibABIVersion ABIVer, bool noGPULib ) const {
1003
+ DeviceLibABIVersion ABIVer) const {
987
1004
if (!hasDeviceLibrary ()) {
988
- if (!noGPULib)
989
- D.Diag (diag::err_drv_no_rocm_device_lib) << 0 ;
1005
+ D.Diag (diag::err_drv_no_rocm_device_lib) << 0 ;
990
1006
return false ;
991
1007
}
992
1008
if (LibDeviceFile.empty ()) {
993
- if (!noGPULib)
994
- D.Diag (diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
1009
+ D.Diag (diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
995
1010
return false ;
996
1011
}
997
1012
if (ABIVer.requiresLibrary () && getABIVersionPath (ABIVer).empty ()) {
@@ -1009,41 +1024,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
1009
1024
1010
1025
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1011
1026
RocmInstallationDetector::getCommonBitcodeLibs (
1012
- const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
1013
- bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
1014
- bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
1015
- bool isOpenMP) const {
1027
+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
1028
+ StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
1029
+ const bool NeedsASanRT) const {
1016
1030
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 > BCLibs;
1017
1031
1032
+ CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
1033
+ DeviceOffloadingKind, NeedsASanRT};
1034
+
1018
1035
auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
1019
1036
bool Internalize = true ) {
1020
1037
BCLib.ShouldInternalize = Internalize;
1021
1038
BCLibs.push_back (BCLib);
1022
1039
};
1023
1040
auto AddSanBCLibs = [&]() {
1024
- if (GPUSan)
1041
+ if (Pref. GPUSan )
1025
1042
AddBCLib (getAsanRTLPath (), false );
1026
1043
};
1027
1044
1028
1045
AddSanBCLibs ();
1029
1046
AddBCLib (getOCMLPath ());
1030
- // FIXME: OpenMP has ockl and ocml contained in libomptarget.bc. However,
1031
- // we cannot exclude ocml here because of the crazy always-compile clang
1032
- // headers for cuda, hip, and openmp. A more sane approach is to use libm
1033
- // offload-arch-specific bitcode files as is done for FORTRAN. Currently,
1034
- // libomptarget-<offload-arch>.bc files is built by compiling headers with
1035
- // __BUILD_MATH_BUILTINS_LIB__ turning static libm functions to extern.
1036
- if (!isOpenMP)
1047
+ if (!Pref.IsOpenMP )
1037
1048
AddBCLib (getOCKLPath ());
1038
- else if (GPUSan && isOpenMP )
1049
+ else if (Pref. GPUSan && Pref. IsOpenMP )
1039
1050
AddBCLib (getOCKLPath (), false );
1040
- AddBCLib (getDenormalsAreZeroPath (DAZ));
1041
- AddBCLib (getUnsafeMathPath (UnsafeMathOpt || FastRelaxedMath));
1042
- AddBCLib (getFiniteOnlyPath (FiniteOnly || FastRelaxedMath));
1043
- AddBCLib (getCorrectlyRoundedSqrtPath (CorrectSqrt));
1044
- AddBCLib (getWavefrontSize64Path (Wave64));
1051
+ AddBCLib (getDenormalsAreZeroPath (Pref. DAZ ));
1052
+ AddBCLib (getUnsafeMathPath (Pref. UnsafeMathOpt || Pref. FastRelaxedMath ));
1053
+ AddBCLib (getFiniteOnlyPath (Pref. FiniteOnly || Pref. FastRelaxedMath ));
1054
+ AddBCLib (getCorrectlyRoundedSqrtPath (Pref. CorrectSqrt ));
1055
+ AddBCLib (getWavefrontSize64Path (Pref. Wave64 ));
1045
1056
AddBCLib (LibDeviceFile);
1046
- auto ABIVerPath = getABIVersionPath (ABIVer);
1057
+ auto ABIVerPath = getABIVersionPath (Pref. ABIVer );
1047
1058
if (!ABIVerPath.empty ())
1048
1059
AddBCLib (ABIVerPath);
1049
1060
@@ -1058,14 +1069,22 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
1058
1069
}
1059
1070
1060
1071
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12 >
1061
- ROCMToolChain::getCommonDeviceLibNames (const llvm::opt::ArgList &DriverArgs,
1062
- const std::string &GPUArch,
1063
- bool isOpenMP) const {
1064
- RocmInstallationDetector RocmInstallation (getDriver (), getTriple (),
1065
- DriverArgs, true , true );
1066
- return amdgpu::dlr::getCommonDeviceLibNames (
1067
- DriverArgs, getSanitizerArgs (DriverArgs), getDriver (), GPUArch, isOpenMP,
1068
- RocmInstallation);
1072
+ ROCMToolChain::getCommonDeviceLibNames (
1073
+ const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
1074
+ Action::OffloadKind DeviceOffloadingKind) const {
1075
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN (GPUArch);
1076
+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN (Kind);
1077
+
1078
+ StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile (CanonArch);
1079
+ auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion (
1080
+ getAMDGPUCodeObjectVersion (getDriver (), DriverArgs));
1081
+ if (!RocmInstallation->checkCommonBitcodeLibs (CanonArch, LibDeviceFile,
1082
+ ABIVer))
1083
+ return {};
1084
+
1085
+ return RocmInstallation->getCommonBitcodeLibs (
1086
+ DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
1087
+ getSanitizerArgs (DriverArgs).needsAsanRt ());
1069
1088
}
1070
1089
1071
1090
bool AMDGPUToolChain::shouldSkipSanitizeOption (
0 commit comments