Skip to content

Commit 74a46e2

Browse files
nicebertronlieb
authored andcommitted
[OpenMP][Clang][LIBOMPTARGET] Merging DLR approach with the new driver
This patch sets the new offload driver as the default and ensures that opaque steps are always called whenever new driver is in use. rebases on latest ASO passes lits need to investigate post landing: 553.pclvleaf WA 534.hpgmg_v WA Change-Id: I82c36f47ba6f4fa81af047e2c6dfed0c2d83fb1a
1 parent 5f0ad79 commit 74a46e2

File tree

19 files changed

+982
-690
lines changed

19 files changed

+982
-690
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2957,6 +2957,10 @@ def offload_new_driver : Flag<["--"], "offload-new-driver">, Flags<[CC1Option]>,
29572957
MarshallingInfoFlag<LangOpts<"OffloadingNewDriver">>, HelpText<"Use the new driver for offloading compilation.">;
29582958
def no_offload_new_driver : Flag<["--"], "no-offload-new-driver">, Flags<[CC1Option]>, Group<f_Group>,
29592959
HelpText<"Don't Use the new driver for offloading compilation.">;
2960+
def opaque_offload_linker : Flag<["--"], "opaque-offload-linker">, Flags<[CC1Option]>, Group<f_Group>,
2961+
HelpText<"Build/link omp offload binary, construct opaque cmd list instead of single clang-linker-wrapper cmd.">;
2962+
def no_opaque_offload_linker : Flag<["--"], "no-opaque-offload-linker">, Flags<[CC1Option]>, Group<f_Group>,
2963+
HelpText<"Build/link omp offload binary, using single clang-linker-wrapper cmd.">;
29602964
def offload_device_only : Flag<["--"], "offload-device-only">, Flags<[FlangOption]>,
29612965
HelpText<"Only compile for the offloading device.">;
29622966
def offload_host_only : Flag<["--"], "offload-host-only">, Flags<[FlangOption]>,

clang/lib/Driver/Driver.cpp

Lines changed: 4 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -965,151 +965,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
965965
*HostTC, OFK);
966966
assert(HIPTC && "Could not create offloading device tool chain.");
967967
C.addOffloadDeviceToolChain(HIPTC, OFK);
968-
} else if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
969-
options::OPT_no_offload_new_driver,
970-
false)) {
971-
bool IsOpenMPOffloading =
972-
C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
973-
options::OPT_fno_openmp, false) &&
974-
(C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) ||
975-
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ));
976-
if (IsOpenMPOffloading) {
977-
// We expect that -fopenmp-targets is always used in conjunction with the
978-
// option -fopenmp specifying a valid runtime with offloading support,
979-
// i.e. libomp or libiomp.
980-
OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs());
981-
if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) {
982-
Diag(clang::diag::Err_drv_expecting_fopenmp_with_fopenmp_targets);
983-
return;
984-
}
985-
986-
llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
987-
llvm::StringMap<StringRef> FoundNormalizedTriples;
988-
llvm::SmallVector<StringRef, 4> OpenMPTriples;
989-
990-
// If the user specified -fopenmp-targets= we create a toolchain for each
991-
// valid triple. Otherwise, if only --offload-arch= was specified we
992-
// instead attempt to derive the appropriate toolchains from the
993-
// arguments.
994-
if (Arg *OpenMPTargets =
995-
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
996-
if (OpenMPTargets && !OpenMPTargets->getNumValues()) {
997-
Diag(clang::diag::warn_drv_empty_joined_argument)
998-
<< OpenMPTargets->getAsString(C.getInputArgs());
999-
return;
1000-
}
1001-
llvm::copy(OpenMPTargets->getValues(),
1002-
std::back_inserter(OpenMPTriples));
1003-
} else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
1004-
!IsHIP && !IsCuda) {
1005-
const ToolChain *HostTC =
1006-
C.getSingleOffloadToolChain<Action::OFK_Host>();
1007-
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
1008-
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
1009-
HostTC->getTriple());
1010-
1011-
// Attempt to deduce the offloading triple from the set of
1012-
// architectures. We can only correctly deduce NVPTX / AMDGPU triples
1013-
// currently. We need to temporarily create these toolchains so that we
1014-
// can access tools for inferring architectures.
1015-
llvm::DenseSet<StringRef> Archs;
1016-
if (NVPTXTriple) {
1017-
auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1018-
*this, *NVPTXTriple, *HostTC, C.getInputArgs());
1019-
for (StringRef Arch : getOffloadArchs(
1020-
C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true))
1021-
Archs.insert(Arch);
1022-
}
1023-
if (AMDTriple) {
1024-
auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1025-
*this, *AMDTriple, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
1026-
for (StringRef Arch : getOffloadArchs(
1027-
C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true))
1028-
Archs.insert(Arch);
1029-
}
1030-
if (!AMDTriple && !NVPTXTriple) {
1031-
for (StringRef Arch : getOffloadArchs(
1032-
C, C.getArgs(), Action::OFK_OpenMP, nullptr, true))
1033-
Archs.insert(Arch);
1034-
}
1035-
1036-
for (StringRef Arch : Archs) {
1037-
if (NVPTXTriple &&
1038-
IsNVIDIAGpuArch(StringToCudaArch(
1039-
getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
1040-
DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
1041-
} else if (AMDTriple &&
1042-
IsAMDGpuArch(StringToCudaArch(
1043-
getProcessorFromTargetID(*AMDTriple, Arch)))) {
1044-
DerivedArchs[AMDTriple->getTriple()].insert(Arch);
1045-
} else {
1046-
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
1047-
<< Arch;
1048-
return;
1049-
}
1050-
}
1051-
1052-
// If the set is empty then we failed to find a native architecture.
1053-
if (Archs.empty()) {
1054-
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
1055-
<< "native";
1056-
return;
1057-
}
1058-
1059-
for (const auto &TripleAndArchs : DerivedArchs)
1060-
OpenMPTriples.push_back(TripleAndArchs.first());
1061-
}
1062-
1063-
for (StringRef Val : OpenMPTriples) {
1064-
llvm::Triple TT(ToolChain::getOpenMPTriple(Val));
1065-
std::string NormalizedName = TT.normalize();
1066-
1067-
// Make sure we don't have a duplicate triple.
1068-
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
1069-
if (Duplicate != FoundNormalizedTriples.end()) {
1070-
Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
1071-
<< Val << Duplicate->second;
1072-
continue;
1073-
}
1074-
1075-
// Store the current triple so that we can check for duplicates in the
1076-
// following iterations.
1077-
FoundNormalizedTriples[NormalizedName] = Val;
1078-
1079-
// If the specified target is invalid, emit a diagnostic.
1080-
if (TT.getArch() == llvm::Triple::UnknownArch)
1081-
Diag(clang::diag::err_drv_invalid_omp_target) << Val;
1082-
else {
1083-
const ToolChain *TC;
1084-
// Device toolchains have to be selected differently. They pair host
1085-
// and device in their implementation.
1086-
if (TT.isNVPTX() || TT.isAMDGCN()) {
1087-
const ToolChain *HostTC =
1088-
C.getSingleOffloadToolChain<Action::OFK_Host>();
1089-
assert(HostTC && "Host toolchain should be always defined.");
1090-
auto &DeviceTC =
1091-
ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()];
1092-
if (!DeviceTC) {
1093-
if (TT.isNVPTX())
1094-
DeviceTC = std::make_unique<toolchains::CudaToolChain>(
1095-
*this, TT, *HostTC, C.getInputArgs());
1096-
else if (TT.isAMDGCN())
1097-
DeviceTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1098-
*this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
1099-
else
1100-
assert(DeviceTC && "Device toolchain not defined.");
1101-
}
1102-
1103-
TC = DeviceTC.get();
1104-
} else
1105-
TC = &getToolChain(C.getInputArgs(), TT);
1106-
C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);
1107-
if (DerivedArchs.find(TT.getTriple()) != DerivedArchs.end())
1108-
KnownArchs[TC] = DerivedArchs[TT.getTriple()];
1109-
}
1110-
}
1111-
}
1112-
1113968
} else {
1114969
//
1115970
// OpenMP
@@ -1167,7 +1022,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11671022
bool IsHostOffloading =
11681023
(OpenMPTargets->getNumValues() == 1) &&
11691024
StringRef(OpenMPTargets->getValue())
1170-
.startswith_insensitive(
1025+
.starts_with_insensitive(
11711026
C.getSingleOffloadToolChain<Action::OFK_Host>()
11721027
->getTriple()
11731028
.getArchName());
@@ -4676,9 +4531,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
46764531

46774532
handleArguments(C, Args, Inputs, Actions);
46784533

4679-
bool UseNewOffloadingDriver =
4680-
Args.hasFlag(options::OPT_offload_new_driver,
4681-
options::OPT_no_offload_new_driver, false);
4534+
bool UseNewOffloadingDriver = Args.hasFlag(
4535+
options::OPT_offload_new_driver, options::OPT_no_offload_new_driver,
4536+
C.isOffloadingHostKind(Action::OFK_OpenMP));
46824537

46834538
// Builder to be used to build offloading actions.
46844539
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,44 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
671671
options::OPT_m_amdgpu_Features_Group);
672672
}
673673

674+
llvm::SmallVector<std::string, 12> amdgpu::dlr::getCommonDeviceLibNames(
675+
const llvm::opt::ArgList &DriverArgs, const Driver &D,
676+
const std::string &GPUArch, bool isOpenMP,
677+
const RocmInstallationDetector &RocmInstallation) {
678+
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
679+
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
680+
681+
StringRef LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
682+
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
683+
getAMDGPUCodeObjectVersion(D, DriverArgs));
684+
if (!RocmInstallation.checkCommonBitcodeLibs(CanonArch, LibDeviceFile,
685+
ABIVer))
686+
return {};
687+
688+
// If --hip-device-lib is not set, add the default bitcode libraries.
689+
// TODO: There are way too many flags that change this. Do we need to check
690+
// them all?
691+
bool DAZ = DriverArgs.hasFlag(
692+
options::OPT_fgpu_flush_denormals_to_zero,
693+
options::OPT_fno_gpu_flush_denormals_to_zero,
694+
toolchains::AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(Kind));
695+
bool FiniteOnly = DriverArgs.hasFlag(
696+
options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
697+
bool UnsafeMathOpt =
698+
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
699+
options::OPT_fno_unsafe_math_optimizations, false);
700+
bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
701+
options::OPT_fno_fast_math, false);
702+
bool CorrectSqrt = DriverArgs.hasFlag(
703+
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
704+
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
705+
bool Wave64 = toolchains::AMDGPUToolChain::isWave64(DriverArgs, Kind);
706+
707+
return RocmInstallation.getCommonBitcodeLibs(
708+
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
709+
FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP);
710+
}
711+
674712
/// AMDGPU Toolchain
675713
AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
676714
const ArgList &Args)
@@ -1031,35 +1069,8 @@ llvm::SmallVector<std::string, 12>
10311069
ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
10321070
const std::string &GPUArch,
10331071
bool isOpenMP) const {
1034-
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
1035-
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
1036-
1037-
StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch);
1038-
auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion(
1039-
getAMDGPUCodeObjectVersion(getDriver(), DriverArgs));
1040-
if (!RocmInstallation->checkCommonBitcodeLibs(CanonArch, LibDeviceFile,
1041-
ABIVer))
1042-
return {};
1043-
1044-
// If --hip-device-lib is not set, add the default bitcode libraries.
1045-
// TODO: There are way too many flags that change this. Do we need to check
1046-
// them all?
1047-
bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
1048-
options::OPT_fno_gpu_flush_denormals_to_zero,
1049-
getDefaultDenormsAreZeroForTarget(Kind));
1050-
bool FiniteOnly = DriverArgs.hasFlag(
1051-
options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
1052-
bool UnsafeMathOpt =
1053-
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
1054-
options::OPT_fno_unsafe_math_optimizations, false);
1055-
bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
1056-
options::OPT_fno_fast_math, false);
1057-
bool CorrectSqrt = DriverArgs.hasFlag(
1058-
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
1059-
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
1060-
bool Wave64 = isWave64(DriverArgs, Kind);
1061-
1062-
return RocmInstallation->getCommonBitcodeLibs(
1063-
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
1064-
FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP);
1072+
RocmInstallationDetector RocmInstallation(getDriver(), getTriple(),
1073+
DriverArgs, true, true);
1074+
return amdgpu::dlr::getCommonDeviceLibNames(DriverArgs, getDriver(), GPUArch,
1075+
isOpenMP, RocmInstallation);
10651076
}

clang/lib/Driver/ToolChains/AMDGPU.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,45 @@ void getAMDGPUTargetFeatures(const Driver &D, const llvm::Triple &Triple,
4242
std::vector<StringRef> &Features,
4343
StringRef TcTargetID = StringRef());
4444

45+
namespace dlr {
46+
llvm::SmallVector<std::string, 12>
47+
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, const Driver &D,
48+
const std::string &GPUArch, bool isOpenMP,
49+
const RocmInstallationDetector &RocmInstallation);
50+
51+
const char *
52+
getCbslCommandArgs(Compilation &C, const llvm::opt::ArgList &Args,
53+
llvm::opt::ArgStringList &CbslArgs,
54+
const SmallVectorImpl<std::string> &InputFileNames,
55+
llvm::StringRef OutputFilePrefix);
56+
57+
const char *
58+
getLinkCommandArgs(Compilation &C, const llvm::opt::ArgList &Args,
59+
llvm::opt::ArgStringList &LastLinkArgs, const ToolChain &TC,
60+
const llvm::Triple &Triple, llvm::StringRef TargetID,
61+
llvm::StringRef OutputFilePrefix, const char *InputFileName,
62+
const RocmInstallationDetector &RocmInstallation);
63+
64+
const char *getOptCommandArgs(Compilation &C, const llvm::opt::ArgList &Args,
65+
llvm::opt::ArgStringList &OptArgs,
66+
const llvm::Triple &Triple,
67+
llvm::StringRef TargetID,
68+
llvm::StringRef OutputFilePrefix,
69+
const char *InputFileName);
70+
71+
const char *
72+
getLlcCommandArgs(Compilation &C, const llvm::opt::ArgList &Args,
73+
llvm::opt::ArgStringList &LlcArgs, const llvm::Triple &Triple,
74+
llvm::StringRef TargetID, llvm::StringRef OutputFilePrefix,
75+
const char *InputFileName, bool OutputIsAsm = false);
76+
77+
const char *getLldCommandArgs(
78+
Compilation &C, const InputInfo &Output, const llvm::opt::ArgList &Args,
79+
llvm::opt::ArgStringList &LldArgs, const llvm::Triple &Triple,
80+
llvm::StringRef TargetID, const char *InputFileName,
81+
const std::optional<std::string> OutputFilePrefix = std::nullopt);
82+
} // end namespace dlr
83+
4584
} // end namespace amdgpu
4685
} // end namespace tools
4786

0 commit comments

Comments
 (0)