Skip to content

Commit 474ba4a

Browse files
jhuber6github-actions[bot]
authored andcommitted
Automerge: [Clang] Determine offloading architectures at Toolchain creation (#145799)
Summary: Previously we had this weird disconnect where we would get some offloading architectures beforehand and some later. This patch changes it to where we just generate this information at Toolchain creation. There's a few edge cases that will need to be cleaned up. Namely, we don't handle the strange SPIR-V handling that mixes two separate toolchains and we needed a pre-check to reject errors when inferring the toolchain from `--offload-arch` in OpenMP. Possible we could also use this information for some host defines if needed.
2 parents 0dce1cb + 48e8937 commit 474ba4a

File tree

3 files changed

+75
-72
lines changed

3 files changed

+75
-72
lines changed

clang/include/clang/Driver/Driver.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,9 @@ class Driver {
367367
/// stored in it, and will clean them up when torn down.
368368
mutable llvm::StringMap<std::unique_ptr<ToolChain>> ToolChains;
369369

370-
/// Cache of known offloading architectures for the ToolChain already derived.
371-
/// This should only be modified when we first initialize the offloading
372-
/// toolchains.
373-
llvm::DenseMap<const ToolChain *, llvm::DenseSet<llvm::StringRef>> KnownArchs;
370+
/// The associated offloading architectures with each toolchain.
371+
llvm::DenseMap<const ToolChain *, llvm::SmallVector<llvm::StringRef>>
372+
OffloadArchs;
374373

375374
private:
376375
/// TranslateInputArgs - Create a new derived argument list from the input
@@ -535,11 +534,11 @@ class Driver {
535534

536535
/// Returns the set of bound architectures active for this offload kind.
537536
/// If there are no bound architctures we return a set containing only the
538-
/// empty string. The \p SuppressError option is used to suppress errors.
539-
llvm::DenseSet<StringRef>
537+
/// empty string.
538+
llvm::SmallVector<StringRef>
540539
getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
541540
Action::OffloadKind Kind, const ToolChain *TC,
542-
bool SuppressError = false) const;
541+
bool SpecificToolchain = true) const;
543542

544543
/// Check that the file referenced by Value exists. If it doesn't,
545544
/// issue a diagnostic and return false.

clang/lib/Driver/Driver.cpp

Lines changed: 68 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
988988
if (CudaInstallation.isValid())
989989
CudaInstallation.WarnIfUnsupportedVersion();
990990
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
991+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC,
992+
/*SpecificToolchain=*/true);
991993
} else if (IsHIP && !UseLLVMOffload) {
992994
if (auto *OMPTargetArg =
993995
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
@@ -1004,6 +1006,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10041006
getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple,
10051007
C.getDefaultToolChain().getTriple());
10061008
C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP);
1009+
1010+
// TODO: Fix 'amdgcnspirv' handling with the new driver.
1011+
if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
1012+
options::OPT_no_offload_new_driver, false))
1013+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC,
1014+
/*SpecificToolchain=*/true);
10071015
}
10081016

10091017
if (IsCuda || IsHIP)
@@ -1069,40 +1077,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10691077
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
10701078
C.getDefaultToolChain().getTriple());
10711079
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
1080+
OffloadArchs[&TC] =
1081+
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
1082+
/*SpecificToolchain=*/true);
10721083
}
10731084
} else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
10741085
((!IsHIP && !IsCuda) || UseLLVMOffload)) {
10751086
llvm::Triple AMDTriple("amdgcn-amd-amdhsa");
10761087
llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda");
10771088

1078-
// Attempt to deduce the offloading triple from the set of architectures.
1079-
// We can only correctly deduce NVPTX / AMDGPU triples currently.
1080-
for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1081-
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
1082-
C.getDefaultToolChain().getTriple());
1083-
1084-
llvm::DenseSet<StringRef> Archs =
1085-
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, true);
1086-
llvm::DenseSet<StringRef> ArchsForTarget;
1087-
for (StringRef Arch : Archs) {
1089+
for (StringRef A :
1090+
C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) {
1091+
for (StringRef Arch : llvm::split(A, ",")) {
10881092
bool IsNVPTX = IsNVIDIAOffloadArch(
10891093
StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch)));
10901094
bool IsAMDGPU = IsAMDOffloadArch(
10911095
StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch)));
1092-
if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive("native")) {
1096+
if (!IsNVPTX && !IsAMDGPU && !Arch.empty() &&
1097+
!Arch.equals_insensitive("native")) {
10931098
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
10941099
<< Arch;
10951100
return;
10961101
}
1097-
1098-
if (TT.isNVPTX() && IsNVPTX)
1099-
ArchsForTarget.insert(Arch);
1100-
else if (TT.isAMDGPU() && IsAMDGPU)
1101-
ArchsForTarget.insert(Arch);
11021102
}
1103-
if (!ArchsForTarget.empty()) {
1103+
}
1104+
1105+
// Attempt to deduce the offloading triple from the set of architectures.
1106+
// We can only correctly deduce NVPTX / AMDGPU triples currently.
1107+
for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1108+
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
1109+
C.getDefaultToolChain().getTriple());
1110+
1111+
llvm::SmallVector<StringRef> Archs =
1112+
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
1113+
/*SpecificToolchain=*/false);
1114+
if (!Archs.empty()) {
11041115
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
1105-
KnownArchs[&TC] = ArchsForTarget;
1116+
OffloadArchs[&TC] = Archs;
11061117
}
11071118
}
11081119

@@ -1143,9 +1154,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11431154
// going to create will depend on both.
11441155
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
11451156
for (const auto &TT : UniqueSYCLTriplesVec) {
1146-
auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
1147-
HostTC->getTriple());
1148-
C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL);
1157+
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
1158+
HostTC->getTriple());
1159+
C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL);
1160+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC,
1161+
/*SpecificToolchain=*/true);
11491162
}
11501163
}
11511164

@@ -4706,20 +4719,22 @@ static StringRef getCanonicalArchString(Compilation &C,
47064719
const llvm::opt::DerivedArgList &Args,
47074720
StringRef ArchStr,
47084721
const llvm::Triple &Triple,
4709-
bool SuppressError = false) {
4722+
bool SpecificToolchain) {
47104723
// Lookup the CUDA / HIP architecture string. Only report an error if we were
47114724
// expecting the triple to be only NVPTX / AMDGPU.
47124725
OffloadArch Arch =
47134726
StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr));
4714-
if (!SuppressError && Triple.isNVPTX() &&
4727+
if (Triple.isNVPTX() &&
47154728
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) {
4716-
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4717-
<< "CUDA" << ArchStr;
4729+
if (SpecificToolchain)
4730+
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4731+
<< "CUDA" << ArchStr;
47184732
return StringRef();
4719-
} else if (!SuppressError && Triple.isAMDGPU() &&
4733+
} else if (Triple.isAMDGPU() &&
47204734
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) {
4721-
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4722-
<< "HIP" << ArchStr;
4735+
if (SpecificToolchain)
4736+
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4737+
<< "HIP" << ArchStr;
47234738
return StringRef();
47244739
}
47254740

@@ -4728,13 +4743,9 @@ static StringRef getCanonicalArchString(Compilation &C,
47284743

47294744
if (IsAMDOffloadArch(Arch)) {
47304745
llvm::StringMap<bool> Features;
4731-
auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
4732-
if (!HIPTriple)
4733-
return StringRef();
4734-
auto Arch = parseTargetID(*HIPTriple, ArchStr, &Features);
4746+
std::optional<StringRef> Arch = parseTargetID(Triple, ArchStr, &Features);
47354747
if (!Arch) {
47364748
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
4737-
C.setContainsError();
47384749
return StringRef();
47394750
}
47404751
return Args.MakeArgStringRef(getCanonicalTargetID(*Arch, Features));
@@ -4757,10 +4768,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
47574768
return getConflictTargetIDCombination(ArchSet);
47584769
}
47594770

4760-
llvm::DenseSet<StringRef>
4771+
llvm::SmallVector<StringRef>
47614772
Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47624773
Action::OffloadKind Kind, const ToolChain *TC,
4763-
bool SuppressError) const {
4774+
bool SpecificToolchain) const {
47644775
if (!TC)
47654776
TC = &C.getDefaultToolChain();
47664777

@@ -4775,9 +4786,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47754786
: "--no-offload-arch");
47764787
}
47774788

4778-
if (KnownArchs.contains(TC))
4779-
return KnownArchs.lookup(TC);
4780-
47814789
llvm::DenseSet<StringRef> Archs;
47824790
for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
47834791
// Add or remove the seen architectures in order of appearance. If an
@@ -4787,7 +4795,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47874795
if (Arch == "native" || Arch.empty()) {
47884796
auto GPUsOrErr = TC->getSystemGPUArchs(Args);
47894797
if (!GPUsOrErr) {
4790-
if (SuppressError)
4798+
if (!SpecificToolchain)
47914799
llvm::consumeError(GPUsOrErr.takeError());
47924800
else
47934801
TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
@@ -4797,16 +4805,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47974805
}
47984806

47994807
for (auto ArchStr : *GPUsOrErr) {
4800-
Archs.insert(
4808+
StringRef CanonicalStr =
48014809
getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr),
4802-
TC->getTriple(), SuppressError));
4810+
TC->getTriple(), SpecificToolchain);
4811+
if (!CanonicalStr.empty())
4812+
Archs.insert(CanonicalStr);
4813+
else if (SpecificToolchain)
4814+
return llvm::SmallVector<StringRef>();
48034815
}
48044816
} else {
4805-
StringRef ArchStr = getCanonicalArchString(
4806-
C, Args, Arch, TC->getTriple(), SuppressError);
4807-
if (ArchStr.empty())
4808-
return Archs;
4809-
Archs.insert(ArchStr);
4817+
StringRef CanonicalStr = getCanonicalArchString(
4818+
C, Args, Arch, TC->getTriple(), SpecificToolchain);
4819+
if (!CanonicalStr.empty())
4820+
Archs.insert(CanonicalStr);
4821+
else if (SpecificToolchain)
4822+
return llvm::SmallVector<StringRef>();
48104823
}
48114824
}
48124825
} else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
@@ -4815,27 +4828,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48154828
Archs.clear();
48164829
} else {
48174830
StringRef ArchStr = getCanonicalArchString(
4818-
C, Args, Arch, TC->getTriple(), SuppressError);
4819-
if (ArchStr.empty())
4820-
return Archs;
4831+
C, Args, Arch, TC->getTriple(), SpecificToolchain);
48214832
Archs.erase(ArchStr);
48224833
}
48234834
}
48244835
}
48254836
}
48264837

48274838
if (auto ConflictingArchs =
4828-
getConflictOffloadArchCombination(Archs, TC->getTriple())) {
4839+
getConflictOffloadArchCombination(Archs, TC->getTriple()))
48294840
C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
48304841
<< ConflictingArchs->first << ConflictingArchs->second;
4831-
C.setContainsError();
4832-
}
48334842

48344843
// Skip filling defaults if we're just querying what is availible.
4835-
if (SuppressError)
4836-
return Archs;
4837-
4838-
if (Archs.empty()) {
4844+
if (SpecificToolchain && Archs.empty()) {
48394845
if (Kind == Action::OFK_Cuda) {
48404846
Archs.insert(OffloadArchToString(OffloadArch::CudaDefault));
48414847
} else if (Kind == Action::OFK_HIP) {
@@ -4861,12 +4867,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48614867
}
48624868
}
48634869
}
4864-
} else {
4865-
Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
4866-
Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
48674870
}
4871+
Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
4872+
Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
48684873

4869-
return Archs;
4874+
SmallVector<StringRef> Sorted(Archs.begin(), Archs.end());
4875+
llvm::sort(Sorted);
4876+
return Sorted;
48704877
}
48714878

48724879
Action *Driver::BuildOffloadingActions(Compilation &C,
@@ -4930,10 +4937,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49304937
// Get the product of all bound architectures and toolchains.
49314938
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
49324939
for (const ToolChain *TC : ToolChains) {
4933-
llvm::DenseSet<StringRef> Arches = getOffloadArchs(C, Args, Kind, TC);
4934-
SmallVector<StringRef, 0> Sorted(Arches.begin(), Arches.end());
4935-
llvm::sort(Sorted);
4936-
for (StringRef Arch : Sorted) {
4940+
for (StringRef Arch : OffloadArchs.lookup(TC)) {
49374941
TCAndArchs.push_back(std::make_pair(TC, Arch));
49384942
DeviceActions.push_back(
49394943
C.MakeAction<InputAction>(*InputArg, InputType, CUID));

clang/test/Driver/openmp-offload-gpu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@
307307
// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out
308308

309309
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
310-
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
310+
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
311311
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
312312
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
313313

0 commit comments

Comments
 (0)