@@ -988,6 +988,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
988988 if (CudaInstallation.isValid ())
989989 CudaInstallation.WarnIfUnsupportedVersion ();
990990 C.addOffloadDeviceToolChain (&TC, Action::OFK_Cuda);
991+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_Cuda, &TC,
992+ /* SpecificToolchain=*/ true );
991993 } else if (IsHIP && !UseLLVMOffload) {
992994 if (auto *OMPTargetArg =
993995 C.getInputArgs ().getLastArg (options::OPT_fopenmp_targets_EQ)) {
@@ -1004,6 +1006,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10041006 getOffloadToolChain (C.getInputArgs (), Action::OFK_HIP, *HIPTriple,
10051007 C.getDefaultToolChain ().getTriple ());
10061008 C.addOffloadDeviceToolChain (&TC, Action::OFK_HIP);
1009+
1010+ // TODO: Fix 'amdgcnspirv' handling with the new driver.
1011+ if (C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
1012+ options::OPT_no_offload_new_driver, false ))
1013+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_HIP, &TC,
1014+ /* SpecificToolchain=*/ true );
10071015 }
10081016
10091017 if (IsCuda || IsHIP)
@@ -1069,40 +1077,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10691077 auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
10701078 C.getDefaultToolChain ().getTriple ());
10711079 C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1080+ OffloadArchs[&TC] =
1081+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1082+ /* SpecificToolchain=*/ true );
10721083 }
10731084 } else if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) &&
10741085 ((!IsHIP && !IsCuda) || UseLLVMOffload)) {
10751086 llvm::Triple AMDTriple (" amdgcn-amd-amdhsa" );
10761087 llvm::Triple NVPTXTriple (" nvptx64-nvidia-cuda" );
10771088
1078- // Attempt to deduce the offloading triple from the set of architectures.
1079- // We can only correctly deduce NVPTX / AMDGPU triples currently.
1080- for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1081- auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1082- C.getDefaultToolChain ().getTriple ());
1083-
1084- llvm::DenseSet<StringRef> Archs =
1085- getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC, true );
1086- llvm::DenseSet<StringRef> ArchsForTarget;
1087- for (StringRef Arch : Archs) {
1089+ for (StringRef A :
1090+ C.getInputArgs ().getAllArgValues (options::OPT_offload_arch_EQ)) {
1091+ for (StringRef Arch : llvm::split (A, " ," )) {
10881092 bool IsNVPTX = IsNVIDIAOffloadArch (
10891093 StringToOffloadArch (getProcessorFromTargetID (NVPTXTriple, Arch)));
10901094 bool IsAMDGPU = IsAMDOffloadArch (
10911095 StringToOffloadArch (getProcessorFromTargetID (AMDTriple, Arch)));
1092- if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive (" native" )) {
1096+ if (!IsNVPTX && !IsAMDGPU && !Arch.empty () &&
1097+ !Arch.equals_insensitive (" native" )) {
10931098 Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
10941099 << Arch;
10951100 return ;
10961101 }
1097-
1098- if (TT.isNVPTX () && IsNVPTX)
1099- ArchsForTarget.insert (Arch);
1100- else if (TT.isAMDGPU () && IsAMDGPU)
1101- ArchsForTarget.insert (Arch);
11021102 }
1103- if (!ArchsForTarget.empty ()) {
1103+ }
1104+
1105+ // Attempt to deduce the offloading triple from the set of architectures.
1106+ // We can only correctly deduce NVPTX / AMDGPU triples currently.
1107+ for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1108+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1109+ C.getDefaultToolChain ().getTriple ());
1110+
1111+ llvm::SmallVector<StringRef> Archs =
1112+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1113+ /* SpecificToolchain=*/ false );
1114+ if (!Archs.empty ()) {
11041115 C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1105- KnownArchs [&TC] = ArchsForTarget ;
1116+ OffloadArchs [&TC] = Archs ;
11061117 }
11071118 }
11081119
@@ -1143,9 +1154,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11431154 // going to create will depend on both.
11441155 const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
11451156 for (const auto &TT : UniqueSYCLTriplesVec) {
1146- auto SYCLTC = &getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1147- HostTC->getTriple ());
1148- C.addOffloadDeviceToolChain (SYCLTC, Action::OFK_SYCL);
1157+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1158+ HostTC->getTriple ());
1159+ C.addOffloadDeviceToolChain (&TC, Action::OFK_SYCL);
1160+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &TC,
1161+ /* SpecificToolchain=*/ true );
11491162 }
11501163 }
11511164
@@ -4706,20 +4719,22 @@ static StringRef getCanonicalArchString(Compilation &C,
47064719 const llvm::opt::DerivedArgList &Args,
47074720 StringRef ArchStr,
47084721 const llvm::Triple &Triple,
4709- bool SuppressError = false ) {
4722+ bool SpecificToolchain ) {
47104723 // Lookup the CUDA / HIP architecture string. Only report an error if we were
47114724 // expecting the triple to be only NVPTX / AMDGPU.
47124725 OffloadArch Arch =
47134726 StringToOffloadArch (getProcessorFromTargetID (Triple, ArchStr));
4714- if (!SuppressError && Triple.isNVPTX () &&
4727+ if (Triple.isNVPTX () &&
47154728 (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch (Arch))) {
4716- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4717- << " CUDA" << ArchStr;
4729+ if (SpecificToolchain)
4730+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4731+ << " CUDA" << ArchStr;
47184732 return StringRef ();
4719- } else if (!SuppressError && Triple.isAMDGPU () &&
4733+ } else if (Triple.isAMDGPU () &&
47204734 (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch (Arch))) {
4721- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4722- << " HIP" << ArchStr;
4735+ if (SpecificToolchain)
4736+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4737+ << " HIP" << ArchStr;
47234738 return StringRef ();
47244739 }
47254740
@@ -4728,13 +4743,9 @@ static StringRef getCanonicalArchString(Compilation &C,
47284743
47294744 if (IsAMDOffloadArch (Arch)) {
47304745 llvm::StringMap<bool > Features;
4731- auto HIPTriple = getHIPOffloadTargetTriple (C.getDriver (), C.getInputArgs ());
4732- if (!HIPTriple)
4733- return StringRef ();
4734- auto Arch = parseTargetID (*HIPTriple, ArchStr, &Features);
4746+ std::optional<StringRef> Arch = parseTargetID (Triple, ArchStr, &Features);
47354747 if (!Arch) {
47364748 C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4737- C.setContainsError ();
47384749 return StringRef ();
47394750 }
47404751 return Args.MakeArgStringRef (getCanonicalTargetID (*Arch, Features));
@@ -4757,10 +4768,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
47574768 return getConflictTargetIDCombination (ArchSet);
47584769}
47594770
4760- llvm::DenseSet <StringRef>
4771+ llvm::SmallVector <StringRef>
47614772Driver::getOffloadArchs (Compilation &C, const llvm::opt::DerivedArgList &Args,
47624773 Action::OffloadKind Kind, const ToolChain *TC,
4763- bool SuppressError ) const {
4774+ bool SpecificToolchain ) const {
47644775 if (!TC)
47654776 TC = &C.getDefaultToolChain ();
47664777
@@ -4775,9 +4786,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47754786 : " --no-offload-arch" );
47764787 }
47774788
4778- if (KnownArchs.contains (TC))
4779- return KnownArchs.lookup (TC);
4780-
47814789 llvm::DenseSet<StringRef> Archs;
47824790 for (auto *Arg : C.getArgsForToolChain (TC, /* BoundArch=*/ " " , Kind)) {
47834791 // Add or remove the seen architectures in order of appearance. If an
@@ -4787,7 +4795,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47874795 if (Arch == " native" || Arch.empty ()) {
47884796 auto GPUsOrErr = TC->getSystemGPUArchs (Args);
47894797 if (!GPUsOrErr) {
4790- if (SuppressError )
4798+ if (!SpecificToolchain )
47914799 llvm::consumeError (GPUsOrErr.takeError ());
47924800 else
47934801 TC->getDriver ().Diag (diag::err_drv_undetermined_gpu_arch)
@@ -4797,16 +4805,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47974805 }
47984806
47994807 for (auto ArchStr : *GPUsOrErr) {
4800- Archs. insert (
4808+ StringRef CanonicalStr =
48014809 getCanonicalArchString (C, Args, Args.MakeArgString (ArchStr),
4802- TC->getTriple (), SuppressError));
4810+ TC->getTriple (), SpecificToolchain);
4811+ if (!CanonicalStr.empty ())
4812+ Archs.insert (CanonicalStr);
4813+ else if (SpecificToolchain)
4814+ return llvm::SmallVector<StringRef>();
48034815 }
48044816 } else {
4805- StringRef ArchStr = getCanonicalArchString (
4806- C, Args, Arch, TC->getTriple (), SuppressError);
4807- if (ArchStr.empty ())
4808- return Archs;
4809- Archs.insert (ArchStr);
4817+ StringRef CanonicalStr = getCanonicalArchString (
4818+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4819+ if (!CanonicalStr.empty ())
4820+ Archs.insert (CanonicalStr);
4821+ else if (SpecificToolchain)
4822+ return llvm::SmallVector<StringRef>();
48104823 }
48114824 }
48124825 } else if (Arg->getOption ().matches (options::OPT_no_offload_arch_EQ)) {
@@ -4815,27 +4828,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48154828 Archs.clear ();
48164829 } else {
48174830 StringRef ArchStr = getCanonicalArchString (
4818- C, Args, Arch, TC->getTriple (), SuppressError);
4819- if (ArchStr.empty ())
4820- return Archs;
4831+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
48214832 Archs.erase (ArchStr);
48224833 }
48234834 }
48244835 }
48254836 }
48264837
48274838 if (auto ConflictingArchs =
4828- getConflictOffloadArchCombination (Archs, TC->getTriple ())) {
4839+ getConflictOffloadArchCombination (Archs, TC->getTriple ()))
48294840 C.getDriver ().Diag (clang::diag::err_drv_bad_offload_arch_combo)
48304841 << ConflictingArchs->first << ConflictingArchs->second ;
4831- C.setContainsError ();
4832- }
48334842
48344843 // Skip filling defaults if we're just querying what is availible.
4835- if (SuppressError)
4836- return Archs;
4837-
4838- if (Archs.empty ()) {
4844+ if (SpecificToolchain && Archs.empty ()) {
48394845 if (Kind == Action::OFK_Cuda) {
48404846 Archs.insert (OffloadArchToString (OffloadArch::CudaDefault));
48414847 } else if (Kind == Action::OFK_HIP) {
@@ -4861,12 +4867,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48614867 }
48624868 }
48634869 }
4864- } else {
4865- Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4866- Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
48674870 }
4871+ Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4872+ Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
48684873
4869- return Archs;
4874+ SmallVector<StringRef> Sorted (Archs.begin (), Archs.end ());
4875+ llvm::sort (Sorted);
4876+ return Sorted;
48704877}
48714878
48724879Action *Driver::BuildOffloadingActions (Compilation &C,
@@ -4930,10 +4937,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49304937 // Get the product of all bound architectures and toolchains.
49314938 SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
49324939 for (const ToolChain *TC : ToolChains) {
4933- llvm::DenseSet<StringRef> Arches = getOffloadArchs (C, Args, Kind, TC);
4934- SmallVector<StringRef, 0 > Sorted (Arches.begin (), Arches.end ());
4935- llvm::sort (Sorted);
4936- for (StringRef Arch : Sorted) {
4940+ for (StringRef Arch : OffloadArchs.lookup (TC)) {
49374941 TCAndArchs.push_back (std::make_pair (TC, Arch));
49384942 DeviceActions.push_back (
49394943 C.MakeAction <InputAction>(*InputArg, InputType, CUID));
0 commit comments