@@ -988,6 +988,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
988988 if (CudaInstallation.isValid ())
989989 CudaInstallation.WarnIfUnsupportedVersion ();
990990 C.addOffloadDeviceToolChain (&TC, Action::OFK_Cuda);
991+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_Cuda, &TC,
992+ /* SpecificToolchain=*/ true );
991993 } else if (IsHIP && !UseLLVMOffload) {
992994 if (auto *OMPTargetArg =
993995 C.getInputArgs ().getLastArg (options::OPT_fopenmp_targets_EQ)) {
@@ -1004,6 +1006,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10041006 getOffloadToolChain (C.getInputArgs (), Action::OFK_HIP, *HIPTriple,
10051007 C.getDefaultToolChain ().getTriple ());
10061008 C.addOffloadDeviceToolChain (&TC, Action::OFK_HIP);
1009+
1010+ // TODO: Fix 'amdgcnspirv' handling with the new driver.
1011+ if (C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
1012+ options::OPT_no_offload_new_driver, false ))
1013+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_HIP, &TC,
1014+ /* SpecificToolchain=*/ true );
10071015 }
10081016
10091017 if (IsCuda || IsHIP)
@@ -1069,40 +1077,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10691077 auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
10701078 C.getDefaultToolChain ().getTriple ());
10711079 C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1080+ OffloadArchs[&TC] =
1081+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1082+ /* SpecificToolchain=*/ true );
10721083 }
10731084 } else if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) &&
10741085 ((!IsHIP && !IsCuda) || UseLLVMOffload)) {
10751086 llvm::Triple AMDTriple (" amdgcn-amd-amdhsa" );
10761087 llvm::Triple NVPTXTriple (" nvptx64-nvidia-cuda" );
10771088
1078- // Attempt to deduce the offloading triple from the set of architectures.
1079- // We can only correctly deduce NVPTX / AMDGPU triples currently.
1080- for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1081- auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1082- C.getDefaultToolChain ().getTriple ());
1083-
1084- llvm::DenseSet<StringRef> Archs =
1085- getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC, true );
1086- llvm::DenseSet<StringRef> ArchsForTarget;
1087- for (StringRef Arch : Archs) {
1089+ for (StringRef A :
1090+ C.getInputArgs ().getAllArgValues (options::OPT_offload_arch_EQ)) {
1091+ for (StringRef Arch : llvm::split (A, " ," )) {
10881092 bool IsNVPTX = IsNVIDIAOffloadArch (
10891093 StringToOffloadArch (getProcessorFromTargetID (NVPTXTriple, Arch)));
10901094 bool IsAMDGPU = IsAMDOffloadArch (
10911095 StringToOffloadArch (getProcessorFromTargetID (AMDTriple, Arch)));
1092- if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive (" native" )) {
1096+ if (!IsNVPTX && !IsAMDGPU && !Arch.empty () &&
1097+ !Arch.equals_insensitive (" native" )) {
10931098 Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
10941099 << Arch;
10951100 return ;
10961101 }
1097-
1098- if (TT.isNVPTX () && IsNVPTX)
1099- ArchsForTarget.insert (Arch);
1100- else if (TT.isAMDGPU () && IsAMDGPU)
1101- ArchsForTarget.insert (Arch);
11021102 }
1103- if (!ArchsForTarget.empty ()) {
1103+ }
1104+
1105+ // Attempt to deduce the offloading triple from the set of architectures.
1106+ // We can only correctly deduce NVPTX / AMDGPU triples currently.
1107+ for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1108+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1109+ C.getDefaultToolChain ().getTriple ());
1110+
1111+ llvm::SmallVector<StringRef> Archs =
1112+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1113+ /* SpecificToolchain=*/ false );
1114+ if (!Archs.empty ()) {
11041115 C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1105- KnownArchs [&TC] = ArchsForTarget ;
1116+ OffloadArchs [&TC] = Archs ;
11061117 }
11071118 }
11081119
@@ -1143,9 +1154,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11431154 // going to create will depend on both.
11441155 const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
11451156 for (const auto &TT : UniqueSYCLTriplesVec) {
1146- auto SYCLTC = &getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1147- HostTC->getTriple ());
1148- C.addOffloadDeviceToolChain (SYCLTC, Action::OFK_SYCL);
1157+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1158+ HostTC->getTriple ());
1159+ C.addOffloadDeviceToolChain (&TC, Action::OFK_SYCL);
1160+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &TC,
1161+ /* SpecificToolchain=*/ true );
11491162 }
11501163 }
11511164
@@ -4703,20 +4716,22 @@ static StringRef getCanonicalArchString(Compilation &C,
47034716 const llvm::opt::DerivedArgList &Args,
47044717 StringRef ArchStr,
47054718 const llvm::Triple &Triple,
4706- bool SuppressError = false ) {
4719+ bool SpecificToolchain ) {
47074720 // Lookup the CUDA / HIP architecture string. Only report an error if we were
47084721 // expecting the triple to be only NVPTX / AMDGPU.
47094722 OffloadArch Arch =
47104723 StringToOffloadArch (getProcessorFromTargetID (Triple, ArchStr));
4711- if (!SuppressError && Triple.isNVPTX () &&
4724+ if (Triple.isNVPTX () &&
47124725 (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch (Arch))) {
4713- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4714- << " CUDA" << ArchStr;
4726+ if (SpecificToolchain)
4727+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4728+ << " CUDA" << ArchStr;
47154729 return StringRef ();
4716- } else if (!SuppressError && Triple.isAMDGPU () &&
4730+ } else if (Triple.isAMDGPU () &&
47174731 (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch (Arch))) {
4718- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4719- << " HIP" << ArchStr;
4732+ if (SpecificToolchain)
4733+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4734+ << " HIP" << ArchStr;
47204735 return StringRef ();
47214736 }
47224737
@@ -4725,13 +4740,9 @@ static StringRef getCanonicalArchString(Compilation &C,
47254740
47264741 if (IsAMDOffloadArch (Arch)) {
47274742 llvm::StringMap<bool > Features;
4728- auto HIPTriple = getHIPOffloadTargetTriple (C.getDriver (), C.getInputArgs ());
4729- if (!HIPTriple)
4730- return StringRef ();
4731- auto Arch = parseTargetID (*HIPTriple, ArchStr, &Features);
4743+ std::optional<StringRef> Arch = parseTargetID (Triple, ArchStr, &Features);
47324744 if (!Arch) {
47334745 C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4734- C.setContainsError ();
47354746 return StringRef ();
47364747 }
47374748 return Args.MakeArgStringRef (getCanonicalTargetID (*Arch, Features));
@@ -4754,10 +4765,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
47544765 return getConflictTargetIDCombination (ArchSet);
47554766}
47564767
4757- llvm::DenseSet <StringRef>
4768+ llvm::SmallVector <StringRef>
47584769Driver::getOffloadArchs (Compilation &C, const llvm::opt::DerivedArgList &Args,
47594770 Action::OffloadKind Kind, const ToolChain *TC,
4760- bool SuppressError ) const {
4771+ bool SpecificToolchain ) const {
47614772 if (!TC)
47624773 TC = &C.getDefaultToolChain ();
47634774
@@ -4772,9 +4783,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47724783 : " --no-offload-arch" );
47734784 }
47744785
4775- if (KnownArchs.contains (TC))
4776- return KnownArchs.lookup (TC);
4777-
47784786 llvm::DenseSet<StringRef> Archs;
47794787 for (auto *Arg : C.getArgsForToolChain (TC, /* BoundArch=*/ " " , Kind)) {
47804788 // Add or remove the seen architectures in order of appearance. If an
@@ -4784,7 +4792,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47844792 if (Arch == " native" || Arch.empty ()) {
47854793 auto GPUsOrErr = TC->getSystemGPUArchs (Args);
47864794 if (!GPUsOrErr) {
4787- if (SuppressError )
4795+ if (!SpecificToolchain )
47884796 llvm::consumeError (GPUsOrErr.takeError ());
47894797 else
47904798 TC->getDriver ().Diag (diag::err_drv_undetermined_gpu_arch)
@@ -4794,16 +4802,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47944802 }
47954803
47964804 for (auto ArchStr : *GPUsOrErr) {
4797- Archs. insert (
4805+ StringRef CanonicalStr =
47984806 getCanonicalArchString (C, Args, Args.MakeArgString (ArchStr),
4799- TC->getTriple (), SuppressError));
4807+ TC->getTriple (), SpecificToolchain);
4808+ if (!CanonicalStr.empty ())
4809+ Archs.insert (CanonicalStr);
4810+ else if (SpecificToolchain)
4811+ return llvm::SmallVector<StringRef>();
48004812 }
48014813 } else {
4802- StringRef ArchStr = getCanonicalArchString (
4803- C, Args, Arch, TC->getTriple (), SuppressError);
4804- if (ArchStr.empty ())
4805- return Archs;
4806- Archs.insert (ArchStr);
4814+ StringRef CanonicalStr = getCanonicalArchString (
4815+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4816+ if (!CanonicalStr.empty ())
4817+ Archs.insert (CanonicalStr);
4818+ else if (SpecificToolchain)
4819+ return llvm::SmallVector<StringRef>();
48074820 }
48084821 }
48094822 } else if (Arg->getOption ().matches (options::OPT_no_offload_arch_EQ)) {
@@ -4812,27 +4825,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48124825 Archs.clear ();
48134826 } else {
48144827 StringRef ArchStr = getCanonicalArchString (
4815- C, Args, Arch, TC->getTriple (), SuppressError);
4816- if (ArchStr.empty ())
4817- return Archs;
4828+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
48184829 Archs.erase (ArchStr);
48194830 }
48204831 }
48214832 }
48224833 }
48234834
48244835 if (auto ConflictingArchs =
4825- getConflictOffloadArchCombination (Archs, TC->getTriple ())) {
4836+ getConflictOffloadArchCombination (Archs, TC->getTriple ()))
48264837 C.getDriver ().Diag (clang::diag::err_drv_bad_offload_arch_combo)
48274838 << ConflictingArchs->first << ConflictingArchs->second ;
4828- C.setContainsError ();
4829- }
48304839
48314840 // Skip filling defaults if we're just querying what is availible.
4832- if (SuppressError)
4833- return Archs;
4834-
4835- if (Archs.empty ()) {
4841+ if (SpecificToolchain && Archs.empty ()) {
48364842 if (Kind == Action::OFK_Cuda) {
48374843 Archs.insert (OffloadArchToString (OffloadArch::CudaDefault));
48384844 } else if (Kind == Action::OFK_HIP) {
@@ -4858,12 +4864,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48584864 }
48594865 }
48604866 }
4861- } else {
4862- Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4863- Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
48644867 }
4868+ Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4869+ Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
48654870
4866- return Archs;
4871+ SmallVector<StringRef> Sorted (Archs.begin (), Archs.end ());
4872+ llvm::sort (Sorted);
4873+ return Sorted;
48674874}
48684875
48694876Action *Driver::BuildOffloadingActions (Compilation &C,
@@ -4927,10 +4934,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49274934 // Get the product of all bound architectures and toolchains.
49284935 SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
49294936 for (const ToolChain *TC : ToolChains) {
4930- llvm::DenseSet<StringRef> Arches = getOffloadArchs (C, Args, Kind, TC);
4931- SmallVector<StringRef, 0 > Sorted (Arches.begin (), Arches.end ());
4932- llvm::sort (Sorted);
4933- for (StringRef Arch : Sorted) {
4937+ for (StringRef Arch : OffloadArchs.lookup (TC)) {
49344938 TCAndArchs.push_back (std::make_pair (TC, Arch));
49354939 DeviceActions.push_back (
49364940 C.MakeAction <InputAction>(*InputArg, InputType, CUID));
0 commit comments