@@ -991,6 +991,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
991991 if (CudaInstallation.isValid ())
992992 CudaInstallation.WarnIfUnsupportedVersion ();
993993 C.addOffloadDeviceToolChain (&TC, Action::OFK_Cuda);
994+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_Cuda, &TC,
995+ /* SpecificToolchain=*/ true );
994996 } else if (IsHIP && !UseLLVMOffload) {
995997 if (auto *OMPTargetArg =
996998 C.getInputArgs ().getLastArg (options::OPT_fopenmp_targets_EQ)) {
@@ -1007,6 +1009,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10071009 getOffloadToolChain (C.getInputArgs (), Action::OFK_HIP, *HIPTriple,
10081010 C.getDefaultToolChain ().getTriple ());
10091011 C.addOffloadDeviceToolChain (&TC, Action::OFK_HIP);
1012+
1013+ // TODO: Fix 'amdgcnspirv' handling with the new driver.
1014+ if (C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
1015+ options::OPT_no_offload_new_driver, false ))
1016+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_HIP, &TC,
1017+ /* SpecificToolchain=*/ true );
10101018 }
10111019
10121020 if (IsCuda || IsHIP)
@@ -1072,40 +1080,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10721080 auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
10731081 C.getDefaultToolChain ().getTriple ());
10741082 C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1083+ OffloadArchs[&TC] =
1084+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1085+ /* SpecificToolchain=*/ true );
10751086 }
10761087 } else if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) &&
10771088 ((!IsHIP && !IsCuda) || UseLLVMOffload)) {
10781089 llvm::Triple AMDTriple (" amdgcn-amd-amdhsa" );
10791090 llvm::Triple NVPTXTriple (" nvptx64-nvidia-cuda" );
10801091
1081- // Attempt to deduce the offloading triple from the set of architectures.
1082- // We can only correctly deduce NVPTX / AMDGPU triples currently.
1083- for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1084- auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1085- C.getDefaultToolChain ().getTriple ());
1086-
1087- llvm::DenseSet<StringRef> Archs =
1088- getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC, true );
1089- llvm::DenseSet<StringRef> ArchsForTarget;
1090- for (StringRef Arch : Archs) {
1092+ for (StringRef A :
1093+ C.getInputArgs ().getAllArgValues (options::OPT_offload_arch_EQ)) {
1094+ for (StringRef Arch : llvm::split (A, " ," )) {
10911095 bool IsNVPTX = IsNVIDIAOffloadArch (
10921096 StringToOffloadArch (getProcessorFromTargetID (NVPTXTriple, Arch)));
10931097 bool IsAMDGPU = IsAMDOffloadArch (
10941098 StringToOffloadArch (getProcessorFromTargetID (AMDTriple, Arch)));
1095- if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive (" native" )) {
1099+ if (!IsNVPTX && !IsAMDGPU && !Arch.empty () &&
1100+ !Arch.equals_insensitive (" native" )) {
10961101 Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
10971102 << Arch;
10981103 return ;
10991104 }
1100-
1101- if (TT.isNVPTX () && IsNVPTX)
1102- ArchsForTarget.insert (Arch);
1103- else if (TT.isAMDGPU () && IsAMDGPU)
1104- ArchsForTarget.insert (Arch);
11051105 }
1106- if (!ArchsForTarget.empty ()) {
1106+ }
1107+
1108+ // Attempt to deduce the offloading triple from the set of architectures.
1109+ // We can only correctly deduce NVPTX / AMDGPU triples currently.
1110+ for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1111+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1112+ C.getDefaultToolChain ().getTriple ());
1113+
1114+ llvm::SmallVector<StringRef> Archs =
1115+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1116+ /* SpecificToolchain=*/ false );
1117+ if (!Archs.empty ()) {
11071118 C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1108- KnownArchs [&TC] = ArchsForTarget ;
1119+ OffloadArchs [&TC] = Archs ;
11091120 }
11101121 }
11111122
@@ -1146,9 +1157,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11461157 // going to create will depend on both.
11471158 const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
11481159 for (const auto &TT : UniqueSYCLTriplesVec) {
1149- auto SYCLTC = &getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1150- HostTC->getTriple ());
1151- C.addOffloadDeviceToolChain (SYCLTC, Action::OFK_SYCL);
1160+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1161+ HostTC->getTriple ());
1162+ C.addOffloadDeviceToolChain (&TC, Action::OFK_SYCL);
1163+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &TC,
1164+ /* SpecificToolchain=*/ true );
11521165 }
11531166 }
11541167
@@ -4729,20 +4742,22 @@ static StringRef getCanonicalArchString(Compilation &C,
47294742 const llvm::opt::DerivedArgList &Args,
47304743 StringRef ArchStr,
47314744 const llvm::Triple &Triple,
4732- bool SuppressError = false ) {
4745+ bool SpecificToolchain ) {
47334746 // Lookup the CUDA / HIP architecture string. Only report an error if we were
47344747 // expecting the triple to be only NVPTX / AMDGPU.
47354748 OffloadArch Arch =
47364749 StringToOffloadArch (getProcessorFromTargetID (Triple, ArchStr));
4737- if (!SuppressError && Triple.isNVPTX () &&
4750+ if (Triple.isNVPTX () &&
47384751 (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch (Arch))) {
4739- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4740- << " CUDA" << ArchStr;
4752+ if (SpecificToolchain)
4753+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4754+ << " CUDA" << ArchStr;
47414755 return StringRef ();
4742- } else if (!SuppressError && Triple.isAMDGPU () &&
4756+ } else if (Triple.isAMDGPU () &&
47434757 (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch (Arch))) {
4744- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4745- << " HIP" << ArchStr;
4758+ if (SpecificToolchain)
4759+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4760+ << " HIP" << ArchStr;
47464761 return StringRef ();
47474762 }
47484763
@@ -4751,13 +4766,9 @@ static StringRef getCanonicalArchString(Compilation &C,
47514766
47524767 if (IsAMDOffloadArch (Arch)) {
47534768 llvm::StringMap<bool > Features;
4754- auto HIPTriple = getHIPOffloadTargetTriple (C.getDriver (), C.getInputArgs ());
4755- if (!HIPTriple)
4756- return StringRef ();
4757- auto Arch = parseTargetID (*HIPTriple, ArchStr, &Features);
4769+ std::optional<StringRef> Arch = parseTargetID (Triple, ArchStr, &Features);
47584770 if (!Arch) {
47594771 C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4760- C.setContainsError ();
47614772 return StringRef ();
47624773 }
47634774 return Args.MakeArgStringRef (getCanonicalTargetID (*Arch, Features));
@@ -4780,10 +4791,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
47804791 return getConflictTargetIDCombination (ArchSet);
47814792}
47824793
4783- llvm::DenseSet <StringRef>
4794+ llvm::SmallVector <StringRef>
47844795Driver::getOffloadArchs (Compilation &C, const llvm::opt::DerivedArgList &Args,
47854796 Action::OffloadKind Kind, const ToolChain *TC,
4786- bool SuppressError ) const {
4797+ bool SpecificToolchain ) const {
47874798 if (!TC)
47884799 TC = &C.getDefaultToolChain ();
47894800
@@ -4798,14 +4809,11 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47984809 : " --no-offload-arch" );
47994810 }
48004811
4801- if (KnownArchs.contains (TC))
4802- return KnownArchs.lookup (TC);
4803-
48044812 llvm::DenseSet<StringRef> Archs;
48054813
48064814 if (!TC->getTargetID ().empty ()) {
48074815 Archs.insert (TC->getTargetID ());
4808- return Archs ;
4816+ return llvm::SmallVector<StringRef>() ;
48094817 }
48104818
48114819 for (auto *Arg : C.getArgsForToolChain (TC, /* BoundArch=*/ " " , Kind)) {
@@ -4816,7 +4824,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48164824 if (Arch == " native" || Arch.empty ()) {
48174825 auto GPUsOrErr = TC->getSystemGPUArchs (Args);
48184826 if (!GPUsOrErr) {
4819- if (SuppressError )
4827+ if (!SpecificToolchain )
48204828 llvm::consumeError (GPUsOrErr.takeError ());
48214829 else
48224830 TC->getDriver ().Diag (diag::err_drv_undetermined_gpu_arch)
@@ -4826,16 +4834,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48264834 }
48274835
48284836 for (auto ArchStr : *GPUsOrErr) {
4829- Archs. insert (
4837+ StringRef CanonicalStr =
48304838 getCanonicalArchString (C, Args, Args.MakeArgString (ArchStr),
4831- TC->getTriple (), SuppressError));
4839+ TC->getTriple (), SpecificToolchain);
4840+ if (!CanonicalStr.empty ())
4841+ Archs.insert (CanonicalStr);
4842+ else if (SpecificToolchain)
4843+ return llvm::SmallVector<StringRef>();
48324844 }
48334845 } else {
4834- StringRef ArchStr = getCanonicalArchString (
4835- C, Args, Arch, TC->getTriple (), SuppressError);
4836- if (ArchStr.empty ())
4837- return Archs;
4838- Archs.insert (ArchStr);
4846+ StringRef CanonicalStr = getCanonicalArchString (
4847+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4848+ if (!CanonicalStr.empty ())
4849+ Archs.insert (CanonicalStr);
4850+ else if (SpecificToolchain)
4851+ return llvm::SmallVector<StringRef>();
48394852 }
48404853 }
48414854 } else if (Arg->getOption ().matches (options::OPT_no_offload_arch_EQ)) {
@@ -4844,27 +4857,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48444857 Archs.clear ();
48454858 } else {
48464859 StringRef ArchStr = getCanonicalArchString (
4847- C, Args, Arch, TC->getTriple (), SuppressError);
4848- if (ArchStr.empty ())
4849- return Archs;
4860+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
48504861 Archs.erase (ArchStr);
48514862 }
48524863 }
48534864 }
48544865 }
48554866
48564867 if (auto ConflictingArchs =
4857- getConflictOffloadArchCombination (Archs, TC->getTriple ())) {
4868+ getConflictOffloadArchCombination (Archs, TC->getTriple ()))
48584869 C.getDriver ().Diag (clang::diag::err_drv_bad_offload_arch_combo)
48594870 << ConflictingArchs->first << ConflictingArchs->second ;
4860- C.setContainsError ();
4861- }
48624871
48634872 // Skip filling defaults if we're just querying what is availible.
4864- if (SuppressError)
4865- return Archs;
4866-
4867- if (Archs.empty ()) {
4873+ if (SpecificToolchain && Archs.empty ()) {
48684874 if (Kind == Action::OFK_Cuda) {
48694875 Archs.insert (OffloadArchToString (OffloadArch::CudaDefault));
48704876 } else if (Kind == Action::OFK_HIP) {
@@ -4890,12 +4896,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48904896 }
48914897 }
48924898 }
4893- } else {
4894- Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4895- Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
48964899 }
4900+ Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4901+ Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
48974902
4898- return Archs;
4903+ SmallVector<StringRef> Sorted (Archs.begin (), Archs.end ());
4904+ llvm::sort (Sorted);
4905+ return Sorted;
48994906}
49004907
49014908Action *Driver::BuildOffloadingActions (Compilation &C,
@@ -4959,10 +4966,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49594966 // Get the product of all bound architectures and toolchains.
49604967 SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
49614968 for (const ToolChain *TC : ToolChains) {
4962- llvm::DenseSet<StringRef> Arches = getOffloadArchs (C, Args, Kind, TC);
4963- SmallVector<StringRef, 0 > Sorted (Arches.begin (), Arches.end ());
4964- llvm::sort (Sorted);
4965- for (StringRef Arch : Sorted) {
4969+ for (StringRef Arch : OffloadArchs.lookup (TC)) {
49664970 TCAndArchs.push_back (std::make_pair (TC, Arch));
49674971 DeviceActions.push_back (
49684972 C.MakeAction <InputAction>(*InputArg, InputType, CUID));
0 commit comments