@@ -833,10 +833,14 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const {
833833
834834static llvm::Triple getSYCLDeviceTriple (StringRef TargetArch) {
835835 SmallVector<StringRef, 5 > SYCLAlias = {" spir" , " spir64" , " spirv" , " spirv32" ,
836- " spirv64" };
836+ " spirv64" , " spir64_x86_64" ,
837+ " spir64_gen" , " nvptx64" };
837838 if (llvm::is_contained (SYCLAlias, TargetArch)) {
838839 llvm::Triple TargetTriple;
839840 TargetTriple.setArchName (TargetArch);
841+ // Return the full SYCL target triple string for NVidia GPU targets.
842+ if (TargetTriple.getArch () == llvm::Triple::nvptx64)
843+ return llvm::Triple (" nvptx64-nvidia-cuda" );
840844 TargetTriple.setVendor (llvm::Triple::UnknownVendor);
841845 TargetTriple.setOS (llvm::Triple::UnknownOS);
842846 return TargetTriple;
@@ -846,16 +850,25 @@ static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) {
846850
847851static bool addSYCLDefaultTriple (Compilation &C,
848852 SmallVectorImpl<llvm::Triple> &SYCLTriples) {
853+
854+ llvm::Triple DefaultTriple = getSYCLDeviceTriple (
855+ C.getDefaultToolChain ().getTriple ().isArch32Bit () ? " spirv32"
856+ : " spirv64" );
857+ for (const auto &SYCLTriple : SYCLTriples) {
858+ if (SYCLTriple == DefaultTriple)
859+ return false ;
860+ // If we encounter a known non-spir* target, do not add the default triple.
861+ if (SYCLTriple.isNVPTX () || SYCLTriple.isAMDGCN ())
862+ return false ;
863+ if (SYCLTriple.isSPIRAOT ())
864+ return false ;
865+ }
849866 // Check current set of triples to see if the default has already been set.
850867 for (const auto &SYCLTriple : SYCLTriples) {
851868 if (SYCLTriple.getSubArch () == llvm::Triple::NoSubArch &&
852869 SYCLTriple.isSPIROrSPIRV ())
853870 return false ;
854871 }
855- // Add the default triple as it was not found.
856- llvm::Triple DefaultTriple = getSYCLDeviceTriple (
857- C.getDefaultToolChain ().getTriple ().isArch32Bit () ? " spirv32"
858- : " spirv64" );
859872 SYCLTriples.insert (SYCLTriples.begin (), DefaultTriple);
860873 return true ;
861874}
@@ -1066,19 +1079,119 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10661079 // -ffreestanding cannot be used with -fsycl
10671080 argSYCLIncompatible (options::OPT_ffreestanding);
10681081
1082+ // Map of SYCL target triple strings to their corresponding target archs.
1083+ // Example: spir64_x86_64 --> SKYLAKEAVX512
1084+ llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
1085+ llvm::StringMap<StringRef> FoundNormalizedTriples;
10691086 llvm::SmallVector<llvm::Triple, 4 > UniqueSYCLTriplesVec;
1070-
1087+ // StringSet to contain SYCL target triples.
1088+ llvm::StringSet<> SYCLTriples;
1089+ // If the user specified --offload-arch, deduce the offloading
1090+ // target triple(s) from the set of architecture(s).
1091+ // Create a toolchain for each valid triple.
1092+ // We do not support SYCL offloading if any of the inputs is a
1093+ // .cu (for CUDA type) or .hip (for HIP type) file.
10711094 if (IsSYCL) {
1072- addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
1095+ if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) && !IsHIP &&
1096+ !IsCuda) {
1097+
1098+ const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1099+ auto AMDTriple = getHIPOffloadTargetTriple (*this , C.getInputArgs ());
1100+ auto NVPTXTriple = getNVIDIAOffloadTargetTriple (*this , C.getInputArgs (),
1101+ HostTC->getTriple ());
1102+
1103+ // Attempt to deduce the offloading triple from the set of architectures.
1104+ // We need to temporarily create these toolchains so that we can access
1105+ // tools for inferring architectures.
1106+ llvm::DenseSet<StringRef> Archs;
1107+ if (NVPTXTriple) {
1108+ auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1109+ *this , *NVPTXTriple, *HostTC, C.getInputArgs ());
1110+ for (StringRef Arch :
1111+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1112+ Archs.insert (Arch);
1113+ }
1114+ if (AMDTriple) {
1115+ auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1116+ *this , *AMDTriple, *HostTC, C.getInputArgs ());
1117+ for (StringRef Arch :
1118+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1119+ Archs.insert (Arch);
1120+ }
1121+ if (!AMDTriple && !NVPTXTriple) {
1122+ for (StringRef Arch :
1123+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, nullptr , true ))
1124+ Archs.insert (Arch);
1125+ }
1126+ for (StringRef Arch : Archs) {
1127+ if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch (StringToOffloadArch (
1128+ getProcessorFromTargetID (*NVPTXTriple, Arch)))) {
1129+ DerivedArchs[NVPTXTriple->getTriple ()].insert (Arch);
1130+ } else if (AMDTriple &&
1131+ IsSYCLSupportedAMDGPUArch (StringToOffloadArch (
1132+ getProcessorFromTargetID (*AMDTriple, Arch)))) {
1133+ DerivedArchs[AMDTriple->getTriple ()].insert (Arch);
1134+ } else if (IsSYCLSupportedIntelCPUArch (StringToOffloadArchSYCL (Arch))) {
1135+ DerivedArchs[getSYCLDeviceTriple (" spir64_x86_64" ).getTriple ()].insert (
1136+ Arch);
1137+ } else if (IsSYCLSupportedIntelGPUArch (StringToOffloadArchSYCL (Arch))) {
1138+ StringRef IntelGPUArch;
1139+ // For Intel Graphics AOT target, valid values for '--offload-arch'
1140+ // are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
1141+ // compiler) via the '-device' option. The mapIntelGPUArchName
1142+ // function maps the accepted values for '--offload-arch' to enable SYCL
1143+ // offloading to Intel GPUs and the corresponding '-device' value passed
1144+ // to OCLOC.
1145+ IntelGPUArch = mapIntelGPUArchName (Arch).data ();
1146+ DerivedArchs[getSYCLDeviceTriple (" spir64_gen" ).getTriple ()].insert (
1147+ IntelGPUArch);
1148+ } else {
1149+ Diag (clang::diag::err_drv_invalid_sycl_target) << Arch;
1150+ return ;
1151+ }
1152+ }
1153+ // Emit an error if architecture value is not provided
1154+ // to --offload-arch.
1155+ if (Archs.empty ()) {
1156+ Diag (clang::diag::err_drv_sycl_offload_arch_missing_value);
1157+ return ;
1158+ }
1159+
1160+ for (const auto &TripleAndArchs : DerivedArchs)
1161+ SYCLTriples.insert (TripleAndArchs.first ());
1162+
1163+ for (const auto &Val : SYCLTriples) {
1164+ llvm::Triple SYCLTargetTriple (getSYCLDeviceTriple (Val.getKey ()));
1165+ std::string NormalizedName = SYCLTargetTriple.normalize ();
1166+
1167+ // Make sure we don't have a duplicate triple.
1168+ auto Duplicate = FoundNormalizedTriples.find (NormalizedName);
1169+ if (Duplicate != FoundNormalizedTriples.end ()) {
1170+ Diag (clang::diag::warn_drv_sycl_offload_target_duplicate)
1171+ << Val.getKey () << Duplicate->second ;
1172+ continue ;
1173+ }
1174+
1175+ // Store the current triple so that we can check for duplicates in the
1176+ // following iterations.
1177+ FoundNormalizedTriples[NormalizedName] = Val.getKey ();
1178+ UniqueSYCLTriplesVec.push_back (SYCLTargetTriple);
1179+ }
1180+
1181+ addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
1182+ } else
1183+ addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
10731184
10741185 // We'll need to use the SYCL and host triples as the key into
1075- // getOffloadingDeviceToolChain , because the device toolchains we're
1186+ // getOffloadToolChain , because the device toolchains we're
10761187 // going to create will depend on both.
10771188 const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
10781189 for (const auto &TT : UniqueSYCLTriplesVec) {
10791190 auto SYCLTC = &getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
10801191 HostTC->getTriple ());
10811192 C.addOffloadDeviceToolChain (SYCLTC, Action::OFK_SYCL);
1193+ if (DerivedArchs.contains (TT.getTriple ()))
1194+ KnownArchs[SYCLTC] = DerivedArchs[TT.getTriple ()];
10821195 }
10831196 }
10841197
@@ -6596,7 +6709,7 @@ const ToolChain &Driver::getOffloadToolChain(
65966709 if (Kind == Action::OFK_HIP)
65976710 TC = std::make_unique<toolchains::HIPAMDToolChain>(*this , Target,
65986711 *HostTC, Args);
6599- else if (Kind == Action::OFK_OpenMP)
6712+ else if (( Kind == Action::OFK_OpenMP) || (Kind == Action::OFK_SYCL) )
66006713 TC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(*this , Target,
66016714 *HostTC, Args);
66026715 break ;
0 commit comments