@@ -1022,7 +1022,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10221022
10231023 for (const auto &TripleAndArchs : DerivedArchs)
10241024 OpenMPTriples.insert (TripleAndArchs.first ());
1025- }
1025+ } // end of offload-arch
10261026
10271027 for (StringRef Val : OpenMPTriples) {
10281028 llvm::Triple TT (ToolChain::getOpenMPTriple (Val));
@@ -1109,6 +1109,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11091109 };
11101110
11111111 Arg *SYCLTargets = getArgRequiringSYCLRuntime (options::OPT_fsycl_targets_EQ);
1112+ Arg *SYCLOffloadArch = getArgRequiringSYCLRuntime (options::OPT_offload_arch_EQ);
11121113
11131114 // Check if -fsycl-host-compiler is used in conjunction with -fsycl.
11141115 Arg *SYCLHostCompiler =
@@ -1182,12 +1183,14 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11821183 llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11831184 llvm::StringMap<StringRef> FoundNormalizedTriples;
11841185 llvm::SmallVector<llvm::Triple, 4 > UniqueSYCLTriplesVec;
1186+ // llvm::StringSet<> SYCLTriples;
1187+ std::multiset<StringRef> SYCLTriples;
11851188 if (HasSYCLTargetsOption) {
11861189 // At this point, we know we have a valid combination
11871190 // of -fsycl*target options passed
11881191 Arg *SYCLTargetsValues = SYCLTargets;
11891192 if (SYCLTargetsValues) {
1190- llvm::StringSet<> SYCLTriples;
1193+
11911194 if (SYCLTargetsValues->getNumValues ()) {
11921195
11931196 // Multiple targets are currently not supported when using
@@ -1275,10 +1278,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12751278 SYCLTriples.insert (DeviceTriple.normalize ());
12761279 if (!Arch.empty ())
12771280 DerivedArchs[DeviceTriple.getTriple ()].insert (Arch);
1278- }
1281+ } // end of SYCLTargetsValues for loop
1282+
12791283 if (!SYCLTriples.empty ()) {
12801284 for (const auto &SYCLTriple : SYCLTriples) {
1281- llvm::Triple Triple (SYCLTriple.getKey ());
1285+ // llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1286+ llvm::Triple Triple (MakeSYCLDeviceTriple (SYCLTriple));
12821287 UniqueSYCLTriplesVec.push_back (Triple);
12831288 }
12841289 }
@@ -1287,7 +1292,93 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12871292 Diag (clang::diag::warn_drv_empty_joined_argument)
12881293 << SYCLTargetsValues->getAsString (C.getInputArgs ());
12891294 }
1290- } else {
1295+ } else if (SYCLOffloadArch) {
1296+ const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1297+ auto AMDTriple = getHIPOffloadTargetTriple (*this , C.getInputArgs ());
1298+ auto NVPTXTriple = getNVIDIAOffloadTargetTriple (*this , C.getInputArgs (),
1299+ HostTC->getTriple ());
1300+
1301+ // Attempt to deduce the offloading triple from the set of architectures.
1302+ // We can only correctly deduce NVPTX / AMDGPU triples currently. We need
1303+ // to temporarily create these toolchains so that we can access tools for
1304+ // inferring architectures.
1305+ llvm::DenseSet<StringRef> Archs;
1306+ if (NVPTXTriple) {
1307+ auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1308+ *this , *NVPTXTriple, *HostTC, C.getInputArgs (), Action::OFK_None);
1309+ for (StringRef Arch : getOffloadArchs (
1310+ C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1311+ Archs.insert (Arch);
1312+ }
1313+ if (AMDTriple) {
1314+ auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1315+ *this , *AMDTriple, *HostTC, C.getInputArgs ());
1316+ for (StringRef Arch : getOffloadArchs (
1317+ C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1318+ Archs.insert (Arch);
1319+ }
1320+ if (!AMDTriple && !NVPTXTriple) {
1321+ for (StringRef Arch :
1322+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, nullptr , true ))
1323+ Archs.insert (Arch);
1324+ }
1325+ for (StringRef Arch : Archs) {
1326+ if (NVPTXTriple && IsNVIDIAOffloadArch (StringToOffloadArch (
1327+ getProcessorFromTargetID (*NVPTXTriple, Arch)))) {
1328+ DerivedArchs[NVPTXTriple->getTriple ()].insert (Arch);
1329+ } else if (AMDTriple &&
1330+ IsAMDOffloadArch (StringToOffloadArch (
1331+ getProcessorFromTargetID (*AMDTriple, Arch)))) {
1332+ DerivedArchs[AMDTriple->getTriple ()].insert (Arch);
1333+ }
1334+ else if (IsIntelCPUOffloadArch (StringToOffloadArchIntel (Arch))) {
1335+ DerivedArchs[" spir64_x86_64" ].insert (Arch);
1336+ } else if (IsIntelGPUOffloadArch (StringToOffloadArchIntel (Arch))) {
1337+ DerivedArchs[" spir64_gen" ].insert (Arch);
1338+ }
1339+ else {
1340+ Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
1341+ return ;
1342+ }
1343+ }
1344+ // If the set is empty then we failed to find a native architecture.
1345+ if (Archs.empty ()) {
1346+ Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
1347+ << " native" ;
1348+ return ;
1349+ }
1350+
1351+ for (const auto &TripleAndArchs : DerivedArchs)
1352+ SYCLTriples.insert (TripleAndArchs.first ()); // ["triple -> arch"]
1353+
1354+
1355+ for (StringRef Val : SYCLTriples) {
1356+ llvm::Triple TT (MakeSYCLDeviceTriple (Val));
1357+ std::string NormalizedName = TT.normalize ();
1358+
1359+ // Make sure we don't have a duplicate triple.
1360+ auto Duplicate = FoundNormalizedTriples.find (NormalizedName);
1361+ if (Duplicate != FoundNormalizedTriples.end ()) {
1362+ Diag (clang::diag::warn_drv_omp_offload_target_duplicate)
1363+ << Val << Duplicate->second ;
1364+ continue ;
1365+ }
1366+
1367+ // Store the current triple so that we can check for duplicates in the
1368+ // following iterations.
1369+ FoundNormalizedTriples[NormalizedName] = Val;
1370+ }
1371+
1372+ if (!SYCLTriples.empty ()) {
1373+ for (const auto &SYCLTriple : SYCLTriples) {
1374+ llvm::Triple Triple (MakeSYCLDeviceTriple (SYCLTriple));
1375+ UniqueSYCLTriplesVec.push_back (Triple);
1376+ }
1377+ }
1378+ addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
1379+
1380+ } // end of --offload-arch
1381+ else {
12911382 // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
12921383 // For -fsycl-device-only, we also setup the implied triple as needed.
12931384 if (HasValidSYCLRuntime) {
0 commit comments