@@ -1109,7 +1109,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11091109 };
11101110
11111111 Arg *SYCLTargets = getArgRequiringSYCLRuntime (options::OPT_fsycl_targets_EQ);
1112- Arg *SYCLOffloadArch = getArgRequiringSYCLRuntime (options::OPT_offload_arch_EQ);
11131112
11141113 // Check if -fsycl-host-compiler is used in conjunction with -fsycl.
11151114 Arg *SYCLHostCompiler =
@@ -1183,8 +1182,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11831182 llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
11841183 llvm::StringMap<StringRef> FoundNormalizedTriples;
11851184 llvm::SmallVector<llvm::Triple, 4 > UniqueSYCLTriplesVec;
1186- // llvm::StringSet<> SYCLTriples;
1187- std::multiset<StringRef> SYCLTriples;
1185+ llvm::StringSet<> SYCLTriples;
11881186 if (HasSYCLTargetsOption) {
11891187 // At this point, we know we have a valid combination
11901188 // of -fsycl*target options passed
@@ -1278,12 +1276,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12781276 SYCLTriples.insert (DeviceTriple.normalize ());
12791277 if (!Arch.empty ())
12801278 DerivedArchs[DeviceTriple.getTriple ()].insert (Arch);
1281- } // end of SYCLTargetsValues for loop
1279+ }
12821280
12831281 if (!SYCLTriples.empty ()) {
12841282 for (const auto &SYCLTriple : SYCLTriples) {
1285- // llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1286- llvm::Triple Triple (MakeSYCLDeviceTriple (SYCLTriple));
1283+ llvm::Triple Triple (MakeSYCLDeviceTriple (SYCLTriple.getKey ()));
12871284 UniqueSYCLTriplesVec.push_back (Triple);
12881285 }
12891286 }
@@ -1292,92 +1289,93 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12921289 Diag (clang::diag::warn_drv_empty_joined_argument)
12931290 << SYCLTargetsValues->getAsString (C.getInputArgs ());
12941291 }
1295- } else if (SYCLOffloadArch) {
1296- const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1297- auto AMDTriple = getHIPOffloadTargetTriple (*this , C.getInputArgs ());
1298- auto NVPTXTriple = getNVIDIAOffloadTargetTriple (*this , C.getInputArgs (),
1299- HostTC->getTriple ());
1300-
1301- // Attempt to deduce the offloading triple from the set of architectures.
1302- // We can only correctly deduce NVPTX / AMDGPU triples currently. We need
1303- // to temporarily create these toolchains so that we can access tools for
1304- // inferring architectures.
1305- llvm::DenseSet<StringRef> Archs;
1306- if (NVPTXTriple) {
1307- auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1308- *this , *NVPTXTriple, *HostTC, C.getInputArgs (), Action::OFK_None);
1309- for (StringRef Arch : getOffloadArchs (
1310- C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1311- Archs.insert (Arch);
1312- }
1313- if (AMDTriple) {
1314- auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1315- *this , *AMDTriple, *HostTC, C.getInputArgs ());
1316- for (StringRef Arch : getOffloadArchs (
1317- C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1318- Archs.insert (Arch);
1319- }
1320- if (!AMDTriple && !NVPTXTriple) {
1321- for (StringRef Arch :
1322- getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, nullptr , true ))
1323- Archs.insert (Arch);
1324- }
1325- for (StringRef Arch : Archs) {
1326- if (NVPTXTriple && IsNVIDIAOffloadArch (StringToOffloadArch (
1327- getProcessorFromTargetID (*NVPTXTriple, Arch)))) {
1328- DerivedArchs[NVPTXTriple->getTriple ()].insert (Arch);
1329- } else if (AMDTriple &&
1330- IsAMDOffloadArch (StringToOffloadArch (
1331- getProcessorFromTargetID (*AMDTriple, Arch)))) {
1332- DerivedArchs[AMDTriple->getTriple ()].insert (Arch);
1333- }
1334- else if (IsIntelCPUOffloadArch (StringToOffloadArchIntel (Arch))) {
1335- DerivedArchs[" spir64_x86_64" ].insert (Arch);
1336- } else if (IsIntelGPUOffloadArch (StringToOffloadArchIntel (Arch))) {
1337- DerivedArchs[" spir64_gen" ].insert (Arch);
1338- }
1339- else {
1340- Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
1341- return ;
1342- }
1343- }
1344- // If the set is empty then we failed to find a native architecture.
1345- if (Archs.empty ()) {
1346- Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
1347- << " native" ;
1292+ }
1293+ // If the user specified --offload-arch, deduce the offloading
1294+ // target triple(s) from the set of architecture(s).
1295+ // Create a toolchain for each valid triple.
1296+ else if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) && !IsHIP &&
1297+ !IsCuda) {
1298+ const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1299+ auto AMDTriple = getHIPOffloadTargetTriple (*this , C.getInputArgs ());
1300+ auto NVPTXTriple = getNVIDIAOffloadTargetTriple (*this , C.getInputArgs (),
1301+ HostTC->getTriple ());
1302+
1303+ // Attempt to deduce the offloading triple from the set of architectures.
1304+ // We need to temporarily create these toolchains so that we can access
1305+ // tools for inferring architectures.
1306+ llvm::DenseSet<StringRef> Archs;
1307+ if (NVPTXTriple) {
1308+ auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1309+ *this , *NVPTXTriple, *HostTC, C.getInputArgs (), Action::OFK_None);
1310+ for (StringRef Arch :
1311+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1312+ Archs.insert (Arch);
1313+ }
1314+ if (AMDTriple) {
1315+ auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1316+ *this , *AMDTriple, *HostTC, C.getInputArgs ());
1317+ for (StringRef Arch :
1318+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1319+ Archs.insert (Arch);
1320+ }
1321+ if (!AMDTriple && !NVPTXTriple) {
1322+ for (StringRef Arch :
1323+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, nullptr , true ))
1324+ Archs.insert (Arch);
1325+ }
1326+ for (StringRef Arch : Archs) {
1327+ if (NVPTXTriple && IsNVIDIAOffloadArch (StringToOffloadArch (
1328+ getProcessorFromTargetID (*NVPTXTriple, Arch)))) {
1329+ DerivedArchs[NVPTXTriple->getTriple ()].insert (Arch);
1330+ } else if (AMDTriple &&
1331+ IsAMDOffloadArch (StringToOffloadArch (
1332+ getProcessorFromTargetID (*AMDTriple, Arch)))) {
1333+ DerivedArchs[AMDTriple->getTriple ()].insert (Arch);
1334+ } else if (IsIntelCPUOffloadArch (StringToOffloadArchIntel (Arch))) {
1335+ DerivedArchs[" spir64_x86_64" ].insert (Arch);
1336+ } else if (IsIntelGPUOffloadArch (StringToOffloadArchIntel (Arch))) {
1337+ DerivedArchs[" spir64_gen" ].insert (Arch);
1338+ } else {
1339+ Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
13481340 return ;
13491341 }
1342+ }
1343+ // If the set is empty then we failed to find a native architecture.
1344+ if (Archs.empty ()) {
1345+ Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch) << " native" ;
1346+ return ;
1347+ }
13501348
1351- for (const auto &TripleAndArchs : DerivedArchs)
1352- SYCLTriples.insert (TripleAndArchs.first ()); // ["triple -> arch"]
1353-
1349+ for (const auto &TripleAndArchs : DerivedArchs)
1350+ SYCLTriples.insert (TripleAndArchs.first ());
13541351
1355- for (StringRef Val : SYCLTriples) {
1356- llvm::Triple TT (MakeSYCLDeviceTriple (Val));
1352+ for (const auto & Val : SYCLTriples) {
1353+ llvm::Triple TT (MakeSYCLDeviceTriple (Val. getKey () ));
13571354 std::string NormalizedName = TT.normalize ();
13581355
13591356 // Make sure we don't have a duplicate triple.
13601357 auto Duplicate = FoundNormalizedTriples.find (NormalizedName);
13611358 if (Duplicate != FoundNormalizedTriples.end ()) {
13621359 Diag (clang::diag::warn_drv_omp_offload_target_duplicate)
1363- << Val << Duplicate->second ;
1360+ << Val. getKey () << Duplicate->second ;
13641361 continue ;
13651362 }
13661363
13671364 // Store the current triple so that we can check for duplicates in the
13681365 // following iterations.
1369- FoundNormalizedTriples[NormalizedName] = Val;
1370- }
1366+ FoundNormalizedTriples[NormalizedName] = Val. getKey () ;
1367+ }
13711368
1372- if (!SYCLTriples.empty ()) {
1373- for (const auto &SYCLTriple : SYCLTriples) {
1374- llvm::Triple Triple (MakeSYCLDeviceTriple (SYCLTriple));
1375- UniqueSYCLTriplesVec.push_back (Triple);
1376- }
1377- }
1378- addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
1379-
1380- } // end of --offload-arch
1369+ if (!SYCLTriples.empty ()) {
1370+ for (const auto &SYCLTriple : SYCLTriples) {
1371+ llvm::Triple Triple (MakeSYCLDeviceTriple (SYCLTriple.getKey ()));
1372+ UniqueSYCLTriplesVec.push_back (Triple);
1373+ }
1374+ }
1375+
1376+ addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
1377+
1378+ } // end of --offload-arch
13811379 else {
13821380 // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
13831381 // For -fsycl-device-only, we also setup the implied triple as needed.
@@ -7335,6 +7333,21 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
73357333
73367334 handleArguments (C, Args, Inputs, Actions);
73377335
7336+ bool HasValidSYCLRuntime =
7337+ C.getInputArgs ().hasFlag (options::OPT_fsycl, options::OPT_fno_sycl,
7338+ false ) ||
7339+ hasSYCLDeviceOnly (C.getInputArgs ());
7340+ bool IsSYCLOffloadArchEnabled =
7341+ HasValidSYCLRuntime &&
7342+ C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ);
7343+
7344+ if (IsSYCLOffloadArchEnabled &&
7345+ !C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
7346+ options::OPT_no_offload_new_driver, false )) {
7347+ Diag (clang::diag::err_drv_sycl_offload_arch_new_driver);
7348+ return ;
7349+ }
7350+
73387351 // If '-fintelfpga' is passed, add '-fsycl' to the list of arguments
73397352 const llvm::opt::OptTable &Opts = getOpts ();
73407353 Arg *SYCLFpgaArg = C.getInputArgs ().getLastArg (options::OPT_fintelfpga);
0 commit comments