@@ -1203,12 +1203,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
12031203 llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
12041204 llvm::StringMap<StringRef> FoundNormalizedTriples;
12051205 llvm::SmallVector<llvm::Triple, 4 > UniqueSYCLTriplesVec;
1206+ // StringSet to contain SYCL target triples.
1207+ llvm::StringSet<> SYCLTriples;
12061208 if (HasSYCLTargetsOption) {
12071209 // At this point, we know we have a valid combination
12081210 // of -fsycl*target options passed
12091211 Arg *SYCLTargetsValues = SYCLTargets;
12101212 if (SYCLTargetsValues) {
1211- llvm::StringSet<> SYCLTriples;
12121213 if (SYCLTargetsValues->getNumValues ()) {
12131214
12141215 // Multiple targets are currently not supported when using
@@ -1308,6 +1309,109 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
13081309 Diag (clang::diag::warn_drv_empty_joined_argument)
13091310 << SYCLTargetsValues->getAsString (C.getInputArgs ());
13101311 }
1312+ }
1313+ // If the user specified --offload-arch, deduce the offloading
1314+ // target triple(s) from the set of architecture(s).
1315+ // Create a toolchain for each valid triple.
1316+ // We do not support SYCL offloading if any of the inputs is a
1317+ // .cu (for CUDA type) or .hip (for HIP type) file.
1318+ else if (HasValidSYCLRuntime &&
1319+ C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) && !IsHIP &&
1320+ !IsCuda) {
1321+ // SYCL offloading to AOT Targets with '--offload-arch'
1322+ // is currently enabled only with '--offload-new-driver' option.
1323+ // Emit a diagnostic if '--offload-arch' is invoked without
1324+ // '--offload-new driver' option.
1325+ if (!C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
1326+ options::OPT_no_offload_new_driver, false )) {
1327+ Diag (clang::diag::err_drv_sycl_offload_arch_new_driver);
1328+ return ;
1329+ }
1330+ const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1331+ auto AMDTriple = getHIPOffloadTargetTriple (*this , C.getInputArgs ());
1332+ auto NVPTXTriple = getNVIDIAOffloadTargetTriple (*this , C.getInputArgs (),
1333+ HostTC->getTriple ());
1334+
1335+ // Attempt to deduce the offloading triple from the set of architectures.
1336+ // We need to temporarily create these toolchains so that we can access
1337+ // tools for inferring architectures.
1338+ llvm::DenseSet<StringRef> Archs;
1339+ if (NVPTXTriple) {
1340+ auto TempTC = std::make_unique<toolchains::CudaToolChain>(
1341+ *this , *NVPTXTriple, *HostTC, C.getInputArgs (), Action::OFK_None);
1342+ for (StringRef Arch :
1343+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1344+ Archs.insert (Arch);
1345+ }
1346+ if (AMDTriple) {
1347+ auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
1348+ *this , *AMDTriple, *HostTC, C.getInputArgs ());
1349+ for (StringRef Arch :
1350+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &*TempTC, true ))
1351+ Archs.insert (Arch);
1352+ }
1353+ if (!AMDTriple && !NVPTXTriple) {
1354+ for (StringRef Arch :
1355+ getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, nullptr , true ))
1356+ Archs.insert (Arch);
1357+ }
1358+ for (StringRef Arch : Archs) {
1359+ if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch (StringToOffloadArch (
1360+ getProcessorFromTargetID (*NVPTXTriple, Arch)))) {
1361+ DerivedArchs[NVPTXTriple->getTriple ()].insert (Arch);
1362+ } else if (AMDTriple &&
1363+ IsSYCLSupportedAMDGPUArch (StringToOffloadArch (
1364+ getProcessorFromTargetID (*AMDTriple, Arch)))) {
1365+ DerivedArchs[AMDTriple->getTriple ()].insert (Arch);
1366+ } else if (IsSYCLSupportedIntelCPUArch (StringToOffloadArchSYCL (Arch))) {
1367+ DerivedArchs[MakeSYCLDeviceTriple (" spir64_x86_64" ).getTriple ()].insert (
1368+ Arch);
1369+ } else if (IsSYCLSupportedIntelGPUArch (StringToOffloadArchSYCL (Arch))) {
1370+ StringRef IntelGPUArch;
1371+ // For Intel Graphics AOT target, valid values for '--offload-arch'
1372+ // are mapped to valid device names accepted by OCLOC (the Intel GPU AOT
1373+ // compiler) via the '-device' option. The mapIntelGPUArchName
1374+ // function maps the accepted values for '--offload-arch' to enable SYCL
1375+ // offloading to Intel GPUs and the corresponding '-device' value passed
1376+ // to OCLOC.
1377+ IntelGPUArch = mapIntelGPUArchName (Arch).data ();
1378+ DerivedArchs[MakeSYCLDeviceTriple (" spir64_gen" ).getTriple ()].insert (
1379+ IntelGPUArch);
1380+ } else {
1381+ Diag (clang::diag::err_drv_invalid_sycl_target) << Arch;
1382+ return ;
1383+ }
1384+ }
1385+ // Emit an error if architecture value is not provided
1386+ // to --offload-arch.
1387+ if (Archs.empty ()) {
1388+ Diag (clang::diag::err_drv_sycl_offload_arch_missing_value);
1389+ return ;
1390+ }
1391+
1392+ for (const auto &TripleAndArchs : DerivedArchs)
1393+ SYCLTriples.insert (TripleAndArchs.first ());
1394+
1395+ for (const auto &Val : SYCLTriples) {
1396+ llvm::Triple SYCLTargetTriple (MakeSYCLDeviceTriple (Val.getKey ()));
1397+ std::string NormalizedName = SYCLTargetTriple.normalize ();
1398+
1399+ // Make sure we don't have a duplicate triple.
1400+ auto Duplicate = FoundNormalizedTriples.find (NormalizedName);
1401+ if (Duplicate != FoundNormalizedTriples.end ()) {
1402+ Diag (clang::diag::warn_drv_sycl_offload_target_duplicate)
1403+ << Val.getKey () << Duplicate->second ;
1404+ continue ;
1405+ }
1406+
1407+ // Store the current triple so that we can check for duplicates in the
1408+ // following iterations.
1409+ FoundNormalizedTriples[NormalizedName] = Val.getKey ();
1410+ UniqueSYCLTriplesVec.push_back (SYCLTargetTriple);
1411+ }
1412+
1413+ addSYCLDefaultTriple (C, UniqueSYCLTriplesVec);
1414+
13111415 } else {
13121416 // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V.
13131417 // For -fsycl-device-only, we also setup the implied triple as needed.
@@ -5455,9 +5559,58 @@ class OffloadingActionBuilder final {
54555559 BundlingActions, types::TY_Object);
54565560 if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
54575561 OWA->setOffloadKind (Action::OFK_Host);
5562+ // The Backend compilation step performed here is being done for
5563+ // creating FPGA archives. The possible split binaries after
5564+ // sycl-post-link need to be individually wrapped as opposed to
5565+ // being passed into the clang-offload-wrapper via a table and
5566+ // using the -batch option - effectively creating a single
5567+ // binary. The resulting archive created from -fsycl-link should
5568+ // not contain the singular binary, but should be individual
5569+ // binaries to be consumed later by either the -fsycl-link=image
5570+ // device compilation step or being linked into the final exe.
5571+ //
5572+ // Typical compile flow:
5573+ // .bc
5574+ // |
5575+ // sycl-post-link -split=kernel
5576+ // |
5577+ // +--------+--------+
5578+ // | | |
5579+ // split1 split2 split3
5580+ // | | |
5581+ // llvm-spirv llvm-spirv llvm-spirv
5582+ // | | |
5583+ // ocloc ocloc ocloc
5584+ // | | |
5585+ // +--------+--------+
5586+ // |
5587+ // clang-offload-wrapper -batch
5588+ // |
5589+ // .o
5590+ //
5591+ // Individual wrap compile flow:
5592+ // .bc
5593+ // |
5594+ // sycl-post-link -split=kernel
5595+ // |
5596+ // +--------+--------+
5597+ // | | |
5598+ // split1 split2 split3
5599+ // | | |
5600+ // llvm-spirv llvm-spirv llvm-spirv
5601+ // | | |
5602+ // ocloc ocloc ocloc
5603+ // | | |
5604+ // wrap wrap wrap
5605+ // | | |
5606+ // .o .o .o
5607+ //
54585608 Action *CompiledDeviceAction =
5459- C.MakeAction <OffloadWrapperJobAction>(WrapperItems,
5460- types::TY_Object);
5609+ C.MakeAction <OffloadWrapperJobAction>(FPGAAOTAction,
5610+ types::TY_Tempfilelist);
5611+ if (auto *OWA =
5612+ dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
5613+ OWA->setWrapIndividualFiles ();
54615614 addDeps (CompiledDeviceAction, TC, BoundArch);
54625615 }
54635616 addDeps (DeviceAction, TC, BoundArch);
@@ -5731,6 +5884,9 @@ class OffloadingActionBuilder final {
57315884 };
57325885
57335886 Action *ExtractIRFilesAction = createExtractIRFilesAction ();
5887+ // Device binaries that are individually wrapped when creating an
5888+ // FPGA Archive.
5889+ ActionList FPGAArchiveWrapperInputs;
57345890
57355891 if (IsNVPTX || IsAMDGCN) {
57365892 JobAction *FinAction =
@@ -5816,6 +5972,7 @@ class OffloadingActionBuilder final {
58165972 FileTableTformJobAction::COL_CODE,
58175973 FileTableTformJobAction::COL_CODE);
58185974 WrapperInputs.push_back (ReplaceFilesAction);
5975+ FPGAArchiveWrapperInputs.push_back (BuildCodeAction);
58195976 }
58205977 if (SkipWrapper) {
58215978 // Wrapper step not requested.
@@ -5850,8 +6007,11 @@ class OffloadingActionBuilder final {
58506007 if (auto *OWA = dyn_cast<OffloadWrapperJobAction>(DeviceAction))
58516008 OWA->setOffloadKind (Action::OFK_Host);
58526009 Action *CompiledDeviceAction =
5853- C.MakeAction <OffloadWrapperJobAction>(WrapperInputs,
5854- types::TY_Object);
6010+ C.MakeAction <OffloadWrapperJobAction>(
6011+ FPGAArchiveWrapperInputs, types::TY_Tempfilelist);
6012+ if (auto *OWA =
6013+ dyn_cast<OffloadWrapperJobAction>(CompiledDeviceAction))
6014+ OWA->setWrapIndividualFiles ();
58556015 addDeps (CompiledDeviceAction, TC, nullptr );
58566016 }
58576017 addDeps (DeviceAction, TC, nullptr );
0 commit comments