@@ -674,9 +674,8 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
674674 // because it only increases amount of code for device compiler to handle,
675675 // without any actual benefits.
676676 // TODO: Try to extend this feature for non-Intel GPUs.
677- if ((!Args.hasFlag (OPT_no_sycl_remove_unused_external_funcs,
678- OPT_sycl_remove_unused_external_funcs, false ) &&
679- !Triple.isNativeCPU ()) &&
677+ if (!Args.hasFlag (OPT_no_sycl_remove_unused_external_funcs,
678+ OPT_sycl_remove_unused_external_funcs, false ) &&
680679 !Args.hasArg (OPT_sycl_allow_device_image_dependencies) &&
681680 !Triple.isNVPTX () && !Triple.isAMDGPU ())
682681 PostLinkArgs.push_back (" -emit-only-kernels-as-entry-points" );
@@ -1567,8 +1566,7 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
15671566 return ClangPath.takeError ();
15681567
15691568 llvm::Triple Triple (Args.getLastArgValue (OPT_triple_EQ));
1570- if (Triple.isNativeCPU ())
1571- Triple = llvm::Triple (Args.getLastArgValue (OPT_host_triple_EQ));
1569+ llvm::Triple HostTriple (Args.getLastArgValue (OPT_host_triple_EQ));
15721570
15731571 StringRef Arch = Args.getLastArgValue (OPT_arch_EQ);
15741572 // Create a new file to write the linked device image to. Assume that the
@@ -1585,7 +1583,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
15851583 " --no-default-config" ,
15861584 " -o" ,
15871585 *TempFileOrErr,
1588- Args.MakeArgString (" --target=" + Triple.getTriple ()),
1586+ Args.MakeArgString (
1587+ " --target=" +
1588+ (Triple.isNativeCPU () ? HostTriple : Triple).getTriple ()),
15891589 };
15901590
15911591 if (!Arch.empty ())
@@ -1602,16 +1602,24 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
16021602 {" -Xlinker" ,
16031603 Args.MakeArgString (" --plugin-opt=" + StringRef (Arg->getValue ()))});
16041604
1605- if (!Triple.isNVPTX () && !Triple.isSPIRV ())
1605+ if (!Triple.isNVPTX () && !Triple.isSPIRV () && !Triple. isNativeCPU () )
16061606 CmdArgs.push_back (" -Wl,--no-undefined" );
16071607
16081608 if (IsSYCLKind && Triple.isNVPTX ())
16091609 CmdArgs.push_back (" -S" );
1610+
1611+ if (IsSYCLKind && Triple.isNativeCPU ()) {
1612+ CmdArgs.push_back (" -Wno-override-module" );
1613+ CmdArgs.push_back (" -mllvm" );
1614+ CmdArgs.push_back (" -sycl-native-cpu-backend" );
1615+ CmdArgs.push_back (" -c" );
1616+ }
1617+
16101618 for (StringRef InputFile : InputFiles)
16111619 CmdArgs.push_back (InputFile);
16121620
16131621 // If this is CPU offloading we copy the input libraries.
1614- if (!Triple.isGPU ()) {
1622+ if (!Triple.isGPU () && !Triple. isNativeCPU () ) {
16151623 CmdArgs.push_back (" -Wl,-Bsymbolic" );
16161624 CmdArgs.push_back (" -shared" );
16171625 ArgStringList LinkerArgs;
@@ -1664,6 +1672,38 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
16641672 Args.MakeArgString (Arg.split (' =' ).second )});
16651673 }
16661674
1675+ // link NativeCPU utils lib if needed
1676+ if (Triple.isNativeCPU ()) {
1677+ if (auto *A = Args.getLastArg (OPT_sycl_device_library_location_EQ)) {
1678+ std::string NativeCPUUtilsLib = " " ;
1679+
1680+ SmallVector<std::string, 8 > LibraryPaths;
1681+ for (const auto &Path : A->getValues ()) {
1682+ SmallString<128 > LPath (Path);
1683+ if (llvm::sys::fs::exists (LPath)) {
1684+ LibraryPaths.emplace_back (LPath);
1685+ }
1686+ }
1687+
1688+ for (auto &LPath : LibraryPaths) {
1689+ // Call llvm-link without --only-needed to link to the nativecpu_utils
1690+ // lib
1691+ const char LibNativeCPUUtilsName[] = " libsycl-nativecpu_utils.bc" ;
1692+ SmallString<128 > LibNativeCPUUtilsPath (LPath);
1693+ llvm::sys::path::append (LibNativeCPUUtilsPath, LibNativeCPUUtilsName);
1694+ if (llvm::sys::fs::exists (LibNativeCPUUtilsPath)) {
1695+ NativeCPUUtilsLib = LibNativeCPUUtilsPath.str ();
1696+ break ;
1697+ }
1698+ }
1699+
1700+ if (NativeCPUUtilsLib != " " ) {
1701+ CmdArgs.append ({" -Xclang" , " -mlink-bitcode-file" , " -Xclang" ,
1702+ Args.MakeArgString (NativeCPUUtilsLib)});
1703+ }
1704+ }
1705+ }
1706+
16671707 // The OpenMPOpt pass can introduce new calls and is expensive, we do
16681708 // not want this when running CodeGen through clang.
16691709 if (Args.hasArg (OPT_clang_backend) || Args.hasArg (OPT_builtin_bitcode_EQ))
@@ -2137,6 +2177,13 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
21372177 SplitModules[I].ModuleFilePath = *BundledFileOrErr;
21382178 } else {
21392179 SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
2180+ if (Triple.isNativeCPU ()) {
2181+ // Add to WrappedOutput directly rather than combining this with the
2182+ // below because WrappedOutput holds references and
2183+ // SplitModules[I].ModuleFilePath will go out of scope too soon.
2184+ std::scoped_lock Guard (ImageMtx);
2185+ WrappedOutput.push_back (*ClangOutputOrErr);
2186+ }
21402187 }
21412188 }
21422189
0 commit comments