diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c789a8253a32a..96deb4cbfb1f3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -11556,13 +11556,13 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, } } } - // -sycl-device-libraries= provides a comma separate list of + // -device-libraries= provides a comma separate list of // libraries to add to the device linking step. if (LibList.size()) - CmdArgs.push_back( - Args.MakeArgString(Twine("-sycl-device-libraries=") + LibList)); + CmdArgs.push_back(Args.MakeArgString( + Twine("--linker-arg=\"-device-libraries=") + LibList + Twine("\""))); - // -sycl-device-library-location= provides the location in which the + // -device-library-location= provides the location in which the // SYCL device libraries can be found. SmallString<128> DeviceLibDir(D.Dir); llvm::sys::path::append(DeviceLibDir, "..", "lib"); @@ -11586,15 +11586,16 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, break; } } - CmdArgs.push_back(Args.MakeArgString( - Twine("-sycl-device-library-location=") + DeviceLibDir)); + CmdArgs.push_back( + Args.MakeArgString(Twine("--linker-arg=\"-device-library-location=") + + DeviceLibDir + Twine("\""))); if (C.getDriver().isDumpDeviceCodeEnabled()) { SmallString<128> DumpDir; Arg *A = C.getArgs().getLastArg(options::OPT_fsycl_dump_device_code_EQ); DumpDir = A ? A->getValue() : ""; - CmdArgs.push_back( - Args.MakeArgString(Twine("-sycl-dump-device-code=") + DumpDir)); + CmdArgs.push_back(Args.MakeArgString( + Twine("--linker-arg=\"-dump-device-code=") + DumpDir + Twine("\""))); } auto appendOption = [](SmallString<128> &OptString, StringRef AddOpt) { @@ -11615,7 +11616,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, } if (!PostLinkOptString.empty()) CmdArgs.push_back( - Args.MakeArgString("--sycl-post-link-options=" + PostLinkOptString)); + Args.MakeArgString(Twine("--linker-arg=\"-post-link-options=") + + PostLinkOptString + Twine("\""))); // --llvm-spirv-options="options" provides a string of options to be passed // along to the llvm-spirv (translation) step during device link. @@ -11628,24 +11630,27 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, getNonTripleBasedSPIRVTransOpts(C, Args, TranslatorArgs); for (const auto &A : TranslatorArgs) appendOption(OptString, A); - CmdArgs.push_back(Args.MakeArgString("--llvm-spirv-options=" + OptString)); - + CmdArgs.push_back( + Args.MakeArgString(Twine("--linker-arg=\"-llvm-spirv-options=") + + OptString + Twine("\""))); if (C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment()) CmdArgs.push_back("-sycl-is-windows-msvc-env"); bool IsUsingLTO = D.isUsingOffloadLTO(); auto LTOMode = D.getOffloadLTOMode(); if (IsUsingLTO && LTOMode == LTOK_Thin) - CmdArgs.push_back(Args.MakeArgString("-sycl-thin-lto")); + CmdArgs.push_back( + Args.MakeArgString(Twine("--linker-arg=\"-thin-lto\""))); if (Args.hasArg(options::OPT_fsycl_embed_ir)) - CmdArgs.push_back(Args.MakeArgString("-sycl-embed-ir")); + CmdArgs.push_back( + Args.MakeArgString(Twine("--linker-arg=\"-embed-ir\""))); if (Args.hasFlag(options::OPT_fsycl_allow_device_image_dependencies, options::OPT_fno_sycl_allow_device_image_dependencies, false)) - CmdArgs.push_back( - Args.MakeArgString("-sycl-allow-device-image-dependencies")); + CmdArgs.push_back(Args.MakeArgString( + Twine("--linker-arg=\"-allow-device-image-dependencies\""))); // Formulate and add any offload-wrapper and AOT specific options. These // are additional options passed in via -Xsycl-target-linker and diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index ded1e3d6a4d5b..218ea18734a2a 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -14,7 +14,6 @@ // //===---------------------------------------------------------------------===// -#include "clang/Basic/Cuda.h" #include "clang/Basic/TargetID.h" #include "clang/Basic/Version.h" #include "llvm/ADT/MapVector.h" @@ -22,7 +21,6 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/Frontend/Offloading/OffloadWrapper.h" -#include "llvm/Frontend/Offloading/SYCLOffloadWrapper.h" #include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticPrinter.h" @@ -42,13 +40,11 @@ #include "llvm/Option/Option.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Remarks/HotnessThresholdParser.h" -#include "llvm/SYCLPostLink/ModuleSplitter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" @@ -65,9 +61,6 @@ #include #include -#define COMPILE_OPTS "compile-opts" -#define LINK_OPTS "link-opts" - using namespace llvm; using namespace llvm::opt; using namespace llvm::object; @@ -147,12 +140,6 @@ static std::list> TempFiles; /// Codegen flags for LTO backend. static codegen::RegisterCodeGenFlags CodeGenFlags; -static std::optional SYCLModuleSplitMode; - -static bool UseSYCLPostLinkTool; - -static SmallString<128> OffloadImageDumpDir; - using OffloadingImage = OffloadBinary::OffloadingImage; namespace llvm { @@ -231,33 +218,6 @@ void printCommands(ArrayRef CmdArgs) { exit(EXIT_FAILURE); } -/// Create an extra user-specified \p OffloadFile. -/// TODO: We should find a way to wrap these as libraries instead. -Expected getInputBitcodeLibrary(StringRef Input) { - auto [Device, Path] = StringRef(Input).split('='); - auto [String, Arch] = Device.rsplit('-'); - auto [Kind, Triple] = String.split('-'); - - llvm::ErrorOr> ImageOrError = - llvm::MemoryBuffer::getFileOrSTDIN(Path); - if (std::error_code EC = ImageOrError.getError()) - return createFileError(Path, EC); - - OffloadingImage Image{}; - Image.TheImageKind = IMG_Bitcode; - Image.TheOffloadKind = getOffloadKind(Kind); - Image.StringData["triple"] = Triple; - Image.StringData["arch"] = Arch; - Image.Image = std::move(*ImageOrError); - - std::unique_ptr Binary = MemoryBuffer::getMemBufferCopy( - OffloadBinary::write(Image), Image.Image->getBufferIdentifier()); - auto NewBinaryOrErr = OffloadBinary::create(*Binary); - if (!NewBinaryOrErr) - return NewBinaryOrErr.takeError(); - return OffloadFile(std::move(*NewBinaryOrErr), std::move(Binary)); -} - std::string getMainExecutable(const char *Name) { void *Ptr = (void *)(intptr_t)&getMainExecutable; auto COWPath = sys::fs::getMainExecutable(Name, Ptr); @@ -269,9 +229,7 @@ Expected createOutputFile(const Twine &Prefix, StringRef Extension) { std::scoped_lock Lock(TempFilesMutex); SmallString<128> OutputFile; if (SaveTemps) { - // Generate a unique path name without creating a file - sys::fs::createUniquePath(Prefix + "-%%%%%%." + Extension, OutputFile, - /*MakeAbsolute=*/false); + (Prefix + "." + Extension).toNullTerminatedStringRef(OutputFile); } else { if (std::error_code EC = sys::fs::createTemporaryFile(Prefix, Extension, OutputFile)) @@ -282,18 +240,16 @@ Expected createOutputFile(const Twine &Prefix, StringRef Extension) { return TempFiles.back(); } -// TODO: Remove HasSYCLOffloadKind dependence when aligning with community code. -Expected writeOffloadFile(const OffloadFile &File, - bool HasSYCLOffloadKind = false) { +Expected writeOffloadFile(const OffloadFile &File) { const OffloadBinary &Binary = *File.getBinary(); StringRef Prefix = sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); - StringRef Suffix = getImageKindName(Binary.getImageKind()); - - auto TempFileOrErr = createOutputFile( - Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), - HasSYCLOffloadKind ? getImageKindName(Binary.getImageKind()) : "o"); + SmallString<128> Filename; + (Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch()) + .toVector(Filename); + llvm::replace(Filename, ':', '-'); + auto TempFileOrErr = createOutputFile(Filename, "o"); if (!TempFileOrErr) return TempFileOrErr.takeError(); @@ -473,46 +429,6 @@ fatbinary(ArrayRef> InputFiles, return *TempFileOrErr; } - -// ptxas binary -Expected ptxas(StringRef InputFile, const ArgList &Args, - StringRef Arch) { - llvm::TimeTraceScope TimeScope("NVPTX ptxas"); - // NVPTX uses the ptxas program to process assembly files. - Expected PtxasPath = - findProgram("ptxas", {CudaBinaryPath + "/bin"}); - if (!PtxasPath) - return PtxasPath.takeError(); - - llvm::Triple Triple( - Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); - - // Create a new file to write the output to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "cubin"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*PtxasPath); - CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); - // Pass -v to ptxas if it was passed to the driver. - if (Args.hasArg(OPT_verbose)) - CmdArgs.push_back("-v"); - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); - if (Args.hasArg(OPT_debug)) - CmdArgs.push_back("-g"); - else - CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); - CmdArgs.push_back("--gpu-name"); - CmdArgs.push_back(Arch); - CmdArgs.push_back("--output-file"); - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back(InputFile); - if (Error Err = executeCommands(*PtxasPath, CmdArgs)) - return std::move(Err); - return *TempFileOrErr; -} } // namespace nvptx namespace amdgcn { @@ -572,931 +488,9 @@ fatbinary(ArrayRef> InputFiles, } } // namespace amdgcn -namespace sycl { -// This utility function is used to gather all SYCL device library files that -// will be linked with input device files. -// The list of files and its location are passed from driver. -static Error getSYCLDeviceLibs(SmallVector &DeviceLibFiles, - const ArgList &Args) { - StringRef SYCLDeviceLibLoc(""); - if (Arg *A = Args.getLastArg(OPT_sycl_device_library_location_EQ)) - SYCLDeviceLibLoc = A->getValue(); - if (Arg *A = Args.getLastArg(OPT_sycl_device_lib_EQ)) { - if (A->getValues().size() == 0) - return createStringError( - inconvertibleErrorCode(), - "Number of device library files cannot be zero."); - for (StringRef Val : A->getValues()) { - SmallString<128> LibName(SYCLDeviceLibLoc); - llvm::sys::path::append(LibName, Val); - if (llvm::sys::fs::exists(LibName)) - DeviceLibFiles.push_back(std::string(LibName)); - else - return createStringError(inconvertibleErrorCode(), - std::string(LibName) + - " SYCL device library file is not found."); - } - } - return Error::success(); -} - -/// This routine is used to convert SPIR-V input files into LLVM IR files. -/// 'llvm-spirv -r' command is used for this purpose. -/// If input is not a SPIR-V file, then the original file is returned. -/// TODO: Add a check to identify SPIR-V files and exit early if the input is -/// not a SPIR-V file. -/// 'Filename' is the input file that could be a SPIR-V file. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// llvm-spirv tool. -static Expected convertSPIRVToIR(StringRef Filename, - const ArgList &Args) { - Expected SPIRVToIRWrapperPath = findProgram( - "spirv-to-ir-wrapper", {getMainExecutable("spirv-to-ir-wrapper")}); - if (!SPIRVToIRWrapperPath) - return SPIRVToIRWrapperPath.takeError(); - - // Create a new file to write the converted file to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "bc"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*SPIRVToIRWrapperPath); - CmdArgs.push_back(Filename); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back("--llvm-spirv-opts"); - CmdArgs.push_back("--spirv-preserve-auxdata --spirv-target-env=SPV-IR " - "--spirv-builtin-format=global"); - if (Error Err = executeCommands(*SPIRVToIRWrapperPath, CmdArgs)) - return std::move(Err); - return *TempFileOrErr; -} - -/// Add any sycl-post-link options that rely on a specific Triple in addition -/// to user supplied options. -/// NOTE: Any changes made here should be reflected in the similarly named -/// function in clang/lib/Driver/ToolChains/Clang.cpp. -static void -getTripleBasedSYCLPostLinkOpts(const ArgList &Args, - SmallVector &PostLinkArgs, - const llvm::Triple Triple) { - const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); - bool SYCLNativeCPU = (HostTriple == Triple); - bool SpecConstsSupported = (!Triple.isNVPTX() && !Triple.isAMDGCN() && - !Triple.isSPIRAOT() && !SYCLNativeCPU); - if (SpecConstsSupported) - PostLinkArgs.push_back("-spec-const=native"); - else - PostLinkArgs.push_back("-spec-const=emulation"); - - // TODO: If we ever pass -ir-output-only based on the triple, - // make sure we don't pass -properties. - PostLinkArgs.push_back("-properties"); - - // See if device code splitting is already requested. If not requested, then - // set -split=auto for non-FPGA targets. - bool NoSplit = true; - for (auto Arg : PostLinkArgs) - if (Arg.contains("-split=")) { - NoSplit = false; - break; - } - if (NoSplit && (Triple.getSubArch() != llvm::Triple::SPIRSubArch_fpga)) - PostLinkArgs.push_back("-split=auto"); - - // On Intel targets we don't need non-kernel functions as entry points, - // because it only increases amount of code for device compiler to handle, - // without any actual benefits. - // TODO: Try to extend this feature for non-Intel GPUs. - if ((!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs, - OPT_sycl_remove_unused_external_funcs, false) && - !SYCLNativeCPU) && - !Args.hasArg(OPT_sycl_allow_device_image_dependencies) && - !Triple.isNVPTX() && !Triple.isAMDGPU()) - PostLinkArgs.push_back("-emit-only-kernels-as-entry-points"); - - if (!Triple.isAMDGCN()) - PostLinkArgs.push_back("-emit-param-info"); - // Enable program metadata - if (Triple.isNVPTX() || Triple.isAMDGCN() || SYCLNativeCPU) - PostLinkArgs.push_back("-emit-program-metadata"); - - bool SplitEsimdByDefault = Triple.isSPIROrSPIRV(); - bool SplitEsimd = - Args.hasFlag(OPT_sycl_device_code_split_esimd, - OPT_no_sycl_device_code_split_esimd, SplitEsimdByDefault); - if (!Args.hasArg(OPT_sycl_thin_lto)) - PostLinkArgs.push_back("-symbols"); - // Specialization constant info generation is mandatory - - // add options unconditionally - PostLinkArgs.push_back("-emit-exported-symbols"); - PostLinkArgs.push_back("-emit-imported-symbols"); - if (SplitEsimd) - PostLinkArgs.push_back("-split-esimd"); - PostLinkArgs.push_back("-lower-esimd"); - - bool IsAOT = Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isSPIRAOT(); - if (Args.hasFlag(OPT_sycl_add_default_spec_consts_image, - OPT_no_sycl_add_default_spec_consts_image, false) && - IsAOT) - PostLinkArgs.push_back("-generate-device-image-default-spec-consts"); -} - -/// Run sycl-post-link tool for SYCL offloading. -/// 'InputFiles' is the list of input LLVM IR files. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// sycl-post-link tool. -static Expected> -runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args) { - Expected SYCLPostLinkPath = - findProgram("sycl-post-link", {getMainExecutable("sycl-post-link")}); - if (!SYCLPostLinkPath) - return SYCLPostLinkPath.takeError(); - - // Create a new file to write the output of sycl-post-link to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "table"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*SYCLPostLinkPath); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_sycl_device_library_location_EQ); - if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) { - std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue(); - std::string BF16DeviceLibLoc = - SYCLDeviceLibLocParam + "/libsycl-native-bfloat16.bc"; - if (llvm::sys::fs::exists(BF16DeviceLibLoc)) { - SYCLDeviceLibLocParam = "--device-lib-dir=" + SYCLDeviceLibLocParam; - CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibLocParam))); - } - } - getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple); - StringRef SYCLPostLinkOptions; - if (Arg *A = Args.getLastArg(OPT_sycl_post_link_options_EQ)) - SYCLPostLinkOptions = A->getValue(); - SYCLPostLinkOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, - /* KeepEmpty = */ false); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); - for (auto &File : InputFiles) - CmdArgs.push_back(File); - if (Error Err = executeCommands(*SYCLPostLinkPath, CmdArgs)) - return std::move(Err); - - if (DryRun) { - // In DryRun we need a dummy entry in order to continue the whole pipeline. - auto ImageFileOrErr = createOutputFile( - sys::path::filename(ExecutableName) + ".sycl.split.image", "bc"); - if (!ImageFileOrErr) - return ImageFileOrErr.takeError(); - - std::vector Modules = {module_split::SplitModule( - *ImageFileOrErr, util::PropertySetRegistry(), "")}; - return Modules; - } - - return llvm::module_split::parseSplitModulesFromFile(*TempFileOrErr); -} - -/// Invokes SYCL Split library for SYCL offloading. -/// -/// \param InputFiles the list of input LLVM IR files. -/// \param Args Encompasses all arguments for linking and wrapping device code. -/// It will be parsed to generate options required to be passed to SYCL split -/// library. -/// \param Mode The splitting mode. -/// \returns The vector of split modules. -static Expected> -runSYCLSplitLibrary(ArrayRef InputFiles, const ArgList &Args, - module_split::IRSplitMode Mode) { - std::vector SplitModules; - if (DryRun) { - auto OutputFileOrErr = createOutputFile( - sys::path::filename(ExecutableName) + ".sycl.split.image", "bc"); - if (!OutputFileOrErr) - return OutputFileOrErr.takeError(); - - StringRef OutputFilePath = *OutputFileOrErr; - auto InputFilesStr = llvm::join(InputFiles.begin(), InputFiles.end(), ","); - errs() << formatv("sycl-module-split: input: {0}, output: {1}\n", - InputFilesStr, OutputFilePath); - SplitModules.emplace_back(OutputFilePath, util::PropertySetRegistry(), ""); - return SplitModules; - } - - llvm::module_split::ModuleSplitterSettings Settings; - Settings.Mode = Mode; - Settings.OutputPrefix = ""; - - for (StringRef InputFile : InputFiles) { - SMDiagnostic Err; - LLVMContext C; - std::unique_ptr M = parseIRFile(InputFile, Err, C); - if (!M) - return createStringError(inconvertibleErrorCode(), Err.getMessage()); - - auto SplitModulesOrErr = - module_split::splitSYCLModule(std::move(M), Settings); - if (!SplitModulesOrErr) - return SplitModulesOrErr.takeError(); - - auto &NewSplitModules = *SplitModulesOrErr; - SplitModules.insert(SplitModules.end(), NewSplitModules.begin(), - NewSplitModules.end()); - } - - if (Verbose) { - auto InputFilesStr = llvm::join(InputFiles.begin(), InputFiles.end(), ","); - std::string SplitOutputFilesStr; - for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { - if (I > 0) - SplitOutputFilesStr += ','; - - SplitOutputFilesStr += SplitModules[I].ModuleFilePath; - } - - errs() << formatv("sycl-module-split: input: {0}, output: {1}\n", - InputFilesStr, SplitOutputFilesStr); - } - - return SplitModules; -} - -/// Add any llvm-spirv option that relies on a specific Triple in addition -/// to user supplied options. -/// NOTE: Any changes made here should be reflected in the similarly named -/// function in clang/lib/Driver/ToolChains/Clang.cpp. -static void -getTripleBasedSPIRVTransOpts(const ArgList &Args, - SmallVector &TranslatorArgs, - const llvm::Triple Triple) { - bool IsCPU = Triple.isSPIR() && - Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; - TranslatorArgs.push_back("-spirv-debug-info-version=nonsemantic-shader-200"); - std::string UnknownIntrinsics("-spirv-allow-unknown-intrinsics=llvm.genx."); - if (IsCPU) - UnknownIntrinsics += ",llvm.fpbuiltin"; - TranslatorArgs.push_back(Args.MakeArgString(UnknownIntrinsics)); - - // Disable all the extensions by default - std::string ExtArg("-spirv-ext=-all"); - std::string DefaultExtArg = - ",+SPV_EXT_shader_atomic_float_add,+SPV_EXT_shader_atomic_float_min_max" - ",+SPV_KHR_no_integer_wrap_decoration,+SPV_KHR_float_controls" - ",+SPV_KHR_expect_assume,+SPV_KHR_linkonce_odr"; - std::string INTELExtArg = - ",+SPV_INTEL_subgroups,+SPV_INTEL_media_block_io" - ",+SPV_INTEL_device_side_avc_motion_estimation" - ",+SPV_INTEL_fpga_loop_controls,+SPV_INTEL_unstructured_loop_controls" - ",+SPV_INTEL_fpga_reg,+SPV_INTEL_blocking_pipes" - ",+SPV_INTEL_function_pointers,+SPV_INTEL_kernel_attributes" - ",+SPV_INTEL_io_pipes,+SPV_INTEL_inline_assembly" - ",+SPV_INTEL_arbitrary_precision_integers" - ",+SPV_INTEL_float_controls2,+SPV_INTEL_vector_compute" - ",+SPV_INTEL_fast_composite" - ",+SPV_INTEL_arbitrary_precision_fixed_point" - ",+SPV_INTEL_arbitrary_precision_floating_point" - ",+SPV_INTEL_variable_length_array,+SPV_INTEL_fp_fast_math_mode" - ",+SPV_INTEL_long_composites" - ",+SPV_INTEL_arithmetic_fence" - ",+SPV_INTEL_global_variable_decorations" - ",+SPV_INTEL_cache_controls" - ",+SPV_INTEL_fpga_buffer_location" - ",+SPV_INTEL_fpga_argument_interfaces" - ",+SPV_INTEL_fpga_invocation_pipelining_attributes" - ",+SPV_INTEL_fpga_latency_control" - ",+SPV_KHR_shader_clock" - ",+SPV_INTEL_bindless_images" - ",+SPV_INTEL_task_sequence"; - ExtArg = ExtArg + DefaultExtArg + INTELExtArg; - ExtArg += ",+SPV_INTEL_bfloat16_conversion" - ",+SPV_INTEL_joint_matrix" - ",+SPV_INTEL_hw_thread_queries" - ",+SPV_KHR_uniform_group_instructions" - ",+SPV_INTEL_masked_gather_scatter" - ",+SPV_INTEL_tensor_float32_conversion" - ",+SPV_INTEL_optnone" - ",+SPV_KHR_non_semantic_info" - ",+SPV_KHR_cooperative_matrix" - ",+SPV_EXT_shader_atomic_float16_add" - ",+SPV_INTEL_fp_max_error"; - TranslatorArgs.push_back(Args.MakeArgString(ExtArg)); -} - -/// Run LLVM to SPIR-V translation. -/// Converts 'File' from LLVM bitcode to SPIR-V format using llvm-spirv tool. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// llvm-spirv tool. -static Expected runLLVMToSPIRVTranslation(StringRef File, - const ArgList &Args) { - Expected LLVMToSPIRVPath = - findProgram("llvm-spirv", {getMainExecutable("llvm-spirv")}); - if (!LLVMToSPIRVPath) - return LLVMToSPIRVPath.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*LLVMToSPIRVPath); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - getTripleBasedSPIRVTransOpts(Args, CmdArgs, Triple); - StringRef LLVMToSPIRVOptions; - if (Arg *A = Args.getLastArg(OPT_llvm_spirv_options_EQ)) - LLVMToSPIRVOptions = A->getValue(); - LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, - /* KeepEmpty = */ false); - CmdArgs.push_back("-o"); - - // Create a new file to write the translated file to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "spv"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back(File); - if (Error Err = executeCommands(*LLVMToSPIRVPath, CmdArgs)) - return std::move(Err); - - return *TempFileOrErr; -} - -/// Adds all AOT backend options required for SYCL AOT compilation step to -/// 'CmdArgs'. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate backend options required to be passed -/// into the SYCL AOT compilation step. -/// IsCPU is a bool used to direct option generation. If IsCPU is false, then -/// options are generated for AOT compilation targeting Intel GPUs. -static void addBackendOptions(const ArgList &Args, - SmallVector &CmdArgs, bool IsCPU) { - StringRef OptC = - Args.getLastArgValue(OPT_sycl_backend_compile_options_from_image_EQ); - OptC.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); - StringRef OptL = - Args.getLastArgValue(OPT_sycl_backend_link_options_from_image_EQ); - OptL.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); - StringRef OptTool = (IsCPU) ? Args.getLastArgValue(OPT_cpu_tool_arg_EQ) - : Args.getLastArgValue(OPT_gpu_tool_arg_EQ); - OptTool.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); - return; -} - -/// Run AOT compilation for Intel CPU. -/// Calls opencl-aot tool to generate device code for Intel CPU backend. -/// 'InputFile' is the input SPIR-V file. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// SYCL AOT compilation step. -static Expected runAOTCompileIntelCPU(StringRef InputFile, - const ArgList &Args) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - SmallVector CmdArgs; - Expected OpenCLAOTPath = - findProgram("opencl-aot", {getMainExecutable("opencl-aot")}); - if (!OpenCLAOTPath) - return OpenCLAOTPath.takeError(); - - CmdArgs.push_back(*OpenCLAOTPath); - CmdArgs.push_back("--device=cpu"); - addBackendOptions(Args, CmdArgs, /* IsCPU */ true); - // Create a new file to write the translated file to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "out"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back(InputFile); - if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs)) - return std::move(Err); - return *TempFileOrErr; -} - -/// Run AOT compilation for Intel GPU -/// Calls ocloc tool to generate device code for Intel GPU backend. -/// 'InputFile' is the input SPIR-V file. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// SYCL AOT compilation step. -static Expected runAOTCompileIntelGPU(StringRef InputFile, - const ArgList &Args) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch(Args.getLastArgValue(OPT_arch_EQ)); - SmallVector CmdArgs; - Expected OclocPath = - findProgram("ocloc", {getMainExecutable("ocloc")}); - if (!OclocPath) - return OclocPath.takeError(); - - CmdArgs.push_back(*OclocPath); - // The next line prevents ocloc from modifying the image name - CmdArgs.push_back("-output_no_suffix"); - CmdArgs.push_back("-spirv_input"); - if (!Arch.empty()) { - CmdArgs.push_back("-device"); - CmdArgs.push_back(Arch); - } - addBackendOptions(Args, CmdArgs, /* IsCPU */ false); - // Create a new file to write the translated file to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "out"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - CmdArgs.push_back("-output"); - CmdArgs.push_back(*TempFileOrErr); - CmdArgs.push_back("-file"); - CmdArgs.push_back(InputFile); - if (Error Err = executeCommands(*OclocPath, CmdArgs)) - return std::move(Err); - return *TempFileOrErr; -} - -/// Run AOT compilation for Intel CPU/GPU. -/// 'InputFile' is the input SPIR-V file. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// SYCL AOT compilation step. -static Expected runAOTCompile(StringRef InputFile, - const ArgList &Args) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - if (Triple.isSPIRAOT()) { - if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) - return runAOTCompileIntelGPU(InputFile, Args); - if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64) - return runAOTCompileIntelCPU(InputFile, Args); - } - return createStringError(inconvertibleErrorCode(), - "Unsupported SYCL Triple and Arch"); -} - -/// Reads device images from the given \p InputFile and wraps them -/// in one LLVM IR Module as a constant data. -/// -/// \returns A path to the LLVM Module that contains wrapped images. -Expected -wrapSYCLBinariesFromFile(std::vector &SplitModules, - const ArgList &Args, bool IsEmbeddedIR) { - auto OutputFileOrErr = createOutputFile( - sys::path::filename(ExecutableName) + ".sycl.image.wrapper", "bc"); - if (!OutputFileOrErr) - return OutputFileOrErr.takeError(); - - StringRef OutputFilePath = *OutputFileOrErr; - if (Verbose || DryRun) { - std::string InputFiles; - for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { - InputFiles += SplitModules[I].ModuleFilePath; - if (I + 1 < E) - InputFiles += ','; - } - - errs() << formatv(" offload-wrapper: input: {0}, output: {1}\n", InputFiles, - OutputFilePath); - if (DryRun) - return OutputFilePath; - } - - StringRef Target = Args.getLastArgValue(OPT_triple_EQ); - if (Target.empty()) - return createStringError( - inconvertibleErrorCode(), - "can't wrap SYCL image. -triple argument is missed."); - - SmallVector Images; - // SYCL runtime currently works for spir64 target triple and not for - // spir64-unknown-unknown/spirv64-unknown-unknown/spirv64. - // TODO: Fix SYCL runtime to accept other triples - llvm::Triple T(Target); - std::string EmbeddedIRTarget("llvm_"); - EmbeddedIRTarget.append(T.getArchName()); - StringRef RegularTarget(T.getArchName()); - if (RegularTarget == "spirv64") - RegularTarget = "spir64"; - - for (auto &SI : SplitModules) { - if (!OffloadImageDumpDir.empty()) { - StringRef CopyFrom = SI.ModuleFilePath; - SmallString<128> CopyTo = OffloadImageDumpDir; - StringRef Filename = sys::path::filename(CopyFrom); - CopyTo.append(Filename); - std::error_code EC = sys::fs::copy_file(CopyFrom, CopyTo); - if (EC) - return createStringError(EC, formatv("failed to copy file. From: " - "{0} to: {1}, error_code: {2}", - CopyFrom, CopyTo, EC.value())); - } - - auto MBOrDesc = MemoryBuffer::getFile(SI.ModuleFilePath); - if (!MBOrDesc) - return createFileError(SI.ModuleFilePath, MBOrDesc.getError()); - - StringRef ImageTarget = - IsEmbeddedIR ? StringRef(EmbeddedIRTarget) : StringRef(RegularTarget); - Images.emplace_back(std::move(*MBOrDesc), SI.Properties, SI.Symbols, - ImageTarget); - } - - LLVMContext C; - Module M("offload.wrapper.object", C); - M.setTargetTriple( - Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); - - auto CompileOptionsFromSYCLBackendCompileOptions = - Args.getLastArgValue(OPT_sycl_backend_compile_options_EQ); - auto LinkOptionsFromSYCLTargetLinkOptions = - Args.getLastArgValue(OPT_sycl_target_link_options_EQ); - - StringRef CompileOptions( - Args.MakeArgString(CompileOptionsFromSYCLBackendCompileOptions.str())); - StringRef LinkOptions( - Args.MakeArgString(LinkOptionsFromSYCLTargetLinkOptions.str())); - offloading::SYCLWrappingOptions WrappingOptions; - WrappingOptions.CompileOptions = CompileOptions; - WrappingOptions.LinkOptions = LinkOptions; - if (Verbose) { - errs() << formatv(" offload-wrapper: compile-opts: {0}, link-opts: {1}\n", - CompileOptions, LinkOptions); - } - if (Error E = offloading::wrapSYCLBinaries(M, Images, WrappingOptions)) - return E; - - if (Args.hasArg(OPT_print_wrapped_module)) - errs() << "Wrapped Module\n" << M; - - // TODO: Once "clang tool->runCompile" migration is finished we need to remove - // this scope and use community flow. - int FD = -1; - if (std::error_code EC = sys::fs::openFileForWrite(OutputFilePath, FD)) - return errorCodeToError(EC); - - raw_fd_ostream OS(FD, true); - WriteBitcodeToFile(M, OS); - return OutputFilePath; -} - -/// Run clang tool for SYCL offloading. -/// 'InputFile' is the wrapped input file. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// clang tool. -static Expected runCompile(StringRef &InputFile, - const ArgList &Args) { - // Create a new file to write the output of clang to. - auto OutputFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "o"); - if (!OutputFileOrErr) - return OutputFileOrErr.takeError(); - - Expected ClangPath = - findProgram("clang", {getMainExecutable("clang")}); - if (!ClangPath) - return ClangPath.takeError(); - - const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); - - SmallVector CmdArgs; - CmdArgs.push_back(*ClangPath); - - const std::string TargetStr = "--target=" + HostTriple.getTriple(); - CmdArgs.push_back(TargetStr); - - // Checking for '-shared' linker option - if (Args.hasArg(OPT_shared)) { - if (!HostTriple.isOSWindows()) - CmdArgs.push_back("-fPIC"); - } - CmdArgs.push_back("-c"); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*OutputFileOrErr); - CmdArgs.push_back(InputFile); - if (Error Err = executeCommands(*ClangPath, CmdArgs)) - return std::move(Err); - return *OutputFileOrErr; -} - -// Run wrapping library and clang -static Expected -runWrapperAndCompile(std::vector &SplitModules, - const ArgList &Args, bool IsEmbeddedIR = false) { - auto OutputFile = - sycl::wrapSYCLBinariesFromFile(SplitModules, Args, IsEmbeddedIR); - if (!OutputFile) - return OutputFile.takeError(); - // call to clang - auto OutputFileOrErr = sycl::runCompile(*OutputFile, Args); - if (!OutputFileOrErr) - return OutputFileOrErr.takeError(); - return *OutputFileOrErr; -} - -/// Link all SYCL device input files into one before adding device library -/// files. Device linking is performed using llvm-link tool. -/// 'InputFiles' is the list of all LLVM IR device input files. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// llvm-link tool. -Expected linkDeviceInputFiles(SmallVectorImpl &InputFiles, - const ArgList &Args) { - llvm::TimeTraceScope TimeScope("SYCL LinkDeviceInputFiles"); - - Expected LLVMLinkPath = - findProgram("llvm-link", {getMainExecutable("llvm-link")}); - if (!LLVMLinkPath) - return LLVMLinkPath.takeError(); - - // Create a new file to write the linked device file to. - auto OutFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "bc"); - if (!OutFileOrErr) - return OutFileOrErr.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*LLVMLinkPath); - for (auto &File : InputFiles) { - auto IRFile = sycl::convertSPIRVToIR(File, Args); - if (!IRFile) - return IRFile.takeError(); - CmdArgs.push_back(*IRFile); - } - CmdArgs.push_back("-o"); - CmdArgs.push_back(*OutFileOrErr); - CmdArgs.push_back("--suppress-warnings"); - if (Error Err = executeCommands(*LLVMLinkPath, CmdArgs)) - return std::move(Err); - return *OutFileOrErr; -} - -/// Link all device library files and input file into one LLVM IR file. This -/// linking is performed using llvm-link tool. -/// 'InputFiles' is the list of all LLVM IR device input files. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// llvm-link tool. -static Expected -linkDeviceLibFiles(SmallVectorImpl &InputFiles, - const ArgList &Args) { - llvm::TimeTraceScope TimeScope("LinkDeviceLibraryFiles"); - - Expected LLVMLinkPath = - findProgram("llvm-link", {getMainExecutable("llvm-link")}); - if (!LLVMLinkPath) - return LLVMLinkPath.takeError(); - - // Create a new file to write the linked device file to. - auto OutFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "bc"); - if (!OutFileOrErr) - return OutFileOrErr.takeError(); - - SmallVector CmdArgs; - CmdArgs.push_back(*LLVMLinkPath); - CmdArgs.push_back("-only-needed"); - for (auto &File : InputFiles) - CmdArgs.push_back(File); - CmdArgs.push_back("-o"); - CmdArgs.push_back(*OutFileOrErr); - CmdArgs.push_back("--suppress-warnings"); - if (Error Err = executeCommands(*LLVMLinkPath, CmdArgs)) - return std::move(Err); - return *OutFileOrErr; -} - -/// This function is used to link all SYCL device input files into a single -/// LLVM IR file. This file is in turn linked with all SYCL device library -/// files. -/// 'InputFiles' is the list of all LLVM IR device input files. -/// 'Args' encompasses all arguments required for linking and wrapping device -/// code and will be parsed to generate options required to be passed into the -/// llvm-link tool. -static Expected linkDevice(ArrayRef InputFiles, - const ArgList &Args) { - SmallVector InputFilesVec; - for (StringRef InputFile : InputFiles) - InputFilesVec.emplace_back(InputFile); - // First llvm-link step. - auto LinkedFile = sycl::linkDeviceInputFiles(InputFilesVec, Args); - if (!LinkedFile) - reportError(LinkedFile.takeError()); - - InputFilesVec.clear(); - InputFilesVec.emplace_back(*LinkedFile); - - // Gathering device library files - SmallVector DeviceLibFiles; - if (Error Err = sycl::getSYCLDeviceLibs(DeviceLibFiles, Args)) - reportError(std::move(Err)); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - SmallVector ExtractedDeviceLibFiles; - for (auto &File : DeviceLibFiles) { - auto BufferOrErr = MemoryBuffer::getFile(File); - if (!BufferOrErr) - return createFileError(File, BufferOrErr.getError()); - auto Buffer = std::move(*BufferOrErr); - SmallVector Binaries; - if (Error Err = extractOffloadBinaries(Buffer->getMemBufferRef(), Binaries)) - return std::move(Err); - bool CompatibleBinaryFound = false; - for (auto &Binary : Binaries) { - auto BinTriple = Binary.getBinary()->getTriple(); - if (BinTriple == Triple.getTriple()) { - auto FileNameOrErr = - writeOffloadFile(Binary, true /* HasSYCLOffloadKind */); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - ExtractedDeviceLibFiles.emplace_back(*FileNameOrErr); - CompatibleBinaryFound = true; - } - } - if (!CompatibleBinaryFound) - WithColor::warning(errs(), LinkerExecutable) - << "Compatible SYCL device library binary not found\n"; - } - - // For NVPTX backend we need to also link libclc and CUDA libdevice. - if (Triple.isNVPTX()) { - if (Arg *A = Args.getLastArg(OPT_sycl_nvptx_device_lib_EQ)) { - if (A->getValues().size() == 0) - return createStringError( - inconvertibleErrorCode(), - "Number of device library files cannot be zero."); - for (StringRef Val : A->getValues()) { - SmallString<128> LibName(Val); - if (llvm::sys::fs::exists(LibName)) - ExtractedDeviceLibFiles.emplace_back(std::string(LibName)); - else - return createStringError( - inconvertibleErrorCode(), - std::string(LibName) + - " SYCL device library file for NVPTX is not found."); - } - } - } - - // Make sure that SYCL device library files are available. - // Note: For AMD targets, we do not pass any SYCL device libraries. - if (ExtractedDeviceLibFiles.empty()) { - // TODO: Add NVPTX when ready - if (Triple.isSPIROrSPIRV()) - return createStringError( - inconvertibleErrorCode(), - " SYCL device library file list cannot be empty."); - return *LinkedFile; - } - - for (auto &File : ExtractedDeviceLibFiles) - InputFilesVec.emplace_back(File); - // second llvm-link step - auto DeviceLinkedFile = sycl::linkDeviceLibFiles(InputFilesVec, Args); - if (!DeviceLinkedFile) - reportError(DeviceLinkedFile.takeError()); - - return *DeviceLinkedFile; -} - -static bool isStaticArchiveFile(const StringRef Filename) { - if (!llvm::sys::path::has_extension(Filename)) - // Any file with no extension should not be considered an Archive. - return false; - llvm::file_magic Magic; - llvm::identify_magic(Filename, Magic); - // Only archive files are to be considered. - // TODO: .lib check to be added - return (Magic == llvm::file_magic::archive); -} - -static Expected listSection(StringRef Filename, - const ArgList &Args) { - Expected OffloadBundlerPath = findProgram( - "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")}); - if (!OffloadBundlerPath) - return OffloadBundlerPath.takeError(); - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - - SmallVector CmdArgs; - CmdArgs.push_back(*OffloadBundlerPath); - bool IsArchive = isStaticArchiveFile(Filename); - CmdArgs.push_back(IsArchive ? "-type=aoo" : "-type=o"); - CmdArgs.push_back(Saver.save("-input=" + Filename)); - CmdArgs.push_back("-list"); - auto Output = createOutputFile("bundled-targets", "list"); - if (!Output) - return Output.takeError(); - SmallVector> Redirects{std::nullopt, *Output, - std::nullopt}; - int ErrCode = llvm::sys::ExecuteAndWait(*OffloadBundlerPath, CmdArgs, - std::nullopt, Redirects); - if (ErrCode != 0) - return createStringError(inconvertibleErrorCode(), - "Failed to list targets"); - return *Output; -} - -// This routine is used to run the clang-offload-bundler tool and unbundle -// device inputs that have been created with an older compiler where the -// device object is bundled into a host object. -static Expected unbundle(StringRef Filename, const ArgList &Args, - llvm::Triple Triple) { - Expected OffloadBundlerPath = findProgram( - "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")}); - if (!OffloadBundlerPath) - return OffloadBundlerPath.takeError(); - - // Create a new file to write the unbundled file to. - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName), "ir"); - if (!TempFileOrErr) - return TempFileOrErr.takeError(); - - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - - SmallVector CmdArgs; - CmdArgs.push_back(*OffloadBundlerPath); - bool IsArchive = isStaticArchiveFile(Filename); - CmdArgs.push_back(IsArchive ? "-type=aoo" : "-type=o"); - auto *Target = Args.MakeArgString(Twine("-targets=sycl-") + Triple.str()); - CmdArgs.push_back(Target); - CmdArgs.push_back(Saver.save("-input=" + Filename)); - CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr)); - CmdArgs.push_back("-unbundle"); - CmdArgs.push_back("-allow-missing-bundles"); - if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs)) - return std::move(Err); - return *TempFileOrErr; -} - -Error extractBundledObjects(StringRef Filename, const ArgList &Args, - SmallVector &Binaries) { - auto List = listSection(Filename, Args); - if (!List) - return List.takeError(); - SmallVector TriplesInFile; - llvm::ErrorOr> TripleList = - llvm::MemoryBuffer::getFileOrSTDIN(*List, /*isText=*/true); - if (std::error_code EC = TripleList.getError()) - return createFileError(*List, EC); - (*TripleList) - ->getBuffer() - .split(TriplesInFile, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - for (StringRef TripleStr : TriplesInFile) { - StringRef SYCLPrefix = "sycl-"; - if (!TripleStr.starts_with(SYCLPrefix)) - continue; - llvm::Triple Triple(TripleStr.substr(SYCLPrefix.size())); - auto UnbundledFile = unbundle(Filename, Args, Triple); - if (!UnbundledFile) - return UnbundledFile.takeError(); - if (*UnbundledFile == Filename) - continue; - - SmallVector ObjectFilePaths; - if (sycl::isStaticArchiveFile(Filename)) { - llvm::ErrorOr> ObjList = - llvm::MemoryBuffer::getFileOrSTDIN(*UnbundledFile, /*isText=*/true); - if (std::error_code EC = ObjList.getError()) - return createFileError(*UnbundledFile, EC); - // Create a copy of the list we can reference even after we close - // the file. - StringRef UnbundledArchiveList = - Args.MakeArgString((*ObjList)->getBuffer()); - UnbundledArchiveList.split(ObjectFilePaths, '\n', /*MaxSplit=*/-1, - /*KeepEmpty=*/false); - } else { - ObjectFilePaths.push_back(*UnbundledFile); - } - for (StringRef ObjectFilePath : ObjectFilePaths) { - llvm::file_magic Magic; - llvm::identify_magic(ObjectFilePath, Magic); - if (Magic == file_magic::spirv_object) - return createStringError( - "SPIR-V fat objects must be generated with --offload-new-driver"); - const auto *Arg = Args.MakeArgString( - "sycl-" + - (Triple.isSPIROrSPIRV() ? Triple.str() + "-" : Triple.str()) + "=" + - ObjectFilePath); - auto Binary = getInputBitcodeLibrary(Arg); - - if (!Binary) - return Binary.takeError(); - - Binaries.push_back(std::move(*Binary)); - } - } - return Error::success(); -} - -} // namespace sycl - namespace generic { Expected clang(ArrayRef InputFiles, const ArgList &Args, - bool IsSYCLKind = false) { + bool HasSYCLOffloadKind = false) { llvm::TimeTraceScope TimeScope("Clang"); // Use `clang` to invoke the appropriate device tools. Expected ClangPath = @@ -1506,8 +500,6 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - if (Arch.empty()) - Arch = "native"; // Create a new file to write the linked device image to. Assume that the // input filename already has the device and architecture. auto TempFileOrErr = @@ -1517,20 +509,23 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, if (!TempFileOrErr) return TempFileOrErr.takeError(); - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); SmallVector CmdArgs{ *ClangPath, "--no-default-config", "-o", *TempFileOrErr, Args.MakeArgString("--target=" + Triple.getTriple()), - Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch) - : Args.MakeArgString("-march=" + Arch), - Args.MakeArgString("-" + OptLevel), }; + if (!Arch.empty()) + Triple.isAMDGPU() ? CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch)) + : CmdArgs.push_back(Args.MakeArgString("-march=" + Arch)); + + // AMDGPU is always in LTO mode currently. + if (Triple.isAMDGPU()) + CmdArgs.push_back("-flto"); + // Forward all of the `--offload-opt` and similar options to the device. - CmdArgs.push_back("-flto"); for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( {"-Xlinker", @@ -1539,13 +534,11 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, if (!Triple.isNVPTX() && !Triple.isSPIRV()) CmdArgs.push_back("-Wl,--no-undefined"); - if (IsSYCLKind && Triple.isNVPTX()) - CmdArgs.push_back("-Wl,--lto-emit-asm"); for (StringRef InputFile : InputFiles) CmdArgs.push_back(InputFile); // If this is CPU offloading we copy the input libraries. - if (!Triple.isAMDGPU() && !Triple.isNVPTX() && !Triple.isSPIRV()) { + if (!Triple.isGPU()) { CmdArgs.push_back("-Wl,-Bsymbolic"); CmdArgs.push_back("-shared"); ArgStringList LinkerArgs; @@ -1581,45 +574,41 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, CmdArgs.append({"-Xlinker", Args.MakeArgString( "-mllvm=" + StringRef(Arg->getValue()))}); - if (Args.hasArg(OPT_debug)) - CmdArgs.push_back("-g"); - - if (SaveTemps) - CmdArgs.push_back("-save-temps"); - if (SaveTemps && linkerSupportsLTO(Args)) CmdArgs.push_back("-Wl,--save-temps"); if (Args.hasArg(OPT_embed_bitcode)) CmdArgs.push_back("-Wl,--lto-emit-llvm"); - + if (Verbose) CmdArgs.push_back("-v"); - if (!CudaBinaryPath.empty()) - CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath)); - - for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg)) - llvm::copy( - SmallVector({"-Xcuda-ptxas", Args.MakeArgString(Arg)}), - std::back_inserter(CmdArgs)); + if (HasSYCLOffloadKind) { + CmdArgs.push_back("--sycl-link"); + CmdArgs.append( + {"-Xlinker", Args.MakeArgString("-triple=" + Triple.getTriple())}); + CmdArgs.append({"-Xlinker", Args.MakeArgString("-arch=" + Arch)}); + if (Verbose) + CmdArgs.append({"-Xlinker", Args.MakeArgString("--verbose")}); + if (SaveTemps) + CmdArgs.append({"-Xlinker", Args.MakeArgString("-save-temps")}); + if (DryRun) + CmdArgs.append({"-Xlinker", Args.MakeArgString("--dry-run")}); + StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); + CmdArgs.append({"-Xlinker", Args.MakeArgString("-" + OptLevel)}); + StringRef GPUArgs = Args.getLastArgValue(OPT_gpu_tool_arg_EQ); + CmdArgs.append( + {"-Xlinker", Args.MakeArgString("-gpu-tool-arg=" + GPUArgs)}); + StringRef CPUArgs = Args.getLastArgValue(OPT_cpu_tool_arg_EQ); + CmdArgs.append( + {"-Xlinker", Args.MakeArgString("-cpu-tool-arg=" + CPUArgs)}); + } for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)}); for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ)) CmdArgs.push_back(Args.MakeArgString(Arg)); - for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) { - if (llvm::Triple(Arg.split('=').first) == Triple) - CmdArgs.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang", - Args.MakeArgString(Arg.split('=').second)}); - } - - // The OpenMPOpt pass can introduce new calls and is expensive, we do - // not want this when running CodeGen through clang. - if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)) - CmdArgs.append({"-mllvm", "-openmp-opt-disable"}); - if (Error Err = executeCommands(*ClangPath, CmdArgs)) return std::move(Err); @@ -1628,7 +617,8 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args, } // namespace generic Expected linkDevice(ArrayRef InputFiles, - const ArgList &Args, bool IsSYCLKind = false) { + const ArgList &Args, + bool HasSYCLOffloadKind = false) { const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); switch (Triple.getArch()) { case Triple::nvptx: @@ -1640,38 +630,10 @@ Expected linkDevice(ArrayRef InputFiles, case Triple::aarch64_be: case Triple::ppc64: case Triple::ppc64le: - case Triple::systemz: - return generic::clang(InputFiles, Args, IsSYCLKind); - case Triple::spirv32: case Triple::spirv64: - case Triple::spir: - case Triple::spir64: { - if (Triple.getSubArch() != llvm::Triple::NoSubArch && - Triple.getSubArch() != llvm::Triple::SPIRSubArch_gen && - Triple.getSubArch() != llvm::Triple::SPIRSubArch_x86_64) - return createStringError( - inconvertibleErrorCode(), - "For SPIR targets, Linking is supported only for JIT compilations " - "and AOT compilations for Intel CPUs/GPUs"); - if (IsSYCLKind) { - auto SPVFile = sycl::runLLVMToSPIRVTranslation(InputFiles[0], Args); - if (!SPVFile) - return SPVFile.takeError(); - // TODO(NOM6): Add AOT support for other targets - bool NeedAOTCompile = - (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen || - Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64); - auto AOTFile = - (NeedAOTCompile) ? sycl::runAOTCompile(*SPVFile, Args) : *SPVFile; - if (!AOTFile) - return AOTFile.takeError(); - return NeedAOTCompile ? *AOTFile : *SPVFile; - } - // Return empty file - return StringRef(""); - } + case Triple::systemz: case Triple::loongarch64: - return generic::clang(InputFiles, Args); + return generic::clang(InputFiles, Args, HasSYCLOffloadKind); default: return createStringError(Triple.getArchName() + " linking is not supported"); @@ -1683,16 +645,16 @@ Expected linkDevice(ArrayRef InputFiles, Expected compileModule(Module &M, OffloadKind Kind) { llvm::TimeTraceScope TimeScope("Compile module"); std::string Msg; - const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg); + llvm::Triple Triple(M.getTargetTriple()); + const Target *T = TargetRegistry::lookupTarget(Triple.getTriple(), Msg); if (!T) return createStringError(Msg); - auto Options = - codegen::InitTargetOptionsFromCodeGenFlags(Triple(M.getTargetTriple())); + auto Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple); StringRef CPU = ""; StringRef Features = ""; std::unique_ptr TM( - T->createTargetMachine(M.getTargetTriple(), CPU, Features, Options, + T->createTargetMachine(Triple.getTriple(), CPU, Features, Options, Reloc::PIC_, M.getCodeModel())); if (M.getDataLayout().isDefault()) @@ -1711,7 +673,7 @@ Expected compileModule(Module &M, OffloadKind Kind) { auto OS = std::make_unique(FD, true); legacy::PassManager CodeGenPasses; - TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple())); + TargetLibraryInfoImpl TLII(Triple); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, CodeGenFileType::ObjectFile)) @@ -1741,22 +703,19 @@ wrapDeviceImages(ArrayRef> Buffers, switch (Kind) { case OFK_OpenMP: if (Error Err = offloading::wrapOpenMPBinaries( - M, BuffersToWrap, - offloading::getOffloadEntryArray(M, "omp_offloading_entries"), + M, BuffersToWrap, offloading::getOffloadEntryArray(M), /*Suffix=*/"", /*Relocatable=*/Args.hasArg(OPT_relocatable))) return std::move(Err); break; case OFK_Cuda: if (Error Err = offloading::wrapCudaBinary( - M, BuffersToWrap.front(), - offloading::getOffloadEntryArray(M, "cuda_offloading_entries"), + M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M), /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false)) return std::move(Err); break; case OFK_HIP: if (Error Err = offloading::wrapHIPBinary( - M, BuffersToWrap.front(), - offloading::getOffloadEntryArray(M, "hip_offloading_entries"))) + M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M))) return std::move(Err); break; default: @@ -1870,19 +829,12 @@ DerivedArgList getLinkerArgs(ArrayRef Input, // Set the subarchitecture and target triple for this compilation. const OptTable &Tbl = getOptTable(); + StringRef Arch = Args.MakeArgString(Input.front().getBinary()->getArch()); DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ), - Args.MakeArgString(Input.front().getBinary()->getArch())); + Arch == "generic" ? "" : Arch); DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ), Args.MakeArgString(Input.front().getBinary()->getTriple())); - const auto *Bin = Input.front().getBinary(); - DAL.AddJoinedArg( - nullptr, Tbl.getOption(OPT_sycl_backend_compile_options_from_image_EQ), - Args.MakeArgString(Bin->getString(COMPILE_OPTS))); - DAL.AddJoinedArg(nullptr, - Tbl.getOption(OPT_sycl_backend_link_options_from_image_EQ), - Args.MakeArgString(Bin->getString(LINK_OPTS))); - // If every input file is bitcode we have whole program visibility as we // do only support static linking with bitcode. auto ContainsBitcode = [](const OffloadFile &F) { @@ -1968,9 +920,6 @@ Expected> linkAndWrapDeviceFiles( std::mutex ImageMtx; MapVector> Images; - // Create a binary image of each offloading image and embed it into a new - // object file. - SmallVector WrappedOutput; // Initialize the images with any overriding inputs. if (Args.hasArg(OPT_override_image)) @@ -1990,116 +939,60 @@ Expected> linkAndWrapDeviceFiles( reportError(createStringError(Err)); }); auto LinkerArgs = getLinkerArgs(Input, BaseArgs); + DenseSet ActiveOffloadKinds; + // Currently, SYCL device code linking process differs from generic device + // code linking. + // TODO: Remove check for offload kind, once SYCL device code linking is + // aligned with generic linking. bool HasSYCLOffloadKind = false; - bool HasNonSYCLOffloadKinds = false; + bool HasNonSYCLOffloadKind = false; for (const auto &File : Input) { if (File.getBinary()->getOffloadKind() != OFK_None) ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); if (File.getBinary()->getOffloadKind() == OFK_SYCL) HasSYCLOffloadKind = true; else - HasNonSYCLOffloadKinds = true; + HasNonSYCLOffloadKind = true; + } + + // Write any remaining device inputs to an output file. + SmallVector InputFiles; + for (const OffloadFile &File : Input) { + auto FileNameOrErr = writeOffloadFile(File); + if (!FileNameOrErr) + return FileNameOrErr.takeError(); + InputFiles.emplace_back(*FileNameOrErr); } if (HasSYCLOffloadKind) { - SmallVector InputFiles; - // Write device inputs to an output file for the linker. - for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File, HasSYCLOffloadKind); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - InputFiles.emplace_back(*FileNameOrErr); - } - // Link the input device files using the device linker for SYCL - // offload. - auto TmpOutputOrErr = sycl::linkDevice(InputFiles, LinkerArgs); - if (!TmpOutputOrErr) - return TmpOutputOrErr.takeError(); - SmallVector InputFilesSYCL; - InputFilesSYCL.emplace_back(*TmpOutputOrErr); - auto SplitModulesOrErr = - UseSYCLPostLinkTool - ? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs) - : sycl::runSYCLSplitLibrary(InputFilesSYCL, LinkerArgs, - *SYCLModuleSplitMode); - if (!SplitModulesOrErr) - return SplitModulesOrErr.takeError(); - - auto &SplitModules = *SplitModulesOrErr; - const llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ)); - if ((Triple.isNVPTX() || Triple.isAMDGCN()) && - LinkerArgs.hasArg(OPT_sycl_embed_ir)) { - // When compiling for Nvidia/AMD devices and the user requested the - // IR to be embedded in the application (via option), run the output - // of sycl-post-link (filetable referencing LLVM Bitcode + symbols) - // through the offload wrapper and link the resulting object to the - // application. - auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs, - /* IsEmbeddedIR */ true); - if (!OutputFile) - return OutputFile.takeError(); - WrappedOutput.push_back(*OutputFile); - } - for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { - SmallVector Files = {SplitModules[I].ModuleFilePath}; - StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); - if (Arch.empty()) - Arch = "native"; - SmallVector, 4> BundlerInputFiles; - auto ClangOutputOrErr = - linkDevice(Files, LinkerArgs, true /* IsSYCLKind */); - if (!ClangOutputOrErr) - return ClangOutputOrErr.takeError(); - if (Triple.isNVPTX()) { - auto VirtualArch = StringRef(clang::OffloadArchToVirtualArchString( - clang::StringToOffloadArch(Arch))); - auto PtxasOutputOrErr = - nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch); - if (!PtxasOutputOrErr) - return PtxasOutputOrErr.takeError(); - BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); - BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); - auto BundledFileOrErr = - nvptx::fatbinary(BundlerInputFiles, LinkerArgs); - if (!BundledFileOrErr) - return BundledFileOrErr.takeError(); - SplitModules[I].ModuleFilePath = *BundledFileOrErr; - } else if (Triple.isAMDGCN()) { - BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); - auto BundledFileOrErr = - amdgcn::fatbinary(BundlerInputFiles, LinkerArgs); - if (!BundledFileOrErr) - return BundledFileOrErr.takeError(); - SplitModules[I].ModuleFilePath = *BundledFileOrErr; - } else { - SplitModules[I].ModuleFilePath = *ClangOutputOrErr; - } + // Link the remaining device files using the device linker. + auto OutputOrErr = linkDevice(InputFiles, LinkerArgs, HasSYCLOffloadKind); + if (!OutputOrErr) + return OutputOrErr.takeError(); + // Output is a packaged object of device images. Unpackage the images and + // copy them to Images[Kind] + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(*OutputOrErr); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(*OutputOrErr, EC); + + MemoryBufferRef Buffer = **BufferOrErr; + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(Buffer, Binaries)) + return std::move(Err); + for (auto &OffloadFile : Binaries) { + auto TheBinary = OffloadFile.getBinary(); + OffloadingImage TheImage{}; + TheImage.TheImageKind = TheBinary->getImageKind(); + TheImage.TheOffloadKind = TheBinary->getOffloadKind(); + TheImage.StringData["triple"] = TheBinary->getTriple(); + TheImage.StringData["arch"] = TheBinary->getArch(); + TheImage.Image = MemoryBuffer::getMemBufferCopy(TheBinary->getImage()); + Images[OFK_SYCL].emplace_back(std::move(TheImage)); } - - // TODO(NOM7): Remove this call and use community flow for bundle/wrap - auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs); - if (!OutputFile) - return OutputFile.takeError(); - - // SYCL offload kind images are all ready to be sent to host linker. - // TODO: Currently, device code wrapping for SYCL offload happens in a - // separate path inside 'linkDevice' call seen above. - // This will eventually be refactored to use the 'common' wrapping logic - // that is used for other offload kinds. - std::scoped_lock Guard(ImageMtx); - WrappedOutput.push_back(*OutputFile); } - if (HasNonSYCLOffloadKinds) { - // Write any remaining device inputs to an output file. - SmallVector InputFiles; - for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - InputFiles.emplace_back(*FileNameOrErr); - } - + if (HasNonSYCLOffloadKind) { // Link the remaining device files using the device linker. auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); if (!OutputOrErr) @@ -2107,6 +1000,8 @@ Expected> linkAndWrapDeviceFiles( // Store the offloading image for each linked output file. for (OffloadKind Kind : ActiveOffloadKinds) { + if (Kind == OFK_SYCL) + continue; llvm::ErrorOr> FileOrErr = llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr); if (std::error_code EC = FileOrErr.getError()) { @@ -2135,9 +1030,10 @@ Expected> linkAndWrapDeviceFiles( if (Err) return std::move(Err); + // Create a binary image of each offloading image and embed it into a new + // object file. + SmallVector WrappedOutput; for (auto &[Kind, Input] : Images) { - if (Kind == OFK_SYCL) - continue; // We sort the entries before bundling so they appear in a deterministic // order in the final binary. llvm::sort(Input, [](OffloadingImage &A, OffloadingImage &B) { @@ -2145,14 +1041,20 @@ Expected> linkAndWrapDeviceFiles( A.StringData["arch"] > B.StringData["arch"] || A.TheOffloadKind < B.TheOffloadKind; }); - auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind); - if (!BundledImagesOrErr) - return BundledImagesOrErr.takeError(); - auto OutputOrErr = wrapDeviceImages(*BundledImagesOrErr, Args, Kind); - if (!OutputOrErr) - return OutputOrErr.takeError(); - WrappedOutput.push_back(*OutputOrErr); + if (Kind == OFK_SYCL) { + /* Do SYCL specific stuff */ + WrappedOutput.push_back("dummy"); + } else { + auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind); + if (!BundledImagesOrErr) + return BundledImagesOrErr.takeError(); + auto OutputOrErr = wrapDeviceImages(*BundledImagesOrErr, Args, Kind); + if (!OutputOrErr) + return OutputOrErr.takeError(); + WrappedOutput.push_back(*OutputOrErr); + } } + return WrappedOutput; } @@ -2224,8 +1126,9 @@ Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, if (Sym.isFormatSpecific() || !Sym.isGlobal()) continue; - bool NewSymbol = Syms.count(Sym.getName()) == 0; - auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()]; + auto It = Syms.find(Sym.getName()); + bool NewSymbol = It == Syms.end(); + auto OldSym = NewSymbol ? Sym_None : It->second; // We will extract if it defines a currenlty undefined non-weak // symbol. @@ -2257,6 +1160,7 @@ Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, // If the file gets extracted we update the table with the new symbols. if (ShouldExtract) Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); + // Syms.insert_range(TmpSyms); return ShouldExtract; } @@ -2311,6 +1215,7 @@ Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind, // If the file gets extracted we update the table with the new symbols. if (ShouldExtract) + // Syms.insert_range(TmpSyms); Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); return ShouldExtract; @@ -2387,21 +1292,17 @@ getDeviceInput(const ArgList &Args) { continue; ErrorOr> BufferOrErr = - MemoryBuffer::getFile(*Filename); + MemoryBuffer::getFileOrSTDIN(*Filename); if (std::error_code EC = BufferOrErr.getError()) return createFileError(*Filename, EC); MemoryBufferRef Buffer = **BufferOrErr; if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object) continue; + SmallVector Binaries; - size_t OldSize = Binaries.size(); if (Error Err = extractOffloadBinaries(Buffer, Binaries)) return std::move(Err); - if (Binaries.size() == OldSize) { - if (Error Err = sycl::extractBundledObjects(*Filename, Args, Binaries)) - return std::move(Err); - } for (auto &OffloadFile : Binaries) { if (identify_magic(Buffer.getBuffer()) == file_magic::archive && @@ -2487,13 +1388,6 @@ getDeviceInput(const ArgList &Args) { } } - for (StringRef Library : Args.getAllArgValues(OPT_bitcode_library_EQ)) { - auto FileOrErr = getInputBitcodeLibrary(Library); - if (!FileOrErr) - return FileOrErr.takeError(); - InputFiles[*FileOrErr].push_back(std::move(*FileOrErr)); - } - SmallVector> InputsForTarget; for (auto &[ID, Input] : InputFiles) InputsForTarget.emplace_back(std::move(Input)); @@ -2583,37 +1477,6 @@ int main(int Argc, char **Argv) { timeTraceProfilerInitialize(Granularity, Argv[0]); } - UseSYCLPostLinkTool = Args.hasFlag(OPT_use_sycl_post_link_tool, - OPT_no_use_sycl_post_link_tool, true); - if (!UseSYCLPostLinkTool && Args.hasArg(OPT_use_sycl_post_link_tool)) - reportError(createStringError("-use-sycl-post-link-tool and " - "-no-use-sycl-post-link-tool options can't " - "be used together.")); - - if (Args.hasArg(OPT_sycl_module_split_mode_EQ)) { - if (UseSYCLPostLinkTool) - reportError(createStringError( - "-sycl-module-split-mode should be used with " - "the -no-use-sycl-post-link-tool command line option.")); - - StringRef StrMode = Args.getLastArgValue(OPT_sycl_module_split_mode_EQ); - SYCLModuleSplitMode = module_split::convertStringToSplitMode(StrMode); - if (!SYCLModuleSplitMode) - reportError(createStringError( - inconvertibleErrorCode(), - formatv("sycl-module-split-mode value isn't recognized: {0}", - StrMode))); - } - - if (Args.hasArg(OPT_sycl_dump_device_code_EQ)) { - Arg *A = Args.getLastArg(OPT_sycl_dump_device_code_EQ); - OffloadImageDumpDir = A->getValue(); - if (OffloadImageDumpDir.empty()) - sys::path::native(OffloadImageDumpDir = "./"); - else - OffloadImageDumpDir.append(sys::path::get_separator()); - } - { llvm::TimeTraceScope TimeScope("Execute linker wrapper"); @@ -2644,5 +1507,6 @@ int main(int Argc, char **Argv) { for (const auto &TempFile : TempFiles) if (std::error_code EC = sys::fs::remove(TempFile)) reportError(createFileError(TempFile, EC)); + return EXIT_SUCCESS; } diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index d7a1aa02e8978..62f99c66d7d25 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -92,8 +92,8 @@ def whole_program : Flag<["--"], "whole-program">, Flags<[DeviceOnlyOption, HelpHidden]>, HelpText<"LTO has visibility of all input files">; def linker_arg_EQ : Joined<["--"], "linker-arg=">, - Flags<[DeviceOnlyOption, HelpHidden]>, - HelpText<"An extra argument to be passed to the linker">; + Flags<[WrapperOnlyOption, DeviceOnlyOption, HelpHidden]>, + HelpText<"An extra argument to be passed to the linker">; def compiler_arg_EQ : Joined<["--"], "compiler-arg=">, Flags<[DeviceOnlyOption, HelpHidden]>, HelpText<"An extra argument to be passed to the compiler">; diff --git a/clang/tools/clang-sycl-linker/CMakeLists.txt b/clang/tools/clang-sycl-linker/CMakeLists.txt index 5665ad7d7186e..f6cd1e84e20a6 100644 --- a/clang/tools/clang-sycl-linker/CMakeLists.txt +++ b/clang/tools/clang-sycl-linker/CMakeLists.txt @@ -5,6 +5,8 @@ set(LLVM_LINK_COMPONENTS Object TargetParser Support + SYCLLowerIR + SYCLPostLink ) set(LLVM_TARGET_DEFINITIONS SYCLLinkOpts.td) diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 3d1fa65da7750..69ed01dd7f599 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -14,6 +14,7 @@ // target-specific device code. //===---------------------------------------------------------------------===// +#include "clang/Basic/Cuda.h" #include "clang/Basic/Version.h" #include "llvm/ADT/StringExtras.h" @@ -34,6 +35,7 @@ #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Remarks/HotnessThresholdParser.h" +#include "llvm/SYCLPostLink/ModuleSplitter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileSystem.h" @@ -46,11 +48,15 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/WithColor.h" +#include "llvm/TargetParser/Host.h" using namespace llvm; using namespace llvm::opt; using namespace llvm::object; +/// Binary path for the CUDA installation. +static std::string CudaBinaryPath; + /// Save intermediary results. static bool SaveTemps = false; @@ -66,6 +72,14 @@ static StringRef OutputFile; /// Directory to dump SPIR-V IR if requested by user. static SmallString<128> SPIRVDumpDir; +static bool UseSYCLPostLinkTool; + +static std::optional SYCLModuleSplitMode; + +static SmallString<128> OffloadImageDumpDir; + +using OffloadingImage = OffloadBinary::OffloadingImage; + static void printVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n'; } @@ -73,6 +87,9 @@ static void printVersion(raw_ostream &OS) { /// The value of `argv[0]` when run. static const char *Executable; +/// Mutex lock to protect writes to shared TempFiles in parallel. +static std::mutex TempFilesMutex; + /// Temporary files to be cleaned up. static SmallVector> TempFiles; @@ -128,6 +145,24 @@ std::string getMainExecutable(const char *Name) { return sys::path::parent_path(COWPath).str(); } +/// Get a temporary filename suitable for output. +Expected createOutputFile(const Twine &Prefix, StringRef Extension) { + std::scoped_lock Lock(TempFilesMutex); + SmallString<128> OutputFile; + if (SaveTemps) { + // Generate a unique path name without creating a file + sys::fs::createUniquePath(Prefix + "-%%%%%%." + Extension, OutputFile, + /*MakeAbsolute=*/false); + } else { + if (std::error_code EC = + sys::fs::createTemporaryFile(Prefix, Extension, OutputFile)) + return createFileError(OutputFile, EC); + } + + TempFiles.emplace_back(std::move(OutputFile)); + return TempFiles.back(); +} + Expected createTempFile(const ArgList &Args, const Twine &Prefix, StringRef Extension) { SmallString<128> OutputFile; @@ -158,6 +193,12 @@ Expected findProgram(const ArgList &Args, StringRef Name, return *Path; } +bool linkerSupportsLTO(const ArgList &Args) { + llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + return Triple.isNVPTX() || Triple.isAMDGPU() || + Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld"); +} + void printCommands(ArrayRef CmdArgs) { if (CmdArgs.empty()) return; @@ -178,123 +219,431 @@ Error executeCommands(StringRef ExecutablePath, ArrayRef Args) { "'%s' failed", sys::path::filename(ExecutablePath).str().c_str()); return Error::success(); } +} // end namespace -Expected> getInput(const ArgList &Args) { - // Collect all input bitcode files to be passed to llvm-link. - SmallVector BitcodeFiles; - for (const opt::Arg *Arg : Args.filtered(OPT_INPUT)) { - std::optional Filename = std::string(Arg->getValue()); - if (!Filename || !sys::fs::exists(*Filename) || - sys::fs::is_directory(*Filename)) - continue; - file_magic Magic; - if (auto EC = identify_magic(*Filename, Magic)) - return createStringError("Failed to open file " + *Filename); - // TODO: Current use case involves LLVM IR bitcode files as input. - // This will be extended to support objects and SPIR-V IR files. - if (Magic != file_magic::bitcode) - return createStringError("Unsupported file type"); - BitcodeFiles.push_back(*Filename); +namespace nvptx { +Expected +fatbinary(ArrayRef> InputFiles, + const ArgList &Args) { + llvm::TimeTraceScope TimeScope("NVPTX fatbinary"); + // NVPTX uses the fatbinary program to bundle the linked images. + Expected FatBinaryPath = + findProgram(Args, "fatbinary", {CudaBinaryPath + "/bin"}); + if (!FatBinaryPath) + return FatBinaryPath.takeError(); + + llvm::Triple Triple( + Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); + + // Create a new file to write the linked device image to. + auto TempFileOrErr = + createOutputFile(sys::path::filename(OutputFile), "fatbin"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + SmallVector CmdArgs; + CmdArgs.push_back(*FatBinaryPath); + CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32"); + CmdArgs.push_back("--create"); + CmdArgs.push_back(*TempFileOrErr); + for (const auto &[File, Arch] : InputFiles) + CmdArgs.push_back( + Args.MakeArgString("--image=profile=" + Arch + ",file=" + File)); + + if (Error Err = executeCommands(*FatBinaryPath, CmdArgs)) + return std::move(Err); + + return *TempFileOrErr; +} + +// ptxas binary +Expected ptxas(StringRef InputFile, const ArgList &Args, + StringRef Arch) { + llvm::TimeTraceScope TimeScope("NVPTX ptxas"); + // NVPTX uses the ptxas program to process assembly files. + Expected PtxasPath = + findProgram(Args, "ptxas", {CudaBinaryPath + "/bin"}); + if (!PtxasPath) + return PtxasPath.takeError(); + + llvm::Triple Triple( + Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); + + // Create a new file to write the output to. + auto TempFileOrErr = + createOutputFile(sys::path::filename(OutputFile), "cubin"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + SmallVector CmdArgs; + CmdArgs.push_back(*PtxasPath); + CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); + // Pass -v to ptxas if it was passed to the driver. + if (Args.hasArg(OPT_verbose)) + CmdArgs.push_back("-v"); + StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); + if (Args.hasArg(OPT_debug)) + CmdArgs.push_back("-g"); + else + CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); + CmdArgs.push_back("--gpu-name"); + CmdArgs.push_back(Arch); + CmdArgs.push_back("--output-file"); + CmdArgs.push_back(*TempFileOrErr); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*PtxasPath, CmdArgs)) + return std::move(Err); + return *TempFileOrErr; +} +} // namespace nvptx + +namespace amdgcn { +Expected +fatbinary(ArrayRef> InputFiles, + const ArgList &Args) { + llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary"); + + // AMDGPU uses the clang-offload-bundler to bundle the linked images. + Expected OffloadBundlerPath = + findProgram(Args, "clang-offload-bundler", + {getMainExecutable("clang-offload-bundler")}); + if (!OffloadBundlerPath) + return OffloadBundlerPath.takeError(); + + llvm::Triple Triple( + Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); + + // Create a new file to write the linked device image to. + auto TempFileOrErr = + createOutputFile(sys::path::filename(OutputFile), "hipfb"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + + SmallVector CmdArgs; + CmdArgs.push_back(*OffloadBundlerPath); + CmdArgs.push_back("-type=o"); + CmdArgs.push_back("-bundle-align=4096"); + + if (Args.hasArg(OPT_compress)) + CmdArgs.push_back("-compress"); + if (auto *Arg = Args.getLastArg(OPT_compression_level_eq)) + CmdArgs.push_back( + Args.MakeArgString(Twine("-compression-level=") + Arg->getValue())); + + SmallVector Targets = {"-targets=host-x86_64-unknown-linux-gnu"}; + for (const auto &[File, Arch] : InputFiles) + Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch)); + CmdArgs.push_back(Saver.save(llvm::join(Targets, ","))); + +#ifdef _WIN32 + CmdArgs.push_back("-input=NUL"); +#else + CmdArgs.push_back("-input=/dev/null"); +#endif + for (const auto &[File, Arch] : InputFiles) + CmdArgs.push_back(Saver.save("-input=" + File)); + + CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr)); + + if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs)) + return std::move(Err); + + return *TempFileOrErr; +} +} // namespace amdgcn + +namespace generic { +Expected clang(ArrayRef InputFiles, const ArgList &Args) { + llvm::TimeTraceScope TimeScope("Clang"); + // Use `clang` to invoke the appropriate device tools. + Expected ClangPath = + findProgram(Args, "clang", {getMainExecutable("clang")}); + if (!ClangPath) + return ClangPath.takeError(); + + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + if (Arch.empty()) + Arch = "native"; + // Create a new file to write the linked device image to. Assume that the + // input filename already has the device and architecture. + auto TempFileOrErr = createOutputFile(sys::path::filename(OutputFile) + "." + + Triple.getArchName() + "." + Arch, + "img"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); + SmallVector CmdArgs{ + *ClangPath, + "--no-default-config", + "-o", + *TempFileOrErr, + Args.MakeArgString("--target=" + Triple.getTriple()), + Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch) + : Args.MakeArgString("-march=" + Arch), + Args.MakeArgString("-" + OptLevel), + }; + + // Forward all of the `--offload-opt` and similar options to the device. + CmdArgs.push_back("-flto"); + for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) + CmdArgs.append( + {"-Xlinker", + Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); + + if (!Triple.isNVPTX() && !Triple.isSPIRV()) + CmdArgs.push_back("-Wl,--no-undefined"); + + if (Triple.isNVPTX()) + CmdArgs.push_back("-Wl,--lto-emit-asm"); + for (StringRef InputFile : InputFiles) + CmdArgs.push_back(InputFile); + + // If this is CPU offloading we copy the input libraries. + if (!Triple.isAMDGPU() && !Triple.isNVPTX() && !Triple.isSPIRV()) { + CmdArgs.push_back("-Wl,-Bsymbolic"); + CmdArgs.push_back("-shared"); + ArgStringList LinkerArgs; + for (const opt::Arg *Arg : + Args.filtered(OPT_INPUT, OPT_library_path_EQ, OPT_rpath, + OPT_whole_archive, OPT_no_whole_archive)) { + // Sometimes needed libraries are passed by name, such as when using + // sanitizers. We need to check the file magic for any libraries. + if (Arg->getOption().matches(OPT_INPUT)) { + if (!sys::fs::exists(Arg->getValue()) || + sys::fs::is_directory(Arg->getValue())) + continue; + + file_magic Magic; + if (auto EC = identify_magic(Arg->getValue(), Magic)) + return createStringError("Failed to open %s", Arg->getValue()); + if (Magic != file_magic::archive && + Magic != file_magic::elf_shared_object) + continue; + } + if (Arg->getOption().matches(OPT_whole_archive)) + LinkerArgs.push_back(Args.MakeArgString("-Wl,--whole-archive")); + else if (Arg->getOption().matches(OPT_no_whole_archive)) + LinkerArgs.push_back(Args.MakeArgString("-Wl,--no-whole-archive")); + else + Arg->render(Args, LinkerArgs); + } + llvm::copy(LinkerArgs, std::back_inserter(CmdArgs)); } - return BitcodeFiles; + + // Pass on -mllvm options to the linker invocation. + for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) + CmdArgs.append({"-Xlinker", Args.MakeArgString( + "-mllvm=" + StringRef(Arg->getValue()))}); + + if (Args.hasArg(OPT_debug)) + CmdArgs.push_back("-g"); + + if (SaveTemps) + CmdArgs.push_back("-save-temps"); + + if (SaveTemps && linkerSupportsLTO(Args)) + CmdArgs.push_back("-Wl,--save-temps"); + + if (Args.hasArg(OPT_embed_bitcode)) + CmdArgs.push_back("-Wl,--lto-emit-llvm"); + + if (Verbose) + CmdArgs.push_back("-v"); + + if (!CudaBinaryPath.empty()) + CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath)); + + for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg)) + llvm::copy( + SmallVector({"-Xcuda-ptxas", Args.MakeArgString(Arg)}), + std::back_inserter(CmdArgs)); + + for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) + CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)}); + for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ)) + CmdArgs.push_back(Args.MakeArgString(Arg)); + + for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) { + if (llvm::Triple(Arg.split('=').first) == Triple) + CmdArgs.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang", + Args.MakeArgString(Arg.split('=').second)}); + } + + // The OpenMPOpt pass can introduce new calls and is expensive, we do + // not want this when running CodeGen through clang. + if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)) + CmdArgs.append({"-mllvm", "-openmp-opt-disable"}); + + if (Error Err = executeCommands(*ClangPath, CmdArgs)) + return std::move(Err); + + return *TempFileOrErr; +} +} // namespace generic + +static Error writeFile(StringRef Filename, StringRef Data) { + Expected> OutputOrErr = + FileOutputBuffer::create(Filename, Data.size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + llvm::copy(Data, Output->getBufferStart()); + if (Error E = Output->commit()) + return E; + return Error::success(); +} + +Expected writeOffloadFile(const OffloadFile &File) { + const OffloadBinary &Binary = *File.getBinary(); + + StringRef Prefix = + sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); + StringRef Suffix = getImageKindName(Binary.getImageKind()); + + auto TempFileOrErr = createOutputFile( + Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Suffix); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + Expected> OutputOrErr = + FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size()); + if (!OutputOrErr) + return OutputOrErr.takeError(); + std::unique_ptr Output = std::move(*OutputOrErr); + llvm::copy(Binary.getImage(), Output->getBufferStart()); + if (Error E = Output->commit()) + return std::move(E); + + return *TempFileOrErr; +} + +/// This routine is used to convert SPIR-V input files into LLVM IR files. +/// 'llvm-spirv -r' command is used for this purpose. +/// If input is not a SPIR-V file, then the original file is returned. +/// TODO: Add a check to identify SPIR-V files and exit early if the input is +/// not a SPIR-V file. +/// 'Filename' is the input file that could be a SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// llvm-spirv tool. +static Expected convertSPIRVToIR(StringRef Filename, + const ArgList &Args) { + Expected SPIRVToIRWrapperPath = findProgram( + Args, "spirv-to-ir-wrapper", {getMainExecutable("spirv-to-ir-wrapper")}); + if (!SPIRVToIRWrapperPath) + return SPIRVToIRWrapperPath.takeError(); + + // Create a new file to write the converted file to. + auto TempFileOrErr = createOutputFile(sys::path::filename(OutputFile), "bc"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + SmallVector CmdArgs; + CmdArgs.push_back(*SPIRVToIRWrapperPath); + CmdArgs.push_back(Filename); + CmdArgs.push_back("-o"); + CmdArgs.push_back(*TempFileOrErr); + CmdArgs.push_back("--llvm-spirv-opts"); + CmdArgs.push_back("--spirv-preserve-auxdata --spirv-target-env=SPV-IR " + "--spirv-builtin-format=global"); + if (Error Err = executeCommands(*SPIRVToIRWrapperPath, CmdArgs)) + return std::move(Err); + return *TempFileOrErr; } /// Link all SYCL device input files into one before adding device library /// files. Device linking is performed using llvm-link tool. /// 'InputFiles' is the list of all LLVM IR device input files. -/// 'Args' encompasses all arguments required for linking device code and will -/// be parsed to generate options required to be passed into llvm-link. -Expected linkDeviceInputFiles(ArrayRef InputFiles, +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// llvm-link tool. +Expected linkDeviceInputFiles(SmallVectorImpl &InputFiles, const ArgList &Args) { llvm::TimeTraceScope TimeScope("SYCL LinkDeviceInputFiles"); - assert(InputFiles.size() && "No inputs to llvm-link"); - // Early check to see if there is only one input. - if (InputFiles.size() < 2) - return InputFiles[0]; - Expected LLVMLinkPath = findProgram(Args, "llvm-link", {getMainExecutable("llvm-link")}); if (!LLVMLinkPath) return LLVMLinkPath.takeError(); - SmallVector CmdArgs; - CmdArgs.push_back(*LLVMLinkPath); - for (auto &File : InputFiles) - CmdArgs.push_back(File); // Create a new file to write the linked device file to. - auto OutFileOrErr = - createTempFile(Args, sys::path::filename(OutputFile), "bc"); + auto OutFileOrErr = createOutputFile(sys::path::filename(OutputFile), "bc"); if (!OutFileOrErr) return OutFileOrErr.takeError(); + + SmallVector CmdArgs; + CmdArgs.push_back(*LLVMLinkPath); + for (auto &File : InputFiles) { + auto IRFile = convertSPIRVToIR(File, Args); + if (!IRFile) + return IRFile.takeError(); + CmdArgs.push_back(*IRFile); + } CmdArgs.push_back("-o"); CmdArgs.push_back(*OutFileOrErr); CmdArgs.push_back("--suppress-warnings"); if (Error Err = executeCommands(*LLVMLinkPath, CmdArgs)) return std::move(Err); - return Args.MakeArgString(*OutFileOrErr); + return *OutFileOrErr; } // This utility function is used to gather all SYCL device library files that // will be linked with input device files. // The list of files and its location are passed from driver. -Expected> getSYCLDeviceLibs(const ArgList &Args) { - SmallVector DeviceLibFiles; - StringRef LibraryPath; - if (Arg *A = Args.getLastArg(OPT_library_path_EQ)) - LibraryPath = A->getValue(); - if (LibraryPath.empty()) - return DeviceLibFiles; - if (Arg *A = Args.getLastArg(OPT_device_libs_EQ)) { +static Error getSYCLDeviceLibs(SmallVector &DeviceLibFiles, + const ArgList &Args) { + StringRef SYCLDeviceLibLoc(""); + if (Arg *A = Args.getLastArg(OPT_device_library_location_EQ)) + SYCLDeviceLibLoc = A->getValue(); + if (Arg *A = Args.getLastArg(OPT_device_lib_EQ)) { if (A->getValues().size() == 0) return createStringError( inconvertibleErrorCode(), "Number of device library files cannot be zero."); for (StringRef Val : A->getValues()) { - SmallString<128> LibName(LibraryPath); + SmallString<128> LibName(SYCLDeviceLibLoc); llvm::sys::path::append(LibName, Val); if (llvm::sys::fs::exists(LibName)) DeviceLibFiles.push_back(std::string(LibName)); else return createStringError(inconvertibleErrorCode(), - "\'" + std::string(LibName) + "\'" + + std::string(LibName) + " SYCL device library file is not found."); } } - return DeviceLibFiles; + return Error::success(); } /// Link all device library files and input file into one LLVM IR file. This /// linking is performed using llvm-link tool. /// 'InputFiles' is the list of all LLVM IR device input files. -/// 'Args' encompasses all arguments required for linking device code and will -/// be parsed to generate options required to be passed into llvm-link tool. -static Expected linkDeviceLibFiles(StringRef InputFile, - const ArgList &Args) { +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// llvm-link tool. +static Expected +linkDeviceLibFiles(SmallVectorImpl &InputFiles, + const ArgList &Args) { llvm::TimeTraceScope TimeScope("LinkDeviceLibraryFiles"); - auto SYCLDeviceLibFiles = getSYCLDeviceLibs(Args); - if (!SYCLDeviceLibFiles) - return SYCLDeviceLibFiles.takeError(); - if ((*SYCLDeviceLibFiles).empty()) - return InputFile; - Expected LLVMLinkPath = findProgram(Args, "llvm-link", {getMainExecutable("llvm-link")}); if (!LLVMLinkPath) return LLVMLinkPath.takeError(); // Create a new file to write the linked device file to. - auto OutFileOrErr = - createTempFile(Args, sys::path::filename(OutputFile), "bc"); + auto OutFileOrErr = createOutputFile(sys::path::filename(OutputFile), "bc"); if (!OutFileOrErr) return OutFileOrErr.takeError(); SmallVector CmdArgs; CmdArgs.push_back(*LLVMLinkPath); CmdArgs.push_back("-only-needed"); - CmdArgs.push_back(InputFile); - for (auto &File : *SYCLDeviceLibFiles) + for (auto &File : InputFiles) CmdArgs.push_back(File); CmdArgs.push_back("-o"); CmdArgs.push_back(*OutFileOrErr); @@ -304,26 +653,297 @@ static Expected linkDeviceLibFiles(StringRef InputFile, return *OutFileOrErr; } -/// Add any llvm-spirv option that relies on a specific Triple in addition +/// This function is used to link all SYCL device input files into a single +/// LLVM IR file. This file is in turn linked with all SYCL device library +/// files. +/// 'InputFiles' is the list of all LLVM IR device input files. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// llvm-link tool. +static Expected linkDeviceBitcode(ArrayRef InputFiles, + const ArgList &Args) { + SmallVector InputFilesVec; + for (StringRef InputFile : InputFiles) + InputFilesVec.emplace_back(InputFile); + // First llvm-link step. + auto LinkedFile = linkDeviceInputFiles(InputFilesVec, Args); + if (!LinkedFile) + reportError(LinkedFile.takeError()); + + InputFilesVec.clear(); + InputFilesVec.emplace_back(*LinkedFile); + + // Gathering device library files + SmallVector DeviceLibFiles; + if (Error Err = getSYCLDeviceLibs(DeviceLibFiles, Args)) + reportError(std::move(Err)); + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + SmallVector ExtractedDeviceLibFiles; + for (auto &File : DeviceLibFiles) { + auto BufferOrErr = MemoryBuffer::getFile(File); + if (!BufferOrErr) + return createFileError(File, BufferOrErr.getError()); + auto Buffer = std::move(*BufferOrErr); + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(Buffer->getMemBufferRef(), Binaries)) + return std::move(Err); + bool CompatibleBinaryFound = false; + for (auto &Binary : Binaries) { + auto BinTriple = Binary.getBinary()->getTriple(); + if (BinTriple == Triple.getTriple()) { + auto FileNameOrErr = + writeOffloadFile(Binary); + if (!FileNameOrErr) + return FileNameOrErr.takeError(); + ExtractedDeviceLibFiles.emplace_back(*FileNameOrErr); + CompatibleBinaryFound = true; + } + } + if (!CompatibleBinaryFound) + WithColor::warning(errs(), Executable) + << "Compatible SYCL device library binary not found\n"; + } + + // For NVPTX backend we need to also link libclc and CUDA libdevice. + if (Triple.isNVPTX()) { + if (Arg *A = Args.getLastArg(OPT_nvptx_device_lib_EQ)) { + if (A->getValues().size() == 0) + return createStringError( + inconvertibleErrorCode(), + "Number of device library files cannot be zero."); + for (StringRef Val : A->getValues()) { + SmallString<128> LibName(Val); + if (llvm::sys::fs::exists(LibName)) + ExtractedDeviceLibFiles.emplace_back(std::string(LibName)); + else + return createStringError( + inconvertibleErrorCode(), + std::string(LibName) + + " SYCL device library file for NVPTX is not found."); + } + } + } + + if (ExtractedDeviceLibFiles.empty()) + return *LinkedFile; + + for (auto &File : ExtractedDeviceLibFiles) + InputFilesVec.emplace_back(File); + // second llvm-link step + auto DeviceLinkedFile = linkDeviceLibFiles(InputFilesVec, Args); + if (!DeviceLinkedFile) + reportError(DeviceLinkedFile.takeError()); + + return *DeviceLinkedFile; +} + +/// Add any sycl-post-link options that rely on a specific Triple in addition /// to user supplied options. -static void getSPIRVTransOpts(const ArgList &Args, - SmallVector &TranslatorArgs, - const llvm::Triple Triple) { - // Enable NonSemanticShaderDebugInfo.200 for non-Windows - const bool IsWindowsMSVC = - Triple.isWindowsMSVCEnvironment() || Args.hasArg(OPT_is_windows_msvc_env); - const bool EnableNonSemanticDebug = !IsWindowsMSVC; - if (EnableNonSemanticDebug) { - TranslatorArgs.push_back( - "-spirv-debug-info-version=nonsemantic-shader-200"); - } else { - TranslatorArgs.push_back("-spirv-debug-info-version=ocl-100"); - // Prevent crash in the translator if input IR contains DIExpression - // operations which don't have mapping to OpenCL.DebugInfo.100 spec. - TranslatorArgs.push_back("-spirv-allow-extra-diexpressions"); +/// NOTE: Any changes made here should be reflected in the similarly named +/// function in clang/lib/Driver/ToolChains/Clang.cpp. +static void +getTripleBasedSYCLPostLinkOpts(const ArgList &Args, + SmallVector &PostLinkArgs, + const llvm::Triple Triple) { + const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ)); + bool SYCLNativeCPU = (HostTriple == Triple); + bool SpecConstsSupported = (!Triple.isNVPTX() && !Triple.isAMDGCN() && + !Triple.isSPIRAOT() && !SYCLNativeCPU); + if (SpecConstsSupported) + PostLinkArgs.push_back("-spec-const=native"); + else + PostLinkArgs.push_back("-spec-const=emulation"); + + // TODO: If we ever pass -ir-output-only based on the triple, + // make sure we don't pass -properties. + PostLinkArgs.push_back("-properties"); + + // See if device code splitting is already requested. If not requested, then + // set -split=auto for non-FPGA targets. + bool NoSplit = true; + for (auto Arg : PostLinkArgs) + if (Arg.contains("-split=")) { + NoSplit = false; + break; + } + if (NoSplit && (Triple.getSubArch() != llvm::Triple::SPIRSubArch_fpga)) + PostLinkArgs.push_back("-split=auto"); + + // On Intel targets we don't need non-kernel functions as entry points, + // because it only increases amount of code for device compiler to handle, + // without any actual benefits. + // TODO: Try to extend this feature for non-Intel GPUs. + if ((!Args.hasFlag(OPT_no_remove_unused_external_funcs, + OPT_remove_unused_external_funcs, false) && + !SYCLNativeCPU) && + !Args.hasArg(OPT_allow_device_image_dependencies) && !Triple.isNVPTX() && + !Triple.isAMDGPU()) + PostLinkArgs.push_back("-emit-only-kernels-as-entry-points"); + + if (!Triple.isAMDGCN()) + PostLinkArgs.push_back("-emit-param-info"); + // Enable program metadata + if (Triple.isNVPTX() || Triple.isAMDGCN() || SYCLNativeCPU) + PostLinkArgs.push_back("-emit-program-metadata"); + + bool SplitEsimdByDefault = Triple.isSPIROrSPIRV(); + bool SplitEsimd = + Args.hasFlag(OPT_device_code_split_esimd, OPT_no_device_code_split_esimd, + SplitEsimdByDefault); + if (!Args.hasArg(OPT_thin_lto)) + PostLinkArgs.push_back("-symbols"); + // Specialization constant info generation is mandatory - + // add options unconditionally + PostLinkArgs.push_back("-emit-exported-symbols"); + PostLinkArgs.push_back("-emit-imported-symbols"); + if (SplitEsimd) + PostLinkArgs.push_back("-split-esimd"); + PostLinkArgs.push_back("-lower-esimd"); + + bool IsAOT = Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isSPIRAOT(); + if (Args.hasFlag(OPT_add_default_spec_consts_image, + OPT_no_add_default_spec_consts_image, false) && + IsAOT) + PostLinkArgs.push_back("-generate-device-image-default-spec-consts"); +} + +/// Run sycl-post-link tool for SYCL offloading. +/// 'InputFiles' is the list of input LLVM IR files. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// sycl-post-link tool. +static Expected> +runSYCLPostLinkTool(ArrayRef InputFiles, const ArgList &Args) { + Expected SYCLPostLinkPath = findProgram( + Args, "sycl-post-link", {getMainExecutable("sycl-post-link")}); + if (!SYCLPostLinkPath) + return SYCLPostLinkPath.takeError(); + + // Create a new file to write the output of sycl-post-link to. + auto TempFileOrErr = + createOutputFile(sys::path::filename(OutputFile), "table"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + SmallVector CmdArgs; + CmdArgs.push_back(*SYCLPostLinkPath); + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + Arg *SYCLDeviceLibLoc = Args.getLastArg(OPT_device_library_location_EQ); + if (SYCLDeviceLibLoc && !Triple.isSPIRAOT()) { + std::string SYCLDeviceLibLocParam = SYCLDeviceLibLoc->getValue(); + std::string BF16DeviceLibLoc = + SYCLDeviceLibLocParam + "/libsycl-native-bfloat16.bc"; + if (llvm::sys::fs::exists(BF16DeviceLibLoc)) { + SYCLDeviceLibLocParam = "--device-lib-dir=" + SYCLDeviceLibLocParam; + CmdArgs.push_back(Args.MakeArgString(StringRef(SYCLDeviceLibLocParam))); + } } - std::string UnknownIntrinsics("-spirv-allow-unknown-intrinsics=llvm.genx."); + getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple); + StringRef SYCLPostLinkOptions; + if (Arg *A = Args.getLastArg(OPT_post_link_options_EQ)) + SYCLPostLinkOptions = A->getValue(); + SYCLPostLinkOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, + /* KeepEmpty = */ false); + CmdArgs.push_back("-o"); + CmdArgs.push_back(*TempFileOrErr); + for (auto &File : InputFiles) + CmdArgs.push_back(File); + if (Error Err = executeCommands(*SYCLPostLinkPath, CmdArgs)) + return std::move(Err); + + if (DryRun) { + // In DryRun we need a dummy entry in order to continue the whole pipeline. + auto ImageFileOrErr = createOutputFile( + sys::path::filename(OutputFile) + ".sycl.split.image", "bc"); + if (!ImageFileOrErr) + return ImageFileOrErr.takeError(); + + std::vector Modules = {module_split::SplitModule( + *ImageFileOrErr, util::PropertySetRegistry(), "")}; + return Modules; + } + + return llvm::module_split::parseSplitModulesFromFile(*TempFileOrErr); +} + +/// Invokes SYCL Split library for SYCL offloading. +/// +/// \param InputFiles the list of input LLVM IR files. +/// \param Args Encompasses all arguments for linking and wrapping device code. +/// It will be parsed to generate options required to be passed to SYCL split +/// library. +/// \param Mode The splitting mode. +/// \returns The vector of split modules. +static Expected> +runSYCLSplitLibrary(ArrayRef InputFiles, const ArgList &Args, + module_split::IRSplitMode Mode) { + std::vector SplitModules; + if (DryRun) { + auto OutputFileOrErr = createOutputFile( + sys::path::filename(OutputFile) + ".sycl.split.image", "bc"); + if (!OutputFileOrErr) + return OutputFileOrErr.takeError(); + + StringRef OutputFilePath = *OutputFileOrErr; + auto InputFilesStr = llvm::join(InputFiles.begin(), InputFiles.end(), ","); + errs() << formatv("sycl-module-split: input: {0}, output: {1}\n", + InputFilesStr, OutputFilePath); + SplitModules.emplace_back(OutputFilePath, util::PropertySetRegistry(), ""); + return SplitModules; + } + + llvm::module_split::ModuleSplitterSettings Settings; + Settings.Mode = Mode; + Settings.OutputPrefix = ""; + + for (StringRef InputFile : InputFiles) { + SMDiagnostic Err; + LLVMContext C; + std::unique_ptr M = parseIRFile(InputFile, Err, C); + if (!M) + return createStringError(inconvertibleErrorCode(), Err.getMessage()); + + auto SplitModulesOrErr = + module_split::splitSYCLModule(std::move(M), Settings); + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); + auto &NewSplitModules = *SplitModulesOrErr; + SplitModules.insert(SplitModules.end(), NewSplitModules.begin(), + NewSplitModules.end()); + } + + if (Verbose) { + auto InputFilesStr = llvm::join(InputFiles.begin(), InputFiles.end(), ","); + std::string SplitOutputFilesStr; + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + if (I > 0) + SplitOutputFilesStr += ','; + + SplitOutputFilesStr += SplitModules[I].ModuleFilePath; + } + + errs() << formatv("sycl-module-split: input: {0}, output: {1}\n", + InputFilesStr, SplitOutputFilesStr); + } + + return SplitModules; +} + +/// Add any llvm-spirv option that relies on a specific Triple in addition +/// to user supplied options. +/// NOTE: Any changes made here should be reflected in the similarly named +/// function in clang/lib/Driver/ToolChains/Clang.cpp. +static void +getTripleBasedSPIRVTransOpts(const ArgList &Args, + SmallVector &TranslatorArgs, + const llvm::Triple Triple) { + bool IsCPU = Triple.isSPIR() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; + TranslatorArgs.push_back("-spirv-debug-info-version=nonsemantic-shader-200"); + std::string UnknownIntrinsics("-spirv-allow-unknown-intrinsics=llvm.genx."); + if (IsCPU) + UnknownIntrinsics += ",llvm.fpbuiltin"; TranslatorArgs.push_back(Args.MakeArgString(UnknownIntrinsics)); // Disable all the extensions by default @@ -353,12 +973,11 @@ static void getSPIRVTransOpts(const ArgList &Args, ",+SPV_INTEL_fpga_argument_interfaces" ",+SPV_INTEL_fpga_invocation_pipelining_attributes" ",+SPV_INTEL_fpga_latency_control" - ",+SPV_INTEL_task_sequence" ",+SPV_KHR_shader_clock" - ",+SPV_INTEL_bindless_images"; + ",+SPV_INTEL_bindless_images" + ",+SPV_INTEL_task_sequence"; ExtArg = ExtArg + DefaultExtArg + INTELExtArg; - ExtArg += ",+SPV_INTEL_token_type" - ",+SPV_INTEL_bfloat16_conversion" + ExtArg += ",+SPV_INTEL_bfloat16_conversion" ",+SPV_INTEL_joint_matrix" ",+SPV_INTEL_hw_thread_queries" ",+SPV_KHR_uniform_group_instructions" @@ -367,82 +986,321 @@ static void getSPIRVTransOpts(const ArgList &Args, ",+SPV_INTEL_optnone" ",+SPV_KHR_non_semantic_info" ",+SPV_KHR_cooperative_matrix" + ",+SPV_EXT_shader_atomic_float16_add" ",+SPV_INTEL_fp_max_error"; TranslatorArgs.push_back(Args.MakeArgString(ExtArg)); } /// Run LLVM to SPIR-V translation. /// Converts 'File' from LLVM bitcode to SPIR-V format using llvm-spirv tool. -/// 'Args' encompasses all arguments required for linking device code and will -/// be parsed to generate options required to be passed into llvm-spirv tool. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// llvm-spirv tool. static Expected runLLVMToSPIRVTranslation(StringRef File, const ArgList &Args) { - llvm::TimeTraceScope TimeScope("LLVMToSPIRVTranslation"); - StringRef LLVMSPIRVPath = Args.getLastArgValue(OPT_llvm_spirv_path_EQ); - Expected LLVMToSPIRVProg = - findProgram(Args, "llvm-spirv", {LLVMSPIRVPath}); - if (!LLVMToSPIRVProg) - return LLVMToSPIRVProg.takeError(); + Expected LLVMToSPIRVPath = + findProgram(Args, "llvm-spirv", {getMainExecutable("llvm-spirv")}); + if (!LLVMToSPIRVPath) + return LLVMToSPIRVPath.takeError(); SmallVector CmdArgs; - CmdArgs.push_back(*LLVMToSPIRVProg); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple)); - getSPIRVTransOpts(Args, CmdArgs, Triple); + CmdArgs.push_back(*LLVMToSPIRVPath); + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + getTripleBasedSPIRVTransOpts(Args, CmdArgs, Triple); StringRef LLVMToSPIRVOptions; if (Arg *A = Args.getLastArg(OPT_llvm_spirv_options_EQ)) LLVMToSPIRVOptions = A->getValue(); LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, /* KeepEmpty = */ false); - CmdArgs.append({"-o", OutputFile}); + CmdArgs.push_back("-o"); + + // Create a new file to write the translated file to. + auto TempFileOrErr = createOutputFile(sys::path::filename(OutputFile), "spv"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + + CmdArgs.push_back(*TempFileOrErr); CmdArgs.push_back(File); - if (Error Err = executeCommands(*LLVMToSPIRVProg, CmdArgs)) + if (Error Err = executeCommands(*LLVMToSPIRVPath, CmdArgs)) return std::move(Err); - if (!SPIRVDumpDir.empty()) { - std::error_code EC = - llvm::sys::fs::create_directory(SPIRVDumpDir, /*IgnoreExisting*/ true); - if (EC) - return createStringError( - EC, - formatv("failed to create dump directory. path: {0}, error_code: {1}", - SPIRVDumpDir, EC.value())); - - StringRef Path = OutputFile; - StringRef Filename = llvm::sys::path::filename(Path); - SmallString<128> CopyPath = SPIRVDumpDir; - CopyPath.append(Filename); - EC = llvm::sys::fs::copy_file(Path, CopyPath); - if (EC) + return *TempFileOrErr; +} + +/// Adds all AOT backend options required for SYCL AOT compilation step to +/// 'CmdArgs'. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate backend options required to be passed +/// into the SYCL AOT compilation step. +/// IsCPU is a bool used to direct option generation. If IsCPU is false, then +/// options are generated for AOT compilation targeting Intel GPUs. +static void addBackendOptions(const ArgList &Args, + SmallVector &CmdArgs, bool IsCPU) { + StringRef OptC = + Args.getLastArgValue(OPT_backend_compile_options_from_image_EQ); + OptC.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + StringRef OptL = Args.getLastArgValue(OPT_backend_link_options_from_image_EQ); + OptL.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + StringRef OptTool = (IsCPU) ? Args.getLastArgValue(OPT_cpu_tool_arg_EQ) + : Args.getLastArgValue(OPT_gpu_tool_arg_EQ); + OptTool.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + return; +} + +/// Run AOT compilation for Intel CPU. +/// Calls opencl-aot tool to generate device code for Intel CPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Expected runAOTCompileIntelCPU(StringRef InputFile, + const ArgList &Args) { + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + SmallVector CmdArgs; + Expected OpenCLAOTPath = + findProgram(Args, "opencl-aot", {getMainExecutable("opencl-aot")}); + if (!OpenCLAOTPath) + return OpenCLAOTPath.takeError(); + + CmdArgs.push_back(*OpenCLAOTPath); + CmdArgs.push_back("--device=cpu"); + addBackendOptions(Args, CmdArgs, /* IsCPU */ true); + // Create a new file to write the translated file to. + auto TempFileOrErr = createOutputFile(sys::path::filename(OutputFile), "out"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + CmdArgs.push_back("-o"); + CmdArgs.push_back(*TempFileOrErr); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs)) + return std::move(Err); + return *TempFileOrErr; +} + +/// Run AOT compilation for Intel GPU +/// Calls ocloc tool to generate device code for Intel GPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Expected runAOTCompileIntelGPU(StringRef InputFile, + const ArgList &Args) { + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + StringRef Arch(Args.getLastArgValue(OPT_arch_EQ)); + SmallVector CmdArgs; + Expected OclocPath = + findProgram(Args, "ocloc", {getMainExecutable("ocloc")}); + if (!OclocPath) + return OclocPath.takeError(); + + CmdArgs.push_back(*OclocPath); + // The next line prevents ocloc from modifying the image name + CmdArgs.push_back("-output_no_suffix"); + CmdArgs.push_back("-spirv_input"); + if (!Arch.empty()) { + CmdArgs.push_back("-device"); + CmdArgs.push_back(Arch); + } + addBackendOptions(Args, CmdArgs, /* IsCPU */ false); + // Create a new file to write the translated file to. + auto TempFileOrErr = createOutputFile(sys::path::filename(OutputFile), "out"); + if (!TempFileOrErr) + return TempFileOrErr.takeError(); + CmdArgs.push_back("-output"); + CmdArgs.push_back(*TempFileOrErr); + CmdArgs.push_back("-file"); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OclocPath, CmdArgs)) + return std::move(Err); + return *TempFileOrErr; +} + +/// Run AOT compilation for Intel CPU/GPU. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Expected runAOTCompile(StringRef InputFile, + const ArgList &Args) { + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + if (Triple.isSPIRAOT()) { + if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) + return runAOTCompileIntelGPU(InputFile, Args); + if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64) + return runAOTCompileIntelCPU(InputFile, Args); + } + return createStringError(inconvertibleErrorCode(), + "Unsupported SYCL Triple and Arch"); +} + +Expected linkDevice(ArrayRef InputFiles, + const ArgList &Args) { + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + switch (Triple.getArch()) { + case Triple::nvptx: + case Triple::nvptx64: + case Triple::amdgcn: + case Triple::x86: + case Triple::x86_64: + case Triple::aarch64: + case Triple::aarch64_be: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::systemz: + return generic::clang(InputFiles, Args); + case Triple::spirv32: + case Triple::spirv64: + case Triple::spir: + case Triple::spir64: { + if (Triple.getSubArch() != llvm::Triple::NoSubArch && + Triple.getSubArch() != llvm::Triple::SPIRSubArch_gen && + Triple.getSubArch() != llvm::Triple::SPIRSubArch_x86_64) return createStringError( - EC, - formatv( - "failed to copy file. original: {0}, copy: {1}, error_code: {2}", - Path, CopyPath, EC.value())); + inconvertibleErrorCode(), + "For SPIR targets, Linking is supported only for JIT compilations " + "and AOT compilations for Intel CPUs/GPUs"); + auto SPVFile = runLLVMToSPIRVTranslation(InputFiles[0], Args); + if (!SPVFile) + return SPVFile.takeError(); + bool NeedAOTCompile = + (Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen || + Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64); + auto AOTFile = + (NeedAOTCompile) ? runAOTCompile(*SPVFile, Args) : *SPVFile; + if (!AOTFile) + return AOTFile.takeError(); + return NeedAOTCompile ? *AOTFile : *SPVFile; + } + case Triple::loongarch64: + return generic::clang(InputFiles, Args); + default: + return createStringError(Triple.getArchName() + + " linking is not supported"); } +} - return OutputFile; +Error writeSplitModulesToFile(ArrayRef SplitModules, + const ArgList &Args) { + SmallVector BinaryData; + raw_svector_ostream OS(BinaryData); + std::mutex ImageMtx; + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + auto File = SplitModules[I].ModuleFilePath; + llvm::ErrorOr> FileOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = FileOrErr.getError()) { + if (DryRun) + FileOrErr = MemoryBuffer::getMemBuffer(""); + else + return createFileError(File, EC); + } + std::scoped_lock Guard(ImageMtx); + OffloadingImage TheImage{}; + TheImage.TheImageKind = IMG_Object; + TheImage.TheOffloadKind = OFK_SYCL; + TheImage.StringData["triple"] = + Args.MakeArgString(Args.getLastArgValue(OPT_triple_EQ)); + TheImage.StringData["arch"] = + Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ)); + TheImage.Image = std::move(*FileOrErr); + + llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage); + if (Buffer.size() % OffloadBinary::getAlignment() != 0) + return createStringError(inconvertibleErrorCode(), + "Offload binary has invalid size alignment"); + OS << Buffer; + } + if (Error E = writeFile(OutputFile, + StringRef(BinaryData.begin(), BinaryData.size()))) + return E; + return Error::success(); } -Error runSYCLLink(ArrayRef Files, const ArgList &Args) { +Error runSYCLLink(ArrayRef Files, const ArgList &Args) { llvm::TimeTraceScope TimeScope("SYCLDeviceLink"); - // First llvm-link step - auto LinkedFile = linkDeviceInputFiles(Files, Args); - if (!LinkedFile) - reportError(LinkedFile.takeError()); - - // second llvm-link step - auto DeviceLinkedFile = linkDeviceLibFiles(*LinkedFile, Args); - if (!DeviceLinkedFile) - reportError(DeviceLinkedFile.takeError()); + { + // Link the input device files using the device linker for SYCL + // offload. + auto TmpOutputOrErr = linkDeviceBitcode(Files, Args); + if (!TmpOutputOrErr) + return TmpOutputOrErr.takeError(); + SmallVector InputFilesSYCL; + InputFilesSYCL.emplace_back(*TmpOutputOrErr); + auto SplitModulesOrErr = + UseSYCLPostLinkTool + ? runSYCLPostLinkTool(InputFilesSYCL, Args) + : runSYCLSplitLibrary(InputFilesSYCL, Args, *SYCLModuleSplitMode); + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); - // LLVM to SPIR-V translation step - auto SPVFile = runLLVMToSPIRVTranslation(*DeviceLinkedFile, Args); - if (!SPVFile) - return SPVFile.takeError(); + auto &SplitModules = *SplitModulesOrErr; + const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + if ((Triple.isNVPTX() || Triple.isAMDGCN()) && Args.hasArg(OPT_embed_ir)) { + // When compiling for Nvidia/AMD devices and the user requested the + // IR to be embedded in the application (via option), run the output + // of sycl-post-link (filetable referencing LLVM Bitcode + symbols) + // through the offload wrapper and link the resulting object to the + // application. + if (auto Err = writeSplitModulesToFile(SplitModules, Args)) + return std::move(Err); + return Error::success(); + } + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + SmallVector Files = {SplitModules[I].ModuleFilePath}; + StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); + if (Arch.empty()) + Arch = "native"; + SmallVector, 4> BundlerInputFiles; + auto ClangOutputOrErr = linkDevice(Files, Args); + if (!ClangOutputOrErr) + return ClangOutputOrErr.takeError(); + if (Triple.isNVPTX()) { + auto VirtualArch = StringRef(clang::OffloadArchToVirtualArchString( + clang::StringToOffloadArch(Arch))); + auto PtxasOutputOrErr = nvptx::ptxas(*ClangOutputOrErr, Args, Arch); + if (!PtxasOutputOrErr) + return PtxasOutputOrErr.takeError(); + BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); + BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); + auto BundledFileOrErr = nvptx::fatbinary(BundlerInputFiles, Args); + if (!BundledFileOrErr) + return BundledFileOrErr.takeError(); + SplitModules[I].ModuleFilePath = *BundledFileOrErr; + } else if (Triple.isAMDGCN()) { + BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); + auto BundledFileOrErr = amdgcn::fatbinary(BundlerInputFiles, Args); + if (!BundledFileOrErr) + return BundledFileOrErr.takeError(); + SplitModules[I].ModuleFilePath = *BundledFileOrErr; + } else { + SplitModules[I].ModuleFilePath = *ClangOutputOrErr; + } + } + if (auto Err = writeSplitModulesToFile(SplitModules, Args)) + return std::move(Err); + } return Error::success(); } -} // namespace +Expected> getInput(const ArgList &Args) { + // Collect all input bitcode files to be passed to the device linking stage. + SmallVector BitcodeFiles; + for (const opt::Arg *Arg : Args.filtered(OPT_INPUT)) { + std::optional Filename = Arg->getValue(); + if (!Filename || !sys::fs::exists(*Filename) || + sys::fs::is_directory(*Filename)) + continue; + file_magic Magic; + if (auto EC = identify_magic(*Filename, Magic)) + return createStringError("Failed to open file " + *Filename); + // TODO: Current use case involves LLVM IR bitcode files as input. + // This will be extended to support SPIR-V IR files. + if (Magic != file_magic::bitcode) + return createStringError("Unsupported file type"); + BitcodeFiles.push_back(*Filename); + } + return BitcodeFiles; +} int main(int argc, char **argv) { InitLLVM X(argc, argv); @@ -468,6 +1326,8 @@ int main(int argc, char **argv) { return EXIT_SUCCESS; } + llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + llvm::errs() << "ARV: Triple = " << Triple.str() << "\n"; if (Args.hasArg(OPT_version)) printVersion(outs()); @@ -475,19 +1335,39 @@ int main(int argc, char **argv) { DryRun = Args.hasArg(OPT_dry_run); SaveTemps = Args.hasArg(OPT_save_temps); - OutputFile = "a.spv"; + OutputFile = "a.out"; if (Args.hasArg(OPT_o)) OutputFile = Args.getLastArgValue(OPT_o); - if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) { - Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ); - SmallString<128> Dir(A->getValue()); - if (Dir.empty()) - llvm::sys::path::native(Dir = "./"); - else - Dir.append(llvm::sys::path::get_separator()); + UseSYCLPostLinkTool = + Args.hasFlag(OPT_use_post_link_tool, OPT_no_use_post_link_tool, true); + if (!UseSYCLPostLinkTool && Args.hasArg(OPT_use_post_link_tool)) + reportError(createStringError("-use-sycl-post-link-tool and " + "-no-use-sycl-post-link-tool options can't " + "be used together.")); - SPIRVDumpDir = Dir; + if (Args.hasArg(OPT_module_split_mode_EQ)) { + if (UseSYCLPostLinkTool) + reportError(createStringError( + "-sycl-module-split-mode should be used with " + "the -no-use-sycl-post-link-tool command line option.")); + + StringRef StrMode = Args.getLastArgValue(OPT_module_split_mode_EQ); + SYCLModuleSplitMode = module_split::convertStringToSplitMode(StrMode); + if (!SYCLModuleSplitMode) + reportError(createStringError( + inconvertibleErrorCode(), + formatv("sycl-module-split-mode value isn't recognized: {0}", + StrMode))); + } + + if (Args.hasArg(OPT_dump_device_code_EQ)) { + Arg *A = Args.getLastArg(OPT_dump_device_code_EQ); + OffloadImageDumpDir = A->getValue(); + if (OffloadImageDumpDir.empty()) + sys::path::native(OffloadImageDumpDir = "./"); + else + OffloadImageDumpDir.append(sys::path::get_separator()); } // Get the input files to pass to the linking stage. diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index 959fd6c3e867c..e04d1e28dfd62 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -17,36 +17,199 @@ def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"">, def output : Separate<["--"], "output-file">, Alias, Flags<[HelpHidden]>, HelpText<"Alias for -o">; +def arch_EQ : Joined<["--", "-"], "arch=">, + Flags<[LinkerOnlyOption]>, + MetaVarName<"">, + HelpText<"The device architecture">; +def triple_EQ : Joined<["--", "-"], "triple=">, + Flags<[LinkerOnlyOption]>, + MetaVarName<"">, + HelpText<"The device target triple">; + +def save_temps : Flag<["--", "-"], "save-temps">, + Flags<[LinkerOnlyOption]>, HelpText<"Save intermediate results">; + +def rpath : Separate<["--", "-"], "rpath">; +def rpath_EQ : Joined<["--", "-"], "rpath=">, Flags<[HelpHidden]>, Alias; + +// Flags for the linker wrapper. +def linker_path_EQ : Joined<["--"], "linker-path=">, + MetaVarName<"">, + HelpText<"The linker executable to invoke">; +def cuda_path_EQ : Joined<["--"], "cuda-path=">, + MetaVarName<"">, + HelpText<"Set the system CUDA path">; +def host_triple_EQ : Joined<["--"], "host-triple=">, + MetaVarName<"">, + HelpText<"Triple to use for the host compilation">; +def opt_level : Joined<["--"], "opt-level=">, + MetaVarName<"">, + HelpText<"Optimization level for LTO">; +def bitcode_library_EQ : Joined<["--"], "bitcode-library=">, + MetaVarName<"--=">, + HelpText<"Extra bitcode library to link">; +def builtin_bitcode_EQ + : Joined<["--"], "builtin-bitcode=">, + MetaVarName<"=">, + HelpText< + "Perform a special internalizing link on the bitcode file. " + "This is necessary for some vendor libraries to be linked correctly">; +def device_linker_args_EQ + : Joined<["--"], "device-linker=">, + MetaVarName<" or =">, + HelpText<"Arguments to pass to the device linker invocation">; +def device_compiler_args_EQ + : Joined<["--"], "device-compiler=">, + MetaVarName<" or =">, + HelpText<"Arguments to pass to the device compiler invocation">; +def clang_backend + : Flag<["--"], "clang-backend">, + HelpText<"Run the backend using clang rather than the LTO backend">; +def dry_run : Flag<["--"], "dry-run">, + HelpText<"Print program arguments without running">; +def embed_bitcode : Flag<["--"], "embed-bitcode">, + HelpText<"Embed linked bitcode in the module">; +def debug : Flag<["--"], "device-debug">, HelpText<"Use debugging">; +def ptxas_arg : Joined<["--"], "ptxas-arg=">, + HelpText<"Argument to pass to the 'ptxas' invocation">; +def compress : Flag<["--"], "compress">, HelpText<"Compress bundled files">; +def compression_level_eq : Joined<["--"], "compression-level=">, + HelpText<"Specify the compression level (integer)">; + +// Flags passed to the device linker. +def whole_program : Flag<["--"], "whole-program">, + Flags<[HelpHidden]>, + HelpText<"LTO has visibility of all input files">; +def linker_arg_EQ : Joined<["--"], "linker-arg=">, + Flags<[HelpHidden]>, + HelpText<"An extra argument to be passed to the linker">; +def compiler_arg_EQ + : Joined<["--"], "compiler-arg=">, + Flags<[HelpHidden]>, + HelpText<"An extra argument to be passed to the compiler">; + +// Arguments for the LLVM backend. +def mllvm + : Separate<["-"], "mllvm">, + MetaVarName<"">, + HelpText< + "Arguments passed to LLVM, including Clang invocations, for which " + "the '-mllvm' prefix is preserved. Use '-mllvm --help' for a list " + "of options.">; +def offload_opt_eq_minus + : Joined<["--", "-"], "offload-opt=">, + Flags<[HelpHidden]>, + HelpText< + "Options passed to LLVM, not including the Clang invocation. Use " + "'--offload-opt=--help' for a list of options.">; + +// Standard linker flags also used by the linker wrapper. +def sysroot_EQ : Joined<["--"], "sysroot=">, HelpText<"Set the system root">; + +def whole_archive : Flag<["--", "-"], "whole-archive">, Flags<[HelpHidden]>; +def no_whole_archive : Flag<["--", "-"], "no-whole-archive">, Flags<[HelpHidden]>; + +def shared : Flag<["--", "-"], "shared">, + HelpText<"Link device code to create a shared library">; + +def library_path : JoinedOrSeparate<["-"], "L">, + MetaVarName<"">, + HelpText<"Add to the library search path">; def library_path_EQ : Joined<["--", "-"], "library-path=">, - Flags<[HelpHidden]>, HelpText<"Add to the library search path">; + Flags<[HelpHidden]>, + Alias; -def device_libs_EQ : CommaJoined<["--", "-"], "device-libs=">, - Flags<[LinkerOnlyOption]>, - HelpText<"A comma separated list of device libraries that are linked during the device link.">; +// Options to specify SYCL device library files +def device_lib_EQ : CommaJoined<["--", "-"], "device-libraries=">, + HelpText<"A comma separated list of device libraries that " + "are linked during the device link.">; +def device_library_location_EQ + : Joined<["--", "-"], "device-library-location=">, + HelpText<"Location of SYCL device library files">; +def nvptx_device_lib_EQ + : CommaJoined<["--", "-"], "nvptx-device-libraries=">, + HelpText<"A comma separated list of nvptx-specific device libraries that " + "are linked during the device link.">; -def triple : Joined<["--"], "triple">, - HelpText<"The device target triple">; -def arch : Separate<["--", "-"], "arch">, - HelpText<"Specify the name of the target architecture.">; +// Special option to pass in sycl-post-link options +def post_link_options_EQ + : Joined<["--", "-"], "post-link-options=">, + HelpText<"Options that will control sycl-post-link step">; -def save_temps : Flag<["--", "-"], "save-temps">, - Flags<[LinkerOnlyOption]>, HelpText<"Save intermediate results">; +def module_split_mode_EQ + : Joined<["--", "-"], "module-split-mode=">, + HelpText<"Option that turns on split library with the given split mode">; + +// TODO: Options will be removed when the sycl-post-link tool becomes removed. +def use_post_link_tool : Flag<["--", "-"], "use-post-link-tool">, + HelpText<"Use the sycl-post-link tool. On by default">; + +def no_use_post_link_tool + : Flag<["--", "-"], "no-use-post-link-tool">, + HelpText< + "Use a SYCL library instead of sycl-post-link tool. (experimental)">; + +// Extra SYCL options to help generate sycl-post-link options that also depend +// on the target triple. +def remove_unused_external_funcs + : Flag<["--", "-"], "remove-unused-external-funcs">, + Flags<[HelpHidden]>; +def no_remove_unused_external_funcs + : Flag<["--", "-"], "no-remove-unused-external-funcs">, + Flags<[HelpHidden]>; +def device_code_split_esimd : Flag<["--", "-"], "device-code-split-esimd">, + Flags<[HelpHidden]>; +def no_device_code_split_esimd + : Flag<["--", "-"], "no-device-code-split-esimd">, + Flags<[HelpHidden]>; +def add_default_spec_consts_image + : Flag<["--", "-"], "add-default-spec-consts-image">, + Flags<[HelpHidden]>; +def no_add_default_spec_consts_image + : Flag<["--", "-"], "no-add-default-spec-consts-image">, + Flags<[HelpHidden]>; + +// Special options to pass backend options required for AOT compilation +def gpu_tool_arg_EQ : + Joined<["--", "-"], "gpu-tool-arg=">, + HelpText<"Options that are passed to the backend of target device compiler for Intel GPU during AOT compilation">; +def cpu_tool_arg_EQ : + Joined<["--", "-"], "cpu-tool-arg=">, + HelpText<"Options that are passed to the backend of target device compiler for Intel CPU during AOT compilation">; + +// Hidden options to store backend compile/link options that are stored in +// device images for SYCL offloading +def backend_compile_options_from_image_EQ + : Joined<["--", "-"], "backend-compile-options-from-image=">, + HelpText<"Compile options that will be transmitted to the SYCL backend " + "compiler">; +def backend_link_options_from_image_EQ + : Joined<["--", "-"], "backend-link-options-from-image=">, + HelpText< + "Link options that will be transmitted to the SYCL backend compiler">; + +def thin_lto : Flag<["--", "-"], "thin-lto">, + HelpText<"Link SYCL device code using thinLTO">; -def dry_run : Flag<["--", "-"], "dry-run">, Flags<[LinkerOnlyOption]>, - HelpText<"Print generated commands without running.">; +def embed_ir : Flag<["--", "-"], "embed-ir">, + HelpText<"Embed LLVM IR for runtime kernel fusion">; -def spirv_dump_device_code_EQ : Joined<["--", "-"], "spirv-dump-device-code=">, - Flags<[LinkerOnlyOption]>, - HelpText<"Path to the folder where the tool dumps SPIR-V device code. Other formats aren't dumped.">; +// Options to enable/disable device dynamic linking. +def allow_device_image_dependencies + : Flag<["--", "-"], "allow-device-image-dependencies">, + Flags<[HelpHidden]>, + HelpText<"Allow dependencies between device code images">; -def is_windows_msvc_env : Flag<["--", "-"], "is-windows-msvc-env">, - Flags<[LinkerOnlyOption, HelpHidden]>; +def dump_device_code_EQ : Joined<["--", "-"], "dump-device-code=">, + HelpText<"Directory to dump offloading images to.">; def llvm_spirv_path_EQ : Joined<["--"], "llvm-spirv-path=">, - Flags<[LinkerOnlyOption]>, MetaVarName<"">, - HelpText<"Set the system llvm-spirv path">; + Flags<[LinkerOnlyOption]>, + MetaVarName<"">, + HelpText<"Set the system llvm-spirv path">; // Options to pass to llvm-spirv tool -def llvm_spirv_options_EQ : Joined<["--", "-"], "llvm-spirv-options=">, - Flags<[LinkerOnlyOption]>, - HelpText<"Options that will control llvm-spirv step">; +def llvm_spirv_options_EQ + : Joined<["--", "-"], "llvm-spirv-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options that will control llvm-spirv step">; diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h index 7932fd5acbe1e..194e2585ac783 100644 --- a/llvm/include/llvm/Frontend/Offloading/Utility.h +++ b/llvm/include/llvm/Frontend/Offloading/Utility.h @@ -96,7 +96,7 @@ getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind, /// Creates a pair of globals used to iterate the array of offloading entries by /// accessing the section variables provided by the linker. std::pair -getOffloadEntryArray(Module &M, StringRef SectionName); +getOffloadEntryArray(Module &M, StringRef SectionName = "llvm_offload_entries"); namespace amdgpu { /// Check if an image is compatible with current system's environment. The diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 4a87ed066c5eb..998870842fa19 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -1233,6 +1233,9 @@ class Triple { /// Test whether target triples are compatible. bool isCompatibleWith(const Triple &Other) const; + /// Test whether the target triple is for a GPU. + bool isGPU() const { return isSPIRV() || isNVPTX() || isAMDGPU(); } + /// Merge target triples. std::string merge(const Triple &Other) const; diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 78949054ef14c..fbeb71a7a84de 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -380,6 +380,7 @@ add_custom_target(sycl-compiler clang-offload-extract clang-offload-packager clang-linker-wrapper + clang-sycl-linker file-table-tform llc llvm-ar @@ -463,6 +464,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS clang-offload-extract clang-offload-packager clang-linker-wrapper + clang-sycl-linker file-table-tform llc llvm-ar