diff --git a/clang/tools/clang-linker-wrapper/CMakeLists.txt b/clang/tools/clang-linker-wrapper/CMakeLists.txt
index 9dc1f244f2802..8ad197d85d535 100644
--- a/clang/tools/clang-linker-wrapper/CMakeLists.txt
+++ b/clang/tools/clang-linker-wrapper/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
+  BitReader
   BitWriter
   Core
   BinaryFormat
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 9173ff2ac48bd..e58883a932efe 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -42,7 +42,9 @@
 #include "llvm/Option/Option.h"
 #include "llvm/Passes/PassPlugin.h"
 #include "llvm/Remarks/HotnessThresholdParser.h"
+#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h"
 #include "llvm/SYCLLowerIR/ModuleSplitter.h"
+#include "llvm/SYCLLowerIR/SYCLLinkedModuleProcessor.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
@@ -600,6 +602,57 @@ static Error getSYCLDeviceLibs(SmallVector<std::string, 16> &DeviceLibFiles,
   return Error::success();
 }
 
+static Error getDeviceLibsForLTO(SmallVector<OffloadFile> &DeviceLibs,
+                                 const ArgList &Args,
+                                 const llvm::Triple Triple) {
+  // TODO: Fix copy paste
+  SmallVector<std::string, 16> DeviceLibFiles;
+  if (Error Err = sycl::getSYCLDeviceLibs(DeviceLibFiles, Args))
+    return Err;
+
+  auto processFile = [&](StringRef File) {
+    auto BufferOrErr = MemoryBuffer::getFile(File);
+    if (!BufferOrErr)
+      return createFileError(File, BufferOrErr.getError());
+    auto Buffer = std::move(*BufferOrErr);
+    SmallVector<OffloadFile> Candidates;
+    if (Error Err =
+            extractOffloadBinaries(Buffer->getMemBufferRef(), Candidates))
+      return Err;
+    for (OffloadFile &OffF : Candidates)
+      if (llvm::Triple(OffF.getBinary()->getTriple()) == Triple)
+        DeviceLibs.emplace_back(std::move(OffF));
+    return Error(Error::success());
+  };
+
+  for (auto &File : DeviceLibFiles) {
+
+    if (Error Err = processFile(File))
+      return Err;
+  }
+
+  // For NVPTX backend we need to also link libclc and CUDA libdevice.
+  if (Triple.isNVPTX()) {
+    if (Arg *A = Args.getLastArg(OPT_sycl_nvptx_device_lib_EQ)) {
+      if (A->getValues().size() == 0)
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Number of device library files cannot be zero.");
+      for (StringRef Val : A->getValues()) {
+        SmallString<128> LibName(Val);
+        if (!llvm::sys::fs::exists(LibName))
+          return createStringError(
+              inconvertibleErrorCode(),
+              std::string(LibName) +
+                  " SYCL device library file for NVPTX is not found.");
+        if (auto Err = processFile(LibName))
+          return Err;
+      }
+    }
+  }
+  return Error::success();
+}
+
 /// This routine is used to convert SPIR-V input files into LLVM IR files.
 /// 'llvm-spirv -r' command is used for this purpose.
 /// If input is not a SPIR-V file, then the original file is returned.
@@ -635,6 +688,30 @@ static Expected<StringRef> convertSPIRVToIR(StringRef Filename,
   return *TempFileOrErr;
 }
 
+static bool considerOnlyKernelsAsEntryPoints(const ArgList &Args,
+                                             const llvm::Triple Triple) {
+  const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ));
+  // On Intel targets we don't need non-kernel functions as entry points,
+  // because it only increases amount of code for device compiler to handle,
+  // without any actual benefits.
+  // TODO: Try to extend this feature for non-Intel GPUs.
+  return !Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs,
+                       OPT_sycl_remove_unused_external_funcs, false) &&
+         Triple.isSPIROrSPIRV();
+}
+
+bool isSYCLThinLTO(const ArgList &Args, const llvm::Triple Triple) {
+  // TODO: Support AMDGPU/NVPTX targets
+  return Triple.isSPIROrSPIRV() && Args.hasArg(OPT_sycl_thin_lto);
+}
+
+bool areSpecConstsSupported(const ArgList &Args, const llvm::Triple Triple) {
+  const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ));
+  bool SYCLNativeCPU = (HostTriple == Triple);
+  return (!Triple.isNVPTX() && !Triple.isAMDGCN() && !Triple.isSPIRAOT() &&
+          !SYCLNativeCPU);
+}
+
 /// Add any sycl-post-link options that rely on a specific Triple in addition
 /// to user supplied options.
 /// NOTE: Any changes made here should be reflected in the similarly named
@@ -667,14 +744,7 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
   if (NoSplit && (Triple.getSubArch() != llvm::Triple::SPIRSubArch_fpga))
     PostLinkArgs.push_back("-split=auto");
 
-  // On Intel targets we don't need non-kernel functions as entry points,
-  // because it only increases amount of code for device compiler to handle,
-  // without any actual benefits.
-  // TODO: Try to extend this feature for non-Intel GPUs.
-  if ((!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs,
-                     OPT_sycl_remove_unused_external_funcs, false) &&
-       !SYCLNativeCPU) &&
-      !Triple.isNVPTX() && !Triple.isAMDGPU())
+  if (considerOnlyKernelsAsEntryPoints(Args, Triple))
     PostLinkArgs.push_back("-emit-only-kernels-as-entry-points");
 
   if (!Triple.isAMDGCN())
@@ -687,7 +757,7 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
   bool SplitEsimd =
       Args.hasFlag(OPT_sycl_device_code_split_esimd,
                    OPT_no_sycl_device_code_split_esimd, SplitEsimdByDefault);
-  if (!Args.hasArg(OPT_sycl_thin_lto))
+  if (!isSYCLThinLTO(Args, Triple))
     PostLinkArgs.push_back("-symbols");
   // Specialization constant info generation is mandatory -
   // add options unconditionally
@@ -891,27 +961,33 @@ getTripleBasedSPIRVTransOpts(const ArgList &Args,
   TranslatorArgs.push_back(Args.MakeArgString(ExtArg));
 }
 
+void computeLLVMToSPIRVTranslationToolArgs(const ArgList &Args,
+                                           SmallVector<StringRef, 8> &CmdArgs) {
+  const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
+  getTripleBasedSPIRVTransOpts(Args, CmdArgs, Triple);
+  StringRef LLVMToSPIRVOptions;
+  if (Arg *A = Args.getLastArg(OPT_llvm_spirv_options_EQ))
+    LLVMToSPIRVOptions = A->getValue();
+  LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
+                           /* KeepEmpty = */ false);
+}
+
 /// Run LLVM to SPIR-V translation.
 /// Converts 'File' from LLVM bitcode to SPIR-V format using llvm-spirv tool.
 /// 'Args' encompasses all arguments required for linking and wrapping device
 /// code and will be parsed to generate options required to be passed into the
 /// llvm-spirv tool.
-static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
-                                                     const ArgList &Args) {
+
+static Expected<StringRef>
+runLLVMToSPIRVTranslation(StringRef File,
+                          SmallVectorImpl<StringRef> &&CmdArgs) {
   Expected<std::string> LLVMToSPIRVPath =
       findProgram("llvm-spirv", {getMainExecutable("llvm-spirv")});
   if (!LLVMToSPIRVPath)
     return LLVMToSPIRVPath.takeError();
 
-  SmallVector<StringRef, 8> CmdArgs;
-  CmdArgs.push_back(*LLVMToSPIRVPath);
-  const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
-  getTripleBasedSPIRVTransOpts(Args, CmdArgs, Triple);
-  StringRef LLVMToSPIRVOptions;
-  if (Arg *A = Args.getLastArg(OPT_llvm_spirv_options_EQ))
-    LLVMToSPIRVOptions = A->getValue();
-  LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
-                           /* KeepEmpty = */ false);
+  CmdArgs.insert(CmdArgs.begin(), (*LLVMToSPIRVPath));
+
   CmdArgs.push_back("-o");
 
   // Create a new file to write the translated file to.
@@ -951,6 +1027,13 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
   return *TempFileOrErr;
 }
 
+static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
+                                                     const ArgList &Args) {
+  SmallVector<StringRef, 8> ToolArgs;
+  computeLLVMToSPIRVTranslationToolArgs(Args, ToolArgs);
+  return runLLVMToSPIRVTranslation(File, std::move(ToolArgs));
+}
+
 /// Adds all AOT backend options required for SYCL AOT compilation step to
 /// 'CmdArgs'.
 /// 'Args' encompasses all arguments required for linking and wrapping device
@@ -1497,6 +1580,43 @@ Error extractBundledObjects(StringRef Filename, const ArgList &Args,
   return Error::success();
 }
 
+llvm::sycl::GlobalBinImageProps
+computeGlobalBinProps(const ArgList &Args, const llvm::Triple Triple) {
+  auto findParam = [](const SmallVectorImpl<StringRef> &Vec, StringRef Param) {
+    for (auto El : Vec)
+      if (Param == El)
+        return true;
+    return false;
+  };
+  SmallVector<StringRef, 8> CmdArgs;
+  getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple);
+  bool EmitKernelParamInfo = findParam(CmdArgs, "-emit-param-info");
+  bool EmitProgramMetadata = findParam(CmdArgs, "-emit-program-metadata");
+  bool EmitExportedSymbols = findParam(CmdArgs, "-emit-exported-symbols");
+  bool EmitImportedSymbols = findParam(CmdArgs, "-emit-imported-symbols");
+  // DeviceGlobals is not triple-based, so it will be present in Args.
+  bool DeviceGlobals = false;
+  if (Arg *A = Args.getLastArg(OPT_sycl_post_link_options_EQ))
+    DeviceGlobals = StringRef(A->getValue()).contains("-device-globals");
+
+  return {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols,
+          EmitImportedSymbols, DeviceGlobals};
+}
+
+Error validateThinLTOModule(BitcodeModule &M, const ArgList &Args) {
+  Expected<BitcodeLTOInfo> LTOInfo = M.getLTOInfo();
+  if (!LTOInfo || !(*LTOInfo).IsThinLTO)
+    return createStringError(
+        "All code must be compiled with -foffload-lto=thin");
+
+  // For O0 we don't run function importing so it defeats
+  // the whole point of thinLTO. Maybe we could lift this
+  // restriction by enabling only required passes for importing for O0.
+  if (Args.getLastArgValue(OPT_opt_level, "") == "O0")
+    return createStringError("O0 is not supported");
+  return Error::success();
+}
+
 } // namespace sycl
 
 namespace generic {
@@ -1729,6 +1849,9 @@ std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) {
 template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
 std::unique_ptr<lto::LTO> createLTO(
     const ArgList &Args, const std::vector<std::string> &Features,
+    SmallVectorImpl<OffloadFile> &BitcodeInputFiles,
+    SmallVectorImpl<StringRef> &Files,
+    std::vector<std::string> ModulesToCompile = {},
     ModuleHook Hook = [](size_t, const Module &) { return true; }) {
   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   // We need to remove AMD's target-id from the processor if present.
@@ -1758,6 +1881,14 @@ std::unique_ptr<lto::LTO> createLTO(
   Conf.OptLevel = OptLevel[1] - '0';
   Conf.DefaultTriple = Triple.getTriple();
 
+  // We need to set up the backend to use thinLTO
+  // even if we don't actually use it, and there is no
+  // backend for the spir64 triple, so override it to
+  // the SPIR-V backlend.
+  // TODO: Remove once SYCL uses the SPIR-V backend.
+  if (sycl::isSYCLThinLTO(Args, Triple))
+    Conf.OverrideTriple = "spirv64-unknown-unknown";
+
   // TODO: Should we complain about combining --opt-level and -passes, as opt
   // does?  That might be too limiting in clang-linker-wrapper, so for now we
   // just warn in the help entry for -passes that the default<O?> corresponding
@@ -1774,11 +1905,21 @@ std::unique_ptr<lto::LTO> createLTO(
 
   Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
   Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
-
+  std::string TempName = (sys::path::filename(ExecutableName) + "." +
+                          Triple.getTriple() + "." + Arch)
+                             .str();
+  auto PreCodeGenSaveTemps = [=](size_t Task, const Module &M) {
+    std::string File =
+        !Task ? TempName + ".precodegen.bc"
+              : TempName + "." + std::to_string(Task) + ".precodegen.bc";
+    error_code EC;
+    raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
+    if (EC)
+      reportError(errorCodeToError(EC));
+    WriteBitcodeToFile(M, LinkedBitcode);
+    return true;
+  };
   if (SaveTemps) {
-    std::string TempName = (sys::path::filename(ExecutableName) + "." +
-                            Triple.getTriple() + "." + TargetID)
-                               .str();
     Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) {
       std::string File =
           !Task ? TempName + ".postlink.bc"
@@ -1790,17 +1931,7 @@ std::unique_ptr<lto::LTO> createLTO(
       WriteBitcodeToFile(M, LinkedBitcode);
       return true;
     };
-    Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) {
-      std::string File =
-          !Task ? TempName + ".postopt.bc"
-                : TempName + "." + std::to_string(Task) + ".postopt.bc";
-      error_code EC;
-      raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
-      if (EC)
-        reportError(errorCodeToError(EC));
-      WriteBitcodeToFile(M, LinkedBitcode);
-      return true;
-    };
+    Conf.PreCodeGenModuleHook = PreCodeGenSaveTemps;
   }
   Conf.PostOptModuleHook = Hook;
   Conf.CGFileType = (Triple.isNVPTX() || SaveTemps)
@@ -1809,6 +1940,90 @@ std::unique_ptr<lto::LTO> createLTO(
 
   // TODO: Handle remark files
   Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program);
+  if (sycl::isSYCLThinLTO(Args, Triple)) {
+    // Passing Args to each thinLTO thread causes crashes, so compute everything
+    // we can here.
+    const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
+    SmallVector<StringRef, 8> SPIRVArgs;
+    sycl::computeLLVMToSPIRVTranslationToolArgs(Args, SPIRVArgs);
+    auto SpecConstArg = sycl::areSpecConstsSupported(Args, Triple)
+                            ? SpecConstantsPass::HandlingMode::native
+                            : SpecConstantsPass::HandlingMode::emulation;
+    Conf.PreCodeGenModuleHook = [=, &BitcodeInputFiles,
+                                 &Files](unsigned Task, const Module &M) {
+      // Here we process the IR from each BC file, save the module for later
+      // use, convert to SPV (using the translator for now) and save the path to
+      // the output file.
+
+      assert(Task != 0 && "Unexpected task");
+      auto &OffloadF = BitcodeInputFiles[Task - 1];
+      if (OffloadF.getBinary()->getOffloadKind() != OFK_SYCL) {
+        if (SaveTemps)
+          PreCodeGenSaveTemps(Task, M);
+        return true;
+      }
+
+      if (SaveTemps)
+        PreCodeGenSaveTemps(Task, M);
+
+      // Use the legacy PM because eventually we will use the
+      // PreCodeGenPassesHook field of LTOConfig which requires the legacy PM.
+      legacy::PassManager PM;
+
+      // LTO does not continue processing the module after this
+      // function finishes, so it's safe to modify the module.
+      PM.add(createSYCLLinkedModuleProcessorPass(SpecConstArg));
+      PM.run(const_cast<Module &>(M));
+
+      auto IRFile = createOutputFile(sys::path::filename(ExecutableName) + "." +
+                                         std::to_string(Task) + ".to.spv",
+                                     "spv");
+      if (!IRFile)
+        reportError(IRFile.takeError());
+      error_code EC;
+      raw_fd_ostream LinkedBitcode(*IRFile, EC, sys::fs::OF_None);
+      if (EC)
+        reportError(errorCodeToError(EC));
+      WriteBitcodeToFile(M, LinkedBitcode);
+      LinkedBitcode.close();
+      {
+        // Overwrite the fully linked module in BitcodeInputFiles
+        // so we can compute the module properties and symbol table.
+        // We need a fully linked module to accurately compute these.
+        llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
+            llvm::MemoryBuffer::getFileOrSTDIN(*IRFile);
+        assert(ImageOrError);
+        OffloadingImage Image{};
+        Image.TheImageKind = IMG_Bitcode;
+        Image.TheOffloadKind = OffloadF.getBinary()->getOffloadKind();
+        Image.StringData["triple"] = OffloadF.getBinary()->getTriple();
+        Image.StringData["arch"] = OffloadF.getBinary()->getArch();
+        Image.Image = std::move(*ImageOrError);
+
+        std::unique_ptr<MemoryBuffer> Binary =
+            MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image));
+        auto NewBinaryOrErr = OffloadBinary::create(*Binary);
+        assert(NewBinaryOrErr);
+        BitcodeInputFiles[Task - 1] =
+            OffloadFile(std::move(*NewBinaryOrErr), std::move(Binary));
+      }
+      // runLLVMToSPIRVTranslation takes in an r-value ref of arguments
+      // and writes to it, so we need to copy the arguments.
+      SmallVector<StringRef, 8> SPIRVArgsCopy = SPIRVArgs;
+      auto SPVFile =
+          sycl::runLLVMToSPIRVTranslation(*IRFile, std::move(SPIRVArgsCopy));
+      if (!SPVFile)
+        reportError(SPVFile.takeError());
+      Files[Task] = *SPVFile;
+      // Return false so the thinLTO backend doesn't continue to process this
+      // module. We already emitted SPIR-V ourselves, so we don't need to do
+      // anything else. Once the SPIR-V backend is ready, we can remove the
+      // manual SPIR-V translator call and return true here.
+      return false;
+    };
+    // Only compile user modules to SPV, not device libraries.
+    Conf.ThinLTOModulesToCompile = ModulesToCompile;
+  }
 
   return std::make_unique<lto::LTO>(std::move(Conf), Backend);
 }
@@ -1823,16 +2038,16 @@ bool isValidCIdentifier(StringRef S) {
 
 Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
                        SmallVectorImpl<StringRef> &OutputFiles,
+                       SmallVector<OffloadFile, 4> &BitcodeInputFiles,
                        const ArgList &Args) {
   llvm::TimeTraceScope TimeScope("Link bitcode files");
   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
 
-  // Early exit for SPIR targets
-  if (Triple.isSPIROrSPIRV())
+  // Early exit for non-thin-LTO SPIR targets
+  if (Triple.isSPIROrSPIRV() && !sycl::isSYCLThinLTO(Args, Triple))
     return Error::success();
 
-  SmallVector<OffloadFile, 4> BitcodeInputFiles;
   DenseSet<StringRef> StrongResolutions;
   DenseSet<StringRef> UsedInRegularObj;
   DenseSet<StringRef> UsedInSharedLib;
@@ -1897,6 +2112,18 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
 
   // LTO Module hook to output bitcode without running the backend.
   SmallVector<StringRef> BitcodeOutput;
+  std::vector<std::string> ModulesToCompile;
+  SmallVector<StringRef> Files;
+  if (sycl::isSYCLThinLTO(Args, Triple)) {
+    for (const OffloadFile &BitcodeInput : BitcodeInputFiles) {
+      auto ModuleName = BitcodeInput.getBinary()->getFileName();
+      // TODO: This is pretty hacky, maybe we could check some module metadata
+      // or something.
+      if (ModuleName.find("libsycl-") == std::string::npos)
+        ModulesToCompile.push_back(ModuleName.str());
+    }
+  }
+
   auto OutputBitcode = [&](size_t, const Module &M) {
     auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
                                               "-jit-" + Triple.getTriple(),
@@ -1919,8 +2146,10 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
   auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
                             Args.hasArg(OPT_builtin_bitcode_EQ) ||
                             Args.hasArg(OPT_clang_backend)
-                        ? createLTO(Args, Features, OutputBitcode)
-                        : createLTO(Args, Features);
+                        ? createLTO(Args, Features, BitcodeInputFiles, Files,
+                                    {}, OutputBitcode)
+                        : createLTO(Args, Features, BitcodeInputFiles, Files,
+                                    ModulesToCompile);
 
   // We need to resolve the symbols so the LTO backend knows which symbols
   // need to be kept or can be internalized. This is a simplified symbol
@@ -1939,6 +2168,17 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
     if (!BitcodeFileOrErr)
       return BitcodeFileOrErr.takeError();
 
+    if (sycl::isSYCLThinLTO(Args, Triple)) {
+      // Error if any module was not compiled with thinLTO. Other platforms
+      // can fall back to binary linking if thinLTO fails, but we don't have
+      // that for SPIR-V (besides spirv-link). In the future we may be able to
+      // fall back to normal SYCL processing and throw a warning instead of a
+      // fatal error.
+      if (auto Err = sycl::validateThinLTOModule(
+              (*BitcodeFileOrErr)->getSingleBitcodeModule(), Args))
+        return Err;
+    }
+
     // Save the input file and the buffer associated with its memory.
     const auto Symbols = (*BitcodeFileOrErr)->symbols();
     SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
@@ -1991,7 +2231,7 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
 
   // Run the LTO job to compile the bitcode.
   size_t MaxTasks = LTOBackend->getMaxTasks();
-  SmallVector<StringRef> Files(MaxTasks);
+  Files.resize(MaxTasks);
   auto AddStream =
       [&](size_t Task,
           const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> {
@@ -2353,6 +2593,8 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
     DenseSet<OffloadKind> ActiveOffloadKinds;
     bool HasSYCLOffloadKind = false;
     bool HasNonSYCLOffloadKinds = false;
+    SmallVector<OffloadFile, 4> BitcodeInputFiles;
+
     for (const auto &File : Input) {
       if (File.getBinary()->getOffloadKind() != OFK_None)
         ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
@@ -2365,9 +2607,21 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
     // First link and remove all the input files containing bitcode if
     // the target linker does not support it natively.
     SmallVector<StringRef> InputFiles;
-    if (!linkerSupportsLTO(LinkerArgs))
-      if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
+    llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ));
+    if (!linkerSupportsLTO(LinkerArgs)) {
+      if (sycl::isSYCLThinLTO(Args, Triple)) {
+        // For thinLTO, we consider device libs as normal compiler input
+        // and add them to the files to be processed by the LTO backend.
+        // Later we set ModulesToCompile so that we don't
+        // actually emit code for them, we just link in their functions in
+        // modules that use them.
+        if (auto Err = sycl::getDeviceLibsForLTO(Input, LinkerArgs, Triple))
+          return Err;
+      }
+      if (Error Err = linkBitcodeFiles(Input, InputFiles, BitcodeInputFiles,
+                                       LinkerArgs))
         return Err;
+    }
 
     // Write any remaining device inputs to an output file for the
     // linker.
@@ -2378,77 +2632,131 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
       InputFiles.emplace_back(*FileNameOrErr);
     }
     if (HasSYCLOffloadKind) {
-      SmallVector<StringRef> InputFiles;
-      // Write device inputs to an output file for the linker.
-      for (const OffloadFile &File : Input) {
-        auto FileNameOrErr = writeOffloadFile(File);
-        if (!FileNameOrErr)
-          return FileNameOrErr.takeError();
-        InputFiles.emplace_back(*FileNameOrErr);
+      StringRef TmpOutput;
+      if (!sycl::isSYCLThinLTO(Args, Triple)) {
+        // Link the input device files using the device linker for SYCL
+        // offload.
+        auto TmpOutputOrErr = sycl::linkDevice(InputFiles, LinkerArgs);
+        if (!TmpOutputOrErr)
+          return TmpOutputOrErr.takeError();
+        TmpOutput = *TmpOutputOrErr;
       }
-      // Link the input device files using the device linker for SYCL
-      // offload.
-      auto TmpOutputOrErr = sycl::linkDevice(InputFiles, LinkerArgs);
-      if (!TmpOutputOrErr)
-        return TmpOutputOrErr.takeError();
       SmallVector<StringRef> InputFilesSYCL;
-      InputFilesSYCL.emplace_back(*TmpOutputOrErr);
-      auto SplitModulesOrErr =
-          UseSYCLPostLinkTool
-              ? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs)
-              : sycl::runSYCLSplitLibrary(InputFilesSYCL, LinkerArgs,
-                                          *SYCLModuleSplitMode);
-      if (!SplitModulesOrErr)
-        return SplitModulesOrErr.takeError();
-
-      auto &SplitModules = *SplitModulesOrErr;
-      const llvm::Triple Triple(LinkerArgs.getLastArgValue(OPT_triple_EQ));
-      if ((Triple.isNVPTX() || Triple.isAMDGCN()) &&
-          LinkerArgs.hasArg(OPT_sycl_embed_ir)) {
-        // When compiling for Nvidia/AMD devices and the user requested the
-        // IR to be embedded in the application (via option), run the output
-        // of sycl-post-link (filetable referencing LLVM Bitcode + symbols)
-        // through the offload wrapper and link the resulting object to the
-        // application.
-        auto OutputFile =
-            sycl::runWrapperAndCompile(SplitModules, LinkerArgs, /* IsEmbeddedIR */ true);
-        if (!OutputFile)
-          return OutputFile.takeError();
-        WrappedOutput.push_back(*OutputFile);
-      }
-      for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
-        SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath};
-        StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ);
-        if (Arch.empty())
-          Arch = "native";
-        SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles;
-        auto ClangOutputOrErr =
-            linkDevice(Files, LinkerArgs, true /* IsSYCLKind */);
-        if (!ClangOutputOrErr)
-          return ClangOutputOrErr.takeError();
-        if (Triple.isNVPTX()) {
-          auto VirtualArch = StringRef(clang::OffloadArchToVirtualArchString(
-              clang::StringToOffloadArch(Arch)));
-          auto PtxasOutputOrErr =
-              nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch);
-          if (!PtxasOutputOrErr)
-            return PtxasOutputOrErr.takeError();
-          BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch);
-          BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch);
-          auto BundledFileOrErr =
-              nvptx::fatbinary(BundlerInputFiles, LinkerArgs);
-          if (!BundledFileOrErr)
-            return BundledFileOrErr.takeError();
-          SplitModules[I].ModuleFilePath = *BundledFileOrErr;
-        } else if (Triple.isAMDGCN()) {
-          BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch);
-          auto BundledFileOrErr =
-              amdgcn::fatbinary(BundlerInputFiles, LinkerArgs);
-          if (!BundledFileOrErr)
-            return BundledFileOrErr.takeError();
-          SplitModules[I].ModuleFilePath = *BundledFileOrErr;
-        } else {
-          SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
+      std::vector<module_split::SplitModule> SplitModules;
+      if (sycl::isSYCLThinLTO(Args, Triple)) {
+        size_t LastSPVFilePath = 0;
+        for (size_t FileIdx = 0; FileIdx < BitcodeInputFiles.size();
+             FileIdx++) {
+          // After we have run the LTO backend, compute module props/symbol
+          // table/spv file path and set it up to be used by SYCL image
+          // creation.
+          // TODO: Once SYCL image creation is reconsiled with the non-SYCL
+          // path, we can move all of the thinLTO handling to be more in-line
+          // with community code.
+
+          // This is a bit hacky but not every BitcodeInputFile will end up as a
+          // SPV file in InputFiles, for example if it is a device library file.
+          // If the file name is empty, that means we didn't generate SPV for
+          // it, so just find the next non-empty file name. Should be easy to
+          // clean this up later if we go with this overall design.
+          const OffloadFile &F = BitcodeInputFiles[FileIdx];
+          StringRef CodegenPath;
+          for (size_t OutputNum = LastSPVFilePath;
+               OutputNum < InputFiles.size(); OutputNum++) {
+            auto SPVFilePath = InputFiles[OutputNum];
+            if (!SPVFilePath.empty()) {
+              LastSPVFilePath = OutputNum + 1;
+              CodegenPath = SPVFilePath;
+              break;
+            }
+          }
+          if (CodegenPath.empty())
+            continue;
+          LLVMContext Context;
+          auto Buf = MemoryBuffer::getMemBuffer(F.getBinary()->getImage());
+          auto ModOrErr = parseBitcodeFile(*Buf, Context);
+          if (!ModOrErr)
+            return ModOrErr.takeError();
+          auto &M = **ModOrErr;
+
+          llvm::sycl::EntryPointSet EntryPoints;
+          bool OnlyKernelsAsEntryPoints =
+              sycl::considerOnlyKernelsAsEntryPoints(Args, Triple);
+          auto GlobalBinProps = sycl::computeGlobalBinProps(Args, Triple);
+          for (const Function &F : M.functions()) {
+            if (llvm::module_split::isEntryPoint(F, OnlyKernelsAsEntryPoints))
+              EntryPoints.insert(const_cast<Function *>(&F));
+          }
+          if (EntryPoints.empty())
+            continue;
+
+          auto Properties = llvm::sycl::computeModuleProperties(M, EntryPoints,
+                                                                GlobalBinProps);
+
+          std::string ModuleSyms =
+              llvm::sycl::computeModuleSymbolTable(M, EntryPoints);
+          SplitModules.emplace_back(CodegenPath, std::move(Properties),
+                                    ModuleSyms);
+        }
+        // We don't need the OffloadFiles anymore, so free them from memory.
+        BitcodeInputFiles.clear();
+      } else {
+        InputFilesSYCL.emplace_back(TmpOutput);
+        auto SplitModulesOrErr =
+            UseSYCLPostLinkTool
+                ? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs)
+                : sycl::runSYCLSplitLibrary(InputFilesSYCL, LinkerArgs,
+                                            *SYCLModuleSplitMode);
+        if (!SplitModulesOrErr)
+          return SplitModulesOrErr.takeError();
+        SplitModules = std::move(*SplitModulesOrErr);
+        if ((Triple.isNVPTX() || Triple.isAMDGCN()) &&
+            LinkerArgs.hasArg(OPT_sycl_embed_ir)) {
+          // When compiling for Nvidia/AMD devices and the user requested the
+          // IR to be embedded in the application (via option), run the output
+          // of sycl-post-link (filetable referencing LLVM Bitcode + symbols)
+          // through the offload wrapper and link the resulting object to the
+          // application.
+          auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs,
+                                                       /* IsEmbeddedIR */ true);
+          if (!OutputFile)
+            return OutputFile.takeError();
+          WrappedOutput.push_back(*OutputFile);
+        }
+        for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
+          SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath};
+          StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ);
+          if (Arch.empty())
+            Arch = "native";
+          SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles;
+          auto ClangOutputOrErr =
+              linkDevice(Files, LinkerArgs, true /* IsSYCLKind */);
+          if (!ClangOutputOrErr)
+            return ClangOutputOrErr.takeError();
+          if (Triple.isNVPTX()) {
+            auto VirtualArch = StringRef(clang::OffloadArchToVirtualArchString(
+                clang::StringToOffloadArch(Arch)));
+            auto PtxasOutputOrErr =
+                nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch);
+            if (!PtxasOutputOrErr)
+              return PtxasOutputOrErr.takeError();
+            BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch);
+            BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch);
+            auto BundledFileOrErr =
+                nvptx::fatbinary(BundlerInputFiles, LinkerArgs);
+            if (!BundledFileOrErr)
+              return BundledFileOrErr.takeError();
+            SplitModules[I].ModuleFilePath = *BundledFileOrErr;
+          } else if (Triple.isAMDGCN()) {
+            BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch);
+            auto BundledFileOrErr =
+                amdgcn::fatbinary(BundlerInputFiles, LinkerArgs);
+            if (!BundledFileOrErr)
+              return BundledFileOrErr.takeError();
+            SplitModules[I].ModuleFilePath = *BundledFileOrErr;
+          } else {
+            SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
+          }
         }
       }
 
@@ -2468,7 +2776,9 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
     if (HasNonSYCLOffloadKinds) {
       // First link and remove all the input files containing bitcode.
       SmallVector<StringRef> InputFiles;
-      if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
+      SmallVector<OffloadFile, 4> BitcodeInputFiles;
+      if (Error Err = linkBitcodeFiles(Input, InputFiles, BitcodeInputFiles,
+                                       LinkerArgs))
         return Err;
 
       // Write any remaining device inputs to an output file for the linker.
diff --git a/libdevice/device.h b/libdevice/device.h
index 360af54f9b4c4..e2f0da6e3c082 100644
--- a/libdevice/device.h
+++ b/libdevice/device.h
@@ -17,9 +17,9 @@
 
 #if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
 #ifdef __SYCL_DEVICE_ONLY__
-#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak))
+#define DEVICE_EXTERNAL SYCL_EXTERNAL
 #else // __SYCL_DEVICE_ONLY__
-#define DEVICE_EXTERNAL __attribute__((weak))
+#define DEVICE_EXTERNAL
 #endif // __SYCL_DEVICE_ONLY__
 
 #define DEVICE_EXTERN_C DEVICE_EXTERNAL EXTERN_C
diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp
index 5d3c99d63c556..1c685737002f4 100644
--- a/libdevice/fallback-cassert.cpp
+++ b/libdevice/fallback-cassert.cpp
@@ -114,9 +114,4 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file,
   __assertfail(expr, file, line, func, 1);
 }
 
-DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File,
-                              unsigned _Line) {
-  __assertfail(_Message, _File, _Line, 0, 1);
-}
-
 #endif
diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
index 0da3706ad3626..cb6049910b934 100644
--- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
+++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
@@ -321,6 +321,7 @@ splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings);
 
 bool isESIMDFunction(const Function &F);
 bool canBeImportedFunction(const Function &F);
+bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints);
 
 } // namespace module_split
 
diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLLinkedModuleProcessor.h b/llvm/include/llvm/SYCLLowerIR/SYCLLinkedModuleProcessor.h
new file mode 100644
index 0000000000000..171992dfb2586
--- /dev/null
+++ b/llvm/include/llvm/SYCLLowerIR/SYCLLinkedModuleProcessor.h
@@ -0,0 +1,22 @@
+//===-- SYCLLinkedModuleProcessor.h - finalize a fully linked module ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The file contains a number of functions to create a pass that can be called
+// by the LTO backend that will finalize a fully-linked module.
+//===----------------------------------------------------------------------===//
+#pragma once
+#include "SpecConstants.h"
+namespace llvm {
+
+class PassRegistry;
+class ModulePass;
+ModulePass *
+    createSYCLLinkedModuleProcessorPass(llvm::SpecConstantsPass::HandlingMode);
+void initializeSYCLLinkedModuleProcessorPass(PassRegistry &);
+
+} // namespace llvm
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index bb3c9f7acdb8e..924fe1389f311 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1077,8 +1077,8 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
     for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
       if (BM.getModuleIdentifier().contains(Name)) {
         ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM});
-        llvm::errs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier()
-                     << " to compile\n";
+        LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier()
+                          << " to compile\n");
       }
     }
   }
diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt
index ed578aaeb21c0..bfa3d3f6dba7f 100644
--- a/llvm/lib/SYCLLowerIR/CMakeLists.txt
+++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt
@@ -65,6 +65,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
   SYCLDeviceRequirements.cpp
   SYCLKernelParamOptInfo.cpp
   SYCLJointMatrixTransform.cpp
+  SYCLLinkedModuleProcessor.cpp
   SYCLPropagateAspectsUsage.cpp
   SYCLPropagateJointMatrixUsage.cpp
   SYCLVirtualFunctionsAnalysis.cpp
diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp
index 6068ce58f414f..6db9aece81569 100644
--- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp
+++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp
@@ -117,32 +117,6 @@ bool isKernel(const Function &F) {
          F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
 }
 
-bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) {
-  // Skip declarations, if any: they should not be included into a vector of
-  // entry points groups or otherwise we will end up with incorrectly generated
-  // list of symbols.
-  if (F.isDeclaration())
-    return false;
-
-  // Kernels are always considered to be entry points
-  if (isKernel(F))
-    return true;
-
-  if (!EmitOnlyKernelsAsEntryPoints) {
-    // If not disabled, SYCL_EXTERNAL functions with sycl-module-id attribute
-    // are also considered as entry points (except __spirv_* and __sycl_*
-    // functions)
-    return llvm::sycl::utils::isSYCLExternalFunction(&F) &&
-           !isSpirvSyclBuiltin(F.getName()) && !isESIMDBuiltin(F.getName()) &&
-           !isGenericBuiltin(F.getName());
-  }
-
-  // Even if we are emitting only kernels as entry points, virtual functions
-  // should still be treated as entry points, because they are going to be
-  // outlined into separate device images and linked in later.
-  return F.hasFnAttribute("indirectly-callable");
-}
-
 // Represents "dependency" or "use" graph of global objects (functions and
 // global variables) in a module. It is used during device code split to
 // understand which global variables and functions (other than entry points)
@@ -445,6 +419,32 @@ class ModuleSplitter : public ModuleSplitterBase {
 namespace llvm {
 namespace module_split {
 
+bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) {
+  // Skip declarations, if any: they should not be included into a vector of
+  // entry points groups or otherwise we will end up with incorrectly generated
+  // list of symbols.
+  if (F.isDeclaration())
+    return false;
+
+  // Kernels are always considered to be entry points
+  if (isKernel(F))
+    return true;
+
+  if (!EmitOnlyKernelsAsEntryPoints) {
+    // If not disabled, SYCL_EXTERNAL functions with sycl-module-id attribute
+    // are also considered as entry points (except __spirv_* and __sycl_*
+    // functions)
+    return llvm::sycl::utils::isSYCLExternalFunction(&F) &&
+           !isSpirvSyclBuiltin(F.getName()) && !isESIMDBuiltin(F.getName()) &&
+           !isGenericBuiltin(F.getName());
+  }
+
+  // Even if we are emitting only kernels as entry points, virtual functions
+  // should still be treated as entry points, because they are going to be
+  // outlined into separate device images and linked in later.
+  return F.hasFnAttribute("indirectly-callable");
+}
+
 std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) {
   static const StringMap<IRSplitMode> Values = {{"kernel", SPLIT_PER_KERNEL},
                                                 {"source", SPLIT_PER_TU},
diff --git a/llvm/lib/SYCLLowerIR/SYCLLinkedModuleProcessor.cpp b/llvm/lib/SYCLLowerIR/SYCLLinkedModuleProcessor.cpp
new file mode 100644
index 0000000000000..672d49d6ad161
--- /dev/null
+++ b/llvm/lib/SYCLLowerIR/SYCLLinkedModuleProcessor.cpp
@@ -0,0 +1,45 @@
+//===-- SYCLLinkedModuleProcessor.cpp - finalize a fully linked module ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// See comments in the header.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/SYCLLowerIR/SYCLLinkedModuleProcessor.h"
+
+#include "llvm/Pass.h"
+
+#define DEBUG_TYPE "sycl-linked-module-processor"
+using namespace llvm;
+
+namespace {
+class SYCLLinkedModuleProcessor : public ModulePass {
+public:
+  static char ID;
+  SYCLLinkedModuleProcessor(SpecConstantsPass::HandlingMode Mode)
+      : ModulePass(ID), Mode(Mode) {
+    initializeSYCLLinkedModuleProcessorPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    // TODO: determine if we need to run other passes
+    ModuleAnalysisManager MAM;
+    SpecConstantsPass SCP(Mode);
+    auto PA = SCP.run(M, MAM);
+    return !PA.areAllPreserved();
+  }
+
+private:
+  SpecConstantsPass::HandlingMode Mode;
+};
+} // namespace
+char SYCLLinkedModuleProcessor::ID = 0;
+INITIALIZE_PASS(SYCLLinkedModuleProcessor, "SYCLLinkedModuleProcessor",
+                "Finalize a fully linked SYCL module", false, false)
+ModulePass *llvm::createSYCLLinkedModuleProcessorPass(
+    SpecConstantsPass::HandlingMode Mode) {
+  return new SYCLLinkedModuleProcessor(Mode);
+}
diff --git a/sycl/doc/design/CompilerAndRuntimeDesign.md b/sycl/doc/design/CompilerAndRuntimeDesign.md
index 1935356d0a3a8..7843b5d3b88e6 100644
--- a/sycl/doc/design/CompilerAndRuntimeDesign.md
+++ b/sycl/doc/design/CompilerAndRuntimeDesign.md
@@ -550,6 +550,9 @@ unit)
 - `off` - disables device code split. If `-fno-sycl-rdc` is specified, the behavior is
    the same as `per_source`
 
+If ThinLTO is enabled, device code splitting is run during the compilation
+stage. See [here](ThinLTO.md) for more information.
+
 ##### Symbol table generation
 
 TBD
diff --git a/sycl/doc/design/ThinLTO.md b/sycl/doc/design/ThinLTO.md
new file mode 100644
index 0000000000000..58232f0aff157
--- /dev/null
+++ b/sycl/doc/design/ThinLTO.md
@@ -0,0 +1,149 @@
+# ThinLTO for SYCL
+
+This document describes the purpose and design of ThinLTO for SYCL.
+
+**NOTE**: This is not the final version. The document is still in progress.
+
+## Background
+
+With traditional SYCL device code linking, all user code is linked together
+along with device libraries into a single huge module and then split and
+processed by `sycl-post-link`. This requires sequential processing, has a large
+memory footprint, and differs from the linking flow for AMD and NVIDIA devices.
+
+## Summary
+
+SYCL ThinLTO will hook into the existing community mechanism to run LTO as part
+of device linking inside `clang-linker-wrapper`. We split the device images
+early at compilation time, and at link time we use ThinLTO's function importing
+feature to bring in the definitions for referenced functions. Only the new
+offload model is supported.
+
+## Device code compilation time changes
+
+Most of the changes for ThinLTO occur during device link time, however there is
+one major change during compilation (-c) time: we now run device code split
+during compilation instead of linking. The main reason for doing this is
+increased parallelization. Many compilation jobs can be run at the same time,
+but linking happens once total for the application. Device code split is
+currently a common source of performance issues.
+
+Splitting early means that the resulting IR after splitting is not complete, it
+still may contain calls to functions (user code and/or the SYCL device
+libraries) defined in other translation units.
+
+We rely on the assumption that all function definitions matching a declaration
+will be the same and we can let ThinLTO pull in any one.
+
+For example, let's start with user device code that defines a `SYCL_EXTERNAL`
+function `foo` in translation unit `tu_foo`. There is also another translation
+unit `tu_bar` that references `foo`. During the early device code splitting run
+of `tu_foo`, we may find that more than one of the resultant device images
+contain a definition for `foo`.
+
+We assert that any function definition for `foo` that is deemed a match by the
+ThinLTO infrastructure during the processing of `tu_bar` is valid.
+
+As a result of running early device code split, the fat object file generated as
+part of device compilation may contain multiple device code images.
+
+## Device code link time changes
+
+Before we go into the link time changes for SYCL, let's understand the device
+linking flow for AMD/NVIDIA devices:
+
+![Community linking flow](images/ThinLTOCommunityFlow.svg)
+
+SYCL has two differentiating requirements:
+
+1) The SPIR-V backend is not production ready and the SPIR-V translator is used.
+2) The SYCL runtime requires metadata (module properties and module symbol
+table) computed from device images that will be stored along the device images
+in the fat executable.
+
+The effect of requirement 1) is that instead of letting ThinLTO call the SPIR-V
+backend, we add a callback that runs right before CodeGen would run. In that
+callback, we call the SPIR-V translator and store the resultant file path for
+use later, and we instruct the ThinLTO framework to not perform CodeGen.
+
+An interesting additional fact about requirement 2) is that we actually need to
+process fully linked module to accurate compute the module properties. One
+example where we need the full module is to [compute the required devicelib
+mask](https://github.com/intel/llvm/blob/sycl/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp).
+If we only process the device code that was included in the original fat object
+input to `clang-linker-wrapper`, we will miss devicelib calls in referenced
+`SYCL_EXTERNAL` functions.
+
+The effect of requirement 2) is that we store the fully linked device image for
+metadata computation in the SYCL-specific handing code after the ThinLTO
+framework has completed. Another option would be to try to compute the metadata
+inside the ThinLTO framework callbacks, but this would require SYCL-specific
+arguments to many caller functions in the stack and pollute community code.
+
+Here is the current ThinLTO flow for SYCL:
+
+![SYCL linking flow](images/ThinLTOSYCLFlow.svg)
+
+We add a `PreCodeGenModuleHook` function to the `LTOConfig` object so that we
+can process the fully linked module without running the backend.
+
+However, the flow is not ideal for many reasons:
+
+1) We are relying on the external `llvm-spirv` tool instead of the SPIR-V
+backend. We could slightly improve this issue by using a library call to the
+SPIR-V translator instead of the tool, however the library API requires setting
+up an object to represent the arguments while we only have strings, and it's
+non-trivial to parse the strings to figure out how to create the argument
+object. Since we plan to use the SPIR-V backend in the long term, this does not
+seem to be worth the effort.
+
+2) We manually run passes inside `PreCodeGenModuleHook`. This is because we
+don't run CodeGen, so we can't take advantage of the `PreCodeGenPassesHook`
+field of `LTOConfig` to run some custom passes, as those passes are only run
+when we actually are going to run CodeGen.
+
+3) We have to store the fully linked module. This is needed because we need a
+fully linked module to accurately compute metadata, see the above explanation of
+SYCL requirement 2). We could get around storing the module by computing the
+metadata inside the LTO framework and storing it for late use by the SYCL
+bundling code, but doing this would require even more SYCL-only customizations
+including even more new function arguments and modifications of the
+`OffloadFile` class. There are also compilations because the LTO framework is
+multithreaded, and not all LLVM data structures are thread safe.
+
+The proposed long-term SYCL ThinLTO flow is as follows:
+
+![SYCL SPIR-V backend linking flow](images/ThinLTOSYCLSPIRVBackendFlow.svg)
+
+The biggest difference here is that we are running CodeGen using the SPIR-V
+backend.
+
+Also, instead of using a lambda function in the `PreCodeGenModuleHook` callback
+to run SYCL finalization passes, we can take advantage of the
+`PreCodeGenPassesHook` field to add passes to the pass manager that the LTO
+framework will run.
+
+It is possible that the number of device images in the fat executable and which
+device image contains which kernel is different with ThinLTO enabled, but we do
+expect this to have any impact on correctness or performance, nor we do expect
+users to care.
+
+## Current limitations
+
+`-O0`: Compiling with `-O0` prevent clang from generating ThinLTO metadata
+during the compilation phase. In the current implementation, this is an error.
+In the final version, we could either silently fall back to full LTO or generate
+ThinLTO metadata even for `-O0`.
+
+SYCL libdevice: Current all `libdevice` functions are explicitly marked to be
+weak symbols. The ThinLTO framework does not consider a definition of function
+with weak linkage as it cannot be sure that this definition is the correct one.
+Ideally we could remove the weak symbol annotation.
+
+No binary linkage: The SPIR-V target does not currently have a production
+quality binary linker. This means that we must generate a fully linked image as
+part of device linkage. At least for AMD devices, this is not a requirement as
+`lld` is used for the final link which can resolve any unresolved symbols.
+`-fno-gpu-rdc` is default for AMD, so in that case it can call `lld` during
+compile, but if `-fno-gpu-rdc` is passed, the lld call happens as part of
+`clang-linker-wrapper` to resolve any symbols not resolved by ThinLTO.
diff --git a/sycl/doc/design/images/ThinLTOCommunityFlow.svg b/sycl/doc/design/images/ThinLTOCommunityFlow.svg
new file mode 100644
index 0000000000000..f2fe257a07af8
--- /dev/null
+++ b/sycl/doc/design/images/ThinLTOCommunityFlow.svg
@@ -0,0 +1 @@
+<svg width="1280" height="720" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><g><rect x="0" y="0" width="1280" height="720" fill="#FFFFFF"/><rect x="221" y="57" width="752" height="640" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#7F7F7F"/><path d="M478 98.5C478 81.6553 531.502 68 597.5 68 663.498 68 717 81.6553 717 98.5 717 115.345 663.498 129 597.5 129 531.502 129 478 115.345 478 98.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(530.336 95)">Extract device code</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(557.336 114)">from inputs</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(484.018 53)">clang</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(541.018 53)">-</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(549.018 53)">linker</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(605.018 53)">-</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(613.018 53)">wrapper</text><path d="M468 201C468 175.595 525.755 155 597 155 668.245 155 726 175.595 726 201 726 226.405 668.245 247 597 247 525.755 247 468 226.405 468 201Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(523.416 187)">Process symbols and</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(516.416 206)">add device code to LTO</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(559.416 225)">framework</text><rect x="428" y="275" width="339" height="243" stroke="#000000" stroke-width="2" stroke-miterlimit="8" fill="#83CBEB"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(495.438 297)">T</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(506.438 297)">hinLTO</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(582.438 297)">framework</text><path d="M502 327C502 312.641 544.757 301 597.5 301 650.243 301 693 312.641 693 327 693 341.359 650.243 353 597.5 353 544.757 353 502 341.359 502 327Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(543.25 323)">Import function</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(560.25 342)">definitions</text><path d="M502 412C502 397.641 544.757 386 597.5 386 650.243 386 693 397.641 693 412 693 426.359 650.243 438 597.5 438 544.757 438 502 426.359 502 412Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(551.5 417)">Optimization</text><path d="M502 488C502 473.641 544.757 462 597.5 462 650.243 462 693 473.641 693 488 693 502.359 650.243 514 597.5 514 544.757 514 502 502.359 502 488Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(566.25 493)">Codegen</text><path d="M499 564.5C499 551.521 542.876 541 597 541 651.124 541 695 551.521 695 564.5 695 577.479 651.124 588 597 588 542.876 588 499 577.479 499 564.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(566.416 570)">Bundling</text><path d="M488 646C488 627.222 536.801 612 597 612 657.199 612 706 627.222 706 646 706 664.778 657.199 680 597 680 536.801 680 488 664.778 488 646Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(545.836 632)">Call host linker</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(540.836 651)">with bundled fat</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(575.836 670)">object</text><path d="M584 143 590.5 143 590.5 132 603.5 132 603.5 143 610 143 597 154Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M584 260.5 590.5 260.5 590.5 252 603.5 252 603.5 260.5 610 260.5 597 269Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M584 370.5 590.5 370.5 590.5 359 603.5 359 603.5 370.5 610 370.5 597 382Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M584 529.5 590.5 529.5 590.5 519 603.5 519 603.5 529.5 610 529.5 597 540Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M584 451 590.5 451 590.5 443 603.5 443 603.5 451 610 451 597 459Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M584 599.5 590.5 599.5 590.5 590 603.5 590 603.5 599.5 610 599.5 597 609Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
diff --git a/sycl/doc/design/images/ThinLTOSYCLFlow.svg b/sycl/doc/design/images/ThinLTOSYCLFlow.svg
new file mode 100644
index 0000000000000..622f4d20d158c
--- /dev/null
+++ b/sycl/doc/design/images/ThinLTOSYCLFlow.svg
@@ -0,0 +1 @@
+<svg width="1280" height="720" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="0" y="0" width="1280" height="720"/></clipPath></defs><g clip-path="url(#clip0)"><rect x="0" y="0" width="1280" height="720" fill="#FFFFFF"/><rect x="220" y="20" width="752" height="683" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#7F7F7F"/><path d="M485 57.5C485 43.4167 534.696 32 596 32 657.304 32 707 43.4167 707 57.5 707 71.5833 657.304 83 596 83 534.696 83 485 71.5833 485 57.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(529.17 53)">Extract</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(580.17 53)">device</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(629.17 53)">code</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(556.17 72)">from</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(592.17 72)">input</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(628.17 72)">s</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(483.018 16)">c</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(496.018 16)">l</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(502.018 16)">ang</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(540.018 16)">-</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(548.018 16)">linker</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(604.018 16)">-</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(612.018 16)">wrapper</text><path d="M476 157C476 136.013 529.726 119 596 119 662.274 119 716 136.013 716 157 716 177.987 662.274 195 596 195 529.726 195 476 177.987 476 157Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(528.753 145)">Process</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(582.753 145)">symbols</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(639.753 145)">and</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(522.753 162)">add</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(549.753 162)">device</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(594.753 162)">code</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(628.753 162)">to</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(644.753 162)">LTO</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(560.753 180)">framewor</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(623.753 180)">k</text><rect x="409" y="213" width="374" height="273" stroke="#000000" stroke-width="2" stroke-miterlimit="8" fill="#83CBEB"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(494.438 234)">T</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(505.438 234)">hinLTO</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(581.438 234)">framework</text><path d="M555 250.5C555 243.044 573.58 237 596.5 237 619.42 237 638 243.044 638 250.5 638 257.956 619.42 264 596.5 264 573.58 264 555 257.956 555 250.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="6" transform="translate(587.586 246)">Import</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="6" transform="translate(585.586 253)">function</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="6" transform="translate(583.586 260)">definitions</text><path d="M559 289.5C559 283.701 575.565 279 596 279 616.435 279 633 283.701 633 289.5 633 295.299 616.435 300 596 300 575.565 300 559 295.299 559 289.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="5" transform="translate(581.836 292)">Optimization</text><path d="M518 594C518 578.536 553.146 566 596.5 566 639.854 566 675 578.536 675 594 675 609.464 639.854 622 596.5 622 553.146 622 518 609.464 518 594Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(565.336 599)">Bundling</text><path d="M534 670C534 654.536 561.982 642 596.5 642 631.018 642 659 654.536 659 670 659 685.464 631.018 698 596.5 698 561.982 698 534 685.464 534 670Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="11" transform="translate(561.67 661)">Call host linker</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="11" transform="translate(564.67 674)">with bundled</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="11" transform="translate(572.67 686)">fat object</text><path d="M583 106.5 589.5 106.5 589.5 95 602.5 95 602.5 106.5 609 106.5 596 118Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M589 204 592.5 204 592.5 197 599.5 197 599.5 204 603 204 596 211Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M591 271.5 593.75 271.5 593.75 266 599.25 266 599.25 271.5 602 271.5 596.5 277Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M586 472.5 591.25 472.5 591.25 463 601.75 463 601.75 472.5 607 472.5 596.5 482Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M589 558 592.5 558 592.5 548 599.5 548 599.5 558 603 558 596 565Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M591 307.5 593.5 307.5 593.5 303 598.5 303 598.5 307.5 601 307.5 596 312Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><rect x="518" y="314" width="156" height="116" stroke="#000000" stroke-width="2" stroke-miterlimit="8" fill="#4E95D9"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="700" font-size="8" transform="translate(543.606 322)">PreCodeGenModuleHook</text><path d="M568 405C568 400.582 580.536 397 596 397 611.464 397 624 400.582 624 405 624 409.418 611.464 413 596 413 580.536 413 568 409.418 568 405Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(587.166 404)">Call SPIR</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(602.166 404)">-</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(603.166 404)">V</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(588.166 408)">translator</text><path d="M591 386.08 593.5 386.08 593.5 382 598.5 382 598.5 386.08 601 386.08 596 393Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M569 371C569 366.582 581.088 363 596 363 610.912 363 623 366.582 623 371 623 375.418 610.912 379 596 379 581.088 379 569 375.418 569 371Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(587.92 368)">Store fully</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(590.92 372)">linked</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(589.92 377)">module</text><path d="M551 446.5C551 439.044 571.371 433 596.5 433 621.629 433 642 439.044 642 446.5 642 453.956 621.629 460 596.5 460 571.371 460 551 453.956 551 446.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="8" transform="translate(579.586 449)">Earl</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="8" transform="translate(592.586 449)">y</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="8" transform="translate(598.586 449)">exi</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="8" transform="translate(608.586 449)">t</text><path d="M591 422.5 593.75 422.5 593.75 417 599.25 417 599.25 422.5 602 422.5 596.5 428Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M518 517C518 501.536 553.146 489 596.5 489 639.854 489 675 501.536 675 517 675 532.464 639.854 545 596.5 545 553.146 545 518 532.464 518 517Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="12" transform="translate(571.333 507)">Compute</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="12" transform="translate(550.333 521)">metadata for fully</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="12" transform="translate(556.333 536)">linked modules</text><path d="M583 631.5 589.5 631.5 589.5 622 602.5 622 602.5 631.5 609 631.5 596 641Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M566 336.5C566 331.253 579.655 327 596.5 327 613.345 327 627 331.253 627 336.5 627 341.747 613.345 346 596.5 346 579.655 346 566 341.747 566 336.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(587.17 331)">Run linked</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(590.17 335)">module</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(587.17 340)">finalization</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="4" transform="translate(590.17 345)">passes</text><path d="M591 352.772 593.5 352.772 593.5 350 598.5 350 598.5 352.772 601 352.772 596 359Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
diff --git a/sycl/doc/design/images/ThinLTOSYCLSPIRVBackendFlow.svg b/sycl/doc/design/images/ThinLTOSYCLSPIRVBackendFlow.svg
new file mode 100644
index 0000000000000..2b048f531b8b7
--- /dev/null
+++ b/sycl/doc/design/images/ThinLTOSYCLSPIRVBackendFlow.svg
@@ -0,0 +1 @@
+<svg width="1280" height="720" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="0" y="0" width="1280" height="720"/></clipPath></defs><g clip-path="url(#clip0)"><rect x="0" y="0" width="1280" height="720" fill="#FFFFFF"/><rect x="220" y="20" width="752" height="683" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#7F7F7F"/><path d="M498 59C498 44.0883 542.1 32 596.5 32 650.9 32 695 44.0883 695 59 695 73.9117 650.9 86 596.5 86 542.1 86 498 73.9117 498 59Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(548.166 55)">Extract</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(599.166 55)">device</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(537.166 74)">code</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(575.166 74)">from</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(611.166 74)">input</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(647.166 74)">s</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(483.018 16)">c</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(496.018 16)">l</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(502.018 16)">ang</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(540.018 16)">-</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(548.018 16)">linker</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(604.018 16)">-</text><text font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(612.018 16)">wrapper</text><path d="M489 155.5C489 135.342 537.129 119 596.5 119 655.871 119 704 135.342 704 155.5 704 175.658 655.871 192 596.5 192 537.129 192 489 175.658 489 155.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(541.833 143)">Process</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(595.833 143)">symbols</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(530.833 160)">and</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(557.833 160)">add</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(584.833 160)">device</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(629.833 160)">code</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(538.833 178)">to</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(554.833 178)">LTO</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(582.833 178)">framewor</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="15" transform="translate(645.833 178)">k</text><rect x="409" y="213" width="374" height="273" stroke="#000000" stroke-width="2" stroke-miterlimit="8" fill="#83CBEB"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(494.438 234)">T</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(505.438 234)">hinLTO</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="24" transform="translate(581.438 234)">framework</text><path d="M555 250.5C555 243.044 573.58 237 596.5 237 619.42 237 638 243.044 638 250.5 638 257.956 619.42 264 596.5 264 573.58 264 555 257.956 555 250.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="6" transform="translate(587.586 246)">Import</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="6" transform="translate(585.586 253)">function</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="6" transform="translate(583.586 260)">definitions</text><path d="M559 289.5C559 283.701 575.565 279 596 279 616.435 279 633 283.701 633 289.5 633 295.299 616.435 300 596 300 575.565 300 559 295.299 559 289.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="5" transform="translate(581.836 292)">Optimization</text><path d="M518 593.5C518 579.417 553.146 568 596.5 568 639.854 568 675 579.417 675 593.5 675 607.583 639.854 619 596.5 619 553.146 619 518 607.583 518 593.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="16" transform="translate(565.336 599)">Bundling</text><path d="M534 672C534 656.536 561.982 644 596.5 644 631.018 644 659 656.536 659 672 659 687.464 631.018 700 596.5 700 561.982 700 534 687.464 534 672Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="11" transform="translate(561.67 663)">Call host linker</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="11" transform="translate(564.67 676)">with bundled</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="11" transform="translate(572.67 688)">fat object</text><path d="M583 102 589.5 102 589.5 91 602.5 91 602.5 102 609 102 596 113Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M589 204 592.5 204 592.5 194 599.5 194 599.5 204 603 204 596 211Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M591 271.5 593.75 271.5 593.75 266 599.25 266 599.25 271.5 602 271.5 596.5 277Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M586 473 591.25 473 591.25 463 601.75 463 601.75 473 607 473 596.5 483Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M589 558 592.5 558 592.5 548 599.5 548 599.5 558 603 558 596 565Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M591 307.5 593.5 307.5 593.5 303 598.5 303 598.5 307.5 601 307.5 596 312Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><rect x="518" y="314" width="156" height="116" stroke="#000000" stroke-width="2" stroke-miterlimit="8" fill="#4E95D9"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="7" transform="translate(554 321)">PreCodeGenPassesHook</text><path d="M549 391C549 381.059 570.043 373 596 373 621.957 373 643 381.059 643 391 643 400.941 621.957 409 596 409 570.043 409 549 400.941 549 391Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="9" transform="translate(576.666 383)">Store fully</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="9" transform="translate(584.666 394)">linked</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="9" transform="translate(580.666 405)">module</text><path d="M551 446.5C551 439.044 571.371 433 596.5 433 621.629 433 642 439.044 642 446.5 642 453.956 621.629 460 596.5 460 571.371 460 551 453.956 551 446.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="8" transform="translate(580.583 449)">Codegen</text><path d="M518 517C518 501.536 553.146 489 596.5 489 639.854 489 675 501.536 675 517 675 532.464 639.854 545 596.5 545 553.146 545 518 532.464 518 517Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="12" transform="translate(571.333 507)">Compute</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="12" transform="translate(550.333 521)">metadata for fully</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="12" transform="translate(556.333 536)">linked modules</text><path d="M587 633 591.5 633 591.5 625 600.5 625 600.5 633 605 633 596 641Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M561 338.5C561 330.492 576.67 324 596 324 615.33 324 631 330.492 631 338.5 631 346.508 615.33 353 596 353 576.67 353 561 346.508 561 338.5Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="5" transform="translate(584 331)">Run linked</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="5" transform="translate(587 337)">module</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="5" transform="translate(583 344)">finalization</text><text fill="#FFFFFF" font-family="Aptos,Aptos_MSFontService,sans-serif" font-weight="400" font-size="5" transform="translate(588 350)">passes</text><path d="M590 362.003 593.25 362.003 593.25 356 599.75 356 599.75 362.003 603 362.003 596.5 371Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/><path d="M591 421.08 593.5 421.08 593.5 412 598.5 412 598.5 421.08 601 421.08 596 428Z" stroke="#042433" stroke-width="2" stroke-miterlimit="8" fill="#156082" fill-rule="evenodd"/></g></svg>
\ No newline at end of file