diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index 8c88047e9e649..57bab9cb7800e 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -37,7 +37,7 @@ int main() { // CHECK-DAG: %_pi_device_binary_property_struct = type { ptr, ptr, i32, i64 } // CHECK-DAG: %_pi_device_binary_property_set_struct = type { ptr, ptr, ptr } -// CHECK-DAG: %struct.__tgt_offload_entry = type { ptr, ptr, i64, i32, i32 } +// CHECK-DAG: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr } // CHECK-DAG: %__sycl.tgt_device_image = type { i16, i8, i8, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr } // CHECK-DAG: %__sycl.tgt_bin_desc = type { i16, i16, ptr, ptr, ptr } @@ -55,7 +55,7 @@ int main() { // CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 0, i64 1) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }] // CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [912 x i8] // CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00" -// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }] +// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { i64 0, i16 1, i16 4, i32 0, ptr null, ptr @__sycl_offload_entry_name, i64 0, i64 0, ptr null }] // CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 912], section ".tgtimg", align 16 // CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata" // CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr ([912 x i8], ptr @.sycl_offloading.0.data, i64 0, i64 912), ptr @__sycl_offload_entries_arr, ptr getelementptr ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 0, i64 1), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 0, i64 3) }] diff --git a/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp index 7ad1c24f27b30..3d227d0c2e050 100644 --- a/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp @@ -62,18 +62,6 @@ int8_t binaryImageFormatToInt8(SYCLBinaryImageFormat Format) { } } -StructType* getLegacyOffloadEntryTy(Module &M) { - LLVMContext &C = M.getContext(); - StructType *EntryTy = - StructType::getTypeByName(C, "struct.__tgt_offload_entry"); - if (!EntryTy) - EntryTy = StructType::create( - "struct.__tgt_offload_entry", PointerType::getUnqual(C), - PointerType::getUnqual(C), M.getDataLayout().getIntPtrType(C), - Type::getInt32Ty(C), Type::getInt32Ty(C)); - return EntryTy; -} - /// Wrapper helper class that creates all LLVM IRs wrapping given images. /// Note: All created structures, "_pi_device_*", "__sycl_*" and "__tgt*" names /// in this implementation are aligned with "sycl/include/sycl/detail/pi.h". @@ -95,7 +83,7 @@ struct Wrapper { SyclPropTy = getSyclPropTy(); SyclPropSetTy = getSyclPropSetTy(); - EntryTy = getLegacyOffloadEntryTy(M); + EntryTy = offloading::getEntryTy(M); SyclDeviceImageTy = getSyclDeviceImageTy(); SyclBinDescTy = getSyclBinDescTy(); } @@ -399,16 +387,26 @@ struct Wrapper { return std::pair(NullPtr, NullPtr); } - auto *Zero = ConstantInt::get(getSizeTTy(), 0); + auto *I64Zero = ConstantInt::get(Type::getInt64Ty(C), 0); auto *I32Zero = ConstantInt::get(Type::getInt32Ty(C), 0); auto *NullPtr = Constant::getNullValue(PointerType::getUnqual(C)); SmallVector EntriesInits; std::unique_ptr MB = MemoryBuffer::getMemBuffer(Entries); - for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) - EntriesInits.push_back(ConstantStruct::get( - EntryTy, NullPtr, addStringToModule(*LI, "__sycl_offload_entry_name"), - Zero, I32Zero, I32Zero)); + for (line_iterator LI(*MB); !LI.is_at_eof(); ++LI) { + Constant *EntryData[] = { + ConstantExpr::getNullValue(Type::getInt64Ty(C)), + ConstantInt::get(Type::getInt16Ty(C), 1), + ConstantInt::get(Type::getInt16Ty(C), object::OffloadKind::OFK_SYCL), + I32Zero, + NullPtr, + addStringToModule(*LI, "__sycl_offload_entry_name"), + I64Zero, + I64Zero, + NullPtr}; + + EntriesInits.push_back(ConstantStruct::get(EntryTy, EntryData)); + } auto *Arr = ConstantArray::get(ArrayType::get(EntryTy, EntriesInits.size()), EntriesInits); diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 0e6a9069ce270..50d67800e446d 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -78,6 +78,32 @@ #define __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION "Requires finalization" +// New entry type after +// https://github.com/llvm/llvm-project/pull/124018 +// This is a replica of the EntryTy data structure in +// llvm/include/llvm/Frontend/Offloading/Utility.h. +struct _sycl_offload_entry_struct_new { + /// Reserved bytes used to detect an older version of the struct, always zero. + uint64_t Reserved; + /// The current version of the struct for runtime forward compatibility. + uint16_t Version; + /// The expected consumer of this entry, e.g. CUDA or OpenMP. + uint16_t Kind; + /// Flags associated with the global. + uint32_t Flags; + /// The address of the global to be registered by the runtime. + void *Address; + /// The name of the symbol in the device image. + char *SymbolName; + /// The number of bytes the symbol takes. + uint64_t Size; + /// Extra generic data used to register this entry. + uint64_t Data; + /// An extra pointer, usually null. + void *AuxAddr; +}; +using sycl_offload_entry_new = _sycl_offload_entry_struct_new *; + // Entry type, matches OpenMP for compatibility struct _sycl_offload_entry_struct { void *addr; @@ -85,6 +111,37 @@ struct _sycl_offload_entry_struct { size_t size; int32_t flags; int32_t reserved; + + inline bool IsNewOffloadEntryType() { + // Assume this is the new version of the struct. + auto newStruct = reinterpret_cast(this); + + // Check if first 64 bits is equal to 0, next 16 bits is equal to 1, next 16 + // bits is equal to 4 (OK_SYCL), and check if Flags are zero. If all these + // conditions are met, then this is a newer version of the struct. + // We can not just rely on checking the first 64 bits, because even for the + // older version of the struct, the first 64 bits (void* addr) are zero. + return newStruct->Reserved == 0 && newStruct->Version == 1 && + newStruct->Kind == 4 && newStruct->Flags == 0; + } + + // Name is the only field that's used in SYCL. + inline char *GetName() { + if (IsNewOffloadEntryType()) + return reinterpret_cast(this)->SymbolName; + + return name; + } + + // Increment the pointer to the next entry. A mix of old and new offload entry + // types is not supported. + inline _sycl_offload_entry_struct *Increment() { + if (IsNewOffloadEntryType()) + return reinterpret_cast<_sycl_offload_entry_struct *>( + reinterpret_cast(this) + 1); + + return this + 1; + } }; using sycl_offload_entry = _sycl_offload_entry_struct *; diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index adb55149060f2..89042b10cb48c 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -120,9 +120,10 @@ void RTDeviceBinaryImage::print() const { std::cerr << " Link options : " << (Bin->LinkOptions ? Bin->LinkOptions : "NULL") << "\n"; std::cerr << " Entries : "; + for (sycl_offload_entry EntriesIt = Bin->EntriesBegin; - EntriesIt != Bin->EntriesEnd; ++EntriesIt) - std::cerr << EntriesIt->name << " "; + EntriesIt != Bin->EntriesEnd; EntriesIt = EntriesIt->Increment()) + std::cerr << EntriesIt->GetName() << " "; std::cerr << "\n"; std::cerr << " Properties [" << Bin->PropertySetsBegin << "-" << Bin->PropertySetsEnd << "]:\n"; diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index 0a6e708543e27..6e728fbe2e554 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -103,8 +103,8 @@ getSortedImages(const std::vector &Imgs) { [](const RTDeviceBinaryImage *A, const RTDeviceBinaryImage *B) { // All entry names are unique among these images, so comparing the // first ones is enough. - return std::strcmp(A->getRawData().EntriesBegin->name, - B->getRawData().EntriesBegin->name) < 0; + return std::strcmp(A->getRawData().EntriesBegin->GetName(), + B->getRawData().EntriesBegin->GetName()) < 0; }); return SortedImgs; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index f2e98c8b68219..8148b413f0ecc 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1867,33 +1867,34 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { m_BinImg2KernelIDs[Img.get()].reset(new std::vector); for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - ++EntriesIt) { + EntriesIt = EntriesIt->Increment()) { + + auto name = EntriesIt->GetName(); // Skip creating unique kernel ID if it is a service kernel. // SYCL service kernels are identified by having // __sycl_service_kernel__ in the mangled name, primarily as part of // the namespace of the name type. - if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) { - m_ServiceKernels.insert(std::make_pair(EntriesIt->name, Img.get())); + if (std::strstr(name, "__sycl_service_kernel__")) { + m_ServiceKernels.insert(std::make_pair(name, Img.get())); continue; } // Skip creating unique kernel ID if it is an exported device // function. Exported device functions appear in the offload entries // among kernels, but are identifiable by being listed in properties. - if (m_ExportedSymbolImages.find(EntriesIt->name) != - m_ExportedSymbolImages.end()) + if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end()) continue; // ... and create a unique kernel ID for the entry - auto It = m_KernelName2KernelIDs.find(EntriesIt->name); + auto It = m_KernelName2KernelIDs.find(name); if (It == m_KernelName2KernelIDs.end()) { std::shared_ptr KernelIDImpl = - std::make_shared(EntriesIt->name); + std::make_shared(name); sycl::kernel_id KernelID = detail::createSyclObjFromImpl(KernelIDImpl); - It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, KernelID); + It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID); } m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); m_BinImg2KernelIDs[Img.get()]->push_back(It->second); @@ -2020,25 +2021,25 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Unmap the unique kernel IDs for the offload entries for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - ++EntriesIt) { + EntriesIt = EntriesIt->Increment()) { // Drop entry for service kernel - if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) { - m_ServiceKernels.erase(EntriesIt->name); + if (std::strstr(EntriesIt->GetName(), "__sycl_service_kernel__")) { + m_ServiceKernels.erase(EntriesIt->GetName()); continue; } // Exported device functions won't have a kernel ID - if (m_ExportedSymbolImages.find(EntriesIt->name) != + if (m_ExportedSymbolImages.find(EntriesIt->GetName()) != m_ExportedSymbolImages.end()) { continue; } // remove everything associated with this KernelName - m_KernelUsesAssert.erase(EntriesIt->name); - m_KernelImplicitLocalArgPos.erase(EntriesIt->name); + m_KernelUsesAssert.erase(EntriesIt->GetName()); + m_KernelImplicitLocalArgPos.erase(EntriesIt->GetName()); - if (auto It = m_KernelName2KernelIDs.find(EntriesIt->name); + if (auto It = m_KernelName2KernelIDs.find(EntriesIt->GetName()); It != m_KernelName2KernelIDs.end()) { m_KernelName2KernelIDs.erase(It); m_KernelIDs2BinImage.erase(It->second); diff --git a/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp b/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp index 21fc02c94840a..5d75b63875fda 100644 --- a/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp +++ b/sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp @@ -336,8 +336,8 @@ TEST_P(PersistentDeviceCodeCache, MultipleImages) { std::sort(Imgs.begin(), Imgs.end(), [](const detail::RTDeviceBinaryImage *A, const detail::RTDeviceBinaryImage *B) { - return std::strcmp(A->getRawData().EntriesBegin->name, - B->getRawData().EntriesBegin->name) < 0; + return std::strcmp(A->getRawData().EntriesBegin->GetName(), + B->getRawData().EntriesBegin->GetName()) < 0; }); std::string ItemDir = detail::PersistentDeviceCodeCache::getCacheItemPath( Dev, Imgs, {}, BuildOptions);