@@ -24,18 +24,38 @@ THE SOFTWARE.
2424#include " hip_fatbin.hpp"
2525#include " hip_global.hpp"
2626#include < unordered_map>
27+ #include < mutex>
2728#include " hip_code_object.hpp"
2829#include " hip_platform.hpp"
2930#include " comgrctx.hpp"
3031#include " amd_hsa_elf.hpp"
3132#include " hip_comgr_helper.hpp"
3233
34+ #if ROCM_KPACK_ENABLED
35+ #include < rocm_kpack/kpack.h>
36+ #endif
37+
3338namespace hip {
3439// Use ComgrUniqueHandle and type aliases from hip_comgr_helper.hpp
3540using comgr_helper::ComgrDataSetUniqueHandle;
3641using comgr_helper::ComgrActionInfoUniqueHandle;
3742using comgr_helper::ComgrDataUniqueHandle;
3843
44+ #if ROCM_KPACK_ENABLED
45+ namespace {
46+ // HIP process-global kpack cache - initialized on first use
47+ std::once_flag g_hipKpackCacheInitFlag;
48+ kpack_cache_t g_hipKpackCache = nullptr ;
49+
50+ void initHipKpackCache () { kpack_cache_create (&g_hipKpackCache); }
51+
52+ kpack_cache_t getHipKpackCache () {
53+ std::call_once (g_hipKpackCacheInitFlag, initHipKpackCache);
54+ return g_hipKpackCache;
55+ }
56+ } // namespace
57+ #endif
58+
3959FatBinaryInfo::FatBinaryInfo (const char * fname, const void * image)
4060 : foffset_(0 ), image_(image), image_mapped_(false ), uri_(std::string()) {
4161 if (fname != nullptr ) {
@@ -47,6 +67,11 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image)
4767 dev_programs_.resize (g_devices.size (), nullptr );
4868}
4969
70+ FatBinaryInfo::FatBinaryInfo (KpackParams kpack_params)
71+ : FatBinaryInfo(kpack_params.binary_path.c_str(), nullptr ) {
72+ kpack_params_ = std::move (kpack_params);
73+ }
74+
5075FatBinaryInfo::~FatBinaryInfo () {
5176 // Release per device fat bin info.
5277 for (int dev_id = 0 ; dev_id < dev_programs_.size (); dev_id++) {
@@ -57,7 +82,16 @@ FatBinaryInfo::~FatBinaryInfo() {
5782 }
5883 // Release Code object allocations
5984 for (const auto & i : code_obj_allocations_) {
60- delete[] reinterpret_cast <const char *>(i);
85+ if (kpack_params_.has_value ()) {
86+ // Kpack-allocated code objects must be freed via kpack API
87+ #if ROCM_KPACK_ENABLED
88+ kpack_free_code_object (const_cast <void *>(i));
89+ #else
90+ guarantee (false , " Kpack code object but ROCM_KPACK_ENABLED=OFF" );
91+ #endif
92+ } else {
93+ delete[] reinterpret_cast <const char *>(i);
94+ }
6195 }
6296 ReleaseImageAndFile ();
6397}
@@ -640,6 +674,79 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
640674 return hip_status;
641675}
642676
677+ // This function is always defined but errors if ROCM_KPACK_ENABLED=OFF
678+ // TODO: Extract SPIR-V translation from ExtractFatBinaryUsingCOMGR and call
679+ // it from both of these entry-points once we have enough testing in place
680+ // to ensure this advanced case is functional.
681+ hipError_t FatBinaryInfo::ExtractKpackBinary (const std::vector<hip::Device*>& devices) {
682+ #if !ROCM_KPACK_ENABLED
683+ LogError (" Kpack binary detected but ROCM_KPACK_ENABLED=OFF" );
684+ return hipErrorNotSupported;
685+ #else
686+ if (!kpack_params_.has_value ()) {
687+ LogError (" ExtractKpackBinary called but kpack_params_ not set" );
688+ return hipErrorInvalidValue;
689+ }
690+
691+ const auto & params = kpack_params_.value ();
692+ if (params.metadata == nullptr ) {
693+ LogError (" HIPK metadata is null" );
694+ return hipErrorInvalidValue;
695+ }
696+
697+ // Build architecture priority list from devices
698+ // For each device, add native ISA first, then generic fallback
699+ std::vector<std::string> arch_list;
700+ for (auto device : devices) {
701+ std::string device_name = device->devices ()[0 ]->isa ().isaName ();
702+ arch_list.push_back (device_name);
703+
704+ // Add generic fallback
705+ auto generic_name = TargetToGeneric (device_name);
706+ if (!generic_name.empty ()) {
707+ arch_list.push_back (generic_name);
708+ }
709+ }
710+
711+ // Convert to C-style array for kpack API
712+ std::vector<const char *> arch_ptrs;
713+ for (const auto & arch : arch_list) {
714+ arch_ptrs.push_back (arch.c_str ());
715+ }
716+
717+ // Load code object from kpack archive
718+ void * code_object = nullptr ;
719+ size_t code_object_size = 0 ;
720+
721+ // binary_path is used to resolve relative paths to kpack archives.
722+ // bundle_index identifies which code object to load for multi-TU binaries.
723+ // The kernel_name (used for TOC lookup) is embedded in the HIPK metadata.
724+ kpack_error_t err =
725+ kpack_load_code_object (getHipKpackCache (), params.metadata , fname_.c_str (),
726+ static_cast <uint32_t >(params.bundle_index ),
727+ arch_ptrs.data (), arch_ptrs.size (), &code_object, &code_object_size);
728+
729+ if (err != KPACK_SUCCESS) {
730+ LogPrintfError (" kpack_load_code_object failed with error: %d" , err);
731+ return hipErrorInvalidImage;
732+ }
733+
734+ // Add code object to all devices
735+ for (auto device : devices) {
736+ hipError_t hip_err = AddDevProgram (device, code_object, code_object_size, 0 );
737+ if (hip_err != hipSuccess) {
738+ kpack_free_code_object (code_object);
739+ return hip_err;
740+ }
741+ }
742+
743+ // Track allocation for cleanup in destructor
744+ code_obj_allocations_.insert (code_object);
745+
746+ return hipSuccess;
747+ #endif
748+ }
749+
643750hipError_t FatBinaryInfo::AddDevProgram (hip::Device* device, const void * binary_image,
644751 size_t binary_size, size_t binary_offset) {
645752 int devID = device->deviceId ();
0 commit comments