Skip to content

Commit e0dcba9

Browse files
Add kpack runtime integration for split device code artifacts (#2622)
Integrates rocm-kpack runtime library for loading device code from external kpack archives at HIP initialization time. Changes: - Add kpack_params_ optional to FatBinaryInfo for HIPK metadata - Parse HIPK magic (0x4B504948) in digestFatBinary to detect kpack'd binaries - Add ExtractKpackBinary() to load code objects via kpack_load_code_object() - Wire up kpack cache lifecycle in hip_global.cpp - Track kpack allocations for proper cleanup - Support multi-TU binaries via bundle_index (co_index parameter) The ROCM_KPACK_ENABLED cmake flag controls whether kpack support is compiled in. When disabled, HIPK binaries return hipErrorNotSupported. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
1 parent dbd26a8 commit e0dcba9

File tree

7 files changed

+214
-1
lines changed

7 files changed

+214
-1
lines changed

projects/clr/hipamd/src/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,19 @@ if(DISABLE_DIRECT_DISPATCH)
180180
target_compile_definitions(amdhip64 PRIVATE DISABLE_DIRECT_DISPATCH)
181181
endif()
182182

183+
# Optional rocm-kpack support for kpack split artifacts
184+
# This option will be enabled permanently at a future point and gates the use
185+
# of the rocm-kpack library for detecting ROCm multi-arch archives in
186+
# distributions of ROCm so that the CLR can load them just as it would normal
187+
# fat binaries. See the WIP repo: https://github.com/ROCm/rocm-kpack (which
188+
# will be migrated to rocm-systems when ready).
189+
option(ROCM_KPACK_ENABLED "Enable kpack runtime loading for split device code" OFF)
190+
if(ROCM_KPACK_ENABLED)
191+
find_package(rocm-kpack REQUIRED)
192+
target_compile_definitions(amdhip64 PRIVATE ROCM_KPACK_ENABLED=1)
193+
target_link_libraries(amdhip64 PRIVATE rocm::rocm_kpack)
194+
endif()
195+
183196
# Short-Term solution for pre-compiled headers for online compilation
184197
# Enable pre compiled header
185198
if(__HIP_ENABLE_PCH)

projects/clr/hipamd/src/hip_code_object.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ THE SOFTWARE.
3232
#include <elf/elf.hpp>
3333
#include "comgrctx.hpp"
3434
#include "hip_comgr_helper.hpp"
35+
#include "hip_platform.hpp"
3536

3637
namespace hip {
3738
hipError_t ihipFree(void* ptr);
@@ -265,6 +266,39 @@ hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) {
265266
return hipSuccess;
266267
}
267268

269+
// Fat binary wrapper structure (matches hip_platform.cpp definition)
270+
// Defined locally to keep kpack integration as implementation detail
271+
struct __CudaFatBinaryWrapper {
272+
unsigned int magic;
273+
unsigned int version;
274+
void* binary;
275+
void* dummy1; // reserved1: bundle index for multi-TU binaries
276+
};
277+
278+
// Check if this is a kpack'd binary (HIPK magic)
279+
const auto* wrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
280+
if (wrapper->magic == symbols::kHipkMagic && wrapper->version == 1) {
281+
// Discover binary path from the wrapper address using existing CLR utility
282+
std::string binary_path;
283+
size_t file_offset = 0;
284+
if (!amd::Os::FindFileNameFromAddress(data, &binary_path, &file_offset)) {
285+
LogError("Failed to discover binary path for kpack loading");
286+
return hipErrorNoBinaryForGpu;
287+
}
288+
289+
// Get bundle index from wrapper->dummy1 (reserved1 field)
290+
// For multi-TU binaries, this identifies which bundle this wrapper corresponds to
291+
uint64_t bundle_index = reinterpret_cast<uintptr_t>(wrapper->dummy1);
292+
293+
// wrapper->binary points to msgpack metadata
294+
// ExtractKpackBinary will error if ROCM_KPACK_ENABLED=OFF
295+
FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo(
296+
FatBinaryInfo::KpackParams{wrapper->binary, std::move(binary_path), bundle_index});
297+
hipError_t err = fatBinaryInfo->ExtractKpackBinary(g_devices);
298+
programs = fatBinaryInfo;
299+
return err;
300+
}
301+
268302
// Create a new fat binary object and extract the fat binary for all devices.
269303
FatBinaryInfo* fatBinaryInfo = new FatBinaryInfo(nullptr, data);
270304
hipError_t err = fatBinaryInfo->ExtractFatBinaryUsingCOMGR(g_devices);
@@ -287,6 +321,26 @@ FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized, bool& s
287321
return &modules_[data];
288322
}
289323

324+
FatBinaryInfo** StatCO::addKpackBinary(const void* hipk_metadata, const void* wrapper_addr,
325+
bool initialized, bool& success) {
326+
amd::ScopedLock lock(sclock_);
327+
328+
// Use wrapper_addr as the key (same as data pointer for normal path)
329+
// This allows digestFatBinary to access the wrapper and detect HIPK magic
330+
module_to_hostModule_.insert(std::make_pair(&modules_[wrapper_addr], wrapper_addr));
331+
332+
if (!initialized) {
333+
// Deferred loading: modules_[wrapper_addr] is nullptr, digestFatBinary will handle it later
334+
success = true;
335+
return &modules_[wrapper_addr];
336+
}
337+
338+
// Immediate loading: call digestFatBinary which handles kpack detection
339+
hipError_t err = digestFatBinary(wrapper_addr, modules_[wrapper_addr]);
340+
success = (err == hipSuccess);
341+
return &modules_[wrapper_addr];
342+
}
343+
290344
hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) {
291345
amd::ScopedLock lock(sclock_);
292346

projects/clr/hipamd/src/hip_code_object.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ constexpr char kHipFatBinName_[] = "hipfatbin-";
5555
constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix
5656
constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-";
5757

58+
// Fat binary wrapper magic values
59+
constexpr uint32_t kHipfMagic = 0x48495046; // "HIPF" little-endian (normal fat binary)
60+
constexpr uint32_t kHipkMagic = 0x4B504948; // "HIPK" little-endian (kpack'd binary)
61+
5862
// Clang Offload bundler description & Header in uncompressed mode.
5963
struct ClangOffloadBundleInfo {
6064
uint64_t offset;
@@ -154,6 +158,8 @@ class StatCO : public CodeObject {
154158

155159
// Add/Remove/Digest Fat Binaries passed to us from "__hipRegisterFatBinary"
156160
FatBinaryInfo** addFatBinary(const void* data, bool initialized, bool& success);
161+
FatBinaryInfo** addKpackBinary(const void* hipk_metadata, const void* wrapper_addr,
162+
bool initialized, bool& success);
157163
hipError_t removeFatBinary(FatBinaryInfo** module);
158164
hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs);
159165
void RemoveAllFatBinaries();

projects/clr/hipamd/src/hip_fatbin.cpp

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,38 @@ THE SOFTWARE.
2424
#include "hip_fatbin.hpp"
2525
#include "hip_global.hpp"
2626
#include <unordered_map>
27+
#include <mutex>
2728
#include "hip_code_object.hpp"
2829
#include "hip_platform.hpp"
2930
#include "comgrctx.hpp"
3031
#include "amd_hsa_elf.hpp"
3132
#include "hip_comgr_helper.hpp"
3233

34+
#if ROCM_KPACK_ENABLED
35+
#include <rocm_kpack/kpack.h>
36+
#endif
37+
3338
namespace hip {
3439
// Use ComgrUniqueHandle and type aliases from hip_comgr_helper.hpp
3540
using comgr_helper::ComgrDataSetUniqueHandle;
3641
using comgr_helper::ComgrActionInfoUniqueHandle;
3742
using comgr_helper::ComgrDataUniqueHandle;
3843

44+
#if ROCM_KPACK_ENABLED
45+
namespace {
46+
// HIP process-global kpack cache - initialized on first use
47+
std::once_flag g_hipKpackCacheInitFlag;
48+
kpack_cache_t g_hipKpackCache = nullptr;
49+
50+
void initHipKpackCache() { kpack_cache_create(&g_hipKpackCache); }
51+
52+
kpack_cache_t getHipKpackCache() {
53+
std::call_once(g_hipKpackCacheInitFlag, initHipKpackCache);
54+
return g_hipKpackCache;
55+
}
56+
} // namespace
57+
#endif
58+
3959
FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image)
4060
: foffset_(0), image_(image), image_mapped_(false), uri_(std::string()) {
4161
if (fname != nullptr) {
@@ -47,6 +67,11 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image)
4767
dev_programs_.resize(g_devices.size(), nullptr);
4868
}
4969

70+
FatBinaryInfo::FatBinaryInfo(KpackParams kpack_params)
71+
: FatBinaryInfo(kpack_params.binary_path.c_str(), nullptr) {
72+
kpack_params_ = std::move(kpack_params);
73+
}
74+
5075
FatBinaryInfo::~FatBinaryInfo() {
5176
// Release per device fat bin info.
5277
for (int dev_id = 0; dev_id < dev_programs_.size(); dev_id++) {
@@ -57,7 +82,16 @@ FatBinaryInfo::~FatBinaryInfo() {
5782
}
5883
// Release Code object allocations
5984
for (const auto& i : code_obj_allocations_) {
60-
delete[] reinterpret_cast<const char*>(i);
85+
if (kpack_params_.has_value()) {
86+
// Kpack-allocated code objects must be freed via kpack API
87+
#if ROCM_KPACK_ENABLED
88+
kpack_free_code_object(const_cast<void*>(i));
89+
#else
90+
guarantee(false, "Kpack code object but ROCM_KPACK_ENABLED=OFF");
91+
#endif
92+
} else {
93+
delete[] reinterpret_cast<const char*>(i);
94+
}
6195
}
6296
ReleaseImageAndFile();
6397
}
@@ -640,6 +674,79 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
640674
return hip_status;
641675
}
642676

677+
// This function is always defined but errors if ROCM_KPACK_ENABLED=OFF
678+
// TODO: Extract SPIR-V translation from ExtractFatBinaryUsingCOMGR and call
679+
// it from both of these entry-points once we have enough testing in place
680+
// to ensure this advanced case is functional.
681+
hipError_t FatBinaryInfo::ExtractKpackBinary(const std::vector<hip::Device*>& devices) {
682+
#if !ROCM_KPACK_ENABLED
683+
LogError("Kpack binary detected but ROCM_KPACK_ENABLED=OFF");
684+
return hipErrorNotSupported;
685+
#else
686+
if (!kpack_params_.has_value()) {
687+
LogError("ExtractKpackBinary called but kpack_params_ not set");
688+
return hipErrorInvalidValue;
689+
}
690+
691+
const auto& params = kpack_params_.value();
692+
if (params.metadata == nullptr) {
693+
LogError("HIPK metadata is null");
694+
return hipErrorInvalidValue;
695+
}
696+
697+
// Build architecture priority list from devices
698+
// For each device, add native ISA first, then generic fallback
699+
std::vector<std::string> arch_list;
700+
for (auto device : devices) {
701+
std::string device_name = device->devices()[0]->isa().isaName();
702+
arch_list.push_back(device_name);
703+
704+
// Add generic fallback
705+
auto generic_name = TargetToGeneric(device_name);
706+
if (!generic_name.empty()) {
707+
arch_list.push_back(generic_name);
708+
}
709+
}
710+
711+
// Convert to C-style array for kpack API
712+
std::vector<const char*> arch_ptrs;
713+
for (const auto& arch : arch_list) {
714+
arch_ptrs.push_back(arch.c_str());
715+
}
716+
717+
// Load code object from kpack archive
718+
void* code_object = nullptr;
719+
size_t code_object_size = 0;
720+
721+
// binary_path is used to resolve relative paths to kpack archives.
722+
// bundle_index identifies which code object to load for multi-TU binaries.
723+
// The kernel_name (used for TOC lookup) is embedded in the HIPK metadata.
724+
kpack_error_t err =
725+
kpack_load_code_object(getHipKpackCache(), params.metadata, fname_.c_str(),
726+
static_cast<uint32_t>(params.bundle_index),
727+
arch_ptrs.data(), arch_ptrs.size(), &code_object, &code_object_size);
728+
729+
if (err != KPACK_SUCCESS) {
730+
LogPrintfError("kpack_load_code_object failed with error: %d", err);
731+
return hipErrorInvalidImage;
732+
}
733+
734+
// Add code object to all devices
735+
for (auto device : devices) {
736+
hipError_t hip_err = AddDevProgram(device, code_object, code_object_size, 0);
737+
if (hip_err != hipSuccess) {
738+
kpack_free_code_object(code_object);
739+
return hip_err;
740+
}
741+
}
742+
743+
// Track allocation for cleanup in destructor
744+
code_obj_allocations_.insert(code_object);
745+
746+
return hipSuccess;
747+
#endif
748+
}
749+
643750
hipError_t FatBinaryInfo::AddDevProgram(hip::Device* device, const void* binary_image,
644751
size_t binary_size, size_t binary_offset) {
645752
int devID = device->deviceId();

projects/clr/hipamd/src/hip_fatbin.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ THE SOFTWARE.
2828
#include "hip_internal.hpp"
2929
#include "platform/program.hpp"
3030

31+
#include <optional>
32+
3133
// Forward declaration for Unique FD
3234
struct UniqueFD;
3335

@@ -36,10 +38,20 @@ namespace hip {
3638
// Fat Binary Info
3739
class FatBinaryInfo {
3840
public:
41+
// Parameters for kpack'd (split device code) binaries
42+
struct KpackParams {
43+
const void* metadata; //!< Msgpack metadata from .rocm_kpack_ref section
44+
std::string binary_path; //!< Path to the host binary
45+
uint64_t bundle_index; //!< Bundle index for multi-TU binaries (0-based)
46+
};
47+
3948
FatBinaryInfo(const char* fname, const void* image);
49+
// Constructor for kpack'd (split device code) binaries
50+
explicit FatBinaryInfo(KpackParams kpack_params);
4051
~FatBinaryInfo();
4152

4253
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
54+
hipError_t ExtractKpackBinary(const std::vector<hip::Device*>& devices);
4355
hipError_t AddDevProgram(hip::Device* device, const void* binary_image, size_t binary_size,
4456
size_t binary_offset);
4557
hipError_t BuildProgram(const int device_id);
@@ -84,6 +96,9 @@ class FatBinaryInfo {
8496
// Only used for FBs where image is directly passed
8597
std::string uri_; //!< Uniform resource indicator
8698

99+
// Kpack parameters for split device code binaries (nullopt for normal fat binaries)
100+
std::optional<KpackParams> kpack_params_;
101+
87102
std::vector<amd::Program*> dev_programs_; //!< Program info per Device
88103

89104
std::shared_ptr<UniqueFD> ufd_; //!< Unique file descriptor

projects/clr/hipamd/src/hip_platform.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,17 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const c
7373

7474
void** __hipRegisterFatBinary(const void* data) {
7575
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
76+
77+
// Check for HIPK magic (kpack'd binary with external device code)
78+
if (fbwrapper->magic == symbols::kHipkMagic && fbwrapper->version == 1) {
79+
// For HIPK binaries, fbwrapper->binary points to msgpack metadata
80+
// Route through addKpackBinary which will error if ROCM_KPACK_ENABLED=OFF
81+
bool success = false;
82+
auto fat_binary_info = PlatformState::instance().addKpackBinary(fbwrapper->binary, data, success);
83+
return success ? reinterpret_cast<void**>(fat_binary_info) : nullptr;
84+
}
85+
86+
// Normal HIPF path
7687
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
7788
LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic,
7889
fbwrapper->version);
@@ -1003,6 +1014,11 @@ hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data, bool& success
10031014
return statCO_.addFatBinary(data, initialized_, success);
10041015
}
10051016

1017+
hip::FatBinaryInfo** PlatformState::addKpackBinary(const void* hipk_metadata,
1018+
const void* wrapper_addr, bool& success) {
1019+
return statCO_.addKpackBinary(hipk_metadata, wrapper_addr, initialized_, success);
1020+
}
1021+
10061022
hipError_t PlatformState::removeFatBinary(hip::FatBinaryInfo** module) {
10071023
return statCO_.removeFatBinary(module);
10081024
}

projects/clr/hipamd/src/hip_platform.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ class PlatformState {
8787

8888
// Static Code Objects functions
8989
hip::FatBinaryInfo** addFatBinary(const void* data, bool& success);
90+
hip::FatBinaryInfo** addKpackBinary(const void* hipk_metadata, const void* wrapper_addr,
91+
bool& success);
9092
hipError_t removeFatBinary(hip::FatBinaryInfo** module);
9193
hipError_t digestFatBinary(const void* data, hip::FatBinaryInfo*& programs);
9294

0 commit comments

Comments
 (0)