Skip to content

Commit 2080657

Browse files
rocr: support multiple driver types in agent initialization
Modify agent initialization to support different driver types, to enable KFD_VIRTIO dirver for CPU and GPU agent here. 1. Add driver_type parameter to CpuAgent and GpuAgent constructors 2. Update topology discovery to handle multiple driver types 3. Fix MakeMemoryResident return value check in VirtioDriver 4. Add helper function IsGPUDriver to check driver types 5. Update agent discovery to iterate through all available drivers This change makes the runtime more flexible by removing hardcoded KFD driver assumptions and properly handling different driver backends. Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>
1 parent d36cb19 commit 2080657

File tree

7 files changed

+65
-42
lines changed

7 files changed

+65
-42
lines changed

runtime/hsa-runtime/core/inc/amd_cpu_agent.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "core/inc/agent.h"
5252
#include "core/inc/queue.h"
5353
#include "core/inc/cache.h"
54+
#include "core/inc/driver.h"
5455

5556
namespace rocr {
5657
namespace AMD {
@@ -62,7 +63,9 @@ class CpuAgent : public core::Agent {
6263
// @param [in] node Node id. Each CPU in different socket will get distinct
6364
// id.
6465
// @param [in] node_props Node property.
65-
CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props);
66+
// @param [in] driver_type Driver type. Default is KFD.
67+
CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props,
68+
core::DriverType driver_type = core::DriverType::KFD);
6669

6770
// @brief CpuAgent destructor.
6871
~CpuAgent();

runtime/hsa-runtime/core/inc/amd_gpu_agent.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,11 @@ typedef ScratchCache::ScratchInfo ScratchInfo;
7373
class GpuAgentInt : public core::Agent {
7474
public:
7575
// @brief Constructor
76-
GpuAgentInt(uint32_t node_id)
77-
: core::Agent(core::Runtime::runtime_singleton_->AgentDriver(
78-
core::DriverType::KFD),
79-
node_id, core::Agent::DeviceType::kAmdGpuDevice) {}
76+
// @param [in] node_id Node id.
77+
// @param [in] driver_type Driver type. Default is KFD.
78+
GpuAgentInt(uint32_t node_id, core::DriverType driver_type)
79+
: core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node_id,
80+
core::Agent::DeviceType::kAmdGpuDevice) {}
8081

8182
// @brief Ensure blits are ready (performance hint).
8283
virtual void PreloadBlits() {}
@@ -231,7 +232,10 @@ class GpuAgent : public GpuAgentInt {
231232
// id.
232233
// @param [in] node_props Node property.
233234
// @param [in] xnack_mode XNACK mode of device.
234-
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index);
235+
// @param [in] index Index of the GPU device.
236+
// @param [in] driver_type Driver type. Default is KFD.
237+
GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode, uint32_t index,
238+
core::DriverType driver_type = core::DriverType::KFD);
235239

236240
// @brief GPU agent destructor.
237241
~GpuAgent();

runtime/hsa-runtime/core/inc/driver.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,14 @@ class Queue;
5858

5959
enum class DriverQuery { GET_DRIVER_VERSION };
6060

61-
enum class DriverType { XDNA = 0, KFD, KFD_VIRTIO, NUM_DRIVER_TYPES };
61+
enum class DriverType {
62+
XDNA = 0,
63+
KFD,
64+
#ifdef HSAKMT_VIRTIO_ENABLED
65+
KFD_VIRTIO,
66+
#endif
67+
NUM_DRIVER_TYPES
68+
};
6269

6370
/// @brief Handle for exported / imported memory.
6471
struct ShareableHandle {

runtime/hsa-runtime/core/inc/runtime.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,14 @@ class Runtime {
510510

511511
std::vector<std::unique_ptr<Driver>>& AgentDrivers() { return agent_drivers_; }
512512

513+
static bool IsGPUDriver(DriverType driver_type) {
514+
return driver_type == core::DriverType::KFD
515+
#ifdef HSAKMT_VIRTIO_ENABLED
516+
|| driver_type == core::DriverType::KFD_VIRTIO
517+
#endif
518+
;
519+
}
520+
513521
protected:
514522
static void AsyncEventsLoop(void*);
515523
static void AsyncIPCSockServerConnLoop(void*);

runtime/hsa-runtime/core/runtime/amd_cpu_agent.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,9 @@
5454

5555
namespace rocr {
5656
namespace AMD {
57-
CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties &node_props)
58-
: core::Agent(
59-
core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD),
60-
node, kAmdCpuDevice),
57+
CpuAgent::CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props,
58+
core::DriverType driver_type)
59+
: core::Agent(core::Runtime::runtime_singleton_->AgentDriver(driver_type), node, kAmdCpuDevice),
6160
properties_(node_props) {
6261
InitRegionList();
6362

runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ namespace AMD {
9393
const uint64_t CP_DMA_DATA_TRANSFER_CNT_MAX = (1 << 26);
9494

9595
GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xnack_mode,
96-
uint32_t index)
97-
: GpuAgentInt(node),
96+
uint32_t index, core::DriverType driver_type)
97+
: GpuAgentInt(node, driver_type),
9898
properties_(node_props),
9999
current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT),
100100
scratch_used_large_(0),

runtime/hsa-runtime/core/runtime/amd_topology.cpp

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,14 @@ namespace rocr {
7878
namespace AMD {
7979
// Anonymous namespace.
8080
namespace {
81+
82+
const std::array<std::function<hsa_status_t(std::unique_ptr<core::Driver>&)>,
8183
#if _WIN32
82-
constexpr size_t num_drivers = 0;
84+
0
8385
#elif __linux__
84-
constexpr size_t num_drivers = 2
85-
#ifdef HSAKMT_VIRTIO_ENABLED
86-
+ 1
87-
#endif
88-
;
86+
static_cast<size_t>(core::DriverType::NUM_DRIVER_TYPES)
8987
#endif
90-
91-
const std::array<std::function<hsa_status_t(std::unique_ptr<core::Driver>&)>, num_drivers>
88+
>
9289
discover_driver_funcs = {
9390
#ifdef __linux__
9491
KfdDriver::DiscoverDriver,
@@ -121,22 +118,22 @@ bool InitializeDriver(std::unique_ptr<core::Driver>& driver) {
121118
return true;
122119
}
123120

124-
void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) {
125-
CpuAgent* cpu = new CpuAgent(node_id, node_prop);
121+
void DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop, core::DriverType driver_type) {
122+
CpuAgent* cpu = new CpuAgent(node_id, node_prop, driver_type);
126123
cpu->Enable();
127124
core::Runtime::runtime_singleton_->RegisterAgent(cpu, true);
128125
}
129126

130127
GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode,
131-
bool enabled) {
128+
bool enabled, core::DriverType driver_type) {
132129
GpuAgent* gpu = nullptr;
133130
if (node_prop.NumFComputeCores == 0) {
134131
// Ignore non GPUs.
135132
return nullptr;
136133
}
137134
try {
138135
gpu = new GpuAgent(node_id, node_prop, xnack_mode,
139-
core::Runtime::runtime_singleton_->gpu_agents().size());
136+
core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type);
140137

141138
const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;
142139

@@ -163,7 +160,7 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac
163160
node_prop.Capability.ui32.SRAM_EDCSupport = 1;
164161
delete gpu;
165162
gpu = new GpuAgent(node_id, node_prop, xnack_mode,
166-
core::Runtime::runtime_singleton_->gpu_agents().size());
163+
core::Runtime::runtime_singleton_->gpu_agents().size(), driver_type);
167164
}
168165
}
169166
} catch (const hsa_exception& e) {
@@ -268,24 +265,29 @@ void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode, bool enable
268265
const int32_t invalidIdx = -1;
269266
int32_t list_sz = gpu_list.size();
270267
HsaNodeProperties node_prop = {0};
271-
const auto& gpu_driver = core::Runtime::runtime_singleton_->AgentDriver(core::DriverType::KFD);
272-
for (int32_t idx = 0; idx < list_sz; idx++) {
273-
if (gpu_list[idx] == invalidIdx) {
274-
break;
268+
for (const auto& gpu_driver : core::Runtime::runtime_singleton_->AgentDrivers()) {
269+
if (!core::Runtime::IsGPUDriver(gpu_driver->kernel_driver_type_)) {
270+
continue;
275271
}
276272

277-
// Obtain properties of the node
278-
hsa_status_t ret = gpu_driver.GetNodeProperties(node_prop, gpu_list[idx]);
279-
assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties");
273+
for (int32_t idx = 0; idx < list_sz; idx++) {
274+
if (gpu_list[idx] == invalidIdx) {
275+
break;
276+
}
280277

281-
// disable interrupt signal for DTIF platform
282-
if (core::Runtime::runtime_singleton_->flag().enable_dtif())
283-
core::g_use_interrupt_wait = false;
278+
// Obtain properties of the node
279+
hsa_status_t ret = gpu_driver->GetNodeProperties(node_prop, gpu_list[idx]);
280+
assert(ret == HSA_STATUS_SUCCESS && "Error in getting Node Properties");
284281

285-
// Instantiate a Gpu device. The IO links
286-
// of this node have already been registered
287-
assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery.");
288-
DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled);
282+
// disable interrupt signal for DTIF platform
283+
if (core::Runtime::runtime_singleton_->flag().enable_dtif())
284+
core::g_use_interrupt_wait = false;
285+
286+
// Instantiate a Gpu device. The IO links
287+
// of this node have already been registered
288+
assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery.");
289+
DiscoverGpu(gpu_list[idx], node_prop, xnack_mode, enabled, gpu_driver->kernel_driver_type_);
290+
}
289291
}
290292
}
291293

@@ -346,7 +348,7 @@ bool BuildTopology() {
346348
/// @todo: Add support for AIEs.
347349
// Query if env ROCR_VISIBLE_DEVICES is defined. If defined
348350
// determine number and order of GPU devices to be surfaced.
349-
if (filter && driver->kernel_driver_type_ == core::DriverType::KFD) {
351+
if (filter && (core::Runtime::IsGPUDriver(driver->kernel_driver_type_))) {
350352
rvdFilter.BuildRvdTokenList();
351353
rvdFilter.BuildDeviceUuidList(node_props_vec);
352354
visibleCnt = rvdFilter.BuildUsrDeviceList();
@@ -361,7 +363,7 @@ bool BuildTopology() {
361363
for (auto& node_props : node_props_vec) {
362364
if (node_props.NumCPUCores) {
363365
// Node has CPU cores so instantiate a CPU agent.
364-
DiscoverCpu(node_id, node_props);
366+
DiscoverCpu(node_id, node_props, driver->kernel_driver_type_);
365367
}
366368

367369
if (node_props.NumNeuralCores) {

0 commit comments

Comments
 (0)