Skip to content

Commit e2071e0

Browse files
authored
rocr: Make IPC Handles Unique (#1422)
Query IPC handles on shared memory export/import for any metadata as a means to uniquely idnetify handles that happen to be backed by buffers that point to the same memory. Signed-off-by: Sunday Clement <Sunday.Clement@amd.com>
1 parent 58baaac commit e2071e0

File tree

2 files changed

+85
-26
lines changed

2 files changed

+85
-26
lines changed

runtime/hsa-runtime/core/inc/runtime.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -909,9 +909,8 @@ class Runtime {
909909
void InitIPCDmaBufSupport();
910910
bool ipc_dmabuf_supported_;
911911
int IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
912-
amdgpu_bo_import_result *res,
913912
unsigned int numNodes, HSAuint32 *nodes,
914-
void **importAddress, HSAuint64 *importSize);
913+
void **importAddress, HSAuint64 *importSize, bool isdmabufSysmem);
915914
};
916915

917916
} // namespace core

runtime/hsa-runtime/core/runtime/runtime.cpp

Lines changed: 84 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,18 @@ hsa_status_t Runtime::FreeMemory(void* ptr) {
355355

356356
notifiers = std::move(it->second.notifiers);
357357

358+
//track the exporter BO to clear meta data via set_metadata
359+
//clear the set metadata here if possible if theres an existing ldrm_bo
360+
if (it->second.ldrm_bo) {
361+
struct amdgpu_bo_info info = {0};
362+
amdgpu_bo_query_info(it->second.ldrm_bo, &info);
363+
364+
//clear metadata
365+
amdgpu_bo_metadata zero_metadata = {0};
366+
memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t));
367+
amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata);
368+
}
369+
358370
allocation_map_.erase(it);
359371
}
360372

@@ -1361,6 +1373,34 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
13611373
hsa_status_t err = agent->driver().ExportDMABuf(ptr, len, &dmabuf_fd, &dmabufOffset);
13621374
assert(dmabufOffset/pageSize == fragOffset && "DMA Buf inconsistent with pointer offset.");
13631375
if (err != HSA_STATUS_SUCCESS) return err;
1376+
1377+
if (agent->device_type() == Agent::kAmdGpuDevice) {
1378+
AMD::GpuAgent* agent_ = reinterpret_cast<AMD::GpuAgent*>(agent);
1379+
amdgpu_bo_import_result res;
1380+
1381+
srand(static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
1382+
handle->handle[7] = rand();
1383+
1384+
//libdrm import for buffer object handle
1385+
if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) {
1386+
fprintf(stderr, "Error in amdgpu_bo_import\n");
1387+
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
1388+
}
1389+
1390+
//query buffer object for pre existing metadata
1391+
struct amdgpu_bo_info info = {0};
1392+
if (!amdgpu_bo_query_info(res.buf_handle, &info) && !!info.metadata.size_metadata) {
1393+
handle->handle[7] = info.metadata.umd_metadata[0];
1394+
} else {
1395+
amdgpu_bo_metadata buf_info = {0};
1396+
buf_info.size_metadata = sizeof(uint32_t);
1397+
buf_info.umd_metadata[0] = handle->handle[7];
1398+
1399+
amdgpu_bo_set_metadata(res.buf_handle, &buf_info);
1400+
allocation_map_[ptr].ldrm_bo = res.buf_handle;
1401+
}
1402+
}
1403+
13641404
close(dmabuf_fd);
13651405

13661406
ScopedAcquire<KernelMutex> lock(&ipc_sock_server_lock_);
@@ -1404,9 +1444,8 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han
14041444
}
14051445

14061446
int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
1407-
amdgpu_bo_import_result *res,
14081447
unsigned int numNodes, HSAuint32 *nodes,
1409-
void **importAddress, HSAuint64 *importSize) {
1448+
void **importAddress, HSAuint64 *importSize, bool isDmabufSysmem) {
14101449
struct sockaddr_un address;
14111450
int dmabuf_fd = -1, socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);
14121451
assert(socket_fd > -1 && "DMA buffer could not be imported for IPC!");
@@ -1450,20 +1489,28 @@ int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle,
14501489
dmabuf_fd = ReceiveDmaBufFd(socket_fd);
14511490
if (dmabuf_fd == -1) return -1;
14521491

1492+
amdgpu_bo_import_result res = {0};
14531493
HsaGraphicsResourceInfo info;
14541494
HSA_REGISTER_MEM_FLAGS regFlags;
1455-
regFlags.ui32.requiresVAddr = !!res ? 0 : 1;
1495+
regFlags.ui32.requiresVAddr = !isDmabufSysmem;
14561496
int err = HSAKMT_CALL(hsaKmtRegisterGraphicsHandleToNodesExt(dmabuf_fd, &info, numNodes, nodes, regFlags));
14571497
if (err == HSAKMT_STATUS_SUCCESS) {
14581498
*importAddress = info.MemoryAddress;
14591499
*importSize = info.SizeInBytes;
1460-
if (res) {
1500+
1501+
if (isDmabufSysmem)
14611502
HSAKMT_CALL(hsaKmtDeregisterMemory(*importAddress));
14621503

1463-
// Manually libDRM import and GPU map system memory
1464-
AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
1465-
err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
1466-
dmabuf_fd, res));
1504+
AMD::GpuAgent* agent = reinterpret_cast<AMD::GpuAgent*>(agents_by_node_[info.NodeId][0]);
1505+
err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd,
1506+
dmabuf_fd, &res));
1507+
1508+
// Store the buffer object handle in allocation map for later use
1509+
if (err == HSAKMT_STATUS_SUCCESS) {
1510+
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
1511+
allocation_map_[*importAddress] =
1512+
AllocationRegion(nullptr, *importSize, *importSize, core::MemoryRegion::AllocateNoFlags);
1513+
allocation_map_[*importAddress].ldrm_bo = res.buf_handle;
14671514
}
14681515
close(dmabuf_fd);
14691516
}
@@ -1496,17 +1543,30 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
14961543
allocation_map_[importAddress].ldrm_bo = ldrm_bo;
14971544
};
14981545

1499-
auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes,
1500-
amdgpu_bo_import_result *res) {
1501-
int ret = ipc_dmabuf_supported_ ?
1502-
IPCClientImport(importHandle.handle[2], dmaBufFDHandle, res,
1503-
numNodes, nodes, &importAddress, &importSize) :
1504-
HSAKMT_CALL(hsaKmtRegisterSharedHandle(reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
1505-
&importAddress, &importSize));
1506-
if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
1507-
1508-
return HSA_STATUS_SUCCESS;
1509-
};
1546+
auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes, bool isSysMem) {
1547+
1548+
int ret = ipc_dmabuf_supported_ ? IPCClientImport(importHandle.handle[2], dmaBufFDHandle, numNodes,
1549+
nodes, &importAddress, &importSize, isSysMem) :
1550+
HSAKMT_CALL(hsaKmtRegisterSharedHandle(
1551+
reinterpret_cast<const HsaSharedMemoryHandle*>(&importHandle),
1552+
&importAddress, &importSize
1553+
));
1554+
1555+
if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
1556+
if (ipc_dmabuf_supported_ && !isSysMem) {
1557+
// use the bo from the allocation map
1558+
// Only check metadata for GPU memory
1559+
struct amdgpu_bo_info info = {0};
1560+
int ret = amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info);
1561+
1562+
// Validate metadata for IPC handle
1563+
if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) {
1564+
fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]);
1565+
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
1566+
}
1567+
}
1568+
return HSA_STATUS_SUCCESS;
1569+
};
15101570

15111571
auto mapMemoryToNodes = [&](unsigned int numNodes, HSAuint32 *nodes) {
15121572
HSAuint64 altAddress;
@@ -1547,10 +1607,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
15471607
}
15481608

15491609
if (num_agents == 0) {
1550-
amdgpu_bo_import_result res;
15511610
bool isDmabufSysMem = ipc_dmabuf_supported_ && importHandle.handle[3];
15521611

1553-
hsa_status_t err = importMemory(0, NULL, isDmabufSysMem ? &res : NULL);
1612+
hsa_status_t err = importMemory(0, NULL, isDmabufSysMem);
15541613
if (err != HSA_STATUS_SUCCESS) return err;
15551614
if (!isDmabufSysMem) return mapMemoryToNodes(0, NULL);
15561615

@@ -1563,7 +1622,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
15631622

15641623
// Create a shared cpu access pointer for user
15651624
void *cpuPtr;
1566-
amdgpu_bo_handle bo = res.buf_handle;
1625+
amdgpu_bo_handle bo = allocation_map_[importAddress].ldrm_bo;
15671626
int ret = DRM_CALL(amdgpu_bo_cpu_map(bo, &cpuPtr));
15681627
if (ret) return errCleanup(bo);
15691628

@@ -1591,8 +1650,9 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len,
15911650
for (uint32_t i = 0; i < num_agents; i++)
15921651
agents[i]->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &nodes[i]);
15931652

1594-
hsa_status_t err = importMemory(num_agents, nodes, NULL);
1653+
hsa_status_t err = importMemory(num_agents, nodes, false);
15951654
if (err != HSA_STATUS_SUCCESS) return err;
1655+
15961656
return mapMemoryToNodes(num_agents, nodes);
15971657
}
15981658

@@ -2184,7 +2244,7 @@ void Runtime::Unload() {
21842244
// Close IPC socket server
21852245
if (ipc_sock_server_conns_.size())
21862246
IPCClientImport(getpid(), IPC_SOCK_SERVER_CONN_CLOSE_HANDLE,
2187-
NULL, 0, NULL, NULL, NULL);
2247+
0, NULL, NULL, NULL, false);
21882248

21892249
svm_profile_.reset(nullptr);
21902250

0 commit comments

Comments
 (0)