Skip to content

Commit 864200e

Browse files
authored
Revert "Changes for RDMA with VMM (#1265)" (#1396)
This reverts commit 915917d.
1 parent 915917d commit 864200e

File tree

8 files changed

+43
-78
lines changed

8 files changed

+43
-78
lines changed

libhsakmt/src/fmm.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,9 @@ static HsaMemFlags fmm_translate_ioc_to_hsa_flags(uint32_t ioc_flags)
10611061
}
10621062

10631063
static HSAKMT_STATUS fmm_register_mem_svm_api(void *address,
1064-
uint64_t size, HsaMemFlags flags)
1064+
uint64_t size,
1065+
bool coarse_grain,
1066+
bool ext_coherent)
10651067
{
10661068
struct kfd_ioctl_svm_args *args;
10671069
size_t s_attr;
@@ -1078,11 +1080,10 @@ static HSAKMT_STATUS fmm_register_mem_svm_api(void *address,
10781080
args->size = aligned_size;
10791081
args->op = KFD_IOCTL_SVM_OP_SET_ATTR;
10801082
args->nattr = 2;
1081-
args->attrs[0].type = flags.ui32.CoarseGrain ?
1083+
args->attrs[0].type = coarse_grain ?
10821084
HSA_SVM_ATTR_CLR_FLAGS : HSA_SVM_ATTR_SET_FLAGS;
10831085
args->attrs[0].value = HSA_SVM_FLAG_COHERENT;
1084-
args->attrs[1].type = flags.ui32.ExtendedCoherent ?
1085-
HSA_SVM_ATTR_SET_FLAGS : HSA_SVM_ATTR_CLR_FLAGS;
1086+
args->attrs[1].type = ext_coherent ? HSA_SVM_ATTR_SET_FLAGS : HSA_SVM_ATTR_CLR_FLAGS ;
10861087
args->attrs[1].value = HSA_SVM_FLAG_EXT_COHERENT;
10871088
pr_debug("Registering to SVM %p size: %ld\n", (void*)aligned_addr,
10881089
aligned_size);
@@ -3747,7 +3748,8 @@ bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offse
37473748
static HSAKMT_STATUS fmm_register_user_memory(void *addr,
37483749
HSAuint64 size,
37493750
vm_object_t **obj_ret,
3750-
HsaMemFlags flags)
3751+
bool coarse_grain,
3752+
bool ext_coherent)
37513753
{
37523754
manageable_aperture_t *aperture = svm.dgpu_aperture;
37533755
HSAuint32 page_offset = (HSAuint64)addr & (PAGE_SIZE-1);
@@ -3772,8 +3774,8 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr,
37723774
&aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR |
37733775
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
37743776
KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE |
3775-
(flags.ui32.CoarseGrain ? 0 : KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) |
3776-
(flags.ui32.ExtendedCoherent ? KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT : 0),
3777+
(coarse_grain ? 0 : KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) |
3778+
(ext_coherent ? KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT : 0),
37773779
0,
37783780
&obj);
37793781
if (!svm_addr)
@@ -3811,7 +3813,8 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr,
38113813
HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
38123814
uint32_t *gpu_id_array,
38133815
uint32_t gpu_id_array_size,
3814-
HsaMemFlags flags)
3816+
bool coarse_grain,
3817+
bool ext_coherent)
38153818
{
38163819
manageable_aperture_t *aperture = NULL;
38173820
vm_object_t *object = NULL;
@@ -3820,7 +3823,7 @@ HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
38203823
if (gpu_id_array_size > 0 && !gpu_id_array)
38213824
return HSAKMT_STATUS_INVALID_PARAMETER;
38223825

3823-
if (flags.ui32.CoarseGrain && flags.ui32.ExtendedCoherent)
3826+
if (coarse_grain && ext_coherent)
38243827
return HSAKMT_STATUS_INVALID_PARAMETER;
38253828

38263829
object = vm_find_object(address, size_in_bytes, &aperture);
@@ -3831,12 +3834,19 @@ HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
38313834

38323835
/* Register a new user ptr */
38333836
if (hsakmt_is_svm_api_supported) {
3834-
ret = fmm_register_mem_svm_api(address, size_in_bytes, flags);
3837+
ret = fmm_register_mem_svm_api(address,
3838+
size_in_bytes,
3839+
coarse_grain,
3840+
ext_coherent);
38353841
if (ret == HSAKMT_STATUS_SUCCESS)
38363842
return ret;
38373843
pr_debug("SVM failed, falling back to old registration\n");
38383844
}
3839-
ret = fmm_register_user_memory(address, size_in_bytes, &object, flags);
3845+
ret = fmm_register_user_memory(address,
3846+
size_in_bytes,
3847+
&object,
3848+
coarse_grain,
3849+
ext_coherent);
38403850

38413851
if (ret != HSAKMT_STATUS_SUCCESS)
38423852
return ret;

libhsakmt/src/fmm.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ HSAKMT_STATUS hsakmt_fmm_get_aperture_base_and_limit(aperture_type_e aperture_ty
7575
HSAKMT_STATUS hsakmt_fmm_register_memory(void *address, uint64_t size_in_bytes,
7676
uint32_t *gpu_id_array,
7777
uint32_t gpu_id_array_size,
78-
HsaMemFlags flags);
78+
bool coarse_grain,
79+
bool ext_coherent);
7980
HSAKMT_STATUS hsakmt_fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
8081
HsaGraphicsResourceInfo *GraphicsResourceInfo,
8182
uint32_t *gpu_id_array,

libhsakmt/src/memory.c

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
268268
/* TODO: support mixed APU and dGPU configurations */
269269
return HSAKMT_STATUS_SUCCESS;
270270

271-
HsaMemFlags flags;
272-
flags.ui32.CoarseGrain = 1;
273-
flags.ui32.ExtendedCoherent = 0;
274271
return hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
275-
NULL, 0, flags);
272+
NULL, 0, true, false);
276273
}
277274

278275
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
@@ -295,14 +292,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
295292
NumberOfNodes, NodeArray);
296293

297294
if (ret == HSAKMT_STATUS_SUCCESS) {
298-
HsaMemFlags flags;
299-
flags.ui32.CoarseGrain = 1;
300-
flags.ui32.ExtendedCoherent = 0;
301-
302295
ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
303296
gpu_id_array,
304297
NumberOfNodes*sizeof(uint32_t),
305-
flags);
298+
true, false);
306299
if (ret != HSAKMT_STATUS_SUCCESS)
307300
free(gpu_id_array);
308301
}
@@ -332,7 +325,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
332325
return HSAKMT_STATUS_NOT_SUPPORTED;
333326

334327
ret = hsakmt_fmm_register_memory(MemoryAddress, MemorySizeInBytes,
335-
NULL, 0, MemFlags);
328+
NULL, 0, MemFlags.ui32.CoarseGrain, MemFlags.ui32.ExtendedCoherent);
336329

337330
return ret;
338331
}

runtime/hsa-runtime/core/inc/amd_memory_region.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,7 @@ class MemoryRegion : public core::MemoryRegion {
105105
hsa_status_t Migrate(uint32_t flag, const void* ptr) const;
106106

107107
hsa_status_t Lock(uint32_t num_agents, const hsa_agent_t* agents,
108-
void* host_ptr, size_t size, uint32_t flags,
109-
void** agent_ptr) const;
108+
void* host_ptr, size_t size, void** agent_ptr) const;
110109

111110
hsa_status_t Unlock(void* host_ptr) const;
112111

runtime/hsa-runtime/core/inc/runtime.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -879,10 +879,12 @@ class Runtime {
879879
};
880880

881881
struct MappedHandle {
882-
MappedHandle(MemoryHandle* mem_handle, AddressHandle* address_handle, void* va,
882+
MappedHandle(MemoryHandle *mem_handle, AddressHandle *address_handle,
883883
uint64_t offset, size_t size, int drm_fd, void *drm_cpu_addr,
884-
hsa_access_permission_t perm, ShareableHandle shareable_handle);
885-
~MappedHandle();
884+
hsa_access_permission_t perm, ShareableHandle shareable_handle)
885+
: mem_handle(mem_handle), address_handle(address_handle),
886+
offset(offset), size(size), drm_fd(drm_fd),
887+
drm_cpu_addr(drm_cpu_addr), shareable_handle(shareable_handle) {}
886888

887889
__forceinline core::Agent* agentOwner() const { return mem_handle->region->owner(); }
888890

runtime/hsa-runtime/core/runtime/amd_memory_region.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ hsa_status_t MemoryRegion::Migrate(uint32_t flag, const void* ptr) const {
538538
}
539539

540540
hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
541-
void* host_ptr, size_t size, uint32_t flags,
541+
void* host_ptr, size_t size,
542542
void** agent_ptr) const {
543543
if (!IsSystem()) {
544544
return HSA_STATUS_ERROR;
@@ -581,15 +581,9 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
581581
*agent_ptr = host_ptr;
582582
return HSA_STATUS_SUCCESS;
583583
}
584-
HsaMemFlags local_mem_flag = mem_flag_;
585-
if (flags & HSA_AMD_MEMORY_POOL_UNCACHED_FLAG) {
586-
local_mem_flag.ui32.Uncached = 1;
587-
local_mem_flag.ui32.CoarseGrain = 0;
588-
local_mem_flag.ui32.ExtendedCoherent = 0;
589-
}
590584

591585
// Call kernel driver to register and pin the memory.
592-
if (owner()->driver().RegisterMemory(host_ptr, size, local_mem_flag) ==
586+
if (owner()->driver().RegisterMemory(host_ptr, size, const_cast<HsaMemFlags&>(mem_flag_)) ==
593587
HSA_STATUS_SUCCESS) {
594588
uint64_t alternate_va = 0;
595589
if (owner()->driver().MakeMemoryResident(host_ptr, size, &alternate_va, &map_flag_,

runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ hsa_status_t hsa_amd_memory_lock(void* host_ptr, size_t size,
758758
const AMD::MemoryRegion* system_region = static_cast<const AMD::MemoryRegion*>(
759759
core::Runtime::runtime_singleton_->system_regions_coarse()[0]);
760760

761-
return system_region->Lock(num_agent, agents, host_ptr, size, 0, agent_ptr);
761+
return system_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
762762
CATCH;
763763
}
764764

@@ -768,7 +768,7 @@ hsa_status_t hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_
768768
TRY;
769769
IS_OPEN();
770770

771-
if (size == 0 || host_ptr == nullptr || agent_ptr == nullptr) {
771+
if (size == 0 || host_ptr == nullptr || agent_ptr == nullptr || flags != 0) {
772772
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
773773
}
774774

@@ -786,7 +786,7 @@ hsa_status_t hsa_amd_memory_lock_to_pool(void* host_ptr, size_t size, hsa_agent_
786786
if (mem_region->owner()->device_type() != core::Agent::kAmdCpuDevice)
787787
return (hsa_status_t)HSA_STATUS_ERROR_INVALID_MEMORY_POOL;
788788

789-
return mem_region->Lock(num_agent, agents, host_ptr, size, flags, agent_ptr);
789+
return mem_region->Lock(num_agent, agents, host_ptr, size, agent_ptr);
790790
CATCH;
791791
}
792792

runtime/hsa-runtime/core/runtime/runtime.cpp

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ hsa_status_t Runtime::CopyMemory(void* dst, const void* src, size_t size) {
531531
const auto& locked_copy = [&](void*& ptr, core::Agent* locking_agent) {
532532
void* tmp;
533533
hsa_agent_t agent = locking_agent->public_handle();
534-
hsa_status_t err = system_region->Lock(1, &agent, ptr, 0, size, &tmp);
534+
hsa_status_t err = system_region->Lock(1, &agent, ptr, size, &tmp);
535535
if (err != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(err, "Lock failed in hsa_memory_copy.");
536536
gpuPtr = ptr;
537537
ptr = tmp;
@@ -965,8 +965,7 @@ hsa_status_t Runtime::VMemoryPtrInfo(const void* ptr, hsa_amd_pointer_info_t* in
965965

966966
for (auto agentPermsIt = mappedHandleIt->second.allowed_agents.begin();
967967
agentPermsIt != mappedHandleIt->second.allowed_agents.end(); agentPermsIt++) {
968-
if ((*agentPermsIt).second.permissions != HSA_ACCESS_PERMISSION_NONE)
969-
allowed_agents.push_back((*agentPermsIt).second.targetAgent->public_handle());
968+
allowed_agents.push_back((*agentPermsIt).second.targetAgent->public_handle());
970969
}
971970

972971
AMD::callback_t<decltype(alloc)> Alloc(alloc);
@@ -3460,7 +3459,7 @@ hsa_status_t Runtime::VMemoryHandleMap(void* va, size_t size, size_t in_offset,
34603459

34613460
mapped_handle_map_.emplace(
34623461
std::piecewise_construct, std::forward_as_tuple(va),
3463-
std::forward_as_tuple(&memoryHandleIt->second, addressHandle, va, offset, size, drm_fd,
3462+
std::forward_as_tuple(&memoryHandleIt->second, addressHandle, offset, size, drm_fd,
34643463
reinterpret_cast<void*>(drm_cpu_addr), HSA_ACCESS_PERMISSION_NONE,
34653464
shareable_handle));
34663465

@@ -3597,49 +3596,16 @@ hsa_status_t Runtime::MappedHandleAllowedAgent::EnableAccess(hsa_access_permissi
35973596
hsa_status_t Runtime::MappedHandleAllowedAgent::RemoveAccess() {
35983597
if (targetAgent->device_type() == core::Agent::DeviceType::kAmdCpuDevice) {
35993598
#if defined(__linux__)
3600-
if (permissions != HSA_ACCESS_PERMISSION_NONE) {
3601-
if (munmap(va, size) != 0) return HSA_STATUS_ERROR;
3602-
3603-
/* We need to keep the CPU mapping. So change it to PROT_NONE */
3604-
void* mapped_ptr = mmap(va, mappedHandle->size, PROT_NONE, MAP_SHARED | MAP_FIXED,
3605-
mappedHandle->drm_fd,
3606-
reinterpret_cast<uint64_t>(mappedHandle->drm_cpu_addr));
3607-
if (mapped_ptr != va)
3608-
return HSA_STATUS_ERROR;
3609-
3610-
permissions = HSA_ACCESS_PERMISSION_NONE;
3611-
}
3599+
if (munmap(va, size) != 0)
3600+
return HSA_STATUS_ERROR;
36123601
#else
36133602
assert(!"Unimplemented!");
36143603
#endif
3604+
return HSA_STATUS_SUCCESS;
36153605
} else {
36163606
return targetAgent->driver().Unmap(
36173607
shareable_handle, va, mappedHandle->offset, mappedHandle->size);
36183608
}
3619-
return HSA_STATUS_SUCCESS;
3620-
}
3621-
3622-
Runtime::MappedHandle::MappedHandle(MemoryHandle *mem_handle, AddressHandle *address_handle,
3623-
void* va, uint64_t offset, size_t size, int drm_fd, void *drm_cpu_addr,
3624-
hsa_access_permission_t perm, ShareableHandle shareable_handle)
3625-
: mem_handle(mem_handle), address_handle(address_handle), offset(offset),
3626-
size(size), drm_fd(drm_fd), drm_cpu_addr(drm_cpu_addr),
3627-
shareable_handle(shareable_handle)
3628-
{
3629-
/* Create a CPU mapping with PROT_NONE */
3630-
auto cpu_agent = static_cast<AMD::GpuAgent*>(agentOwner())->GetNearestCpuAgent();
3631-
auto agentPermsIt = allowed_agents.emplace(std::piecewise_construct,
3632-
std::forward_as_tuple(cpu_agent),
3633-
std::forward_as_tuple(this, cpu_agent, va,
3634-
size, HSA_ACCESS_PERMISSION_NONE))
3635-
.first;
3636-
3637-
auto ret = agentPermsIt->second.EnableAccess(HSA_ACCESS_PERMISSION_NONE);
3638-
if (ret != HSA_STATUS_SUCCESS)
3639-
throw AMD::hsa_exception(ret, "Failed to create default CPU mapping");
3640-
}
3641-
3642-
Runtime::MappedHandle::~MappedHandle() {
36433609
}
36443610

36453611
// Note: VMemorySetAccessPerHandle should be called with &memory_lock_ held

0 commit comments

Comments
 (0)