Skip to content

Commit a7492c5

Browse files
authored
SWDEV-511204 - Mapped virtual memory should use device instead of host context (#213)
Since the sub-buffer(virtual memory that is mapped to device memory) is associated with device memory, it should utilize the device context instead of the host context. The original implementation caused hipMemcpyPeer to not take the P2P path, as the memory object was treated as host memory.
1 parent 6858b0f commit a7492c5

File tree

3 files changed

+22
-10
lines changed

3 files changed

+22
-10
lines changed

hipamd/src/hip_memory.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
539539
hip::Stream* pStream = &stream;
540540
switch (type) {
541541
case hipWriteBuffer:
542-
if (queueDevice != dstMemory->GetDeviceById()) {
542+
if (queueDevice != dstMemory->GetDeviceById() &&
543+
!(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
543544
pStream = hip::getNullStream(dstMemory->GetDeviceById()->context());
544545
amd::Command* cmd = stream.getLastQueuedCommand(true);
545546
if (cmd != nullptr) {
@@ -551,7 +552,8 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
551552
copyMetadata);
552553
break;
553554
case hipReadBuffer:
554-
if (queueDevice != srcMemory->GetDeviceById()) {
555+
if (queueDevice != srcMemory->GetDeviceById() &&
556+
!(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
555557
pStream = hip::getNullStream(srcMemory->GetDeviceById()->context());
556558
amd::Command* cmd = stream.getLastQueuedCommand(true);
557559
if (cmd != nullptr) {
@@ -589,15 +591,17 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
589591
} else if (srcMemory->GetDeviceById() != dstMemory->GetDeviceById()) {
590592
// Scenarios such as DtoH where dst is pinned memory
591593
if ((queueDevice != srcMemory->GetDeviceById()) &&
592-
(dstMemory->getContext().devices().size() != 1)) {
594+
(dstMemory->getContext().devices().size() != 1) &&
595+
!(srcMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
593596
pStream = hip::getNullStream(srcMemory->GetDeviceById()->context());
594597
amd::Command* cmd = stream.getLastQueuedCommand(true);
595598
if (cmd != nullptr) {
596599
waitList.push_back(cmd);
597600
}
598601
// Scenarios such as HtoD where src is pinned memory
599602
} else if ((queueDevice != dstMemory->GetDeviceById()) &&
600-
(srcMemory->getContext().devices().size() != 1)) {
603+
(srcMemory->getContext().devices().size() != 1) &&
604+
!(dstMemory->getMemFlags() & CL_MEM_VA_RANGE_AMD)) {
601605
pStream = hip::getNullStream(dstMemory->GetDeviceById()->context());
602606
amd::Command* cmd = stream.getLastQueuedCommand(true);
603607
if (cmd != nullptr) {

rocclr/device/device.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -537,10 +537,12 @@ amd::Memory* Device::CreateVirtualBuffer(amd::Context& device_context, void* vpt
537537
}
538538
assert(vaddr_base_obj->getMemFlags() & CL_MEM_VA_RANGE_AMD);
539539

540-
size_t offset = (reinterpret_cast<address>(vptr)
541-
- reinterpret_cast<address>(vaddr_base_obj->getSvmPtr()));
542-
Context& ctx = vaddr_base_obj->getContext();
543-
vaddr_sub_obj = new (ctx) amd::Buffer(*vaddr_base_obj,CL_MEM_VA_RANGE_AMD, offset, size);
540+
size_t offset =
541+
(reinterpret_cast<address>(vptr) - reinterpret_cast<address>(vaddr_base_obj->getSvmPtr()));
542+
vaddr_sub_obj =
543+
new (device_context) amd::Buffer(device_context, CL_MEM_VA_RANGE_AMD, size, vptr);
544+
vaddr_sub_obj->SetParent(vaddr_base_obj);
545+
vaddr_sub_obj->setOrigin(offset);
544546

545547
// This curr_mem_obj->create() does not create an actual memory but stores the memory info
546548
// with given vptr on ROCr backend.

rocclr/platform/memory.hpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,18 @@ class Memory : public amd::RuntimeObject {
329329

330330
// Accessors
331331
Memory* parent() const { return parent_; }
332-
void SetParent(amd::Memory* parent) { parent_ = parent; }
332+
void SetParent(amd::Memory* parent) {
333+
parent_ = parent;
334+
if (parent != nullptr) {
335+
parent_->isParent_ = true;
336+
parent_->retain();
337+
}
338+
}
333339
bool isParent() const { return isParent_; }
334340
bool ImageView() const { return image_view_; }
335341

336342
size_t getOrigin() const { return origin_; }
343+
void setOrigin(size_t origin) { origin_ = origin; }
337344
size_t getSize() const { return size_; }
338345
Flags getMemFlags() const { return flags_; }
339346
Type getType() const { return type_; }
@@ -721,4 +728,3 @@ class IpcBuffer : public Buffer {
721728
};
722729

723730
} // namespace amd
724-

0 commit comments

Comments
 (0)