Skip to content

Commit 1cc1b82

Browse files
authored
MNNVL fix (#604)
1 parent 542a10f commit 1cc1b82

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

src/registered_memory.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ RegisteredMemory::Impl::Impl(const std::vector<char>::const_iterator& begin,
204204
if (getHostHash() == this->hostHash && getPidHash() == this->pidHash) {
205205
// The memory is local to the process, so originalDataPtr is valid as is
206206
this->data = this->originalDataPtr;
207-
} else if (transports.has(Transport::CudaIpc) && getHostHash() == this->hostHash) {
207+
} else if (transports.has(Transport::CudaIpc)) {
208208
// The memory is local to the machine but not to the process, so we need to open the CUDA IPC handle
209209
auto entry = getTransportInfo(Transport::CudaIpc);
210210
void* base;

src/semaphore.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Licensed under the MIT license.
33

44
#include <mscclpp/semaphore.hpp>
5+
#include <mscclpp/gpu_utils.hpp>
56

67
#include "api.h"
78
#include "atomic.hpp"
@@ -26,6 +27,11 @@ struct SemaphoreStub::Impl {
2627
};
2728

2829
static std::shared_ptr<uint64_t> gpuCallocToken() {
30+
#if (CUDA_NVLS_API_AVAILABLE)
31+
if (isNvlsSupported()) {
32+
return detail::gpuCallocPhysicalShared<uint64_t>(1, 0);
33+
}
34+
#endif // CUDA_NVLS_API_AVAILABLE
2935
#if defined(MSCCLPP_DEVICE_HIP)
3036
return detail::gpuCallocUncachedShared<uint64_t>();
3137
#else // !defined(MSCCLPP_DEVICE_HIP)

0 commit comments

Comments
 (0)