Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
cf77bdb
first working dispatch and combine primitive for k=1
samnordmann Jan 21, 2026
66e7811
add comments and cleanup
samnordmann Jan 21, 2026
dda9aa7
add kernel based a2av and cuda backend for d/c
samnordmann Jan 22, 2026
7aa2de8
unstable - add nixl backend
x41lakazam Feb 25, 2026
9a8a377
unstable
x41lakazam Feb 26, 2026
0f21528
add python build changes for nixl
x41lakazam Feb 26, 2026
6144827
fix typo
x41lakazam Feb 26, 2026
04a9133
merge main
x41lakazam Feb 26, 2026
b32587a
restore main:
x41lakazam Feb 26, 2026
f8a94fc
fix bug where zero-length buffer was passed to nixl
x41lakazam Feb 26, 2026
a6b6f87
Reduce probe size to 1
x41lakazam Feb 26, 2026
95460af
Address PR comments.
x41lakazam Mar 1, 2026
41ec0ac
typos
x41lakazam Mar 4, 2026
d63ffd7
set getAgentName to inline
x41lakazam Mar 4, 2026
86e5028
fix comments in nixl.cpp
x41lakazam Mar 4, 2026
7283aa8
clean ifdef USE_NIXL statements
x41lakazam Mar 4, 2026
a085c54
inline exchangeMetadata inside registerTensors
x41lakazam Mar 8, 2026
13ae58f
include deviceId (rank) inside TensorDesc
x41lakazam Mar 8, 2026
149c15a
remove useless handleImpl.isPrepared
x41lakazam Mar 8, 2026
1b41788
add thread yield in wait transfer loop
x41lakazam Mar 8, 2026
2eccaa5
remove remote_rank from prepare transfer
x41lakazam Mar 8, 2026
10d010a
add nixlbackend::impl when use_nixl is false
x41lakazam Mar 8, 2026
e9062a4
Move exchangeMetadata to private when USE_NIXL is false
x41lakazam Mar 9, 2026
9047991
fix CI: clang-format, clang-tidy, and trailing newline
x41lakazam Mar 9, 2026
b3b5fbd
fix ci
x41lakazam Mar 9, 2026
b283c2a
fix CI
x41lakazam Mar 9, 2026
c4726d0
Separate device and rank in tensordesc for more clarity
x41lakazam Mar 9, 2026
7ff4aae
fix linter
x41lakazam Mar 10, 2026
0739240
Update cmake config
x41lakazam Mar 10, 2026
5560230
Fix CI
x41lakazam Mar 10, 2026
ec01db9
Fix no-headers in NIXL install instructions (Cmake config)
x41lakazam Mar 10, 2026
67a92f0
Replace NVFUSER_STANDALONE_BUILD_WITH_NIXL by NVFUSER_BUILD_WITH_NIXL
x41lakazam Mar 10, 2026
dae35aa
move nixl linkage from CmakeList to handle_nixl.cmake
x41lakazam Mar 10, 2026
a9fb56d
move TPL locs to handle_nixl.cmake
x41lakazam Mar 10, 2026
e364949
Fix - move nixl linkage to handle_nixl.cmake
x41lakazam Mar 10, 2026
f74078f
fix linter
x41lakazam Mar 10, 2026
9847a11
Add NIXL to CI image
x41lakazam Mar 11, 2026
40758d6
remove import nixl from install-nixl.sh
x41lakazam Mar 11, 2026
bf471ea
Add transitive shared libs deps for nixl
x41lakazam Mar 12, 2026
df6c384
Add nixl*.mesonpy.libs and nixl*.libs as shared lib dirs in CI's inst…
x41lakazam Mar 12, 2026
1c10779
try to make nixl tests work
x41lakazam Mar 16, 2026
5eca79f
remove nixl from clang build
x41lakazam Mar 17, 2026
deddb46
Merge branch 'main' into dispatch_combine/nixl_backend
x41lakazam Mar 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ set(NVFUSER_CUTLASS "${NVFUSER_ROOT}/cutlass")
set(NVFUSER_THIRD_PARTY_DIR "${NVFUSER_ROOT}/third_party")

option(NVFUSER_STANDALONE_BUILD_WITH_UCC "" OFF)
option(NVFUSER_STANDALONE_BUILD_WITH_NIXL "" OFF)
option(NVFUSER_EXPLICIT_ERROR_CHECK "" OFF)
option(NVFUSER_ENABLE_DEPENDENCY_REPORT "Enable Python-based dependency reporting and log capture" ON)

Expand Down Expand Up @@ -248,6 +249,7 @@ list(APPEND NVFUSER_SRCS
${NVFUSER_SRCS_DIR}/multidevice/ipc_utils.cpp
${NVFUSER_SRCS_DIR}/multidevice/device_mesh.cpp
${NVFUSER_SRCS_DIR}/multidevice/executor.cpp
${NVFUSER_SRCS_DIR}/multidevice/nixl.cpp
${NVFUSER_SRCS_DIR}/multidevice/execution_utils.cpp
${NVFUSER_SRCS_DIR}/multidevice/propagation.cpp
${NVFUSER_SRCS_DIR}/multidevice/resharding.cpp
Expand Down Expand Up @@ -583,6 +585,37 @@ if(NVFUSER_STANDALONE_BUILD_WITH_UCC)
target_compile_definitions(codegen_internal PRIVATE NVFUSER_BUILD_WITH_UCC)
endif()

if(NVFUSER_STANDALONE_BUILD_WITH_NIXL)
# User may need to set NIXL_PREFIX to the NIXL install directory.
find_path(NIXL_INCLUDE_DIR nixl.h
HINTS $ENV{NIXL_PREFIX}/include ENV CPATH
)
find_library(NIXL_LIBRARY nixl
HINTS $ENV{NIXL_PREFIX}/lib $ENV{NIXL_PREFIX}/lib64 $ENV{NIXL_PREFIX}/lib/x86_64-linux-gnu
)
find_library(NIXL_BUILD_LIBRARY nixl_build
HINTS $ENV{NIXL_PREFIX}/lib $ENV{NIXL_PREFIX}/lib64 $ENV{NIXL_PREFIX}/lib/x86_64-linux-gnu
)

if(NOT NIXL_INCLUDE_DIR OR NOT NIXL_LIBRARY)
message(FATAL_ERROR "NIXL not found. Set NIXL_PREFIX to the NIXL install directory.")
endif()

message(STATUS "Found NIXL: ${NIXL_LIBRARY} (include: ${NIXL_INCLUDE_DIR})")
if(NIXL_BUILD_LIBRARY)
message(STATUS "Found NIXL build lib: ${NIXL_BUILD_LIBRARY}")
endif()

add_library(__nvfuser_nixl INTERFACE)
target_include_directories(__nvfuser_nixl INTERFACE ${NIXL_INCLUDE_DIR})
target_link_libraries(__nvfuser_nixl INTERFACE ${NIXL_LIBRARY})
if(NIXL_BUILD_LIBRARY)
target_link_libraries(__nvfuser_nixl INTERFACE ${NIXL_BUILD_LIBRARY})
endif()
target_link_libraries(codegen_internal PRIVATE __nvfuser_nixl)
target_compile_definitions(codegen_internal PRIVATE USE_NIXL)
endif()

add_dependencies(codegen_internal flatc build_flatbuffer_config)

# installing nvfuser headers
Expand Down Expand Up @@ -1031,6 +1064,7 @@ if(BUILD_TEST)
${NVFUSER_ROOT}/tests/cpp/test_multidevice_lower_communication.cpp
${NVFUSER_ROOT}/tests/cpp/test_multidevice_lower_communication_cuda.cpp
${NVFUSER_ROOT}/tests/cpp/test_multidevice_matmul.cpp
${NVFUSER_ROOT}/tests/cpp/test_multidevice_nixl.cpp
${NVFUSER_ROOT}/tests/cpp/test_multidevice_pipeline.cpp
${NVFUSER_ROOT}/tests/cpp/test_multidevice_sharding.cpp
${NVFUSER_ROOT}/tests/cpp/test_multidevice_stream_parallel_type.cpp
Expand Down Expand Up @@ -1332,6 +1366,11 @@ if(NVFUSER_STANDALONE_BUILD_WITH_UCC)
message(STATUS " UCX_DIR : $ENV{UCX_DIR}")
endif()
message(STATUS " NVFUSER_STANDALONE_BUILD_WITH_UCC : ${NVFUSER_STANDALONE_BUILD_WITH_UCC}")
message(STATUS " NVFUSER_STANDALONE_BUILD_WITH_NIXL : ${NVFUSER_STANDALONE_BUILD_WITH_NIXL}")
if(NVFUSER_STANDALONE_BUILD_WITH_NIXL)
message(STATUS " NIXL_INCLUDE_DIR: ${NIXL_INCLUDE_DIR}")
message(STATUS " NIXL_LIBRARY : ${NIXL_LIBRARY}")
endif()
message(STATUS " NVFUSER_BUILD_WITH_ASAN : ${NVFUSER_BUILD_WITH_ASAN}")
message(STATUS " NVFUSER_DISTRIBUTED : ${NVFUSER_DISTRIBUTED}")
message(STATUS " NVFUSER_CPP_STANDARD : ${NVFUSER_CPP_STANDARD}")
Expand Down
3 changes: 3 additions & 0 deletions csrc/multidevice/communicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ std::ostream& operator<<(std::ostream& out, const CommunicatorBackend& cb) {
case CommunicatorBackend::kCuda:
out << "CUDA";
break;
case CommunicatorBackend::kNixl:
out << "NIXL";
break;
}
return out;
}
Expand Down
11 changes: 11 additions & 0 deletions csrc/multidevice/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <ATen/core/ivalue.h>
#include <c10/util/intrusive_ptr.h>

#include <cstdint>
#include <cstring>

#ifdef NVFUSER_DISTRIBUTED
#include <torch/csrc/distributed/c10d/Backend.hpp>
#include <torch/csrc/distributed/c10d/TCPStore.hpp>
Expand Down Expand Up @@ -116,6 +119,12 @@ class NVF_API Communicator {
return ucc_available_;
} else if (backend == CommunicatorBackend::kNccl) {
return nccl_available_;
} else if (backend == CommunicatorBackend::kNixl) {
#ifdef USE_NIXL
return true;
#else
return false;
#endif
}
return false;
}
Expand All @@ -124,6 +133,7 @@ class NVF_API Communicator {
return store_.get();
}


private:
Communicator(
CommunicatorBackend backend = comm_backend_default,
Expand Down Expand Up @@ -155,4 +165,5 @@ class NVF_API Communicator {
std::unordered_map<std::string, c10::intrusive_ptr<c10d::Backend>> backends_;
};


} // namespace nvfuser
2 changes: 1 addition & 1 deletion csrc/multidevice/multidevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ using DeviceType = c10::Device;
using Team = std::vector<DeviceIdxType>;

// Supported backends.
enum class CommunicatorBackend { kNccl, kUcc, kCuda };
enum class CommunicatorBackend { kNccl, kUcc, kCuda, kNixl };
} // namespace nvfuser
Loading
Loading