Skip to content

Commit 136391c

Browse files
authored
[TransferEngine] feature: introduce USE_NVMEOF to enable NVMe-oF separately. (kvcache-ai#106)
Signed-off-by: doujiang24 <doujiang24@gmail.com>
1 parent 0e47414 commit 136391c

File tree

9 files changed

+33
-15
lines changed

9 files changed

+33
-15
lines changed

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ add_definitions(-DCONFIG_ERDMA)
2727
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
2828

2929
option(USE_CUDA "option for using gpu direct" OFF)
30+
option(USE_NVMEOF "option for using NVMe over Fabric" OFF)
3031
option(USE_CXL "option for using cxl protocol" OFF)
3132
option(USE_ETCD "option for enable etcd as metadata server" ON)
3233
option(USE_REDIS "option for enable redis as metadata server" OFF)
@@ -38,7 +39,16 @@ option(WITH_RUST_EXAMPLE "build the Rust interface and sample code for the trans
3839
if (USE_CUDA)
3940
add_compile_definitions(USE_CUDA)
4041
message(STATUS "CUDA support is enabled")
42+
43+
if (USE_NVMEOF)
44+
add_compile_definitions(USE_NVMEOF)
45+
message(STATUS "NVMe-oF support is enabled")
46+
endif()
47+
4148
include_directories(/usr/local/cuda/include)
49+
link_directories(/usr/local/cuda/lib)
50+
elseif(USE_NVMEOF)
51+
message(FATAL_ERROR "Cannot enable USE_NVMEOF without USE_CUDA")
4252
endif()
4353

4454
if (USE_REDIS)

mooncake-transfer-engine/example/transfer_engine_bench.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@
3030
#ifdef USE_CUDA
3131
#include <bits/stdint-uintn.h>
3232
#include <cuda_runtime.h>
33+
34+
#ifdef USE_NVMEOF
3335
#include <cufile.h>
36+
#endif
3437

3538
#include <cassert>
3639

@@ -41,7 +44,6 @@ static void checkCudaError(cudaError_t result, const char *message) {
4144
exit(EXIT_FAILURE);
4245
}
4346
}
44-
4547
#endif
4648

4749
const static int NR_SOCKETS =
@@ -258,7 +260,7 @@ std::string loadNicPriorityMatrix() {
258260
" \"cpu:1\": [[" +
259261
device_names +
260262
"], []], "
261-
" \"gpu:0\": [[" +
263+
" \"cuda:0\": [[" +
262264
device_names + "], []]}";
263265
}
264266

@@ -294,7 +296,7 @@ int initiator() {
294296
if (FLAGS_use_vram) LOG(INFO) << "VRAM is used";
295297
for (int i = 0; i < buffer_num; ++i) {
296298
addr[i] = allocateMemoryPool(FLAGS_buffer_size, i, FLAGS_use_vram);
297-
std::string name_prefix = FLAGS_use_vram ? "gpu:" : "cpu:";
299+
std::string name_prefix = FLAGS_use_vram ? "cuda:" : "cpu:";
298300
int rc = engine->registerLocalMemory(addr[i], FLAGS_buffer_size,
299301
name_prefix + std::to_string(i));
300302
LOG_ASSERT(!rc);

mooncake-transfer-engine/src/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,8 @@ target_link_libraries(transfer_engine PUBLIC transport rdma_transport ibverbs gl
2727

2828
if (USE_CUDA)
2929
target_include_directories(transfer_engine PRIVATE /usr/local/cuda/include)
30-
target_link_libraries(transfer_engine PUBLIC nvmeof_transport cuda cufile cudart rt)
30+
target_link_libraries(transfer_engine PUBLIC cuda cudart rt)
31+
if (USE_NVMEOF)
32+
target_link_libraries(transfer_engine PUBLIC nvmeof_transport cufile)
33+
endif()
3134
endif()

mooncake-transfer-engine/src/multi_transport.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include "transport/rdma_transport/rdma_transport.h"
1818
#include "transport/tcp_transport/tcp_transport.h"
1919
#include "transport/transport.h"
20-
#ifdef USE_CUDA
20+
#ifdef USE_NVMEOF
2121
#include "transport/nvmeof_transport/nvmeof_transport.h"
2222
#endif
2323

@@ -109,8 +109,7 @@ int MultiTransport::getTransferStatus(BatchID batch_id, size_t task_id,
109109
status.transferred_bytes = task.transferred_bytes;
110110
uint64_t success_slice_count = task.success_slice_count;
111111
uint64_t failed_slice_count = task.failed_slice_count;
112-
if (success_slice_count + failed_slice_count ==
113-
task.slice_count) {
112+
if (success_slice_count + failed_slice_count == task.slice_count) {
114113
if (failed_slice_count) {
115114
status.s = Transport::TransferStatusEnum::FAILED;
116115
} else {
@@ -131,7 +130,7 @@ Transport *MultiTransport::installTransport(const std::string &proto,
131130
} else if (std::string(proto) == "tcp") {
132131
transport = new TcpTransport();
133132
}
134-
#ifdef USE_CUDA
133+
#ifdef USE_NVMEOF
135134
else if (std::string(proto) == "nvmeof") {
136135
transport = new NVMeoFTransport();
137136
}

mooncake-transfer-engine/src/transport/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ add_library(transport OBJECT ${XPORT_SOURCES} $<TARGET_OBJECTS:rdma_transport>)
66
add_subdirectory(tcp_transport)
77
target_sources(transport PUBLIC $<TARGET_OBJECTS:tcp_transport>)
88

9-
if (USE_CUDA)
9+
if (USE_NVMEOF)
1010
add_subdirectory(nvmeof_transport)
1111
target_sources(transport PUBLIC $<TARGET_OBJECTS:nvmeof_transport>)
1212
endif()

mooncake-transfer-engine/src/transport/nvmeof_transport/cufile_context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#ifdef USE_CUDA
15+
#ifdef USE_NVMEOF
1616

1717
// TBD
1818

mooncake-transfer-engine/tests/rdma_transport_test.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
#ifdef USE_CUDA
3737
#include <bits/stdint-uintn.h>
3838
#include <cuda_runtime.h>
39+
40+
#ifdef USE_NVMEOF
3941
#include <cufile.h>
42+
#endif
4043

4144
#include <cassert>
4245

@@ -47,7 +50,6 @@ static void checkCudaError(cudaError_t result, const char *message) {
4750
exit(EXIT_FAILURE);
4851
}
4952
}
50-
5153
#endif
5254

5355
#define NR_SOCKETS (1)
@@ -234,7 +236,7 @@ std::string loadNicPriorityMatrix() {
234236
" \"cpu:1\": [[" +
235237
device_names +
236238
"], []], "
237-
" \"gpu:0\": [[" +
239+
" \"cuda:0\": [[" +
238240
device_names + "], []]}";
239241
}
240242

@@ -268,7 +270,7 @@ int initiator() {
268270
#ifdef USE_CUDA
269271
addr = allocateMemoryPool(ram_buffer_size, 0, FLAGS_use_vram);
270272
int rc = engine->registerLocalMemory(addr, ram_buffer_size,
271-
FLAGS_use_vram ? "gpu:0" : "cpu:0");
273+
FLAGS_use_vram ? "cuda:0" : "cpu:0");
272274
LOG_ASSERT(!rc);
273275
#else
274276
addr = allocateMemoryPool(ram_buffer_size, 0, false);

mooncake-transfer-engine/tests/rdma_transport_test2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ std::string loadNicPriorityMatrix() {
9090
" \"cpu:1\": [[" +
9191
device_names +
9292
"], []], "
93-
" \"gpu:0\": [[" +
93+
" \"cuda:0\": [[" +
9494
device_names + "], []]}";
9595
}
9696

mooncake-transfer-engine/tests/tcp_transport_test.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
#ifdef USE_CUDA
2626
#include <bits/stdint-uintn.h>
2727
#include <cuda_runtime.h>
28+
29+
#ifdef USE_NVMEOF
2830
#include <cufile.h>
31+
#endif
2932

3033
#include <cassert>
3134

@@ -36,7 +39,6 @@ static void checkCudaError(cudaError_t result, const char *message) {
3639
exit(EXIT_FAILURE);
3740
}
3841
}
39-
4042
#endif
4143

4244
#include "transfer_engine.h"

0 commit comments

Comments
 (0)