|
35 | 35 | #include "runtime/etcd/etcd_rt.h" |
36 | 36 | #include "utils/utils.h" |
37 | 37 |
|
| 38 | +// Define gflags params |
| 39 | +#define NB_ARG_STRING(param_name, def_val, help_text) \ |
| 40 | + DEFINE_string(param_name, def_val, help_text); |
| 41 | +#define NB_ARG_BOOL(param_name, def_val, help_text) \ |
| 42 | + DEFINE_bool(param_name, def_val, help_text); |
| 43 | +#define NB_ARG_UINT64(param_name, def_val, help_text) \ |
| 44 | + DEFINE_uint64(param_name, def_val, help_text); |
| 45 | +#define NB_ARG_INT32(param_name, def_val, help_text) \ |
| 46 | + DEFINE_int32(param_name, def_val, help_text); |
| 47 | + |
38 | 48 | /********** |
39 | 49 | * xferBench Config |
40 | 50 | **********/ |
41 | | -DEFINE_string(benchmark_group, "default", |
42 | | - "Name of benchmark group. Use different names to run multiple benchmarks in parallel " |
43 | | - "(Default: default)"); |
44 | | -DEFINE_string(runtime_type, XFERBENCH_RT_ETCD, "Runtime type to use for communication [ETCD]"); |
45 | | -DEFINE_string(worker_type, XFERBENCH_WORKER_NIXL, "Type of worker [nixl, nvshmem]"); |
46 | | -DEFINE_string(backend, XFERBENCH_BACKEND_UCX, |
47 | | - "Name of NIXL backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, GUSLI]" |
48 | | - " (only used with nixl worker)"); |
49 | | -DEFINE_string(initiator_seg_type, XFERBENCH_SEG_TYPE_DRAM, "Type of memory segment for initiator" |
50 | | - " [DRAM, VRAM]. Note: Storage backends always use DRAM locally."); |
51 | | -DEFINE_string(target_seg_type, XFERBENCH_SEG_TYPE_DRAM, "Type of memory segment for target" |
52 | | - " [DRAM, VRAM]. Note: Storage backends determine remote type automatically."); |
53 | | -DEFINE_string(scheme, XFERBENCH_SCHEME_PAIRWISE, "Scheme: pairwise, maytoone, onetomany, tp"); |
54 | | -DEFINE_string(mode, XFERBENCH_MODE_SG, "MODE: SG (Single GPU per proc), MG (Multi GPU per proc) [default: SG]"); |
55 | | -DEFINE_string(op_type, XFERBENCH_OP_WRITE, "Op type: READ, WRITE"); |
56 | | -DEFINE_bool(check_consistency, false, "Enable Consistency Check"); |
57 | | -DEFINE_uint64(total_buffer_size, 8LL * 1024 * (1 << 20), "Total buffer" |
58 | | - " size across device for each process (Default: 80 GiB)"); |
59 | | -DEFINE_uint64(start_block_size, 4 * (1 << 10), "Max size of block (Default: 4 KiB)"); |
60 | | -DEFINE_uint64(max_block_size, 64 * (1 << 20), "Max size of block (Default: 64 MiB)"); |
61 | | -DEFINE_uint64(start_batch_size, 1, "Starting size of batch (Default: 1)"); |
62 | | -DEFINE_uint64(max_batch_size, 1, "Max size of batch (starts from 1)"); |
63 | | -DEFINE_int32(num_iter, 1000, "Max iterations"); |
64 | | -DEFINE_int32(large_blk_iter_ftr, 16, "factor to reduce test iteration when testing large block size(>1MB)"); |
65 | | -DEFINE_int32(warmup_iter, 100, "Number of warmup iterations before timing"); |
66 | | -DEFINE_int32 (num_threads, 1, "Number of threads used by benchmark." |
67 | | - " Num_iter must be greater or equal than num_threads and equally divisible by num_threads." |
68 | | - " (Default: 1)"); |
69 | | -DEFINE_int32(num_initiator_dev, 1, "Number of device in initiator process"); |
70 | | -DEFINE_int32(num_target_dev, 1, "Number of device in target process"); |
71 | | -DEFINE_bool(enable_pt, false, "Enable Progress Thread (only used with nixl worker)"); |
72 | | -DEFINE_uint64(progress_threads, 0, "Number of progress threads (default: 0)"); |
73 | | -DEFINE_bool(enable_vmm, false, "Enable VMM memory allocation when DRAM is requested"); |
| 51 | +NB_ARG_STRING(benchmark_group, "default", \ |
| 52 | + "Name of benchmark group. Use different names to run multiple benchmarks in parallel " \ |
| 53 | + "(Default: default)") |
| 54 | +NB_ARG_STRING(runtime_type, XFERBENCH_RT_ETCD, "Runtime type to use for communication [ETCD]") |
| 55 | +NB_ARG_STRING(worker_type, XFERBENCH_WORKER_NIXL, "Type of worker [nixl, nvshmem]") |
| 56 | +NB_ARG_STRING(backend, XFERBENCH_BACKEND_UCX, \ |
| 57 | + "Name of NIXL backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, GUSLI]" \ |
| 58 | + " (only used with nixl worker)") |
| 59 | +NB_ARG_STRING(initiator_seg_type, XFERBENCH_SEG_TYPE_DRAM, "Type of memory segment for initiator" \ |
| 60 | + " [DRAM, VRAM]. Note: Storage backends always use DRAM locally.") |
| 61 | +NB_ARG_STRING(target_seg_type, XFERBENCH_SEG_TYPE_DRAM, "Type of memory segment for target" \ |
| 62 | + " [DRAM, VRAM]. Note: Storage backends determine remote type automatically.") |
| 63 | +NB_ARG_STRING(scheme, XFERBENCH_SCHEME_PAIRWISE, "Scheme: pairwise, maytoone, onetomany, tp") |
| 64 | +NB_ARG_STRING(mode, XFERBENCH_MODE_SG, "MODE: SG (Single GPU per proc), MG (Multi GPU per proc) [default: SG]") |
| 65 | +NB_ARG_STRING(op_type, XFERBENCH_OP_WRITE, "Op type: READ, WRITE") |
| 66 | +NB_ARG_BOOL(check_consistency, false, "Enable Consistency Check") |
| 67 | +NB_ARG_UINT64(total_buffer_size, 8LL * 1024 * (1 << 20), "Total buffer" \ |
| 68 | + " size across device for each process (Default: 80 GiB)") |
| 69 | +NB_ARG_UINT64(start_block_size, 4 * (1 << 10), "Max size of block (Default: 4 KiB)") |
| 70 | +NB_ARG_UINT64(max_block_size, 64 * (1 << 20), "Max size of block (Default: 64 MiB)") |
| 71 | +NB_ARG_UINT64(start_batch_size, 1, "Starting size of batch (Default: 1)") |
| 72 | +NB_ARG_UINT64(max_batch_size, 1, "Max size of batch (starts from 1)") |
| 73 | +NB_ARG_INT32(num_iter, 1000, "Max iterations") |
| 74 | +NB_ARG_INT32(large_blk_iter_ftr, 16, "factor to reduce test iteration when testing large block size(>1MB)") |
| 75 | +NB_ARG_INT32(warmup_iter, 100, "Number of warmup iterations before timing") |
| 76 | +NB_ARG_INT32 (num_threads, 1, "Number of threads used by benchmark." \ |
| 77 | + " Num_iter must be greater or equal than num_threads and equally divisible by num_threads." \ |
| 78 | + " (Default: 1)") |
| 79 | +NB_ARG_INT32(num_initiator_dev, 1, "Number of device in initiator process") |
| 80 | +NB_ARG_INT32(num_target_dev, 1, "Number of device in target process") |
| 81 | +NB_ARG_BOOL(enable_pt, false, "Enable Progress Thread (only used with nixl worker)") |
| 82 | +NB_ARG_UINT64(progress_threads, 0, "Number of progress threads (default: 0)") |
| 83 | +NB_ARG_BOOL(enable_vmm, false, "Enable VMM memory allocation when DRAM is requested") |
74 | 84 |
|
75 | 85 | // Storage backend(GDS, GDS_MT, POSIX, HF3FS, OBJ) options |
76 | | -DEFINE_string (filepath, "", "File path for storage operations"); |
77 | | -DEFINE_int32 (num_files, 1, "Number of files used by benchmark"); |
78 | | -DEFINE_bool (storage_enable_direct, false, "Enable direct I/O for storage operations"); |
| 86 | +NB_ARG_STRING (filepath, "", "File path for storage operations") |
| 87 | +NB_ARG_INT32 (num_files, 1, "Number of files used by benchmark") |
| 88 | +NB_ARG_BOOL (storage_enable_direct, false, "Enable direct I/O for storage operations") |
79 | 89 |
|
80 | 90 | // GDS options - only used when backend is GDS |
81 | | -DEFINE_int32(gds_batch_pool_size, 32, "Batch pool size for GDS operations (default: 32, only used with GDS backend)"); |
82 | | -DEFINE_int32(gds_batch_limit, 128, "Batch limit for GDS operations (default: 128, only used with GDS backend)"); |
83 | | -DEFINE_int32(gds_mt_num_threads, 1, "Number of threads used by GDS MT plugin (Default: 1)"); |
| 91 | +NB_ARG_INT32(gds_batch_pool_size, 32, "Batch pool size for GDS operations (default: 32, only used with GDS backend)") |
| 92 | +NB_ARG_INT32(gds_batch_limit, 128, "Batch limit for GDS operations (default: 128, only used with GDS backend)") |
| 93 | +NB_ARG_INT32(gds_mt_num_threads, 1, "Number of threads used by GDS MT plugin (Default: 1)") |
84 | 94 |
|
85 | 95 | // TODO: We should take rank wise device list as input to extend support |
86 | | -// <rank>:<device_list>, ... |
87 | | -// For example- 0:mlx5_0,mlx5_1,mlx5_2,1:mlx5_3,mlx5_4, ... |
88 | | -DEFINE_string(device_list, "all", "Comma-separated device name to use for" |
89 | | - " communication (only used with nixl worker)"); |
90 | | -DEFINE_string(etcd_endpoints, "", |
91 | | - "ETCD server endpoints for communication (optional for storage backends)"); |
92 | | - |
93 | | -// POSIX options - only used when backend is POSIX |
94 | | -DEFINE_string (posix_api_type, XFERBENCH_POSIX_API_AIO, |
95 | | - "API type for POSIX operations [AIO, URING] (only used with POSIX backend)"); |
96 | | - |
97 | | -// DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO |
98 | | -DEFINE_string(gpunetio_device_list, "0", "Comma-separated GPU CUDA device id to use for" |
99 | | - " communication (only used with nixl worker)"); |
100 | | -// DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO |
101 | | -DEFINE_string(gpunetio_oob_list, "", "Comma-separated OOB network interface name" |
102 | | - " for control path (only used with nixl worker)"); |
103 | | - |
104 | | -// OBJ options - only used when backend is OBJ |
105 | | -DEFINE_string(obj_access_key, "", "Access key for S3 backend"); |
106 | | -DEFINE_string(obj_secret_key, "", "Secret key for S3 backend"); |
107 | | -DEFINE_string(obj_session_token, "", "Session token for S3 backend"); |
108 | | -DEFINE_string(obj_bucket_name, XFERBENCH_OBJ_BUCKET_NAME_DEFAULT, "Bucket name for S3 backend"); |
109 | | -DEFINE_string(obj_scheme, XFERBENCH_OBJ_SCHEME_HTTP, "HTTP scheme for S3 backend [http, https]"); |
110 | | -DEFINE_string(obj_region, XFERBENCH_OBJ_REGION_EU_CENTRAL_1, "Region for S3 backend"); |
111 | | -DEFINE_bool(obj_use_virtual_addressing, false, "Use virtual addressing for S3 backend"); |
112 | | -DEFINE_string(obj_endpoint_override, "", "Endpoint override for S3 backend"); |
113 | | -DEFINE_string(obj_req_checksum, XFERBENCH_OBJ_REQ_CHECKSUM_SUPPORTED, |
114 | | - "Required checksum for S3 backend [supported, required]"); |
115 | | -DEFINE_string(obj_ca_bundle, "", "Path to CA bundle for S3 backend"); |
116 | | - |
117 | | -// HF3FS options - only used when backend is HF3FS |
118 | | -DEFINE_int32(hf3fs_iopool_size, 64, "Size of io memory pool"); |
119 | | - |
120 | | -// GUSLI options - only used when backend is GUSLI |
121 | | -DEFINE_string(gusli_client_name, "NIXLBench", "Client name for GUSLI backend"); |
122 | | -DEFINE_int32(gusli_max_simultaneous_requests, 32, |
123 | | - "Maximum number of simultaneous requests for GUSLI backend"); |
124 | | -DEFINE_string(gusli_config_file, "", |
125 | | - "Configuration file content for GUSLI backend (if empty, auto-generated from device_list)"); |
126 | | -DEFINE_uint64(gusli_bdev_byte_offset, 1048576, |
127 | | - "Byte offset in block device for GUSLI operations (default: 1MB)"); |
128 | | -DEFINE_string(gusli_device_security, "", |
129 | | - "Comma-separated list of security flags per device (e.g. 'sec=0x3,sec=0x71'). " |
130 | | - "If empty or fewer than devices, uses 'sec=0x3' as default. " |
131 | | - "For GUSLI backend, use device_list in format 'id:type:path' where type is F (file) " |
132 | | - "or K (kernel device)."); |
| 96 | +// <rank>:<device_list>, ... */ |
| 97 | +// For example- 0:mlx5_0,mlx5_1,mlx5_2,1:mlx5_3,mlx5_4, ... */ |
| 98 | +NB_ARG_STRING(device_list, "all", "Comma-separated device name to use for" \ |
| 99 | + " communication (only used with nixl worker)") |
| 100 | +NB_ARG_STRING(etcd_endpoints, "", \ |
| 101 | + "ETCD server endpoints for communication (optional for storage backends)") |
| 102 | +/* POSIX options - only used when backend is POSIX */ \ |
| 103 | +NB_ARG_STRING (posix_api_type, XFERBENCH_POSIX_API_AIO, \ |
| 104 | + "API type for POSIX operations [AIO, URING] (only used with POSIX backend)") |
| 105 | +/* DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO */ \ |
| 106 | +NB_ARG_STRING(gpunetio_device_list, "0", "Comma-separated GPU CUDA device id to use for" \ |
| 107 | + " communication (only used with nixl worker)") |
| 108 | +/* DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO */ \ |
| 109 | +NB_ARG_STRING(gpunetio_oob_list, "", "Comma-separated OOB network interface name" \ |
| 110 | + " for control path (only used with nixl worker)") |
| 111 | +/* OBJ options - only used when backend is OBJ */ \ |
| 112 | +NB_ARG_STRING(obj_access_key, "", "Access key for S3 backend") |
| 113 | +NB_ARG_STRING(obj_secret_key, "", "Secret key for S3 backend") |
| 114 | +NB_ARG_STRING(obj_session_token, "", "Session token for S3 backend") |
| 115 | +NB_ARG_STRING(obj_bucket_name, XFERBENCH_OBJ_BUCKET_NAME_DEFAULT, "Bucket name for S3 backend") |
| 116 | +NB_ARG_STRING(obj_scheme, XFERBENCH_OBJ_SCHEME_HTTP, "HTTP scheme for S3 backend [http, https]") |
| 117 | +NB_ARG_STRING(obj_region, XFERBENCH_OBJ_REGION_EU_CENTRAL_1, "Region for S3 backend") |
| 118 | +NB_ARG_BOOL(obj_use_virtual_addressing, false, "Use virtual addressing for S3 backend") |
| 119 | +NB_ARG_STRING(obj_endpoint_override, "", "Endpoint override for S3 backend") |
| 120 | +NB_ARG_STRING(obj_req_checksum, XFERBENCH_OBJ_REQ_CHECKSUM_SUPPORTED, \ |
| 121 | + "Required checksum for S3 backend [supported, required]") |
| 122 | +NB_ARG_STRING(obj_ca_bundle, "", "Path to CA bundle for S3 backend") |
| 123 | +/* HF3FS options - only used when backend is HF3FS */ \ |
| 124 | +NB_ARG_INT32(hf3fs_iopool_size, 64, "Size of io memory pool") |
| 125 | +/* GUSLI options - only used when backend is GUSLI */ \ |
| 126 | +NB_ARG_STRING(gusli_client_name, "NIXLBench", "Client name for GUSLI backend") |
| 127 | +NB_ARG_INT32(gusli_max_simultaneous_requests, 32, \ |
| 128 | + "Maximum number of simultaneous requests for GUSLI backend") |
| 129 | +NB_ARG_STRING(gusli_config_file, "", \ |
| 130 | + "Configuration file content for GUSLI backend (if empty, auto-generated from device_list)") |
| 131 | +NB_ARG_UINT64(gusli_bdev_byte_offset, 1048576, \ |
| 132 | + "Byte offset in block device for GUSLI operations (default: 1MB)") |
| 133 | +NB_ARG_STRING(gusli_device_security, "", \ |
| 134 | + "Comma-separated list of security flags per device (e.g. 'sec=0x3,sec=0x71'). " \ |
| 135 | + "If empty or fewer than devices, uses 'sec=0x3' as default. " \ |
| 136 | + "For GUSLI backend, use device_list in format 'id:type:path' where type is F (file) " \ |
| 137 | + "or K (kernel device).") |
| 138 | + |
| 139 | +#undef NB_ARG_INT32 |
| 140 | +#undef NB_ARG_UINT64 |
| 141 | +#undef NB_ARG_BOOL |
| 142 | +#undef NB_ARG_STRING |
133 | 143 |
|
134 | 144 | std::string xferBenchConfig::runtime_type = ""; |
135 | 145 | std::string xferBenchConfig::worker_type = ""; |
|
0 commit comments