Skip to content

Commit 4a23c68

Browse files
Merge branch 'main' into rust_dl_apis_serialization
2 parents 0a83f62 + 96a288d commit 4a23c68

File tree

7 files changed

+54
-11
lines changed

7 files changed

+54
-11
lines changed

.gitlab/test_cpp.sh

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,6 @@ export PATH=${INSTALL_DIR}/bin:$PATH
4141
export PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig:$PKG_CONFIG_PATH
4242
export NIXL_PLUGIN_DIR=${INSTALL_DIR}/lib/$ARCH-linux-gnu/plugins
4343

44-
# Set UCX GDA max system latency to allow GDA on SYS topology
45-
# TODO: Remove this once CI setups have better GPU-NIC locality
46-
# export UCX_IB_GDA_MAX_SYS_LATENCY=1us
47-
4844
echo "==== Show system info ===="
4945
env
5046
nvidia-smi topo -m || true
@@ -116,8 +112,7 @@ kill -s INT $telePID
116112
# fi
117113

118114
# shellcheck disable=SC2154
119-
# TODO: enable PrepGpuSignal and ucxDeviceApi tests once the problem in UCX is fixed
120-
gtest-parallel --workers=1 --serialize_test_cases ./bin/gtest -- --min-tcp-port="$min_gtest_port" --max-tcp-port="$max_gtest_port" --gtest_filter=-*PrepGpuSignal*:*ucxDeviceApi*
115+
gtest-parallel --workers=1 --serialize_test_cases ./bin/gtest -- --min-tcp-port="$min_gtest_port" --max-tcp-port="$max_gtest_port"
121116
./bin/test_plugin
122117

123118
# Run NIXL client-server test

meson.build

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,14 @@ if ucx_dep.found() and cuda_dep.found() and nvcc_prog.found()
201201
''', dependencies : [ucx_dep, doca_gpunetio_dep], args: nvcc_flags)
202202

203203
have_host_side = cpp.compiles('''
204+
#include <ucp/api/ucp_version.h>
204205
#include <ucp/api/device/ucp_host.h>
205-
int main() { return 0; }
206-
''', dependencies: ucx_dep)
206+
#include <ucs/sys/compiler_def.h>
207+
int main() {
208+
UCS_STATIC_ASSERT(UCP_VERSION(UCP_API_MAJOR, UCP_API_MINOR) >= UCP_VERSION(1, 21));
209+
return 0;
210+
}
211+
''', dependencies: ucx_dep, name: 'UCX Device API (host-side, version >= 1.21)')
207212

208213
if have_gpu_side and have_host_side
209214
ucx_gpu_device_api_available = true

src/api/python/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,23 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
16+
from ._api import (
17+
DEFAULT_COMM_PORT,
18+
nixl_agent,
19+
nixl_agent_config,
20+
nixl_backend_handle,
21+
nixl_prepped_dlist_handle,
22+
nixl_xfer_handle,
23+
)
24+
25+
__all__ = [
26+
# Constants
27+
"DEFAULT_COMM_PORT",
28+
# Main classes
29+
"nixl_agent",
30+
"nixl_agent_config",
31+
"nixl_backend_handle",
32+
"nixl_prepped_dlist_handle",
33+
"nixl_xfer_handle",
34+
]

src/utils/ucx/ucx_utils.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,9 +425,7 @@ nixlUcxContext::nixlUcxContext(std::vector<std::string> devs,
425425
ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_MT_WORKERS_SHARED;
426426
ucp_params.features = UCP_FEATURE_RMA | UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64 | UCP_FEATURE_AM;
427427
#ifdef HAVE_UCX_GPU_DEVICE_API
428-
if (ucp_version >= UCP_VERSION(1, 21)) {
429-
ucp_params.features |= UCP_FEATURE_DEVICE;
430-
}
428+
ucp_params.features |= UCP_FEATURE_DEVICE;
431429
#endif
432430

433431
if (prog_thread)

test/gtest/common.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,23 @@
2727
#include <mutex>
2828
#include "gtest/gtest.h"
2929

30+
#ifdef HAVE_CUDA
31+
#include <cuda_runtime.h>
32+
#endif
33+
3034
namespace gtest {
35+
36+
inline bool
37+
hasCudaGpu() {
38+
#ifdef HAVE_CUDA
39+
int count = 0;
40+
auto err = cudaGetDeviceCount(&count);
41+
return (err == cudaSuccess && count > 0);
42+
#else
43+
return false;
44+
#endif
45+
}
46+
3147
constexpr const char *
3248
GetMockBackendName() {
3349
return "MOCK_BACKEND";

test/gtest/device_api/single_write_test.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
*/
1717

1818
#include "utils.cuh"
19+
#include "common.h"
20+
21+
#include <gtest/gtest.h>
1922

2023
namespace gtest::nixl::gpu::single_write {
2124

@@ -157,6 +160,9 @@ protected:
157160

158161
void
159162
SetUp() override {
163+
if (!hasCudaGpu()) {
164+
GTEST_SKIP() << "No CUDA-capable GPU is available, skipping test.";
165+
}
160166
if (cudaSetDevice(0) != cudaSuccess) {
161167
FAIL() << "Failed to set CUDA device 0";
162168
}

test/gtest/test_transfer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,9 @@ TEST_P(TestTransfer, PrepGpuSignal) {
696696
#ifndef HAVE_UCX_GPU_DEVICE_API
697697
GTEST_SKIP() << "UCX GPU device API not available, skipping test";
698698
#else
699+
if (!hasCudaGpu()) {
700+
GTEST_SKIP() << "No CUDA-capable GPU is available, skipping test.";
701+
}
699702
size_t gpu_signal_size = 0;
700703
nixl_opt_args_t extra_params = {.backends = {backend_handles[0]}};
701704
nixl_status_t size_status = getAgent(0).getGpuSignalSize(gpu_signal_size, &extra_params);

0 commit comments

Comments
 (0)