Merge branch 'main' into rust_dl_apis_serialization

evgeny-leksikov · web-flow · commit 4a23c6880d44 · 2025-11-28T09:53:37.000+01:00
diff --git a/.gitlab/test_cpp.sh b/.gitlab/test_cpp.sh
@@ -41,10 +41,6 @@ export PATH=${INSTALL_DIR}/bin:$PATH
 export PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig:$PKG_CONFIG_PATH
 export NIXL_PLUGIN_DIR=${INSTALL_DIR}/lib/$ARCH-linux-gnu/plugins
 
-# Set UCX GDA max system latency to allow GDA on SYS topology
-# TODO: Remove this once CI setups have better GPU-NIC locality
-# export UCX_IB_GDA_MAX_SYS_LATENCY=1us
-
 echo "==== Show system info ===="
 env
 nvidia-smi topo -m || true
@@ -116,8 +112,7 @@ kill -s INT $telePID
 # fi
 
 # shellcheck disable=SC2154
-# TODO: enable PrepGpuSignal and ucxDeviceApi tests once the problem in UCX is fixed
-gtest-parallel --workers=1 --serialize_test_cases ./bin/gtest -- --min-tcp-port="$min_gtest_port" --max-tcp-port="$max_gtest_port" --gtest_filter=-*PrepGpuSignal*:*ucxDeviceApi*
+gtest-parallel --workers=1 --serialize_test_cases ./bin/gtest -- --min-tcp-port="$min_gtest_port" --max-tcp-port="$max_gtest_port"
 ./bin/test_plugin
 
 # Run NIXL client-server test
diff --git a/meson.build b/meson.build
@@ -201,9 +201,14 @@ if ucx_dep.found() and cuda_dep.found() and nvcc_prog.found()
         ''', dependencies : [ucx_dep, doca_gpunetio_dep], args: nvcc_flags)
 
     have_host_side = cpp.compiles('''
+            #include <ucp/api/ucp_version.h>
             #include <ucp/api/device/ucp_host.h>
-            int main() { return 0; }
-        ''', dependencies: ucx_dep)
+            #include <ucs/sys/compiler_def.h>
+            int main() {
+                UCS_STATIC_ASSERT(UCP_VERSION(UCP_API_MAJOR, UCP_API_MINOR) >= UCP_VERSION(1, 21));
+                return 0;
+            }
+        ''', dependencies: ucx_dep, name: 'UCX Device API (host-side, version >= 1.21)')
 
     if have_gpu_side and have_host_side
         ucx_gpu_device_api_available = true
diff --git a/src/api/python/__init__.py b/src/api/python/__init__.py
@@ -12,3 +12,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from ._api import (
+    DEFAULT_COMM_PORT,
+    nixl_agent,
+    nixl_agent_config,
+    nixl_backend_handle,
+    nixl_prepped_dlist_handle,
+    nixl_xfer_handle,
+)
+
+__all__ = [
+    # Constants
+    "DEFAULT_COMM_PORT",
+    # Main classes
+    "nixl_agent",
+    "nixl_agent_config",
+    "nixl_backend_handle",
+    "nixl_prepped_dlist_handle",
+    "nixl_xfer_handle",
+]
diff --git a/src/utils/ucx/ucx_utils.cpp b/src/utils/ucx/ucx_utils.cpp
@@ -425,9 +425,7 @@ nixlUcxContext::nixlUcxContext(std::vector<std::string> devs,
     ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_MT_WORKERS_SHARED;
     ucp_params.features = UCP_FEATURE_RMA | UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64 | UCP_FEATURE_AM;
 #ifdef HAVE_UCX_GPU_DEVICE_API
-    if (ucp_version >= UCP_VERSION(1, 21)) {
-        ucp_params.features |= UCP_FEATURE_DEVICE;
-    }
+    ucp_params.features |= UCP_FEATURE_DEVICE;
 #endif
 
     if (prog_thread)
diff --git a/test/gtest/common.h b/test/gtest/common.h
@@ -27,7 +27,23 @@
 #include <mutex>
 #include "gtest/gtest.h"
 
+#ifdef HAVE_CUDA
+#include <cuda_runtime.h>
+#endif
+
 namespace gtest {
+
+inline bool
+hasCudaGpu() {
+#ifdef HAVE_CUDA
+    int count = 0;
+    auto err = cudaGetDeviceCount(&count);
+    return (err == cudaSuccess && count > 0);
+#else
+    return false;
+#endif
+}
+
 constexpr const char *
 GetMockBackendName() {
     return "MOCK_BACKEND";
diff --git a/test/gtest/device_api/single_write_test.cu b/test/gtest/device_api/single_write_test.cu
@@ -16,6 +16,9 @@
  */
 
 #include "utils.cuh"
+#include "common.h"
+
+#include <gtest/gtest.h>
 
 namespace gtest::nixl::gpu::single_write {
 
@@ -157,6 +160,9 @@ protected:
 
     void
     SetUp() override {
+        if (!hasCudaGpu()) {
+            GTEST_SKIP() << "No CUDA-capable GPU is available, skipping test.";
+        }
         if (cudaSetDevice(0) != cudaSuccess) {
             FAIL() << "Failed to set CUDA device 0";
         }
diff --git a/test/gtest/test_transfer.cpp b/test/gtest/test_transfer.cpp
@@ -696,6 +696,9 @@ TEST_P(TestTransfer, PrepGpuSignal) {
 #ifndef HAVE_UCX_GPU_DEVICE_API
     GTEST_SKIP() << "UCX GPU device API not available, skipping test";
 #else
+    if (!hasCudaGpu()) {
+        GTEST_SKIP() << "No CUDA-capable GPU is available, skipping test.";
+    }
     size_t gpu_signal_size = 0;
     nixl_opt_args_t extra_params = {.backends = {backend_handles[0]}};
     nixl_status_t size_status = getAgent(0).getGpuSignalSize(gpu_signal_size, &extra_params);