rapidsai · jameslamb · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026
@@ -10,6 +10,7 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=12.9

@@ -10,6 +10,7 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=12.9

@@ -10,6 +10,7 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=13.1

@@ -10,6 +10,7 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-version=13.1

@@ -290,8 +290,7 @@ outputs:
         - ${{ pin_subpackage("libucxx", exact=True) }}
         - cuda-cudart-dev
       run:
-        - numba >=0.60.0,<0.62.0
-        - numba-cuda >=0.22.1
+        - cuda-core >=0.3.2
         - numpy >=1.23,<3.0
         # 'nvidia-ml-py' provides the 'pynvml' module
         - nvidia-ml-py>=12
@@ -431,8 +430,7 @@ outputs:
         - setuptools>=77.0.0
         - wheel
       run:
-        - numba >=0.60.0,<0.62.0
-        - numba-cuda >=0.22.1
+        - cuda-core >=0.3.2
         - python
         - pyyaml >=6
         - rapids-dask-dependency ${{ rapids_version }}

@@ -297,52 +297,14 @@ dependencies:
           - &numpy numpy>=1.23,<3.0
           # 'nvidia-ml-py' provides the 'pynvml' module
           - nvidia-ml-py>=12
-      - output_types: [conda]
-        packages:
-          - &numba_cuda numba-cuda>=0.22.1
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix:
-              cuda: "12.*"
-              cuda_suffixed: "true"
-            packages:
-              - &numba_cuda_cu12 numba-cuda[cu12]>=0.22.1
-          - matrix:
-              cuda: "13.*"
-              cuda_suffixed: "true"
-            packages:
-              - &numba_cuda_cu13 numba-cuda[cu13]>=0.22.1
-          # fallback to numba-cuda with no extra CUDA packages if 'cuda_suffixed' isn't true
-          - matrix:
-            packages:
-              - *numba_cuda
+          - cuda-core>=0.3.2
   run_python_distributed_ucxx:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - rapids-dask-dependency==26.4.*,>=0.0.0a0
           - pyyaml>=6
-      - output_types: [conda]
-        packages:
-          - *numba_cuda
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix:
-              cuda: "12.*"
-              cuda_suffixed: "true"
-            packages:
-              - *numba_cuda_cu12
-          - matrix:
-              cuda: "13.*"
-              cuda_suffixed: "true"
-            packages:
-              - *numba_cuda_cu13
-          # fallback to numba-cuda with no extra CUDA packages if 'cuda_suffixed' isn't true
-          - matrix:
-            packages:
-              - *numba_cuda
+          - cuda-core>=0.3.2
   test_cpp:
     common:
       - output_types: conda
@@ -357,13 +319,15 @@ dependencies:
           - pytest-asyncio>=1.0.0
           - pytest-rerunfailures!=16.0.0  # See https://github.com/pytest-dev/pytest-rerunfailures/issues/302
           - rapids-dask-dependency==26.4.*,>=0.0.0a0
+          - numba-cuda>=0.22.1
   test_python_distributed_ucxx:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - *numpy
           - pytest<9.0.0
           - pytest-rerunfailures!=16.0.0  # See https://github.com/pytest-dev/pytest-rerunfailures/issues/302
+          - numba-cuda>=0.22.1
   depends_on_cupy:
     common:
       - output_types: conda

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: BSD-3-Clause
 
 """
@@ -94,13 +94,12 @@ class CudaStream(Enum):
 
 
 def synchronize_stream(stream: CudaStream = CudaStream.Default):
-    import numba.cuda
+    from ucxx._cuda_context import synchronize_default_stream
 
     if stream == CudaStream.Default:
-        numba_stream = numba.cuda.default_stream()
+        synchronize_default_stream()
     else:
         raise ValueError("Unsupported stream")
-    numba_stream.synchronize()
 
 
 class gc_disabled:
@@ -246,11 +245,11 @@ def init_once():
         or ("cuda" in ucx_tls and "^cuda" not in ucx_tls)
     ):
         try:
-            import numba.cuda
-        except ImportError:
+            from ucxx._cuda_context import ensure_cuda_context
+        except ImportError as e:
             raise ImportError(
-                "CUDA support with UCX requires Numba for context management"
-            )
+                "CUDA support with UCX requires cuda-core for context management."
+            ) from e
 
         cuda_visible_device = get_device_index_and_uuid(
             os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
@@ -261,7 +260,7 @@ def init_once():
                 pre_existing_cuda_context.device_info, os.getpid()
             )
 
-        numba.cuda.current_context()
+        ensure_cuda_context(0)
 
         cuda_context_created = has_cuda_context()
         if (
@@ -291,7 +290,8 @@ def init_once():
 
     pool_size_str = get_rmm_config("pool-size")
 
-    # Find the function, `cuda_array()`, to use when allocating new CUDA arrays
+    # Find the function, `cuda_array()`, to use when allocating new CUDA arrays.
+    # RMM is required for CUDA array allocation at runtime (numba is only for tests).
     try:
         import rmm
 
@@ -304,22 +304,9 @@ def device_array(n):
                 pool_allocator=True, managed_memory=False, initial_pool_size=pool_size
             )
     except ImportError:
-        try:
-            import numba.cuda
-
-            def numba_device_array(n):
-                a = numba.cuda.device_array((n,), dtype="u1")
-                weakref.finalize(a, numba.cuda.current_context)
-                return a
-
-            device_array = numba_device_array
 
-        except ImportError:
-
-            def device_array(n):
-                raise RuntimeError(
-                    "In order to send/recv CUDA arrays, Numba or RMM is required"
-                )
+        def device_array(n):
+            raise RuntimeError("In order to send/recv CUDA arrays, RMM is required.")
 
         if pool_size_str is not None:
             logger.warning(

@@ -20,7 +20,7 @@ license = "BSD-3-Clause"
 license-files = ["LICENSE"]
 requires-python = ">=3.11"
 dependencies = [
-    "numba-cuda>=0.22.1",
+    "cuda-core>=0.3.2",
     "pyyaml>=6",
     "rapids-dask-dependency==26.4.*,>=0.0.0a0",
     "ucxx==0.49.*,>=0.0.0a0",
@@ -46,6 +46,7 @@ docs = [
 test = [
     "cudf==26.4.*,>=0.0.0a0",
     "cupy-cuda13x>=13.6.0",
+    "numba-cuda>=0.22.1",
     "numpy>=1.23,<3.0",
     "pytest-rerunfailures!=16.0.0",
     "pytest<9.0.0",

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: BSD-3-Clause
 
 import argparse
@@ -12,9 +12,9 @@
 
 
 def _create_cuda_context():
-    import numba.cuda
+    from ucxx._cuda_context import ensure_cuda_context
 
-    numba.cuda.current_context()
+    ensure_cuda_context(0)
 
 
 async def _progress_coroutine(worker):

@@ -19,8 +19,8 @@ authors = [
 license = "BSD-3-Clause"
 requires-python = ">=3.11"
 dependencies = [
+    "cuda-core>=0.3.2",
     "libucxx==0.49.*,>=0.0.0a0",
-    "numba-cuda>=0.22.1",
     "numpy>=1.23,<3.0",
     "nvidia-ml-py>=12",
     "rmm==26.4.*,>=0.0.0a0",
@@ -44,6 +44,7 @@ test = [
     "cloudpickle",
     "cudf==26.4.*,>=0.0.0a0",
     "cupy-cuda13x>=13.6.0",
+    "numba-cuda>=0.22.1",
     "pytest-asyncio>=1.0.0",
     "pytest-rerunfailures!=16.0.0",
     "pytest<9.0.0",

@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""CUDA context management using cuda.core.
+
+Provides helpers to ensure a CUDA context is created and to synchronize
+the default stream.
+"""
+
+
+def _get_device_class():
+    """Get the Device class from cuda.core."""
+    try:
+        from cuda.core import Device
+
+        return Device
+    except ImportError:
+        try:
+            from cuda.core.experimental import Device
+
+            return Device
+        except ImportError as e:
+            raise ImportError(
+                "CUDA context management requires cuda-core (cuda-core>=0.3.2)."
+            ) from e
+
+
+def ensure_cuda_context(device_id: int = 0) -> None:
+    """Ensure a CUDA context exists for the given device and set it as current.
+
+    Parameters
+    ----------
+    device_id : int, optional
+        The CUDA device index (default: 0).
+    """
+    Device = _get_device_class()
+    Device(device_id).set_current()
+
+
+def synchronize_default_stream(device_id: int = 0) -> None:
+    """Synchronize the default CUDA stream of the current device.
+
+    Required when coordinating with UCX CUDA transfers (e.g. before send/recv
+    of CUDA buffers).
+
+    Parameters
+    ----------
+    device_id : int, optional
+        The CUDA device index (default: 0).
+    """
+    Device = _get_device_class()
+    device = Device(device_id)
+    device.set_current()
+    device.sync()
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: BSD-3-Clause
 
 
@@ -70,9 +70,9 @@ def __eq__(self, other):
 
 
 def _create_context():
-    import numba.cuda
+    from ucxx._cuda_context import ensure_cuda_context
 
-    numba.cuda.current_context()
+    ensure_cuda_context(0)
 
 
 class ThreadMode(ProgressTask):

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: BSD-3-Clause
 
 from argparse import Namespace
@@ -17,9 +17,9 @@
 
 
 def _create_cuda_context(device):
-    import numba.cuda
+    from ucxx._cuda_context import ensure_cuda_context
 
-    numba.cuda.current_context(0)
+    ensure_cuda_context(device)
 
 
 def _transfer_wireup(ep, server):

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: BSD-3-Clause
 
 import argparse
@@ -54,10 +54,10 @@ def _get_backend_implementation(backend):
 
 def _set_cuda_device(object_type, device):
     if object_type in ["cupy", "rmm"]:
-        import numba.cuda
+        from ucxx._cuda_context import ensure_cuda_context
 
         os.environ["CUDA_VISIBLE_DEVICES"] = str(device)
-        numba.cuda.current_context()
+        ensure_cuda_context(0)
 
 
 def server(queue, args):

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: BSD-3-Clause
 
 import asyncio
@@ -19,12 +19,12 @@
 
 
 def _ensure_cuda_device(devs, rank):
-    import numba.cuda
+    from ucxx._cuda_context import ensure_cuda_context
 
     dev_id = devs[rank % len(devs)]
     os.environ["CUDA_VISIBLE_DEVICES"] = str(dev_id)
     logger.debug(f"{dev_id=}, {rank=}")
-    numba.cuda.current_context()
+    ensure_cuda_context(0)
 
 
 def get_allocator(