Fix CUDA context leak in with cuda.gpus[N]: context manager (#855)

isVoid · web-flow · commit e5ef7e2b9304 · 2026-04-14T15:45:26.000+01:00
## Summary - **Fix**: `Device.get_primary_context()` calls `self._dev.set_current()` on first invocation, which leaves the primary context active on the thread. The caller `_activate_context_for` then calls `push()`, saving that already-active context onto the stack. On exit, `pop()` restores it — so the context remains current after the `with` block (a leak). The fix pops the context left by `set_current()` immediately after obtaining the handle, so `get_primary_context()` upholds its documented contract: *"Note: it is not pushed to the CPU thread."* - **Tests**: Adds two regression tests in `test_context_stack.py` — one verifying no context remains after `with cuda.gpus[0]: pass` on a clean stack, and another verifying the previous context is properly restored. Made with [Cursor](https://cursor.com) --------- Co-authored-by: Michael Wang <isVoid@users.noreply.github.com>
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
@@ -534,11 +534,18 @@ def get_primary_context(self):
                 f"{self} has compute capability < {MIN_REQUIRED_CC}"
             )
 
+        prev = get_cuda_native_handle(driver.cuCtxGetCurrent())
         self._dev.set_current()
         if CUDA_CORE_GT_0_6:
             ctx_handle = self._dev.context.handle
         else:
             ctx_handle = self._dev.context._handle
+        # set_current() may push a context onto the thread's stack.  Undo
+        # that so callers (_activate_context_for) can push/pop symmetrically.
+        # Only pop when set_current() actually changed the current context;
+        # it is a no-op when a context for this device is already active.
+        if get_cuda_native_handle(driver.cuCtxGetCurrent()) != prev:
+            driver.cuCtxPopCurrent()
         self.primary_context = ctx = Context(
             weakref.proxy(self),
             ctx_handle,
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py b/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py
@@ -77,6 +77,51 @@ def switch_gpu():
         self.assertEqual(int(devid), 1)
 
 
+@skip_on_cudasim("CUDA HW required")
+class TestContextLeak(CUDATestCase):
+    """Regression tests for context leaks from the gpu context manager."""
+
+    def test_gpus_context_manager_does_not_leak(self):
+        # Regression test: ``with cuda.gpus[N]`` must not leave a CUDA
+        # context on the thread after the block exits.
+        the_driver = driver.driver
+
+        # Drain any pre-existing contexts from the stack.
+        while the_driver.pop_active_context() is not None:
+            pass
+
+        with cuda.gpus[0]:
+            pass
+
+        # After exiting the context manager the current context must be null.
+        with the_driver.get_active_context() as ac:
+            self.assertIsNone(
+                ac.context_handle,
+                "CUDA context leaked after exiting cuda.gpus context manager",
+            )
+
+    def test_gpus_context_manager_restores_previous_context(self):
+        # If a context is already active before entering the context manager,
+        # it must be restored on exit.
+        the_driver = driver.driver
+
+        # Ensure device-0 context exists and is pushed.
+        outer_ctx = cuda.current_context()
+        outer_handle = int(outer_ctx.handle)
+
+        with cuda.gpus[0]:
+            pass
+
+        with the_driver.get_active_context() as ac:
+            self.assertIsNotNone(ac.context_handle)
+            self.assertEqual(
+                int(ac.context_handle),
+                outer_handle,
+                "Previous context was not restored after exiting "
+                "cuda.gpus context manager",
+            )
+
+
 @skip_on_cudasim("CUDA HW required")
 class Test3rdPartyContext(CUDATestCase):
     def tearDown(self):