From 55d484a4dd24c2aa87cea2007155c61cbcdfdc67 Mon Sep 17 00:00:00 2001 From: Chris Cameron Date: Sun, 25 Jan 2026 15:59:39 +1300 Subject: [PATCH 1/7] fix: extract handle value in Event.query() call to match other driver calls It looks as if the `query` method missed out on the update in 20a2e3b4. The method includes a try/except statement to catch CUDA_ERROR_NOT_READY, which was falling through to an `else` statement which returned True. This `else` swallowed the exception raised due to providing a non-integer handle and returned True no matter the progress of the stream being queried. --- numba_cuda/numba/cuda/cudadrv/driver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py index c5073a7bb..a9e2a3c74 100644 --- a/numba_cuda/numba/cuda/cudadrv/driver.py +++ b/numba_cuda/numba/cuda/cudadrv/driver.py @@ -2117,14 +2117,14 @@ def query(self): otherwise, returns False. """ try: - driver.cuEventQuery(self.handle) + handle = self.handle.value + driver.cuEventQuery(handle) + return True except CudaAPIError as e: if e.code == enums.CUDA_ERROR_NOT_READY: return False else: raise - else: - return True def record(self, stream=0): """ From b698a5d30e1c9224c9df61049426fb3be34fc50c Mon Sep 17 00:00:00 2001 From: Chris Cameron Date: Sun, 25 Jan 2026 22:06:35 +1300 Subject: [PATCH 2/7] tests: add regression test for Event.query() --- .../numba/cuda/tests/cudadrv/test_events.py | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py index 6ebe5cf58..b69db327a 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: BSD-2-Clause import numpy as np -from numba import cuda +from numba import cuda, int32 from numba.cuda.testing import unittest, CUDATestCase from numba.cuda._compat import Device from numba.cuda.testing import skip_on_cudasim @@ -48,6 +48,40 @@ def event_elapsed_inner(self, stream): # Exercise the code path evtstart.elapsed_time(evtend) + def test_event_query(self): + from time import perf_counter + + @cuda.jit + def spin(ms): + # Sleep for ms + for i in range(ms): + cuda.nanosleep(int32(1_000_000)) # 1 ms + + stream = cuda.stream() + evt = cuda.event() + + # Run once to compile + spin[1, 1, stream](1) + + t0 = perf_counter() + spin_ms = 250 + spin[1, 1, stream](250) + evt.record(stream) + + # Query immediately. + while not evt.query(): + event_time = perf_counter() - t0 + + # Syncronize and capture stream-finished time. + evt.synchronize() + sync_time = perf_counter() - t0 + + assert event_time * 1000 > spin_ms * 0.9 # nanosleep isnt reliable + assert sync_time * 1000 > spin_ms * 0.9 # nanosleep isnt reliable + + # Give a few ms overhead for the synchronize call to complete + assert sync_time - event_time < 2e-3 + if __name__ == "__main__": unittest.main() From 876c90f37e5affcfb53237b40d51a82171a13b3e Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:40:31 -0500 Subject: [PATCH 3/7] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- numba_cuda/numba/cuda/tests/cudadrv/test_events.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py index b69db327a..f334d1bb5 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py @@ -69,15 +69,17 @@ def spin(ms): evt.record(stream) # Query immediately. + # Query immediately. + event_time = perf_counter() - t0 while not evt.query(): event_time = perf_counter() - t0 - # Syncronize and capture stream-finished time. + # Synchronize and capture stream-finished time. evt.synchronize() sync_time = perf_counter() - t0 - assert event_time * 1000 > spin_ms * 0.9 # nanosleep isnt reliable - assert sync_time * 1000 > spin_ms * 0.9 # nanosleep isnt reliable + assert event_time * 1000 > spin_ms * 0.9 # nanosleep isn't reliable + assert sync_time * 1000 > spin_ms * 0.9 # nanosleep isn't reliable # Give a few ms overhead for the synchronize call to complete assert sync_time - event_time < 2e-3 From 0951c7cba692fe60ab38f9a9d5d424e86386574b Mon Sep 17 00:00:00 2001 From: Chris Cameron Date: Fri, 30 Jan 2026 10:23:16 +1300 Subject: [PATCH 4/7] remove unnecessary assertion, clarify failure modes. --- numba_cuda/numba/cuda/tests/cudadrv/test_events.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py index f334d1bb5..12398affb 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py @@ -68,7 +68,6 @@ def spin(ms): spin[1, 1, stream](250) evt.record(stream) - # Query immediately. # Query immediately. event_time = perf_counter() - t0 while not evt.query(): @@ -78,11 +77,11 @@ def spin(ms): evt.synchronize() sync_time = perf_counter() - t0 - assert event_time * 1000 > spin_ms * 0.9 # nanosleep isn't reliable - assert sync_time * 1000 > spin_ms * 0.9 # nanosleep isn't reliable + # If this assertion fails, it was nanosleep inaccuracy that caused it + assert sync_time * 1000 > spin_ms * 0.9 - # Give a few ms overhead for the synchronize call to complete - assert sync_time - event_time < 2e-3 + # If this assertion fails, the event query returned early + assert event_time * 1000 > spin_ms * 0.9 if __name__ == "__main__": From ddbb792374d9eb3ae6207cdbc44378a4d76270a0 Mon Sep 17 00:00:00 2001 From: Chris Cameron Date: Mon, 9 Feb 2026 15:52:08 +1300 Subject: [PATCH 5/7] fix: remove unnecessary busy-wait and time-based assertions. --- .../numba/cuda/tests/cudadrv/test_events.py | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py index 12398affb..db5ebad9a 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py @@ -48,8 +48,10 @@ def event_elapsed_inner(self, stream): # Exercise the code path evtstart.elapsed_time(evtend) + @skip_on_cudasim("Testing cuda.core events requires driver") def test_event_query(self): - from time import perf_counter + stream = cuda.stream() + evt = cuda.event() @cuda.jit def spin(ms): @@ -57,32 +59,19 @@ def spin(ms): for i in range(ms): cuda.nanosleep(int32(1_000_000)) # 1 ms - stream = cuda.stream() - evt = cuda.event() - # Run once to compile spin[1, 1, stream](1) - t0 = perf_counter() - spin_ms = 250 - spin[1, 1, stream](250) + spin_ms = 200 + spin[1, 1, stream](spin_ms) evt.record(stream) - # Query immediately. - event_time = perf_counter() - t0 - while not evt.query(): - event_time = perf_counter() - t0 - - # Synchronize and capture stream-finished time. + immediate_query = evt.query() evt.synchronize() - sync_time = perf_counter() - t0 - - # If this assertion fails, it was nanosleep inaccuracy that caused it - assert sync_time * 1000 > spin_ms * 0.9 - - # If this assertion fails, the event query returned early - assert event_time * 1000 > spin_ms * 0.9 + synced_query = evt.query() + assert immediate_query is False, "Query returned True prematurely" + assert synced_query is True, "Query returned False after sync" if __name__ == "__main__": unittest.main() From 813862238182d12ae0805fd3ff89466cff6a1dc0 Mon Sep 17 00:00:00 2001 From: Chris Cameron Date: Fri, 13 Feb 2026 16:13:18 +1300 Subject: [PATCH 6/7] fix: fix fragile timing in test_event_query. The proposed test would fail given a ~200ms hang between kernel launch and the first query() call, which is possible in Windows/CI environments and which would cause the test to fail. Instead, mapped arrays track when the kernel starts and allow the host to release the kernel from an infinite spin. This gives two guarantees - the kernel has started, and the kernel hasn't finished, which are what's needed to verify that query() doesn't return prematurely. A final assertion after synchronisation checks that query() does return True when called after stream sync. --- .../numba/cuda/tests/cudadrv/test_events.py | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py index db5ebad9a..09dd9c89b 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py @@ -53,24 +53,46 @@ def test_event_query(self): stream = cuda.stream() evt = cuda.event() + # Mapped arrays: host-side edits visible to device and vice versa. + started = cuda.mapped_array(1, dtype=np.int32) + release = cuda.mapped_array(1, dtype=np.int32) + @cuda.jit - def spin(ms): - # Sleep for ms - for i in range(ms): - cuda.nanosleep(int32(1_000_000)) # 1 ms + def gated_kernel(started_flag, release_flag): + # Signal that kernel has started + started_flag[0] = 1 + # Spin until host releases us + while release_flag[0] == 0: + cuda.nanosleep(int32(1_000)) + + # Compile first + started[0] = 0 + release[0] = 1 # Don't block during warmup + gated_kernel[1, 1, stream](started, release) + stream.synchronize() - # Run once to compile - spin[1, 1, stream](1) + # Reset for actual test + started[0] = 0 + release[0] = 0 - spin_ms = 200 - spin[1, 1, stream](spin_ms) + # Launch - kernel will spin until we release it + gated_kernel[1, 1, stream](started, release) evt.record(stream) + # Wait until kernel confirms it's running + while started[0] == 0: + pass + + # Kernel is running until we release it - if query returns True, fail. immediate_query = evt.query() + assert immediate_query is False, "Query returned True prematurely" + + # Release the kernel and synchronize + release[0] = 1 evt.synchronize() - synced_query = evt.query() - assert immediate_query is False, "Query returned True prematurely" + # If query returns False after synchronize, fail. + synced_query = evt.query() assert synced_query is True, "Query returned False after sync" if __name__ == "__main__": From 11758008978f0fbc8ef61406208cc518367c57f1 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Wed, 25 Feb 2026 12:57:10 -0500 Subject: [PATCH 7/7] formatting fix --- numba_cuda/numba/cuda/tests/cudadrv/test_events.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py index 88cc01a30..9fc4ce912 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_events.py @@ -95,5 +95,6 @@ def gated_kernel(started_flag, release_flag): synced_query = evt.query() assert synced_query is True, "Query returned False after sync" + if __name__ == "__main__": unittest.main()