fix: Add callable check and improve test coverage for CUDA fork check

arrdel · arrdel · commit f2cc224e02bc · 2025-12-09T13:38:51.000-05:00
- Add callable() check before calling _is_in_bad_fork to ensure robustness
- Add test_check_for_bad_cuda_fork_with_is_in_bad_fork() to test new detection path
- Ensures test coverage for both the new _is_in_bad_fork and fallback paths
diff --git a/src/lightning/fabric/strategies/launchers/multiprocessing.py b/src/lightning/fabric/strategies/launchers/multiprocessing.py
@@ -199,7 +199,7 @@ def _check_bad_cuda_fork() -> None:
     # is initialized. This allows passive CUDA initialization (e.g., from library imports or device queries)
     # while still catching actual problematic cases where CUDA context was created before forking.
     _is_in_bad_fork = getattr(torch.cuda, "_is_in_bad_fork", None)
-    if _is_in_bad_fork is not None and _is_in_bad_fork():
+    if _is_in_bad_fork is not None and callable(_is_in_bad_fork) and _is_in_bad_fork():
         message = (
             "Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, "
             "you must use the 'spawn' start method or avoid CUDA initialization in the main process."
diff --git a/tests/tests_fabric/strategies/launchers/test_multiprocessing.py b/tests/tests_fabric/strategies/launchers/test_multiprocessing.py
@@ -98,6 +98,17 @@ def test_check_for_bad_cuda_fork(mp_mock, _, start_method):
         launcher.launch(function=Mock())
 
 
+@pytest.mark.parametrize("start_method", ["fork", "forkserver"])
+@mock.patch("torch.cuda._is_in_bad_fork", return_value=True)
+@mock.patch("lightning.fabric.strategies.launchers.multiprocessing.mp")
+def test_check_for_bad_cuda_fork_with_is_in_bad_fork(mp_mock, _, start_method):
+    """Test the new _is_in_bad_fork detection when available."""
+    mp_mock.get_all_start_methods.return_value = [start_method]
+    launcher = _MultiProcessingLauncher(strategy=Mock(), start_method=start_method)
+    with pytest.raises(RuntimeError, match="Cannot re-initialize CUDA in forked subprocess"):
+        launcher.launch(function=Mock())
+
+
 def test_check_for_missing_main_guard():
     launcher = _MultiProcessingLauncher(strategy=Mock(), start_method="spawn")
     with (