add auto option for accelerator and device

fnhirwa · fnhirwa · commit 951c0bc38eab · 2025-06-17T11:41:05.000+02:00
diff --git a/src/lightning/fabric/cli.py b/src/lightning/fabric/cli.py
@@ -34,7 +34,7 @@
 _CLICK_AVAILABLE = RequirementCache("click")
 _LIGHTNING_SDK_AVAILABLE = RequirementCache("lightning_sdk")
 
-_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu")
+_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu", "auto")
 
 
 def _get_supported_strategies() -> list[str]:
@@ -187,6 +187,8 @@ def _set_env_variables(args: Namespace) -> None:
 
 def _get_num_processes(accelerator: str, devices: str) -> int:
     """Parse the `devices` argument to determine how many processes need to be launched on the current machine."""
+    if devices == "auto":
+        devices = "1"  # default to 1 device if 'auto' is specified
     if accelerator == "gpu":
         parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True)
     elif accelerator == "cuda":
@@ -195,9 +197,20 @@ def _get_num_processes(accelerator: str, devices: str) -> int:
         parsed_devices = MPSAccelerator.parse_devices(devices)
     elif accelerator == "tpu":
         raise ValueError("Launching processes for TPU through the CLI is not supported.")
+    elif accelerator == "auto" or accelerator is None:
+        if torch.cuda.is_available():
+            parsed_devices = CUDAAccelerator.parse_devices(devices)
+        elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
+            parsed_devices = MPSAccelerator.parse_devices(devices)
+        else:
+            parsed_devices = CPUAccelerator.parse_devices(devices)
     else:
         return CPUAccelerator.parse_devices(devices)
-    return len(parsed_devices) if parsed_devices is not None else 0
+    return (
+        len(parsed_devices)
+        if isinstance(parsed_devices, list)
+        else (parsed_devices if isinstance(parsed_devices, int) else 0)
+    )
 
 
 def _torchrun_launch(args: Namespace, script_args: list[str]) -> None:
diff --git a/tests/tests_fabric/test_cli.py b/tests/tests_fabric/test_cli.py
@@ -46,7 +46,7 @@ def test_run_env_vars_defaults(monkeypatch, fake_script):
     assert "LT_PRECISION" not in os.environ
 
 
-@pytest.mark.parametrize("accelerator", ["cpu", "gpu", "cuda", pytest.param("mps", marks=RunIf(mps=True))])
+@pytest.mark.parametrize("accelerator", ["cpu", "gpu", "cuda", "auto", pytest.param("mps", marks=RunIf(mps=True))])
 @mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 @mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2)
 def test_run_env_vars_accelerator(_, accelerator, monkeypatch, fake_script):
@@ -85,7 +85,7 @@ def test_run_env_vars_unsupported_strategy(strategy, fake_script):
     assert f"Invalid value for '--strategy': '{strategy}'" in ioerr.getvalue()
 
 
-@pytest.mark.parametrize("devices", ["1", "2", "0,", "1,0", "-1"])
+@pytest.mark.parametrize("devices", ["1", "2", "0,", "1,0", "-1", "auto"])
 @mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 @mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2)
 def test_run_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
@@ -97,7 +97,7 @@ def test_run_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
 
 
 @RunIf(mps=True)
-@pytest.mark.parametrize("accelerator", ["mps", "gpu"])
+@pytest.mark.parametrize("accelerator", ["mps", "gpu", "auto"])
 @mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_run_env_vars_devices_mps(accelerator, monkeypatch, fake_script):
     monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())