Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions src/lightning/fabric/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from lightning_utilities.core.imports import RequirementCache
from typing_extensions import get_args

from lightning.fabric.accelerators import CPUAccelerator, CUDAAccelerator, MPSAccelerator
from lightning.fabric.accelerators import CPUAccelerator, CUDAAccelerator, MPSAccelerator, XLAAccelerator
from lightning.fabric.plugins.precision.precision import _PRECISION_INPUT_STR, _PRECISION_INPUT_STR_ALIAS
from lightning.fabric.strategies import STRATEGY_REGISTRY
from lightning.fabric.utilities.consolidate_checkpoint import _process_cli_args
Expand All @@ -34,7 +34,18 @@
_CLICK_AVAILABLE = RequirementCache("click")
_LIGHTNING_SDK_AVAILABLE = RequirementCache("lightning_sdk")

_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu")
_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu", "auto")


def _choose_auto_accelerator() -> str:
"""Choose the best available accelerator for the current environment."""
if CUDAAccelerator.is_available():
return "cuda"
if MPSAccelerator.is_available():
return "mps"
if XLAAccelerator.is_available():
return "tpu"
return "cpu"


def _get_supported_strategies() -> list[str]:
Expand Down Expand Up @@ -187,6 +198,14 @@ def _set_env_variables(args: Namespace) -> None:

def _get_num_processes(accelerator: str, devices: str) -> int:
"""Parse the `devices` argument to determine how many processes need to be launched on the current machine."""

if accelerator == "auto" or accelerator is None:
accelerator = _choose_auto_accelerator()
if devices == "auto":
if accelerator == "cuda" or accelerator == "mps" or accelerator == "cpu":
devices = "1"
else:
raise ValueError(f"Cannot default to '1' device for accelerator='{accelerator}'")
if accelerator == "gpu":
parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True)
elif accelerator == "cuda":
Expand Down
6 changes: 3 additions & 3 deletions tests/tests_fabric/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_run_env_vars_defaults(monkeypatch, fake_script):
assert "LT_PRECISION" not in os.environ


@pytest.mark.parametrize("accelerator", ["cpu", "gpu", "cuda", pytest.param("mps", marks=RunIf(mps=True))])
@pytest.mark.parametrize("accelerator", ["cpu", "gpu", "cuda", "auto", pytest.param("mps", marks=RunIf(mps=True))])
@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
@mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2)
def test_run_env_vars_accelerator(_, accelerator, monkeypatch, fake_script):
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_run_env_vars_unsupported_strategy(strategy, fake_script):
assert f"Invalid value for '--strategy': '{strategy}'" in ioerr.getvalue()


@pytest.mark.parametrize("devices", ["1", "2", "0,", "1,0", "-1"])
@pytest.mark.parametrize("devices", ["1", "2", "0,", "1,0", "-1", "auto"])
@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
@mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2)
def test_run_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):
Expand All @@ -97,7 +97,7 @@ def test_run_env_vars_devices_cuda(_, devices, monkeypatch, fake_script):


@RunIf(mps=True)
@pytest.mark.parametrize("accelerator", ["mps", "gpu"])
@pytest.mark.parametrize("accelerator", ["mps", "gpu", "auto"])
@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
def test_run_env_vars_devices_mps(accelerator, monkeypatch, fake_script):
monkeypatch.setitem(sys.modules, "torch.distributed.run", Mock())
Expand Down
Loading