feat: add device_name classmethod in Accelerator.

GdoongMathew · GdoongMathew · commit 037a24b6c422 · 2025-08-24T15:53:33.000+08:00
diff --git a/src/lightning/pytorch/accelerators/accelerator.py b/src/lightning/pytorch/accelerators/accelerator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from abc import ABC
-from typing import Any
+from typing import Any, Optional
 
 import lightning.pytorch as pl
 from lightning.fabric.accelerators.accelerator import Accelerator as _Accelerator
@@ -45,3 +45,8 @@ def get_device_stats(self, device: _DEVICE) -> dict[str, Any]:
 
         """
         raise NotImplementedError
+
+    @classmethod
+    def device_name(cls, device: Optional = None) -> str:
+        """Get the device name for a given device."""
+        return str(cls.is_available())
diff --git a/src/lightning/pytorch/accelerators/cuda.py b/src/lightning/pytorch/accelerators/cuda.py
@@ -113,6 +113,12 @@ def register_accelerators(cls, accelerator_registry: _AcceleratorRegistry) -> No
             description=cls.__name__,
         )
 
+    @classmethod
+    def device_name(cls, device: Optional[torch.types.Device] = None) -> str:
+        if not cls.is_available():
+            return "False"
+        return torch.cuda.get_device_name(device)
+
 
 def get_nvidia_gpu_stats(device: _DEVICE) -> dict[str, float]:  # pragma: no-cover
     """Get GPU stats including memory, fan speed, and temperature from nvidia-smi.
diff --git a/src/lightning/pytorch/accelerators/mps.py b/src/lightning/pytorch/accelerators/mps.py
@@ -87,6 +87,13 @@ def register_accelerators(cls, accelerator_registry: _AcceleratorRegistry) -> No
             description=cls.__name__,
         )
 
+    @classmethod
+    def device_name(cls, device: Optional = None) -> str:
+        # todo: implement a better way to get the device name
+        available = cls.is_available()
+        gpu_type = " (mps)" if available else ""
+        return f"{available}{gpu_type}"
+
 
 # device metrics
 _VM_PERCENT = "M1_vm_percent"
diff --git a/src/lightning/pytorch/accelerators/xla.py b/src/lightning/pytorch/accelerators/xla.py
@@ -11,11 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any
+from typing import Any, Optional
 
 from typing_extensions import override
 
 from lightning.fabric.accelerators import _AcceleratorRegistry
+from lightning.fabric.accelerators.xla import _XLA_GREATER_EQUAL_2_1
 from lightning.fabric.accelerators.xla import XLAAccelerator as FabricXLAAccelerator
 from lightning.fabric.utilities.types import _DEVICE
 from lightning.pytorch.accelerators.accelerator import Accelerator
@@ -53,3 +54,24 @@ def get_device_stats(self, device: _DEVICE) -> dict[str, Any]:
     @override
     def register_accelerators(cls, accelerator_registry: _AcceleratorRegistry) -> None:
         accelerator_registry.register("tpu", cls, description=cls.__name__)
+
+    @classmethod
+    def device_name(cls, device: Optional = None) -> str:
+        is_available = cls.is_available()
+        if not is_available:
+            return str(is_available)
+
+        if _XLA_GREATER_EQUAL_2_1:
+            from torch_xla._internal import tpu
+        else:
+            from torch_xla.experimental import tpu
+        import torch_xla.core.xla_env_vars as xenv
+        from requests.exceptions import HTTPError
+
+        try:
+            ret = tpu.get_tpu_env()[xenv.ACCELERATOR_TYPE]
+        except HTTPError:
+            # Fallback to "True" if HTTPError is raised during retrieving device information
+            ret = str(is_available)
+
+        return ret
diff --git a/src/lightning/pytorch/trainer/setup.py b/src/lightning/pytorch/trainer/setup.py
@@ -142,21 +142,16 @@ def _init_profiler(trainer: "pl.Trainer", profiler: Optional[Union[Profiler, str
 
 
 def _log_device_info(trainer: "pl.Trainer") -> None:
-    if CUDAAccelerator.is_available():
-        gpu_available = True
-        gpu_type = " (cuda)"
-    elif MPSAccelerator.is_available():
-        gpu_available = True
-        gpu_type = " (mps)"
+    if isinstance(trainer.accelerator, (CUDAAccelerator, MPSAccelerator)):
+        gpu_used = trainer.num_devices
+        device_names = list({trainer.accelerator.device_name(d) for d in trainer.devices})
     else:
-        gpu_available = False
-        gpu_type = ""
-
-    gpu_used = isinstance(trainer.accelerator, (CUDAAccelerator, MPSAccelerator))
-    rank_zero_info(f"GPU available: {gpu_available}{gpu_type}, used: {gpu_used}")
+        gpu_used = 0
+        device_names = "False"
+    rank_zero_info(f"GPU available: {device_names}, using: {gpu_used} {'devices' if gpu_used else 'device'}.")
 
     num_tpu_cores = trainer.num_devices if isinstance(trainer.accelerator, XLAAccelerator) else 0
-    rank_zero_info(f"TPU available: {XLAAccelerator.is_available()}, using: {num_tpu_cores} TPU cores")
+    rank_zero_info(f"TPU available: {XLAAccelerator.device_name()}, using: {num_tpu_cores} TPU cores")
 
     if _habana_available_and_importable():
         from lightning_habana import HPUAccelerator
diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
@@ -1187,16 +1187,21 @@ def num_nodes(self) -> int:
         return getattr(self.strategy, "num_nodes", 1)
 
     @property
-    def device_ids(self) -> list[int]:
-        """List of device indexes per node."""
+    def devices(self) -> list[torch.device]:
+        """The devices the trainer uses per node."""
         devices = (
             self.strategy.parallel_devices
             if isinstance(self.strategy, ParallelStrategy)
             else [self.strategy.root_device]
         )
         assert devices is not None
+        return devices
+
+    @property
+    def device_ids(self) -> list[int]:
+        """List of device indexes per node."""
         device_ids = []
-        for idx, device in enumerate(devices):
+        for idx, device in enumerate(self.devices):
             if isinstance(device, torch.device):
                 device_ids.append(device.index or idx)
             elif isinstance(device, int):