Lightning-AI · alpha0422 · Sep 10, 2025 · Sep 10, 2025 · Sep 21, 2025 · Sep 22, 2025
@@ -656,11 +656,17 @@ def __check_allowed(v: Any, name: str, value: Any) -> None:
         raise ValueError(f"`self.log({name}, {value})` was called, but `{type(v).__name__}` values cannot be logged")
 
     def __to_tensor(self, value: Union[Tensor, numbers.Number], name: str) -> Tensor:
-        value = (
-            value.clone().detach()
-            if isinstance(value, Tensor)
-            else torch.tensor(value, device=self.device, dtype=_get_default_dtype())
-        )
+        if isinstance(value, Tensor):
+            # Keep tensor on its original device to avoid unnecessary transfers
+            value = value.clone().detach()
+        else:
+            # Place scalar metrics on CPU to avoid CPU-GPU transfer and synchronization.
+            # `torch.tensor(value, device="cuda")` contains such synchronization, while the metric
+            # itself is only used on the CPU side. So placing metric on CPU for scalar inputs is more efficient.
+            # For non-CUDA devices, maintain original behavior
+            device = "cpu" if self.device.type == "cuda" else self.device
+            value = torch.tensor(value, device=device, dtype=_get_default_dtype())
+
         if not torch.numel(value) == 1:
             raise ValueError(
                 f"`self.log({name}, {value})` was called, but the tensor must have a single element."

@@ -234,7 +234,9 @@ def metrics(self) -> _METRICS:
         """This function returns either batch or epoch metrics."""
         on_step = self._first_loop_iter is not None
         assert self.trainer._results is not None
-        return self.trainer._results.metrics(on_step)
+        # Only include progress bar metrics if a progress bar callback is present
+        include_pbar_metrics = self.trainer.progress_bar_callback is not None
+        return self.trainer._results.metrics(on_step, include_pbar_metrics=include_pbar_metrics)
 
     @property
     def callback_metrics(self) -> _OUT_DICT:

@@ -468,7 +468,7 @@ def _forked_name(self, result_metric: _ResultMetric, on_step: bool) -> tuple[str
             forked_name += dataloader_suffix
         return name, forked_name
 
-    def metrics(self, on_step: bool) -> _METRICS:
+    def metrics(self, on_step: bool, *, include_pbar_metrics: bool = True) -> _METRICS:
         metrics = _METRICS(callback={}, log={}, pbar={})
 
         for _, result_metric in self.valid_items():
@@ -489,7 +489,7 @@ def metrics(self, on_step: bool) -> _METRICS:
                 metrics["callback"][forked_name] = value
 
             # populate progress_bar metrics. convert tensors to numbers
-            if result_metric.meta.prog_bar:
+            if result_metric.meta.prog_bar and include_pbar_metrics:
                 metrics["pbar"][forked_name] = convert_tensors_to_scalars(value)
 
         return metrics