From 9f9c4ac2b3383d5281e8882b7c453b28f73889e9 Mon Sep 17 00:00:00 2001
From: Shion Matsumoto <smatsumoto97@gmail.com>
Date: Fri, 12 Sep 2025 16:11:35 -0400
Subject: [PATCH 1/3] autofix

---
 pyproject.toml                                       |  3 ---
 src/lightning/fabric/fabric.py                       |  3 +--
 src/lightning/fabric/loggers/tensorboard.py          |  3 +--
 .../fabric/plugins/precision/transformer_engine.py   |  3 +--
 src/lightning/fabric/strategies/fsdp.py              |  7 ++-----
 src/lightning/fabric/strategies/parallel.py          |  3 +--
 src/lightning/fabric/strategies/xla.py               |  3 +--
 src/lightning/fabric/strategies/xla_fsdp.py          |  3 +--
 src/lightning/fabric/utilities/throughput.py         |  1 +
 src/lightning/pytorch/callbacks/model_checkpoint.py  |  4 +---
 src/lightning/pytorch/core/datamodule.py             |  3 +--
 src/lightning/pytorch/core/module.py                 |  6 ++----
 src/lightning/pytorch/core/optimizer.py              |  1 +
 src/lightning/pytorch/demos/boring_classes.py        | 12 ++++--------
 src/lightning/pytorch/demos/transformer.py           | 12 ++++--------
 src/lightning/pytorch/loggers/comet.py               |  1 +
 src/lightning/pytorch/loggers/tensorboard.py         |  3 +--
 src/lightning/pytorch/loggers/utilities.py           |  3 +--
 src/lightning/pytorch/profilers/pytorch.py           |  3 +--
 src/lightning/pytorch/strategies/fsdp.py             |  4 ++--
 src/lightning/pytorch/strategies/model_parallel.py   |  1 +
 src/lightning/pytorch/strategies/parallel.py         |  3 +--
 src/lightning/pytorch/strategies/xla.py              |  3 +--
 src/lightning/pytorch/trainer/trainer.py             |  6 ++----
 src/lightning/pytorch/utilities/model_registry.py    |  3 +--
 .../pytorch/utilities/model_summary/model_summary.py |  3 +--
 tests/parity_fabric/models.py                        |  3 +--
 .../strategies/test_model_parallel_integration.py    |  3 +--
 tests/tests_pytorch/accelerators/test_xla.py         |  6 ++----
 tests/tests_pytorch/callbacks/test_lr_monitor.py     |  6 ++----
 tests/tests_pytorch/callbacks/test_spike.py          |  3 +--
 .../callbacks/test_stochastic_weight_avg.py          |  3 +--
 tests/tests_pytorch/helpers/advanced_models.py       |  6 ++----
 tests/tests_pytorch/helpers/simple_models.py         |  3 +--
 tests/tests_pytorch/models/test_hparams.py           |  3 +--
 tests/tests_pytorch/plugins/test_amp_plugins.py      |  3 +--
 .../strategies/test_model_parallel_integration.py    |  3 +--
 .../trainer/optimization/test_manual_optimization.py |  3 +--
 tests/tests_pytorch/tuner/test_lr_finder.py          |  3 +--
 tests/tests_pytorch/utilities/test_model_summary.py  |  3 +--
 .../tests_pytorch/utilities/test_parameter_tying.py  |  6 ++----
 41 files changed, 54 insertions(+), 103 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e6d08411b0f35..b79268331a6a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,8 +101,6 @@ ignore = [
     "S603",  # todo: `subprocess` call: check for execution of untrusted input
     "S605",  # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell`
     "S607",  # todo: Starting a process with a partial executable path
-    "RET504",  # todo:Unnecessary variable assignment before `return` statement
-    "RET503",
 ]
 "tests/**" = [
     "S101",  # Use of `assert` detected
@@ -118,7 +116,6 @@ ignore = [
     "S603",  # todo: `subprocess` call: check for execution of untrusted input
     "S605",  # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell`
     "S607",  # todo: Starting a process with a partial executable path
-    "RET504",  # todo:Unnecessary variable assignment before `return` statement
     "PT004",  # todo: Fixture `tmpdir_unittest_fixture` does not return anything, add leading underscore
     "PT012",  # todo: `pytest.raises()` block should contain a single simple statement
     "PT019",  # todo: Fixture `_` without value is injected as parameter, use `@pytest.mark.usefixtures` instead
diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py
index 288c355a4ebf2..d3d070a18fba1 100644
--- a/src/lightning/fabric/fabric.py
+++ b/src/lightning/fabric/fabric.py
@@ -476,8 +476,7 @@ def _setup_dataloader(
         dataloader = self._strategy.process_dataloader(dataloader)
         device = self.device if move_to_device and not isinstance(self._strategy, XLAStrategy) else None
         fabric_dataloader = _FabricDataLoader(dataloader=dataloader, device=device)
-        fabric_dataloader = cast(DataLoader, fabric_dataloader)
-        return fabric_dataloader
+        return cast(DataLoader, fabric_dataloader)
 
     def backward(self, tensor: Tensor, *args: Any, model: Optional[_FabricModule] = None, **kwargs: Any) -> None:
         r"""Replaces ``loss.backward()`` in your training loop. Handles precision automatically for you.
diff --git a/src/lightning/fabric/loggers/tensorboard.py b/src/lightning/fabric/loggers/tensorboard.py
index 208244dc38cd3..0c9a575bad39d 100644
--- a/src/lightning/fabric/loggers/tensorboard.py
+++ b/src/lightning/fabric/loggers/tensorboard.py
@@ -157,8 +157,7 @@ def log_dir(self) -> str:
         if isinstance(self.sub_dir, str):
             log_dir = os.path.join(log_dir, self.sub_dir)
         log_dir = os.path.expandvars(log_dir)
-        log_dir = os.path.expanduser(log_dir)
-        return log_dir
+        return os.path.expanduser(log_dir)
 
     @property
     def sub_dir(self) -> Optional[str]:
diff --git a/src/lightning/fabric/plugins/precision/transformer_engine.py b/src/lightning/fabric/plugins/precision/transformer_engine.py
index bf1e51ea6b2b0..a5be075dbf171 100644
--- a/src/lightning/fabric/plugins/precision/transformer_engine.py
+++ b/src/lightning/fabric/plugins/precision/transformer_engine.py
@@ -103,8 +103,7 @@ def convert_module(self, module: torch.nn.Module) -> torch.nn.Module:
                 )
         elif self.replace_layers in (None, True):
             _convert_layers(module)
-        module = module.to(dtype=self.weights_dtype)
-        return module
+        return module.to(dtype=self.weights_dtype)
 
     @override
     def tensor_init_context(self) -> AbstractContextManager:
diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py
index baaee74af0ec9..aa4df5889878d 100644
--- a/src/lightning/fabric/strategies/fsdp.py
+++ b/src/lightning/fabric/strategies/fsdp.py
@@ -801,13 +801,12 @@ def _get_sharded_state_dict_context(module: Module) -> Generator[None, None, Non
 
     state_dict_config = ShardedStateDictConfig(offload_to_cpu=True)
     optim_state_dict_config = ShardedOptimStateDictConfig(offload_to_cpu=True)
-    state_dict_type_context = FSDP.state_dict_type(
+    return FSDP.state_dict_type(
         module=module,
         state_dict_type=StateDictType.SHARDED_STATE_DICT,
         state_dict_config=state_dict_config,
         optim_state_dict_config=optim_state_dict_config,
     )
-    return state_dict_type_context  # type: ignore[return-value]
 
 
 def _get_full_state_dict_context(
@@ -819,15 +818,13 @@ def _get_full_state_dict_context(
 
     state_dict_config = FullStateDictConfig(offload_to_cpu=True, rank0_only=rank0_only)
     optim_state_dict_config = FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=rank0_only)
-    state_dict_type_context = FSDP.state_dict_type(
+    return FSDP.state_dict_type(
         module=module,
         state_dict_type=StateDictType.FULL_STATE_DICT,
         state_dict_config=state_dict_config,
         optim_state_dict_config=optim_state_dict_config,
     )
 
-    return state_dict_type_context  # type: ignore[return-value]
-
 
 def _is_sharded_checkpoint(path: Path) -> bool:
     """A heuristic check to determine whether the path points to a directory with checkpoint shards."""
diff --git a/src/lightning/fabric/strategies/parallel.py b/src/lightning/fabric/strategies/parallel.py
index 327cfc016d4ef..d1b900f817b34 100644
--- a/src/lightning/fabric/strategies/parallel.py
+++ b/src/lightning/fabric/strategies/parallel.py
@@ -104,8 +104,7 @@ def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool:
             decision,
             reduce_op=ReduceOp.SUM,  # type: ignore[arg-type]
         )
-        decision = bool(decision == self.world_size) if all else bool(decision)
-        return decision
+        return bool(decision == self.world_size) if all else bool(decision)
 
     @override
     def teardown(self) -> None:
diff --git a/src/lightning/fabric/strategies/xla.py b/src/lightning/fabric/strategies/xla.py
index 3a571fef37f00..b028c86b8100f 100644
--- a/src/lightning/fabric/strategies/xla.py
+++ b/src/lightning/fabric/strategies/xla.py
@@ -200,8 +200,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo
         import torch_xla.core.xla_model as xm
 
         tensor = xf.all_gather(tensor) if sync_grads else xm.all_gather(tensor)
-        tensor = tensor.to(original_device)
-        return tensor
+        return tensor.to(original_device)
 
     @override
     def all_reduce(
diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py
index 3fa9e40f4b4bd..f5ea1c9a99633 100644
--- a/src/lightning/fabric/strategies/xla_fsdp.py
+++ b/src/lightning/fabric/strategies/xla_fsdp.py
@@ -334,8 +334,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo
         import torch_xla.core.xla_model as xm
 
         tensor = xf.all_gather(tensor) if sync_grads else xm.all_gather(tensor)
-        tensor = tensor.to(original_device)
-        return tensor
+        return tensor.to(original_device)
 
     @override
     def all_reduce(
diff --git a/src/lightning/fabric/utilities/throughput.py b/src/lightning/fabric/utilities/throughput.py
index 6bc329fa1c3be..dc091154bc240 100644
--- a/src/lightning/fabric/utilities/throughput.py
+++ b/src/lightning/fabric/utilities/throughput.py
@@ -632,6 +632,7 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) ->
             rank_zero_warn(f"FLOPs not found for TPU {device_name!r} with {dtype}")
             return None
         return int(_TPU_FLOPS[chip])
+    return None
 
 
 def _plugin_to_compute_dtype(plugin: "Precision") -> torch.dtype:
diff --git a/src/lightning/pytorch/callbacks/model_checkpoint.py b/src/lightning/pytorch/callbacks/model_checkpoint.py
index 415e1dcac309b..2c7aee706ab7b 100644
--- a/src/lightning/pytorch/callbacks/model_checkpoint.py
+++ b/src/lightning/pytorch/callbacks/model_checkpoint.py
@@ -623,9 +623,7 @@ def check_monitor_top_k(self, trainer: "pl.Trainer", current: Optional[Tensor] =
         should_update_best_and_save = monitor_op(current, self.best_k_models[self.kth_best_model_path])
 
         # If using multiple devices, make sure all processes are unanimous on the decision.
-        should_update_best_and_save = trainer.strategy.reduce_boolean_decision(bool(should_update_best_and_save))
-
-        return should_update_best_and_save
+        return trainer.strategy.reduce_boolean_decision(bool(should_update_best_and_save))
 
     def _format_checkpoint_name(
         self,
diff --git a/src/lightning/pytorch/core/datamodule.py b/src/lightning/pytorch/core/datamodule.py
index ff84c2fd8b199..c662f4751ebbe 100644
--- a/src/lightning/pytorch/core/datamodule.py
+++ b/src/lightning/pytorch/core/datamodule.py
@@ -315,5 +315,4 @@ def format_loader_info(info: dict[str, Union[dataset_info, Iterable[dataset_info
         # Retrieve information for each dataloader method
         dataloader_info = extract_loader_info(datamodule_loader_methods)
         # Format the information
-        dataloader_str = format_loader_info(dataloader_info)
-        return dataloader_str
+        return format_loader_info(dataloader_info)
diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py
index 85f631ee40f75..8a48410ea5865 100644
--- a/src/lightning/pytorch/core/module.py
+++ b/src/lightning/pytorch/core/module.py
@@ -360,8 +360,7 @@ def _apply_batch_transfer_handler(
     ) -> Any:
         device = device or self.device
         batch = self._call_batch_hook("transfer_batch_to_device", batch, device, dataloader_idx)
-        batch = self._call_batch_hook("on_after_batch_transfer", batch, dataloader_idx)
-        return batch
+        return self._call_batch_hook("on_after_batch_transfer", batch, dataloader_idx)
 
     def print(self, *args: Any, **kwargs: Any) -> None:
         r"""Prints only from process 0. Use this in any distributed mode to log only once.
@@ -666,8 +665,7 @@ def __to_tensor(self, value: Union[Tensor, numbers.Number], name: str) -> Tensor
                 f"`self.log({name}, {value})` was called, but the tensor must have a single element."
                 f" You can try doing `self.log({name}, {value}.mean())`"
             )
-        value = value.squeeze()
-        return value
+        return value.squeeze()
 
     def all_gather(
         self, data: Union[Tensor, dict, list, tuple], group: Optional[Any] = None, sync_grads: bool = False
diff --git a/src/lightning/pytorch/core/optimizer.py b/src/lightning/pytorch/core/optimizer.py
index b85e9b2c10e5a..2b354da15fd2d 100644
--- a/src/lightning/pytorch/core/optimizer.py
+++ b/src/lightning/pytorch/core/optimizer.py
@@ -409,6 +409,7 @@ def step(self, closure: Callable[[], float]) -> float: ...
     def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]:
         if closure is not None:
             return closure()
+        return None
 
     @override
     def zero_grad(self, set_to_none: Optional[bool] = True) -> None:
diff --git a/src/lightning/pytorch/demos/boring_classes.py b/src/lightning/pytorch/demos/boring_classes.py
index 3855f31898b81..92fb561d5ee46 100644
--- a/src/lightning/pytorch/demos/boring_classes.py
+++ b/src/lightning/pytorch/demos/boring_classes.py
@@ -253,20 +253,16 @@ def setup(self, stage: str) -> None:
             ]
 
     def train_dataloader(self) -> Iterable[DataLoader]:
-        combined_train = apply_to_collection(self.train_datasets, Dataset, lambda x: DataLoader(x))
-        return combined_train
+        return apply_to_collection(self.train_datasets, Dataset, lambda x: DataLoader(x))
 
     def val_dataloader(self) -> DataLoader:
-        combined_val = apply_to_collection(self.val_datasets, Dataset, lambda x: DataLoader(x))
-        return combined_val
+        return apply_to_collection(self.val_datasets, Dataset, lambda x: DataLoader(x))
 
     def test_dataloader(self) -> DataLoader:
-        combined_test = apply_to_collection(self.test_datasets, Dataset, lambda x: DataLoader(x))
-        return combined_test
+        return apply_to_collection(self.test_datasets, Dataset, lambda x: DataLoader(x))
 
     def predict_dataloader(self) -> DataLoader:
-        combined_predict = apply_to_collection(self.predict_datasets, Dataset, lambda x: DataLoader(x))
-        return combined_predict
+        return apply_to_collection(self.predict_datasets, Dataset, lambda x: DataLoader(x))
 
 
 class ManualOptimBoringModel(BoringModel):
diff --git a/src/lightning/pytorch/demos/transformer.py b/src/lightning/pytorch/demos/transformer.py
index 13b5e05adc680..3064851f3e978 100644
--- a/src/lightning/pytorch/demos/transformer.py
+++ b/src/lightning/pytorch/demos/transformer.py
@@ -59,8 +59,7 @@ def __init__(
     def generate_square_subsequent_mask(self, size: int) -> Tensor:
         """Generate a square mask for the sequence to prevent future tokens from being seen."""
         mask = torch.triu(torch.ones(size, size), diagonal=1)
-        mask = mask.float().masked_fill(mask == 1, float("-inf")).masked_fill(mask == 0, 0.0)
-        return mask
+        return mask.float().masked_fill(mask == 1, float("-inf")).masked_fill(mask == 0, 0.0)
 
     def forward(self, inputs: Tensor, target: Tensor, mask: Optional[Tensor] = None) -> Tensor:
         _, t = inputs.shape
@@ -78,8 +77,7 @@ def forward(self, inputs: Tensor, target: Tensor, mask: Optional[Tensor] = None)
         output = self.transformer(src, target, tgt_mask=mask)
         output = self.decoder(output)
         output = F.log_softmax(output, dim=-1)
-        output = output.view(-1, self.vocab_size)
-        return output
+        return output.view(-1, self.vocab_size)
 
 
 class PositionalEncoding(nn.Module):
@@ -106,8 +104,7 @@ def _init_pos_encoding(self, device: torch.device) -> Tensor:
         div_term = torch.exp(torch.arange(0, self.dim, 2, device=device).float() * (-math.log(10000.0) / self.dim))
         pe[:, 0::2] = torch.sin(position * div_term)
         pe[:, 1::2] = torch.cos(position * div_term)
-        pe = pe.unsqueeze(0)
-        return pe
+        return pe.unsqueeze(0)
 
 
 class WikiText2(Dataset):
@@ -200,8 +197,7 @@ def forward(self, inputs: Tensor, target: Tensor) -> Tensor:
     def training_step(self, batch: tuple[Tensor, Tensor], batch_idx: int) -> Tensor:
         inputs, target = batch
         output = self(inputs, target)
-        loss = torch.nn.functional.nll_loss(output, target.view(-1))
-        return loss
+        return torch.nn.functional.nll_loss(output, target.view(-1))
 
     def configure_optimizers(self) -> torch.optim.Optimizer:
         return torch.optim.SGD(self.model.parameters(), lr=0.1)
diff --git a/src/lightning/pytorch/loggers/comet.py b/src/lightning/pytorch/loggers/comet.py
index b544212e755e2..4399b078494aa 100644
--- a/src/lightning/pytorch/loggers/comet.py
+++ b/src/lightning/pytorch/loggers/comet.py
@@ -397,6 +397,7 @@ def version(self) -> Optional[str]:
         # Don't create an experiment if we don't have one
         if self._experiment is not None:
             return self._experiment.get_key()
+        return None
 
     def __getstate__(self) -> dict[str, Any]:
         state = self.__dict__.copy()
diff --git a/src/lightning/pytorch/loggers/tensorboard.py b/src/lightning/pytorch/loggers/tensorboard.py
index f9cc41c67045c..0d1f495316803 100644
--- a/src/lightning/pytorch/loggers/tensorboard.py
+++ b/src/lightning/pytorch/loggers/tensorboard.py
@@ -136,8 +136,7 @@ def log_dir(self) -> str:
         if isinstance(self.sub_dir, str):
             log_dir = os.path.join(log_dir, self.sub_dir)
         log_dir = os.path.expandvars(log_dir)
-        log_dir = os.path.expanduser(log_dir)
-        return log_dir
+        return os.path.expanduser(log_dir)
 
     @property
     @override
diff --git a/src/lightning/pytorch/loggers/utilities.py b/src/lightning/pytorch/loggers/utilities.py
index ced8a6f1f2bd3..30d9f06a89455 100644
--- a/src/lightning/pytorch/loggers/utilities.py
+++ b/src/lightning/pytorch/loggers/utilities.py
@@ -52,8 +52,7 @@ def _scan_checkpoints(checkpoint_callback: Checkpoint, logged_model_time: dict)
     checkpoints = sorted(
         (Path(p).stat().st_mtime, p, s, tag) for p, (s, tag) in checkpoints.items() if Path(p).is_file()
     )
-    checkpoints = [c for c in checkpoints if c[1] not in logged_model_time or logged_model_time[c[1]] < c[0]]
-    return checkpoints
+    return [c for c in checkpoints if c[1] not in logged_model_time or logged_model_time[c[1]] < c[0]]
 
 
 def _log_hyperparams(trainer: "pl.Trainer") -> None:
diff --git a/src/lightning/pytorch/profilers/pytorch.py b/src/lightning/pytorch/profilers/pytorch.py
index e264d5154feba..f0353dbda4140 100644
--- a/src/lightning/pytorch/profilers/pytorch.py
+++ b/src/lightning/pytorch/profilers/pytorch.py
@@ -375,12 +375,11 @@ def _total_steps(self) -> Union[int, float]:
             )
             return num_val_batches + num_sanity_val_batches
         if self._schedule.is_testing:
-            num_test_batches = (
+            return (
                 sum(trainer.num_test_batches)
                 if isinstance(trainer.num_test_batches, list)
                 else trainer.num_test_batches
             )
-            return num_test_batches
         if self._schedule.is_predicting:
             return sum(trainer.num_predict_batches)
         raise NotImplementedError("Unsupported schedule")
diff --git a/src/lightning/pytorch/strategies/fsdp.py b/src/lightning/pytorch/strategies/fsdp.py
index 3fbd0f9cd5f0a..350b48919b367 100644
--- a/src/lightning/pytorch/strategies/fsdp.py
+++ b/src/lightning/pytorch/strategies/fsdp.py
@@ -581,6 +581,7 @@ def save_checkpoint(
             return super().save_checkpoint(checkpoint=checkpoint, filepath=path)
         else:
             raise ValueError(f"Unknown state_dict_type: {self._state_dict_type}")
+        return None
 
     @override
     def load_checkpoint(self, checkpoint_path: _PATH) -> dict[str, Any]:
@@ -624,8 +625,7 @@ def load_checkpoint(self, checkpoint_path: _PATH) -> dict[str, Any]:
                         optim.load_state_dict(flattened_osd)
 
             # Load metadata (anything not a module or optimizer)
-            metadata = torch.load(path / _METADATA_FILENAME)
-            return metadata
+            return torch.load(path / _METADATA_FILENAME)
 
         if _is_full_checkpoint(path):
             checkpoint = _lazy_load(path)
diff --git a/src/lightning/pytorch/strategies/model_parallel.py b/src/lightning/pytorch/strategies/model_parallel.py
index e0286dbe2e0e6..f8ba46cd2b390 100644
--- a/src/lightning/pytorch/strategies/model_parallel.py
+++ b/src/lightning/pytorch/strategies/model_parallel.py
@@ -327,6 +327,7 @@ def save_checkpoint(
             if _is_sharded_checkpoint(path):
                 shutil.rmtree(path)
             return super().save_checkpoint(checkpoint=checkpoint, filepath=path)
+        return None
 
     @override
     def load_checkpoint(self, checkpoint_path: _PATH) -> dict[str, Any]:
diff --git a/src/lightning/pytorch/strategies/parallel.py b/src/lightning/pytorch/strategies/parallel.py
index dbd8e2962b230..74352b11db888 100644
--- a/src/lightning/pytorch/strategies/parallel.py
+++ b/src/lightning/pytorch/strategies/parallel.py
@@ -110,8 +110,7 @@ def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool:
             decision,
             reduce_op=ReduceOp.SUM,  # type: ignore[arg-type]
         )
-        decision = bool(decision == self.world_size) if all else bool(decision)
-        return decision
+        return bool(decision == self.world_size) if all else bool(decision)
 
     @contextmanager
     def block_backward_sync(self) -> Generator:
diff --git a/src/lightning/pytorch/strategies/xla.py b/src/lightning/pytorch/strategies/xla.py
index cbdc890a1ca32..2918296feda6b 100644
--- a/src/lightning/pytorch/strategies/xla.py
+++ b/src/lightning/pytorch/strategies/xla.py
@@ -345,8 +345,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo
         import torch_xla.core.xla_model as xm
 
         tensor = xf.all_gather(tensor) if sync_grads else xm.all_gather(tensor)
-        tensor = tensor.to(original_device)
-        return tensor
+        return tensor.to(original_device)
 
     @override
     def teardown(self) -> None:
diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
index 5768c507e2e3f..ef2b3206a47df 100644
--- a/src/lightning/pytorch/trainer/trainer.py
+++ b/src/lightning/pytorch/trainer/trainer.py
@@ -1277,8 +1277,7 @@ def training_step(self, batch, batch_idx):
         else:
             dirpath = self.default_root_dir
 
-        dirpath = self.strategy.broadcast(dirpath)
-        return dirpath
+        return self.strategy.broadcast(dirpath)
 
     @property
     def is_global_zero(self) -> bool:
@@ -1731,5 +1730,4 @@ def configure_optimizers(self):
         assert self.max_epochs is not None
         max_estimated_steps = math.ceil(total_batches / self.accumulate_grad_batches) * max(self.max_epochs, 1)
 
-        max_estimated_steps = min(max_estimated_steps, self.max_steps) if self.max_steps != -1 else max_estimated_steps
-        return max_estimated_steps
+        return min(max_estimated_steps, self.max_steps) if self.max_steps != -1 else max_estimated_steps
diff --git a/src/lightning/pytorch/utilities/model_registry.py b/src/lightning/pytorch/utilities/model_registry.py
index 104da2514f5c2..eac7329c35c19 100644
--- a/src/lightning/pytorch/utilities/model_registry.py
+++ b/src/lightning/pytorch/utilities/model_registry.py
@@ -137,8 +137,7 @@ def _determine_model_folder(model_name: str, default_root_dir: str) -> str:
     # download the latest checkpoint from the model registry
     model_name = model_name.replace("/", "_")
     model_name = model_name.replace(":", "_")
-    local_model_dir = os.path.join(default_root_dir, model_name)
-    return local_model_dir
+    return os.path.join(default_root_dir, model_name)
 
 
 def find_model_local_ckpt_path(
diff --git a/src/lightning/pytorch/utilities/model_summary/model_summary.py b/src/lightning/pytorch/utilities/model_summary/model_summary.py
index 01b692abdc05f..3efe9a05872cf 100644
--- a/src/lightning/pytorch/utilities/model_summary/model_summary.py
+++ b/src/lightning/pytorch/utilities/model_summary/model_summary.py
@@ -313,14 +313,13 @@ def total_flops(self) -> int:
     @property
     def flop_counts(self) -> dict[str, dict[Any, int]]:
         flop_counts = self._flop_counter.get_flop_counts()
-        ret = {
+        return {
             name: flop_counts.get(
                 f"{type(self._model).__name__}.{name}",
                 {},
             )
             for name in self.layer_names
         }
-        return ret
 
     def summarize(self) -> dict[str, LayerSummary]:
         summary = OrderedDict((name, LayerSummary(module)) for name, module in self.named_modules)
diff --git a/tests/parity_fabric/models.py b/tests/parity_fabric/models.py
index 4887a4c7f7dba..f65a20460e2f7 100644
--- a/tests/parity_fabric/models.py
+++ b/tests/parity_fabric/models.py
@@ -60,8 +60,7 @@ def forward(self, x):
         x = torch.flatten(x, 1)  # flatten all dimensions except batch
         x = F.relu(self.fc1(x))
         x = F.relu(self.fc2(x))
-        x = self.fc3(x)
-        return x
+        return self.fc3(x)
 
     def get_optimizer(self):
         return torch.optim.SGD(self.parameters(), lr=0.0001)
diff --git a/tests/tests_fabric/strategies/test_model_parallel_integration.py b/tests/tests_fabric/strategies/test_model_parallel_integration.py
index 4c11fb0edcd78..18c5ad07252da 100644
--- a/tests/tests_fabric/strategies/test_model_parallel_integration.py
+++ b/tests/tests_fabric/strategies/test_model_parallel_integration.py
@@ -83,8 +83,7 @@ def _parallelize_feed_forward_fsdp2(model, device_mesh):
 
 def _parallelize_feed_forward_fsdp2_tp(model, device_mesh):
     model = _parallelize_feed_forward_tp(model, device_mesh)
-    model = _parallelize_feed_forward_fsdp2(model, device_mesh)
-    return model
+    return _parallelize_feed_forward_fsdp2(model, device_mesh)
 
 
 @RunIf(min_torch="2.4", standalone=True, min_cuda_gpus=4)
diff --git a/tests/tests_pytorch/accelerators/test_xla.py b/tests/tests_pytorch/accelerators/test_xla.py
index 83dace719371d..5e56d5c585c88 100644
--- a/tests/tests_pytorch/accelerators/test_xla.py
+++ b/tests/tests_pytorch/accelerators/test_xla.py
@@ -46,8 +46,7 @@ def __init__(self):
     def forward(self, x):
         x = self.layer_1(x)
         x = self.layer_2(x)
-        x = self.layer_3(x)
-        return x
+        return self.layer_3(x)
 
 
 @RunIf(tpu=True, standalone=True)
@@ -230,8 +229,7 @@ def __init__(self):
     def forward(self, x):
         x = self.net_a(x)
         x = self.layer_2(x)
-        x = self.net_b(x)
-        return x
+        return self.net_b(x)
 
 
 @RunIf(tpu=True)
diff --git a/tests/tests_pytorch/callbacks/test_lr_monitor.py b/tests/tests_pytorch/callbacks/test_lr_monitor.py
index 66ce47f0e7ad4..391841e4e949c 100644
--- a/tests/tests_pytorch/callbacks/test_lr_monitor.py
+++ b/tests/tests_pytorch/callbacks/test_lr_monitor.py
@@ -428,8 +428,7 @@ def __init__(self):
 
         def forward(self, x):
             x = self.linear_a(x)
-            x = self.linear_b(x)
-            return x
+            return self.linear_b(x)
 
         def configure_optimizers(self):
             param_groups = [
@@ -603,8 +602,7 @@ def __init__(self, lr, momentum):
 
         def forward(self, x):
             x = self.linear_a(x)
-            x = self.linear_b(x)
-            return x
+            return self.linear_b(x)
 
         def configure_optimizers(self):
             param_groups = [
diff --git a/tests/tests_pytorch/callbacks/test_spike.py b/tests/tests_pytorch/callbacks/test_spike.py
index 86e3ac88e93cf..20679c52394c0 100644
--- a/tests/tests_pytorch/callbacks/test_spike.py
+++ b/tests/tests_pytorch/callbacks/test_spike.py
@@ -29,8 +29,7 @@ def training_step(self, batch, batch_idx: int):
         if curr_loss_val is None:
             curr_loss_val = batch_idx
 
-        loss = self.layer(torch.tensor(curr_loss_val, device=self.device, dtype=self.dtype).view(1, 1))
-        return loss
+        return self.layer(torch.tensor(curr_loss_val, device=self.device, dtype=self.dtype).view(1, 1))
 
     def configure_optimizers(self):
         return torch.optim.SGD(self.parameters(), lr=1e-3)
diff --git a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py
index abcd302149fcf..df2df72b18c1f 100644
--- a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py
+++ b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py
@@ -266,8 +266,7 @@ def __init__(self):
 
         def forward(self, x):
             x = self.layer1(x)
-            x = self.layer2(x)
-            return x
+            return self.layer2(x)
 
         def configure_optimizers(self):
             params = [{"params": self.layer1.parameters(), "lr": 0.1}, {"params": self.layer2.parameters(), "lr": 0.2}]
diff --git a/tests/tests_pytorch/helpers/advanced_models.py b/tests/tests_pytorch/helpers/advanced_models.py
index 959e6e5968d18..3426c3e51f41d 100644
--- a/tests/tests_pytorch/helpers/advanced_models.py
+++ b/tests/tests_pytorch/helpers/advanced_models.py
@@ -46,8 +46,7 @@ def block(in_feat, out_feat, normalize=True):
 
     def forward(self, z):
         img = self.model(z)
-        img = img.view(img.size(0), *self.img_shape)
-        return img
+        return img.view(img.size(0), *self.img_shape)
 
 
 class Discriminator(nn.Module):
@@ -204,8 +203,7 @@ def forward(self, x):
         x = torch.tanh(x)
         x = self.c_d1_bn(x)
         x = self.c_d1_drop(x)
-        x = self.c_d2(x)
-        return x
+        return self.c_d2(x)
 
     def training_step(self, batch, batch_nb):
         x, y = batch
diff --git a/tests/tests_pytorch/helpers/simple_models.py b/tests/tests_pytorch/helpers/simple_models.py
index a9dc635bba275..49f931ed14cba 100644
--- a/tests/tests_pytorch/helpers/simple_models.py
+++ b/tests/tests_pytorch/helpers/simple_models.py
@@ -100,8 +100,7 @@ def forward(self, x):
         x = self.layer_1a(x)
         x = self.layer_2(x)
         x = self.layer_2a(x)
-        x = self.layer_end(x)
-        return x
+        return self.layer_end(x)
 
     def configure_optimizers(self):
         optimizer = torch.optim.Adam(self.parameters(), lr=0.01)
diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py
index 92a07f0a3d05e..d354c8c1e16ef 100644
--- a/tests/tests_pytorch/models/test_hparams.py
+++ b/tests/tests_pytorch/models/test_hparams.py
@@ -417,8 +417,7 @@ def _raw_checkpoint_path(trainer) -> str:
     raw_checkpoint_paths = [x for x in raw_checkpoint_paths if ".ckpt" in x]
     assert raw_checkpoint_paths
     raw_checkpoint_path = raw_checkpoint_paths[0]
-    raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path)
-    return raw_checkpoint_path
+    return os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path)
 
 
 @pytest.mark.parametrize("base_class", [HyperparametersMixin, LightningModule, LightningDataModule])
diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py
index 0b68c098cc713..b345e9e1bee8f 100644
--- a/tests/tests_pytorch/plugins/test_amp_plugins.py
+++ b/tests/tests_pytorch/plugins/test_amp_plugins.py
@@ -165,8 +165,7 @@ def __init__(self):
 
         def forward(self, x: Tensor):
             x = self.layer1(x)
-            x = self.layer2(x)
-            return x
+            return self.layer2(x)
 
         def training_step(self, batch, batch_idx):
             _, opt2 = self.optimizers()
diff --git a/tests/tests_pytorch/strategies/test_model_parallel_integration.py b/tests/tests_pytorch/strategies/test_model_parallel_integration.py
index 00600183f4293..4b3dbe9df9724 100644
--- a/tests/tests_pytorch/strategies/test_model_parallel_integration.py
+++ b/tests/tests_pytorch/strategies/test_model_parallel_integration.py
@@ -74,8 +74,7 @@ def _parallelize_feed_forward_fsdp2(model, device_mesh):
 
 def _parallelize_feed_forward_fsdp2_tp(model, device_mesh):
     model = _parallelize_feed_forward_tp(model, device_mesh)
-    model = _parallelize_feed_forward_fsdp2(model, device_mesh)
-    return model
+    return _parallelize_feed_forward_fsdp2(model, device_mesh)
 
 
 def _parallelize_with_compile(parallelize):
diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
index dd8042ecf2058..4a32418360aed 100644
--- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
+++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py
@@ -324,8 +324,7 @@ def __repr__(self):
 
     def __copy__(self):
         cls = self.__class__
-        new_obj = cls(self._store.copy())
-        return new_obj
+        return cls(self._store.copy())
 
     def copy(self):
         return self.__copy__()
diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py
index 81352ebe256ef..ee8bedace5872 100644
--- a/tests/tests_pytorch/tuner/test_lr_finder.py
+++ b/tests/tests_pytorch/tuner/test_lr_finder.py
@@ -652,8 +652,7 @@ def training_step(self, batch: Any, batch_idx: int) -> STEP_OUTPUT:
             x, y = batch
             z = self.encoder(x)
             x_hat = self.decoder(z)
-            loss = F.mse_loss(x_hat, y)
-            return loss
+            return F.mse_loss(x_hat, y)
 
         def configure_optimizers(self):
             return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
diff --git a/tests/tests_pytorch/utilities/test_model_summary.py b/tests/tests_pytorch/utilities/test_model_summary.py
index ee6e064077f86..cb419c43cd556 100644
--- a/tests/tests_pytorch/utilities/test_model_summary.py
+++ b/tests/tests_pytorch/utilities/test_model_summary.py
@@ -81,8 +81,7 @@ def forward(self, x, y):
         out1 = self.layer1(x)
         out2 = self.layer2(y)
         out = self.relu(torch.cat((out1, out2), 1))
-        out = self.combine(out)
-        return out
+        return self.combine(out)
 
 
 class MixedDtypeModel(LightningModule):
diff --git a/tests/tests_pytorch/utilities/test_parameter_tying.py b/tests/tests_pytorch/utilities/test_parameter_tying.py
index e45fb39f81b34..e172dcef2faf1 100644
--- a/tests/tests_pytorch/utilities/test_parameter_tying.py
+++ b/tests/tests_pytorch/utilities/test_parameter_tying.py
@@ -30,8 +30,7 @@ def __init__(self):
     def forward(self, x):
         x = self.layer_1(x)
         x = self.layer_2(x)
-        x = self.layer_3(x)
-        return x
+        return self.layer_3(x)
 
 
 @pytest.mark.parametrize(
@@ -67,8 +66,7 @@ def __init__(self):
         def forward(self, x):
             x = self.net_a(x)
             x = self.layer_2(x)
-            x = self.net_b(x)
-            return x
+            return self.net_b(x)
 
     model = NestedModule()
     set_shared_parameters(model, [["layer.weight", "net_a.layer.weight", "net_b.layer.weight"]])

From 306441615addff995bfe599ccf35853a8f3d1b77 Mon Sep 17 00:00:00 2001
From: Shion Matsumoto <smatsumoto97@gmail.com>
Date: Sun, 14 Sep 2025 10:29:15 -0400
Subject: [PATCH 2/3] ignore return type

---
 src/lightning/fabric/strategies/fsdp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py
index aa4df5889878d..f2adcd5435bd7 100644
--- a/src/lightning/fabric/strategies/fsdp.py
+++ b/src/lightning/fabric/strategies/fsdp.py
@@ -795,7 +795,7 @@ def _optimizer_has_flat_params(optimizer: Optimizer) -> bool:
     )
 
 
-def _get_sharded_state_dict_context(module: Module) -> Generator[None, None, None]:
+def _get_sharded_state_dict_context(module: Module) -> Generator:
     from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
     from torch.distributed.fsdp.api import ShardedOptimStateDictConfig, ShardedStateDictConfig, StateDictType
 
@@ -806,7 +806,7 @@ def _get_sharded_state_dict_context(module: Module) -> Generator[None, None, Non
         state_dict_type=StateDictType.SHARDED_STATE_DICT,
         state_dict_config=state_dict_config,
         optim_state_dict_config=optim_state_dict_config,
-    )
+    )  # type: ignore[return-value]
 
 
 def _get_full_state_dict_context(
@@ -823,7 +823,7 @@ def _get_full_state_dict_context(
         state_dict_type=StateDictType.FULL_STATE_DICT,
         state_dict_config=state_dict_config,
         optim_state_dict_config=optim_state_dict_config,
-    )
+    )  # type: ignore[return-value]
 
 
 def _is_sharded_checkpoint(path: Path) -> bool:

From 9636d66c48600ee8ccda183842c58258f2b31eca Mon Sep 17 00:00:00 2001
From: jirka <jirka.borovec@seznam.cz>
Date: Tue, 16 Sep 2025 19:28:46 +0200
Subject: [PATCH 3/3] Empty-Commit