From 9f9c4ac2b3383d5281e8882b7c453b28f73889e9 Mon Sep 17 00:00:00 2001 From: Shion Matsumoto Date: Fri, 12 Sep 2025 16:11:35 -0400 Subject: [PATCH 1/3] autofix --- pyproject.toml | 3 --- src/lightning/fabric/fabric.py | 3 +-- src/lightning/fabric/loggers/tensorboard.py | 3 +-- .../fabric/plugins/precision/transformer_engine.py | 3 +-- src/lightning/fabric/strategies/fsdp.py | 7 ++----- src/lightning/fabric/strategies/parallel.py | 3 +-- src/lightning/fabric/strategies/xla.py | 3 +-- src/lightning/fabric/strategies/xla_fsdp.py | 3 +-- src/lightning/fabric/utilities/throughput.py | 1 + src/lightning/pytorch/callbacks/model_checkpoint.py | 4 +--- src/lightning/pytorch/core/datamodule.py | 3 +-- src/lightning/pytorch/core/module.py | 6 ++---- src/lightning/pytorch/core/optimizer.py | 1 + src/lightning/pytorch/demos/boring_classes.py | 12 ++++-------- src/lightning/pytorch/demos/transformer.py | 12 ++++-------- src/lightning/pytorch/loggers/comet.py | 1 + src/lightning/pytorch/loggers/tensorboard.py | 3 +-- src/lightning/pytorch/loggers/utilities.py | 3 +-- src/lightning/pytorch/profilers/pytorch.py | 3 +-- src/lightning/pytorch/strategies/fsdp.py | 4 ++-- src/lightning/pytorch/strategies/model_parallel.py | 1 + src/lightning/pytorch/strategies/parallel.py | 3 +-- src/lightning/pytorch/strategies/xla.py | 3 +-- src/lightning/pytorch/trainer/trainer.py | 6 ++---- src/lightning/pytorch/utilities/model_registry.py | 3 +-- .../pytorch/utilities/model_summary/model_summary.py | 3 +-- tests/parity_fabric/models.py | 3 +-- .../strategies/test_model_parallel_integration.py | 3 +-- tests/tests_pytorch/accelerators/test_xla.py | 6 ++---- tests/tests_pytorch/callbacks/test_lr_monitor.py | 6 ++---- tests/tests_pytorch/callbacks/test_spike.py | 3 +-- .../callbacks/test_stochastic_weight_avg.py | 3 +-- tests/tests_pytorch/helpers/advanced_models.py | 6 ++---- tests/tests_pytorch/helpers/simple_models.py | 3 +-- tests/tests_pytorch/models/test_hparams.py | 3 +-- tests/tests_pytorch/plugins/test_amp_plugins.py | 3 +-- .../strategies/test_model_parallel_integration.py | 3 +-- .../trainer/optimization/test_manual_optimization.py | 3 +-- tests/tests_pytorch/tuner/test_lr_finder.py | 3 +-- tests/tests_pytorch/utilities/test_model_summary.py | 3 +-- .../tests_pytorch/utilities/test_parameter_tying.py | 6 ++---- 41 files changed, 54 insertions(+), 103 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e6d08411b0f35..b79268331a6a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,8 +101,6 @@ ignore = [ "S603", # todo: `subprocess` call: check for execution of untrusted input "S605", # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell` "S607", # todo: Starting a process with a partial executable path - "RET504", # todo:Unnecessary variable assignment before `return` statement - "RET503", ] "tests/**" = [ "S101", # Use of `assert` detected @@ -118,7 +116,6 @@ ignore = [ "S603", # todo: `subprocess` call: check for execution of untrusted input "S605", # todo: Starting a process with a shell: seems safe, but may be changed in the future; consider rewriting without `shell` "S607", # todo: Starting a process with a partial executable path - "RET504", # todo:Unnecessary variable assignment before `return` statement "PT004", # todo: Fixture `tmpdir_unittest_fixture` does not return anything, add leading underscore "PT012", # todo: `pytest.raises()` block should contain a single simple statement "PT019", # todo: Fixture `_` without value is injected as parameter, use `@pytest.mark.usefixtures` instead diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index 288c355a4ebf2..d3d070a18fba1 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -476,8 +476,7 @@ def _setup_dataloader( dataloader = self._strategy.process_dataloader(dataloader) device = self.device if move_to_device and not isinstance(self._strategy, XLAStrategy) else None fabric_dataloader = _FabricDataLoader(dataloader=dataloader, device=device) - fabric_dataloader = cast(DataLoader, fabric_dataloader) - return fabric_dataloader + return cast(DataLoader, fabric_dataloader) def backward(self, tensor: Tensor, *args: Any, model: Optional[_FabricModule] = None, **kwargs: Any) -> None: r"""Replaces ``loss.backward()`` in your training loop. Handles precision automatically for you. diff --git a/src/lightning/fabric/loggers/tensorboard.py b/src/lightning/fabric/loggers/tensorboard.py index 208244dc38cd3..0c9a575bad39d 100644 --- a/src/lightning/fabric/loggers/tensorboard.py +++ b/src/lightning/fabric/loggers/tensorboard.py @@ -157,8 +157,7 @@ def log_dir(self) -> str: if isinstance(self.sub_dir, str): log_dir = os.path.join(log_dir, self.sub_dir) log_dir = os.path.expandvars(log_dir) - log_dir = os.path.expanduser(log_dir) - return log_dir + return os.path.expanduser(log_dir) @property def sub_dir(self) -> Optional[str]: diff --git a/src/lightning/fabric/plugins/precision/transformer_engine.py b/src/lightning/fabric/plugins/precision/transformer_engine.py index bf1e51ea6b2b0..a5be075dbf171 100644 --- a/src/lightning/fabric/plugins/precision/transformer_engine.py +++ b/src/lightning/fabric/plugins/precision/transformer_engine.py @@ -103,8 +103,7 @@ def convert_module(self, module: torch.nn.Module) -> torch.nn.Module: ) elif self.replace_layers in (None, True): _convert_layers(module) - module = module.to(dtype=self.weights_dtype) - return module + return module.to(dtype=self.weights_dtype) @override def tensor_init_context(self) -> AbstractContextManager: diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py index baaee74af0ec9..aa4df5889878d 100644 --- a/src/lightning/fabric/strategies/fsdp.py +++ b/src/lightning/fabric/strategies/fsdp.py @@ -801,13 +801,12 @@ def _get_sharded_state_dict_context(module: Module) -> Generator[None, None, Non state_dict_config = ShardedStateDictConfig(offload_to_cpu=True) optim_state_dict_config = ShardedOptimStateDictConfig(offload_to_cpu=True) - state_dict_type_context = FSDP.state_dict_type( + return FSDP.state_dict_type( module=module, state_dict_type=StateDictType.SHARDED_STATE_DICT, state_dict_config=state_dict_config, optim_state_dict_config=optim_state_dict_config, ) - return state_dict_type_context # type: ignore[return-value] def _get_full_state_dict_context( @@ -819,15 +818,13 @@ def _get_full_state_dict_context( state_dict_config = FullStateDictConfig(offload_to_cpu=True, rank0_only=rank0_only) optim_state_dict_config = FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=rank0_only) - state_dict_type_context = FSDP.state_dict_type( + return FSDP.state_dict_type( module=module, state_dict_type=StateDictType.FULL_STATE_DICT, state_dict_config=state_dict_config, optim_state_dict_config=optim_state_dict_config, ) - return state_dict_type_context # type: ignore[return-value] - def _is_sharded_checkpoint(path: Path) -> bool: """A heuristic check to determine whether the path points to a directory with checkpoint shards.""" diff --git a/src/lightning/fabric/strategies/parallel.py b/src/lightning/fabric/strategies/parallel.py index 327cfc016d4ef..d1b900f817b34 100644 --- a/src/lightning/fabric/strategies/parallel.py +++ b/src/lightning/fabric/strategies/parallel.py @@ -104,8 +104,7 @@ def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool: decision, reduce_op=ReduceOp.SUM, # type: ignore[arg-type] ) - decision = bool(decision == self.world_size) if all else bool(decision) - return decision + return bool(decision == self.world_size) if all else bool(decision) @override def teardown(self) -> None: diff --git a/src/lightning/fabric/strategies/xla.py b/src/lightning/fabric/strategies/xla.py index 3a571fef37f00..b028c86b8100f 100644 --- a/src/lightning/fabric/strategies/xla.py +++ b/src/lightning/fabric/strategies/xla.py @@ -200,8 +200,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo import torch_xla.core.xla_model as xm tensor = xf.all_gather(tensor) if sync_grads else xm.all_gather(tensor) - tensor = tensor.to(original_device) - return tensor + return tensor.to(original_device) @override def all_reduce( diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 3fa9e40f4b4bd..f5ea1c9a99633 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -334,8 +334,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo import torch_xla.core.xla_model as xm tensor = xf.all_gather(tensor) if sync_grads else xm.all_gather(tensor) - tensor = tensor.to(original_device) - return tensor + return tensor.to(original_device) @override def all_reduce( diff --git a/src/lightning/fabric/utilities/throughput.py b/src/lightning/fabric/utilities/throughput.py index 6bc329fa1c3be..dc091154bc240 100644 --- a/src/lightning/fabric/utilities/throughput.py +++ b/src/lightning/fabric/utilities/throughput.py @@ -632,6 +632,7 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) -> rank_zero_warn(f"FLOPs not found for TPU {device_name!r} with {dtype}") return None return int(_TPU_FLOPS[chip]) + return None def _plugin_to_compute_dtype(plugin: "Precision") -> torch.dtype: diff --git a/src/lightning/pytorch/callbacks/model_checkpoint.py b/src/lightning/pytorch/callbacks/model_checkpoint.py index 415e1dcac309b..2c7aee706ab7b 100644 --- a/src/lightning/pytorch/callbacks/model_checkpoint.py +++ b/src/lightning/pytorch/callbacks/model_checkpoint.py @@ -623,9 +623,7 @@ def check_monitor_top_k(self, trainer: "pl.Trainer", current: Optional[Tensor] = should_update_best_and_save = monitor_op(current, self.best_k_models[self.kth_best_model_path]) # If using multiple devices, make sure all processes are unanimous on the decision. - should_update_best_and_save = trainer.strategy.reduce_boolean_decision(bool(should_update_best_and_save)) - - return should_update_best_and_save + return trainer.strategy.reduce_boolean_decision(bool(should_update_best_and_save)) def _format_checkpoint_name( self, diff --git a/src/lightning/pytorch/core/datamodule.py b/src/lightning/pytorch/core/datamodule.py index ff84c2fd8b199..c662f4751ebbe 100644 --- a/src/lightning/pytorch/core/datamodule.py +++ b/src/lightning/pytorch/core/datamodule.py @@ -315,5 +315,4 @@ def format_loader_info(info: dict[str, Union[dataset_info, Iterable[dataset_info # Retrieve information for each dataloader method dataloader_info = extract_loader_info(datamodule_loader_methods) # Format the information - dataloader_str = format_loader_info(dataloader_info) - return dataloader_str + return format_loader_info(dataloader_info) diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index 85f631ee40f75..8a48410ea5865 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -360,8 +360,7 @@ def _apply_batch_transfer_handler( ) -> Any: device = device or self.device batch = self._call_batch_hook("transfer_batch_to_device", batch, device, dataloader_idx) - batch = self._call_batch_hook("on_after_batch_transfer", batch, dataloader_idx) - return batch + return self._call_batch_hook("on_after_batch_transfer", batch, dataloader_idx) def print(self, *args: Any, **kwargs: Any) -> None: r"""Prints only from process 0. Use this in any distributed mode to log only once. @@ -666,8 +665,7 @@ def __to_tensor(self, value: Union[Tensor, numbers.Number], name: str) -> Tensor f"`self.log({name}, {value})` was called, but the tensor must have a single element." f" You can try doing `self.log({name}, {value}.mean())`" ) - value = value.squeeze() - return value + return value.squeeze() def all_gather( self, data: Union[Tensor, dict, list, tuple], group: Optional[Any] = None, sync_grads: bool = False diff --git a/src/lightning/pytorch/core/optimizer.py b/src/lightning/pytorch/core/optimizer.py index b85e9b2c10e5a..2b354da15fd2d 100644 --- a/src/lightning/pytorch/core/optimizer.py +++ b/src/lightning/pytorch/core/optimizer.py @@ -409,6 +409,7 @@ def step(self, closure: Callable[[], float]) -> float: ... def step(self, closure: Optional[Callable[[], float]] = None) -> Optional[float]: if closure is not None: return closure() + return None @override def zero_grad(self, set_to_none: Optional[bool] = True) -> None: diff --git a/src/lightning/pytorch/demos/boring_classes.py b/src/lightning/pytorch/demos/boring_classes.py index 3855f31898b81..92fb561d5ee46 100644 --- a/src/lightning/pytorch/demos/boring_classes.py +++ b/src/lightning/pytorch/demos/boring_classes.py @@ -253,20 +253,16 @@ def setup(self, stage: str) -> None: ] def train_dataloader(self) -> Iterable[DataLoader]: - combined_train = apply_to_collection(self.train_datasets, Dataset, lambda x: DataLoader(x)) - return combined_train + return apply_to_collection(self.train_datasets, Dataset, lambda x: DataLoader(x)) def val_dataloader(self) -> DataLoader: - combined_val = apply_to_collection(self.val_datasets, Dataset, lambda x: DataLoader(x)) - return combined_val + return apply_to_collection(self.val_datasets, Dataset, lambda x: DataLoader(x)) def test_dataloader(self) -> DataLoader: - combined_test = apply_to_collection(self.test_datasets, Dataset, lambda x: DataLoader(x)) - return combined_test + return apply_to_collection(self.test_datasets, Dataset, lambda x: DataLoader(x)) def predict_dataloader(self) -> DataLoader: - combined_predict = apply_to_collection(self.predict_datasets, Dataset, lambda x: DataLoader(x)) - return combined_predict + return apply_to_collection(self.predict_datasets, Dataset, lambda x: DataLoader(x)) class ManualOptimBoringModel(BoringModel): diff --git a/src/lightning/pytorch/demos/transformer.py b/src/lightning/pytorch/demos/transformer.py index 13b5e05adc680..3064851f3e978 100644 --- a/src/lightning/pytorch/demos/transformer.py +++ b/src/lightning/pytorch/demos/transformer.py @@ -59,8 +59,7 @@ def __init__( def generate_square_subsequent_mask(self, size: int) -> Tensor: """Generate a square mask for the sequence to prevent future tokens from being seen.""" mask = torch.triu(torch.ones(size, size), diagonal=1) - mask = mask.float().masked_fill(mask == 1, float("-inf")).masked_fill(mask == 0, 0.0) - return mask + return mask.float().masked_fill(mask == 1, float("-inf")).masked_fill(mask == 0, 0.0) def forward(self, inputs: Tensor, target: Tensor, mask: Optional[Tensor] = None) -> Tensor: _, t = inputs.shape @@ -78,8 +77,7 @@ def forward(self, inputs: Tensor, target: Tensor, mask: Optional[Tensor] = None) output = self.transformer(src, target, tgt_mask=mask) output = self.decoder(output) output = F.log_softmax(output, dim=-1) - output = output.view(-1, self.vocab_size) - return output + return output.view(-1, self.vocab_size) class PositionalEncoding(nn.Module): @@ -106,8 +104,7 @@ def _init_pos_encoding(self, device: torch.device) -> Tensor: div_term = torch.exp(torch.arange(0, self.dim, 2, device=device).float() * (-math.log(10000.0) / self.dim)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) - pe = pe.unsqueeze(0) - return pe + return pe.unsqueeze(0) class WikiText2(Dataset): @@ -200,8 +197,7 @@ def forward(self, inputs: Tensor, target: Tensor) -> Tensor: def training_step(self, batch: tuple[Tensor, Tensor], batch_idx: int) -> Tensor: inputs, target = batch output = self(inputs, target) - loss = torch.nn.functional.nll_loss(output, target.view(-1)) - return loss + return torch.nn.functional.nll_loss(output, target.view(-1)) def configure_optimizers(self) -> torch.optim.Optimizer: return torch.optim.SGD(self.model.parameters(), lr=0.1) diff --git a/src/lightning/pytorch/loggers/comet.py b/src/lightning/pytorch/loggers/comet.py index b544212e755e2..4399b078494aa 100644 --- a/src/lightning/pytorch/loggers/comet.py +++ b/src/lightning/pytorch/loggers/comet.py @@ -397,6 +397,7 @@ def version(self) -> Optional[str]: # Don't create an experiment if we don't have one if self._experiment is not None: return self._experiment.get_key() + return None def __getstate__(self) -> dict[str, Any]: state = self.__dict__.copy() diff --git a/src/lightning/pytorch/loggers/tensorboard.py b/src/lightning/pytorch/loggers/tensorboard.py index f9cc41c67045c..0d1f495316803 100644 --- a/src/lightning/pytorch/loggers/tensorboard.py +++ b/src/lightning/pytorch/loggers/tensorboard.py @@ -136,8 +136,7 @@ def log_dir(self) -> str: if isinstance(self.sub_dir, str): log_dir = os.path.join(log_dir, self.sub_dir) log_dir = os.path.expandvars(log_dir) - log_dir = os.path.expanduser(log_dir) - return log_dir + return os.path.expanduser(log_dir) @property @override diff --git a/src/lightning/pytorch/loggers/utilities.py b/src/lightning/pytorch/loggers/utilities.py index ced8a6f1f2bd3..30d9f06a89455 100644 --- a/src/lightning/pytorch/loggers/utilities.py +++ b/src/lightning/pytorch/loggers/utilities.py @@ -52,8 +52,7 @@ def _scan_checkpoints(checkpoint_callback: Checkpoint, logged_model_time: dict) checkpoints = sorted( (Path(p).stat().st_mtime, p, s, tag) for p, (s, tag) in checkpoints.items() if Path(p).is_file() ) - checkpoints = [c for c in checkpoints if c[1] not in logged_model_time or logged_model_time[c[1]] < c[0]] - return checkpoints + return [c for c in checkpoints if c[1] not in logged_model_time or logged_model_time[c[1]] < c[0]] def _log_hyperparams(trainer: "pl.Trainer") -> None: diff --git a/src/lightning/pytorch/profilers/pytorch.py b/src/lightning/pytorch/profilers/pytorch.py index e264d5154feba..f0353dbda4140 100644 --- a/src/lightning/pytorch/profilers/pytorch.py +++ b/src/lightning/pytorch/profilers/pytorch.py @@ -375,12 +375,11 @@ def _total_steps(self) -> Union[int, float]: ) return num_val_batches + num_sanity_val_batches if self._schedule.is_testing: - num_test_batches = ( + return ( sum(trainer.num_test_batches) if isinstance(trainer.num_test_batches, list) else trainer.num_test_batches ) - return num_test_batches if self._schedule.is_predicting: return sum(trainer.num_predict_batches) raise NotImplementedError("Unsupported schedule") diff --git a/src/lightning/pytorch/strategies/fsdp.py b/src/lightning/pytorch/strategies/fsdp.py index 3fbd0f9cd5f0a..350b48919b367 100644 --- a/src/lightning/pytorch/strategies/fsdp.py +++ b/src/lightning/pytorch/strategies/fsdp.py @@ -581,6 +581,7 @@ def save_checkpoint( return super().save_checkpoint(checkpoint=checkpoint, filepath=path) else: raise ValueError(f"Unknown state_dict_type: {self._state_dict_type}") + return None @override def load_checkpoint(self, checkpoint_path: _PATH) -> dict[str, Any]: @@ -624,8 +625,7 @@ def load_checkpoint(self, checkpoint_path: _PATH) -> dict[str, Any]: optim.load_state_dict(flattened_osd) # Load metadata (anything not a module or optimizer) - metadata = torch.load(path / _METADATA_FILENAME) - return metadata + return torch.load(path / _METADATA_FILENAME) if _is_full_checkpoint(path): checkpoint = _lazy_load(path) diff --git a/src/lightning/pytorch/strategies/model_parallel.py b/src/lightning/pytorch/strategies/model_parallel.py index e0286dbe2e0e6..f8ba46cd2b390 100644 --- a/src/lightning/pytorch/strategies/model_parallel.py +++ b/src/lightning/pytorch/strategies/model_parallel.py @@ -327,6 +327,7 @@ def save_checkpoint( if _is_sharded_checkpoint(path): shutil.rmtree(path) return super().save_checkpoint(checkpoint=checkpoint, filepath=path) + return None @override def load_checkpoint(self, checkpoint_path: _PATH) -> dict[str, Any]: diff --git a/src/lightning/pytorch/strategies/parallel.py b/src/lightning/pytorch/strategies/parallel.py index dbd8e2962b230..74352b11db888 100644 --- a/src/lightning/pytorch/strategies/parallel.py +++ b/src/lightning/pytorch/strategies/parallel.py @@ -110,8 +110,7 @@ def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool: decision, reduce_op=ReduceOp.SUM, # type: ignore[arg-type] ) - decision = bool(decision == self.world_size) if all else bool(decision) - return decision + return bool(decision == self.world_size) if all else bool(decision) @contextmanager def block_backward_sync(self) -> Generator: diff --git a/src/lightning/pytorch/strategies/xla.py b/src/lightning/pytorch/strategies/xla.py index cbdc890a1ca32..2918296feda6b 100644 --- a/src/lightning/pytorch/strategies/xla.py +++ b/src/lightning/pytorch/strategies/xla.py @@ -345,8 +345,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo import torch_xla.core.xla_model as xm tensor = xf.all_gather(tensor) if sync_grads else xm.all_gather(tensor) - tensor = tensor.to(original_device) - return tensor + return tensor.to(original_device) @override def teardown(self) -> None: diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py index 5768c507e2e3f..ef2b3206a47df 100644 --- a/src/lightning/pytorch/trainer/trainer.py +++ b/src/lightning/pytorch/trainer/trainer.py @@ -1277,8 +1277,7 @@ def training_step(self, batch, batch_idx): else: dirpath = self.default_root_dir - dirpath = self.strategy.broadcast(dirpath) - return dirpath + return self.strategy.broadcast(dirpath) @property def is_global_zero(self) -> bool: @@ -1731,5 +1730,4 @@ def configure_optimizers(self): assert self.max_epochs is not None max_estimated_steps = math.ceil(total_batches / self.accumulate_grad_batches) * max(self.max_epochs, 1) - max_estimated_steps = min(max_estimated_steps, self.max_steps) if self.max_steps != -1 else max_estimated_steps - return max_estimated_steps + return min(max_estimated_steps, self.max_steps) if self.max_steps != -1 else max_estimated_steps diff --git a/src/lightning/pytorch/utilities/model_registry.py b/src/lightning/pytorch/utilities/model_registry.py index 104da2514f5c2..eac7329c35c19 100644 --- a/src/lightning/pytorch/utilities/model_registry.py +++ b/src/lightning/pytorch/utilities/model_registry.py @@ -137,8 +137,7 @@ def _determine_model_folder(model_name: str, default_root_dir: str) -> str: # download the latest checkpoint from the model registry model_name = model_name.replace("/", "_") model_name = model_name.replace(":", "_") - local_model_dir = os.path.join(default_root_dir, model_name) - return local_model_dir + return os.path.join(default_root_dir, model_name) def find_model_local_ckpt_path( diff --git a/src/lightning/pytorch/utilities/model_summary/model_summary.py b/src/lightning/pytorch/utilities/model_summary/model_summary.py index 01b692abdc05f..3efe9a05872cf 100644 --- a/src/lightning/pytorch/utilities/model_summary/model_summary.py +++ b/src/lightning/pytorch/utilities/model_summary/model_summary.py @@ -313,14 +313,13 @@ def total_flops(self) -> int: @property def flop_counts(self) -> dict[str, dict[Any, int]]: flop_counts = self._flop_counter.get_flop_counts() - ret = { + return { name: flop_counts.get( f"{type(self._model).__name__}.{name}", {}, ) for name in self.layer_names } - return ret def summarize(self) -> dict[str, LayerSummary]: summary = OrderedDict((name, LayerSummary(module)) for name, module in self.named_modules) diff --git a/tests/parity_fabric/models.py b/tests/parity_fabric/models.py index 4887a4c7f7dba..f65a20460e2f7 100644 --- a/tests/parity_fabric/models.py +++ b/tests/parity_fabric/models.py @@ -60,8 +60,7 @@ def forward(self, x): x = torch.flatten(x, 1) # flatten all dimensions except batch x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x + return self.fc3(x) def get_optimizer(self): return torch.optim.SGD(self.parameters(), lr=0.0001) diff --git a/tests/tests_fabric/strategies/test_model_parallel_integration.py b/tests/tests_fabric/strategies/test_model_parallel_integration.py index 4c11fb0edcd78..18c5ad07252da 100644 --- a/tests/tests_fabric/strategies/test_model_parallel_integration.py +++ b/tests/tests_fabric/strategies/test_model_parallel_integration.py @@ -83,8 +83,7 @@ def _parallelize_feed_forward_fsdp2(model, device_mesh): def _parallelize_feed_forward_fsdp2_tp(model, device_mesh): model = _parallelize_feed_forward_tp(model, device_mesh) - model = _parallelize_feed_forward_fsdp2(model, device_mesh) - return model + return _parallelize_feed_forward_fsdp2(model, device_mesh) @RunIf(min_torch="2.4", standalone=True, min_cuda_gpus=4) diff --git a/tests/tests_pytorch/accelerators/test_xla.py b/tests/tests_pytorch/accelerators/test_xla.py index 83dace719371d..5e56d5c585c88 100644 --- a/tests/tests_pytorch/accelerators/test_xla.py +++ b/tests/tests_pytorch/accelerators/test_xla.py @@ -46,8 +46,7 @@ def __init__(self): def forward(self, x): x = self.layer_1(x) x = self.layer_2(x) - x = self.layer_3(x) - return x + return self.layer_3(x) @RunIf(tpu=True, standalone=True) @@ -230,8 +229,7 @@ def __init__(self): def forward(self, x): x = self.net_a(x) x = self.layer_2(x) - x = self.net_b(x) - return x + return self.net_b(x) @RunIf(tpu=True) diff --git a/tests/tests_pytorch/callbacks/test_lr_monitor.py b/tests/tests_pytorch/callbacks/test_lr_monitor.py index 66ce47f0e7ad4..391841e4e949c 100644 --- a/tests/tests_pytorch/callbacks/test_lr_monitor.py +++ b/tests/tests_pytorch/callbacks/test_lr_monitor.py @@ -428,8 +428,7 @@ def __init__(self): def forward(self, x): x = self.linear_a(x) - x = self.linear_b(x) - return x + return self.linear_b(x) def configure_optimizers(self): param_groups = [ @@ -603,8 +602,7 @@ def __init__(self, lr, momentum): def forward(self, x): x = self.linear_a(x) - x = self.linear_b(x) - return x + return self.linear_b(x) def configure_optimizers(self): param_groups = [ diff --git a/tests/tests_pytorch/callbacks/test_spike.py b/tests/tests_pytorch/callbacks/test_spike.py index 86e3ac88e93cf..20679c52394c0 100644 --- a/tests/tests_pytorch/callbacks/test_spike.py +++ b/tests/tests_pytorch/callbacks/test_spike.py @@ -29,8 +29,7 @@ def training_step(self, batch, batch_idx: int): if curr_loss_val is None: curr_loss_val = batch_idx - loss = self.layer(torch.tensor(curr_loss_val, device=self.device, dtype=self.dtype).view(1, 1)) - return loss + return self.layer(torch.tensor(curr_loss_val, device=self.device, dtype=self.dtype).view(1, 1)) def configure_optimizers(self): return torch.optim.SGD(self.parameters(), lr=1e-3) diff --git a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py index abcd302149fcf..df2df72b18c1f 100644 --- a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py +++ b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py @@ -266,8 +266,7 @@ def __init__(self): def forward(self, x): x = self.layer1(x) - x = self.layer2(x) - return x + return self.layer2(x) def configure_optimizers(self): params = [{"params": self.layer1.parameters(), "lr": 0.1}, {"params": self.layer2.parameters(), "lr": 0.2}] diff --git a/tests/tests_pytorch/helpers/advanced_models.py b/tests/tests_pytorch/helpers/advanced_models.py index 959e6e5968d18..3426c3e51f41d 100644 --- a/tests/tests_pytorch/helpers/advanced_models.py +++ b/tests/tests_pytorch/helpers/advanced_models.py @@ -46,8 +46,7 @@ def block(in_feat, out_feat, normalize=True): def forward(self, z): img = self.model(z) - img = img.view(img.size(0), *self.img_shape) - return img + return img.view(img.size(0), *self.img_shape) class Discriminator(nn.Module): @@ -204,8 +203,7 @@ def forward(self, x): x = torch.tanh(x) x = self.c_d1_bn(x) x = self.c_d1_drop(x) - x = self.c_d2(x) - return x + return self.c_d2(x) def training_step(self, batch, batch_nb): x, y = batch diff --git a/tests/tests_pytorch/helpers/simple_models.py b/tests/tests_pytorch/helpers/simple_models.py index a9dc635bba275..49f931ed14cba 100644 --- a/tests/tests_pytorch/helpers/simple_models.py +++ b/tests/tests_pytorch/helpers/simple_models.py @@ -100,8 +100,7 @@ def forward(self, x): x = self.layer_1a(x) x = self.layer_2(x) x = self.layer_2a(x) - x = self.layer_end(x) - return x + return self.layer_end(x) def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=0.01) diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py index 92a07f0a3d05e..d354c8c1e16ef 100644 --- a/tests/tests_pytorch/models/test_hparams.py +++ b/tests/tests_pytorch/models/test_hparams.py @@ -417,8 +417,7 @@ def _raw_checkpoint_path(trainer) -> str: raw_checkpoint_paths = [x for x in raw_checkpoint_paths if ".ckpt" in x] assert raw_checkpoint_paths raw_checkpoint_path = raw_checkpoint_paths[0] - raw_checkpoint_path = os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) - return raw_checkpoint_path + return os.path.join(trainer.checkpoint_callback.dirpath, raw_checkpoint_path) @pytest.mark.parametrize("base_class", [HyperparametersMixin, LightningModule, LightningDataModule]) diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 0b68c098cc713..b345e9e1bee8f 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -165,8 +165,7 @@ def __init__(self): def forward(self, x: Tensor): x = self.layer1(x) - x = self.layer2(x) - return x + return self.layer2(x) def training_step(self, batch, batch_idx): _, opt2 = self.optimizers() diff --git a/tests/tests_pytorch/strategies/test_model_parallel_integration.py b/tests/tests_pytorch/strategies/test_model_parallel_integration.py index 00600183f4293..4b3dbe9df9724 100644 --- a/tests/tests_pytorch/strategies/test_model_parallel_integration.py +++ b/tests/tests_pytorch/strategies/test_model_parallel_integration.py @@ -74,8 +74,7 @@ def _parallelize_feed_forward_fsdp2(model, device_mesh): def _parallelize_feed_forward_fsdp2_tp(model, device_mesh): model = _parallelize_feed_forward_tp(model, device_mesh) - model = _parallelize_feed_forward_fsdp2(model, device_mesh) - return model + return _parallelize_feed_forward_fsdp2(model, device_mesh) def _parallelize_with_compile(parallelize): diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py index dd8042ecf2058..4a32418360aed 100644 --- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py +++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py @@ -324,8 +324,7 @@ def __repr__(self): def __copy__(self): cls = self.__class__ - new_obj = cls(self._store.copy()) - return new_obj + return cls(self._store.copy()) def copy(self): return self.__copy__() diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py index 81352ebe256ef..ee8bedace5872 100644 --- a/tests/tests_pytorch/tuner/test_lr_finder.py +++ b/tests/tests_pytorch/tuner/test_lr_finder.py @@ -652,8 +652,7 @@ def training_step(self, batch: Any, batch_idx: int) -> STEP_OUTPUT: x, y = batch z = self.encoder(x) x_hat = self.decoder(z) - loss = F.mse_loss(x_hat, y) - return loss + return F.mse_loss(x_hat, y) def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=self.hparams.lr) diff --git a/tests/tests_pytorch/utilities/test_model_summary.py b/tests/tests_pytorch/utilities/test_model_summary.py index ee6e064077f86..cb419c43cd556 100644 --- a/tests/tests_pytorch/utilities/test_model_summary.py +++ b/tests/tests_pytorch/utilities/test_model_summary.py @@ -81,8 +81,7 @@ def forward(self, x, y): out1 = self.layer1(x) out2 = self.layer2(y) out = self.relu(torch.cat((out1, out2), 1)) - out = self.combine(out) - return out + return self.combine(out) class MixedDtypeModel(LightningModule): diff --git a/tests/tests_pytorch/utilities/test_parameter_tying.py b/tests/tests_pytorch/utilities/test_parameter_tying.py index e45fb39f81b34..e172dcef2faf1 100644 --- a/tests/tests_pytorch/utilities/test_parameter_tying.py +++ b/tests/tests_pytorch/utilities/test_parameter_tying.py @@ -30,8 +30,7 @@ def __init__(self): def forward(self, x): x = self.layer_1(x) x = self.layer_2(x) - x = self.layer_3(x) - return x + return self.layer_3(x) @pytest.mark.parametrize( @@ -67,8 +66,7 @@ def __init__(self): def forward(self, x): x = self.net_a(x) x = self.layer_2(x) - x = self.net_b(x) - return x + return self.net_b(x) model = NestedModule() set_shared_parameters(model, [["layer.weight", "net_a.layer.weight", "net_b.layer.weight"]]) From 306441615addff995bfe599ccf35853a8f3d1b77 Mon Sep 17 00:00:00 2001 From: Shion Matsumoto Date: Sun, 14 Sep 2025 10:29:15 -0400 Subject: [PATCH 2/3] ignore return type --- src/lightning/fabric/strategies/fsdp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py index aa4df5889878d..f2adcd5435bd7 100644 --- a/src/lightning/fabric/strategies/fsdp.py +++ b/src/lightning/fabric/strategies/fsdp.py @@ -795,7 +795,7 @@ def _optimizer_has_flat_params(optimizer: Optimizer) -> bool: ) -def _get_sharded_state_dict_context(module: Module) -> Generator[None, None, None]: +def _get_sharded_state_dict_context(module: Module) -> Generator: from torch.distributed.fsdp import FullyShardedDataParallel as FSDP from torch.distributed.fsdp.api import ShardedOptimStateDictConfig, ShardedStateDictConfig, StateDictType @@ -806,7 +806,7 @@ def _get_sharded_state_dict_context(module: Module) -> Generator[None, None, Non state_dict_type=StateDictType.SHARDED_STATE_DICT, state_dict_config=state_dict_config, optim_state_dict_config=optim_state_dict_config, - ) + ) # type: ignore[return-value] def _get_full_state_dict_context( @@ -823,7 +823,7 @@ def _get_full_state_dict_context( state_dict_type=StateDictType.FULL_STATE_DICT, state_dict_config=state_dict_config, optim_state_dict_config=optim_state_dict_config, - ) + ) # type: ignore[return-value] def _is_sharded_checkpoint(path: Path) -> bool: From 9636d66c48600ee8ccda183842c58258f2b31eca Mon Sep 17 00:00:00 2001 From: jirka Date: Tue, 16 Sep 2025 19:28:46 +0200 Subject: [PATCH 3/3] Empty-Commit