diff --git a/.actions/assistant.py b/.actions/assistant.py
index 47a0543f228ad..7b2d49423d622 100644
--- a/.actions/assistant.py
+++ b/.actions/assistant.py
@@ -154,8 +154,8 @@ def load_readme_description(path_dir: str, homepage: str, version: str) -> str:
 
     """
     path_readme = os.path.join(path_dir, "README.md")
-    with open(path_readme, encoding="utf-8") as fo:
-        text = fo.read()
+    with open(path_readme, encoding="utf-8") as fopen:
+        text = fopen.read()
 
     # drop images from readme
     text = text.replace(
@@ -308,17 +308,17 @@ def copy_replace_imports(
         if ext in (".pyc",):
             continue
         # Try to parse everything else
-        with open(fp, encoding="utf-8") as fo:
+        with open(fp, encoding="utf-8") as fopen:
             try:
-                lines = fo.readlines()
+                lines = fopen.readlines()
             except UnicodeDecodeError:
                 # a binary file, skip
                 print(f"Skipped replacing imports for {fp}")
                 continue
         lines = _replace_imports(lines, list(zip(source_imports, target_imports)), lightning_by=lightning_by)
         os.makedirs(os.path.dirname(fp_new), exist_ok=True)
-        with open(fp_new, "w", encoding="utf-8") as fo:
-            fo.writelines(lines)
+        with open(fp_new, "w", encoding="utf-8") as fopen:
+            fopen.writelines(lines)
 
 
 def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> None:
@@ -370,10 +370,10 @@ def _prune_packages(req_file: str, packages: Sequence[str]) -> None:
 
     @staticmethod
     def _replace_min(fname: str) -> None:
-        with open(fname, encoding="utf-8") as fo:
-            req = fo.read().replace(">=", "==")
-        with open(fname, "w", encoding="utf-8") as fw:
-            fw.write(req)
+        with open(fname, encoding="utf-8") as fopen:
+            req = fopen.read().replace(">=", "==")
+        with open(fname, "w", encoding="utf-8") as fwrite:
+            fwrite.write(req)
 
     @staticmethod
     def replace_oldest_ver(requirement_fnames: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
@@ -471,15 +471,15 @@ def convert_version2nightly(ver_file: str = "src/version.info") -> None:
         """Load the actual version and convert it to the nightly version."""
         from datetime import datetime
 
-        with open(ver_file) as fo:
-            version = fo.read().strip()
+        with open(ver_file) as fopen:
+            version = fopen.read().strip()
         # parse X.Y.Z version and prune any suffix
         vers = re.match(r"(\d+)\.(\d+)\.(\d+).*", version)
         # create timestamp  YYYYMMDD
         timestamp = datetime.now().strftime("%Y%m%d")
         version = f"{'.'.join(vers.groups())}.dev{timestamp}"
-        with open(ver_file, "w") as fo:
-            fo.write(version + os.linesep)
+        with open(ver_file, "w") as fopen:
+            fopen.write(version + os.linesep)
 
     @staticmethod
     def generate_docker_tags(
diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile
index bf493ad47e51a..2fe1e57e95a77 100644
--- a/dockers/base-cuda/Dockerfile
+++ b/dockers/base-cuda/Dockerfile
@@ -34,11 +34,12 @@ ENV \
     MAKEFLAGS="-j2"
 
 RUN \
-    apt-get update --fix-missing && apt-get install -y wget && \
-    apt-get update -qq --fix-missing && \
-    NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
     CUDA_VERSION_MM=${CUDA_VERSION%.*} && \
+    apt-get update -qq --fix-missing && apt-get install -y wget && \
+    NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
+    echo "NCCL version found: $NCCL_VER" && \
     TO_INSTALL_NCCL=$(echo -e "$MAX_ALLOWED_NCCL\n$NCCL_VER" | sort -V  | head -n1)-1+cuda${CUDA_VERSION_MM} && \
+    echo "NCCL version to install: $TO_INSTALL_NCCL" && \
     apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
         build-essential \
         pkg-config \
@@ -96,7 +97,7 @@ RUN \
       --extra-index-url="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM//'.'/''}/"
 
 RUN \
-    # Show what we have
+    # Show what we have \
     pip --version && \
     pip list && \
     python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
diff --git a/examples/fabric/build_your_own_trainer/trainer.py b/examples/fabric/build_your_own_trainer/trainer.py
index d9d081a2aea69..ef7c3f4f53534 100644
--- a/examples/fabric/build_your_own_trainer/trainer.py
+++ b/examples/fabric/build_your_own_trainer/trainer.py
@@ -418,7 +418,7 @@ def load(self, state: Optional[Mapping], path: str) -> None:
         """Loads a checkpoint from a given file into state.
 
         Args:
-            state: a mapping contaning model, optimizer and lr scheduler
+            state: a mapping containing model, optimizer and lr scheduler
             path: the path to load the checkpoint from
 
         """
diff --git a/examples/fabric/meta_learning/train_fabric.py b/examples/fabric/meta_learning/train_fabric.py
index 203155f7b2ada..779ede1632d06 100644
--- a/examples/fabric/meta_learning/train_fabric.py
+++ b/examples/fabric/meta_learning/train_fabric.py
@@ -30,7 +30,7 @@ def accuracy(predictions, targets):
 def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways):
     data, labels = batch
 
-    # Separate data into adaptation/evalutation sets
+    # Separate data into adaptation/evaluation sets
     adaptation_indices = torch.zeros(data.size(0), dtype=bool)
     adaptation_indices[torch.arange(shots * ways) * 2] = True
     evaluation_indices = ~adaptation_indices
diff --git a/examples/fabric/meta_learning/train_torch.py b/examples/fabric/meta_learning/train_torch.py
index 1e3666755704b..99357ebd5e8b4 100644
--- a/examples/fabric/meta_learning/train_torch.py
+++ b/examples/fabric/meta_learning/train_torch.py
@@ -34,7 +34,7 @@ def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways, device):
     data, labels = batch
     data, labels = data.to(device), labels.to(device)
 
-    # Separate data into adaptation/evalutation sets
+    # Separate data into adaptation/evaluation sets
     adaptation_indices = torch.zeros(data.size(0), dtype=bool)
     adaptation_indices[torch.arange(shots * ways) * 2] = True
     evaluation_indices = ~adaptation_indices
diff --git a/examples/pytorch/domain_templates/reinforce_learn_ppo.py b/examples/pytorch/domain_templates/reinforce_learn_ppo.py
index af503dbb925cd..55581c1b68088 100644
--- a/examples/pytorch/domain_templates/reinforce_learn_ppo.py
+++ b/examples/pytorch/domain_templates/reinforce_learn_ppo.py
@@ -353,7 +353,7 @@ def generate_trajectory_samples(self) -> tuple[list[torch.Tensor], list[torch.Te
                 # logging
                 self.avg_reward = sum(self.epoch_rewards) / self.steps_per_epoch
 
-                # if epoch ended abruptly, exlude last cut-short episode to prevent stats skewness
+                # if epoch ended abruptly, exclude last cut-short episode to prevent stats skewness
                 epoch_rewards = self.epoch_rewards
                 if not done:
                     epoch_rewards = epoch_rewards[:-1]
diff --git a/pyproject.toml b/pyproject.toml
index 48439bee75332..b45f60489c6fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ blank = true
 
 [tool.codespell]
 # Todo: enable also python files in a next step
-skip = '*.py'
+#skip = '*.py'
 quiet-level = 3
 # comma separated list of words; waiting for:
 #  https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603
diff --git a/setup.py b/setup.py
index b432033a0d8c4..fffb38f9a578b 100755
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@
      - for `pytorch-lightning` use `export PACKAGE_NAME=pytorch ; pip install .`
      - for `lightning-fabric` use `export PACKAGE_NAME=fabric ; pip install .`
 
-3. Building packages as sdist or binary wheel and installing or publish to PyPI afterwords you use command
+3. Building packages as sdist or binary wheel and installing or publish to PyPI afterwards you use command
     `python setup.py sdist` or `python setup.py bdist_wheel` accordingly.
    In case you want to build just a particular package you want to set an environment variable:
    `PACKAGE_NAME=lightning|pytorch|fabric python setup.py sdist|bdist_wheel`
diff --git a/src/lightning/__version__.py b/src/lightning/__version__.py
index 1491508baf4b3..862b5f95d8845 100644
--- a/src/lightning/__version__.py
+++ b/src/lightning/__version__.py
@@ -5,5 +5,5 @@
 if not os.path.exists(_VERSION_PATH):
     # relevant for `bdist_wheel`
     _VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info")
-with open(_VERSION_PATH, encoding="utf-8") as fo:
-    version = fo.readlines()[0].strip()
+with open(_VERSION_PATH, encoding="utf-8") as fopen:
+    version = fopen.readlines()[0].strip()
diff --git a/src/lightning/fabric/connector.py b/src/lightning/fabric/connector.py
index ac3cc7c13851c..b3289debbd522 100644
--- a/src/lightning/fabric/connector.py
+++ b/src/lightning/fabric/connector.py
@@ -83,7 +83,7 @@ class _Connector:
             1. strategy class
             2. strategy str registered with STRATEGY_REGISTRY
             3. strategy str in _strategy_type enum which listed in each strategy as
-               backend (registed these too, and _strategy_type could be deprecated)
+               backend (registered these too, and _strategy_type could be deprecated)
 
         C. plugins flag could be:
             1. precision class (should be removed, and precision flag should allow user pass classes)
diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py
index 36ffc0c1c7772..92870fcd7afb2 100644
--- a/src/lightning/fabric/fabric.py
+++ b/src/lightning/fabric/fabric.py
@@ -327,7 +327,7 @@ def setup_optimizers(self, *optimizers: Optimizer) -> Union[_FabricOptimizer, tu
         ``.setup(model, optimizer, ...)`` instead to jointly set them up.
 
         Args:
-            *optimizers: One or more optmizers to set up.
+            *optimizers: One or more optimizers to set up.
 
         Returns:
             The wrapped optimizer(s).
diff --git a/src/lightning/fabric/strategies/parallel.py b/src/lightning/fabric/strategies/parallel.py
index d9bc1a03d1bb5..327cfc016d4ef 100644
--- a/src/lightning/fabric/strategies/parallel.py
+++ b/src/lightning/fabric/strategies/parallel.py
@@ -87,7 +87,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo
 
     @override
     def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool:
-        """Reduces a boolean decision over distributed processes. By default is analagous to ``all`` from the standard
+        """Reduces a boolean decision over distributed processes. By default is analogous to ``all`` from the standard
         library, returning ``True`` only if all input decisions evaluate to ``True``. If ``all`` is set to ``False``,
         it behaves like ``any`` instead.
 
diff --git a/src/lightning/pytorch/callbacks/progress/rich_progress.py b/src/lightning/pytorch/callbacks/progress/rich_progress.py
index 0a51d99ccb676..7bb98e8a9058c 100644
--- a/src/lightning/pytorch/callbacks/progress/rich_progress.py
+++ b/src/lightning/pytorch/callbacks/progress/rich_progress.py
@@ -430,7 +430,7 @@ def on_validation_batch_start(
             if self.val_progress_bar_id is not None:
                 self.progress.update(self.val_progress_bar_id, advance=0, visible=False)
 
-            # TODO: remove old tasks when new onces are created
+            # TODO: remove old tasks when new once they are created
             self.val_progress_bar_id = self._add_task(
                 self.total_val_batches_current_dataloader,
                 self.validation_description,
diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py
index c484a95c6c632..8108100be6dc4 100644
--- a/src/lightning/pytorch/core/module.py
+++ b/src/lightning/pytorch/core/module.py
@@ -262,7 +262,7 @@ def current_epoch(self) -> int:
     def global_step(self) -> int:
         """Total training batches seen across all epochs.
 
-        If no Trainer is attached, this propery is 0.
+        If no Trainer is attached, this property is 0.
 
         """
         return self.trainer.global_step if self._trainer else 0
diff --git a/src/lightning/pytorch/demos/transformer.py b/src/lightning/pytorch/demos/transformer.py
index eca86b4cb4dc7..fefa073fbd310 100644
--- a/src/lightning/pytorch/demos/transformer.py
+++ b/src/lightning/pytorch/demos/transformer.py
@@ -84,7 +84,7 @@ def __init__(self, dim: int, dropout: float = 0.1, max_len: int = 5000) -> None:
     def forward(self, x: Tensor) -> Tensor:
         if self.pe is None:
             # 1) can't use buffer, see https://github.com/pytorch/pytorch/issues/68407
-            # 2) can't use parameter becauses pe gets sliced and DDP requires all params to participate in forward
+            # 2) can't use parameter because pe gets sliced and DDP requires all params to participate in forward
             # TODO: Could make this a `nn.Parameter` with `requires_grad=False`
             self.pe = self._init_pos_encoding(device=x.device)
 
diff --git a/src/lightning/pytorch/strategies/parallel.py b/src/lightning/pytorch/strategies/parallel.py
index 285d40706a5a9..dbd8e2962b230 100644
--- a/src/lightning/pytorch/strategies/parallel.py
+++ b/src/lightning/pytorch/strategies/parallel.py
@@ -93,7 +93,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo
 
     @override
     def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool:
-        """Reduces a boolean decision over distributed processes. By default is analagous to ``all`` from the standard
+        """Reduces a boolean decision over distributed processes. By default is analogous to ``all`` from the standard
         library, returning ``True`` only if all input decisions evaluate to ``True``. If ``all`` is set to ``False``,
         it behaves like ``any`` instead.
 
diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
index 603aedfc94589..1423c1aeeafe4 100644
--- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
+++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
@@ -467,7 +467,7 @@ def _check_strategy_and_fallback(self) -> None:
         if strategy_flag in _DDP_FORK_ALIASES and "fork" not in torch.multiprocessing.get_all_start_methods():
             raise ValueError(
                 f"You selected `Trainer(strategy='{strategy_flag}')` but process forking is not supported on this"
-                f" platform. We recommed `Trainer(strategy='ddp_spawn')` instead."
+                f" platform. We recommend `Trainer(strategy='ddp_spawn')` instead."
             )
         if strategy_flag:
             self._strategy_flag = strategy_flag
diff --git a/src/lightning_fabric/__version__.py b/src/lightning_fabric/__version__.py
index 1491508baf4b3..862b5f95d8845 100644
--- a/src/lightning_fabric/__version__.py
+++ b/src/lightning_fabric/__version__.py
@@ -5,5 +5,5 @@
 if not os.path.exists(_VERSION_PATH):
     # relevant for `bdist_wheel`
     _VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info")
-with open(_VERSION_PATH, encoding="utf-8") as fo:
-    version = fo.readlines()[0].strip()
+with open(_VERSION_PATH, encoding="utf-8") as fopen:
+    version = fopen.readlines()[0].strip()
diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py
index 1491508baf4b3..862b5f95d8845 100644
--- a/src/pytorch_lightning/__version__.py
+++ b/src/pytorch_lightning/__version__.py
@@ -5,5 +5,5 @@
 if not os.path.exists(_VERSION_PATH):
     # relevant for `bdist_wheel`
     _VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info")
-with open(_VERSION_PATH, encoding="utf-8") as fo:
-    version = fo.readlines()[0].strip()
+with open(_VERSION_PATH, encoding="utf-8") as fopen:
+    version = fopen.readlines()[0].strip()
diff --git a/tests/parity_fabric/test_parity_ddp.py b/tests/parity_fabric/test_parity_ddp.py
index d30d2b6233886..4fc78d384de45 100644
--- a/tests/parity_fabric/test_parity_ddp.py
+++ b/tests/parity_fabric/test_parity_ddp.py
@@ -126,7 +126,7 @@ def train_fabric_ddp(fabric):
 def run_parity_test(accelerator: str = "cpu", devices: int = 2, tolerance: float = 0.02):
     cuda_reset()
 
-    # Launch processes with Fabric and re-use them for the PyTorch training for convenience
+    # Launch processes with Fabric and reuse them for the PyTorch training for convenience
     fabric = Fabric(accelerator=accelerator, strategy="ddp", devices=devices)
     fabric.launch()
 
diff --git a/tests/tests_fabric/plugins/environments/test_slurm.py b/tests/tests_fabric/plugins/environments/test_slurm.py
index 75ca43577d579..b907c287faa5f 100644
--- a/tests/tests_fabric/plugins/environments/test_slurm.py
+++ b/tests/tests_fabric/plugins/environments/test_slurm.py
@@ -174,7 +174,7 @@ def test_validate_user_settings():
     with pytest.raises(ValueError, match="the number of nodes configured in SLURM .* does not match"):
         env.validate_settings(num_devices=4, num_nodes=1)
 
-    # in interactive mode, validation is skipped becauses processes get launched by Fabric/Trainer, not SLURM
+    # in interactive mode, validation is skipped because processes get launched by Fabric/Trainer, not SLURM
     with mock.patch(
         "lightning.fabric.plugins.environments.slurm.SLURMEnvironment.job_name", return_value="interactive"
     ):
diff --git a/tests/tests_fabric/strategies/test_ddp_integration.py b/tests/tests_fabric/strategies/test_ddp_integration.py
index 3ed76211e5d6d..9d43724228cd2 100644
--- a/tests/tests_fabric/strategies/test_ddp_integration.py
+++ b/tests/tests_fabric/strategies/test_ddp_integration.py
@@ -85,7 +85,7 @@ def test_reapply_compile():
     fabric.launch()
 
     model = BoringModel()
-    # currently (PyTorch 2.6) using ruduce-overhead here casues a RuntimeError:
+    # currently (PyTorch 2.6) using reduce overhead here causes a RuntimeError:
     # Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run.
     compile_kwargs = {"mode": "reduce-overhead"} if _TORCH_LESS_EQUAL_2_6 else {}
     compiled_model = torch.compile(model, **compile_kwargs)
diff --git a/tests/tests_fabric/strategies/test_fsdp_integration.py b/tests/tests_fabric/strategies/test_fsdp_integration.py
index 576a0df38b966..5da9b50399a94 100644
--- a/tests/tests_fabric/strategies/test_fsdp_integration.py
+++ b/tests/tests_fabric/strategies/test_fsdp_integration.py
@@ -412,7 +412,7 @@ def test_reapply_compile():
     fabric.launch()
 
     model = BoringModel()
-    # currently (PyTorch 2.6) using ruduce-overhead here casues a RuntimeError:
+    # currently (PyTorch 2.6) using ruduce-overhead here causes a RuntimeError:
     # Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run.
     compile_kwargs = {"mode": "reduce-overhead"} if _TORCH_LESS_EQUAL_2_6 else {}
     compiled_model = torch.compile(model, **compile_kwargs)
diff --git a/tests/tests_fabric/test_connector.py b/tests/tests_fabric/test_connector.py
index c6bef5943a30f..1074789e71055 100644
--- a/tests/tests_fabric/test_connector.py
+++ b/tests/tests_fabric/test_connector.py
@@ -194,23 +194,23 @@ def name() -> str:
     class Prec(Precision):
         pass
 
-    class Strat(SingleDeviceStrategy):
+    class TestStrategy(SingleDeviceStrategy):
         pass
 
-    strategy = Strat(device=torch.device("cpu"), accelerator=Accel(), precision=Prec())
+    strategy = TestStrategy(device=torch.device("cpu"), accelerator=Accel(), precision=Prec())
     connector = _Connector(strategy=strategy, devices=2)
     assert isinstance(connector.accelerator, Accel)
-    assert isinstance(connector.strategy, Strat)
+    assert isinstance(connector.strategy, TestStrategy)
     assert isinstance(connector.precision, Prec)
     assert connector.strategy is strategy
 
-    class Strat(DDPStrategy):
+    class TestStrategy(DDPStrategy):
         pass
 
-    strategy = Strat(accelerator=Accel(), precision=Prec())
+    strategy = TestStrategy(accelerator=Accel(), precision=Prec())
     connector = _Connector(strategy=strategy, devices=2)
     assert isinstance(connector.accelerator, Accel)
-    assert isinstance(connector.strategy, Strat)
+    assert isinstance(connector.strategy, TestStrategy)
     assert isinstance(connector.precision, Prec)
     assert connector.strategy is strategy
 
diff --git a/tests/tests_pytorch/callbacks/test_throughput_monitor.py b/tests/tests_pytorch/callbacks/test_throughput_monitor.py
index 9f77e4371e69e..83bcb16c81797 100644
--- a/tests/tests_pytorch/callbacks/test_throughput_monitor.py
+++ b/tests/tests_pytorch/callbacks/test_throughput_monitor.py
@@ -303,7 +303,7 @@ def test_throughput_monitor_eval(tmp_path, fn):
     assert logger_mock.log_metrics.mock_calls == [
         call(metrics={**expected, f"{fn}|batches": 3, f"{fn}|samples": 9}, step=3),
         call(metrics={**expected, f"{fn}|batches": 6, f"{fn}|samples": 18}, step=6),
-        # the step doesnt repeat
+        # the step doesn't repeat
         call(metrics={**expected, f"{fn}|batches": 9, f"{fn}|samples": 27}, step=9),
         call(metrics={**expected, f"{fn}|batches": 12, f"{fn}|samples": 36}, step=12),
     ]
diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
index 1907a5fb35799..7b17498865889 100644
--- a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
+++ b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
@@ -326,8 +326,8 @@ def test_model_checkpoint_to_yaml(tmp_path, save_top_k: int):
 
     path_yaml = tmp_path / "best_k_models.yaml"
     checkpoint.to_yaml(path_yaml)
-    with open(path_yaml) as fo:
-        d = yaml.full_load(fo)
+    with open(path_yaml) as fopen:
+        d = yaml.full_load(fopen)
     best_k = dict(checkpoint.best_k_models.items())
     assert d == best_k
 
diff --git a/tests/tests_pytorch/core/test_lightning_optimizer.py b/tests/tests_pytorch/core/test_lightning_optimizer.py
index ed1ca2b4db03f..042532f968e7d 100644
--- a/tests/tests_pytorch/core/test_lightning_optimizer.py
+++ b/tests/tests_pytorch/core/test_lightning_optimizer.py
@@ -45,7 +45,7 @@ def configure_optimizers(self):
 
 
 def test_init_optimizers_resets_lightning_optimizers(tmp_path):
-    """Test that the Trainer resets the `lightning_optimizers` list everytime new optimizers get initialized."""
+    """Test that the Trainer resets the `lightning_optimizers` list every time new optimizers get initialized."""
 
     def compare_optimizers():
         assert trainer.strategy._lightning_optimizers[0].optimizer is trainer.optimizers[0]
diff --git a/tests/tests_pytorch/models/test_cpu.py b/tests/tests_pytorch/models/test_cpu.py
index a2d38aca7c56c..38e37effe9600 100644
--- a/tests/tests_pytorch/models/test_cpu.py
+++ b/tests/tests_pytorch/models/test_cpu.py
@@ -49,7 +49,7 @@ def test_cpu_slurm_save_load(_, tmp_path):
     trainer.fit(model)
     real_global_step = trainer.global_step
 
-    # traning complete
+    # training complete
     assert trainer.state.finished, "cpu model failed to complete"
 
     # predict with trained model before saving
diff --git a/tests/tests_pytorch/models/test_restore.py b/tests/tests_pytorch/models/test_restore.py
index 099493890831d..e651e4729a72f 100644
--- a/tests/tests_pytorch/models/test_restore.py
+++ b/tests/tests_pytorch/models/test_restore.py
@@ -547,7 +547,7 @@ def test_strict_model_load_more_params(monkeypatch, tmp_path, tmpdir_server, url
     )
     trainer.fit(model)
 
-    # traning complete
+    # training complete
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
     # save model
@@ -587,7 +587,7 @@ def test_strict_model_load_less_params(monkeypatch, tmp_path, tmpdir_server, url
     )
     trainer.fit(model)
 
-    # traning complete
+    # training complete
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
     # save model
diff --git a/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py b/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py
index cae26fc1fe775..0f62eeae69ef8 100644
--- a/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py
+++ b/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py
@@ -16,6 +16,7 @@
 from typing import Any, Optional
 from unittest.mock import MagicMock, Mock
 
+import pytest
 import torch
 
 from lightning.fabric.plugins import CheckpointIO, TorchCheckpointIO
@@ -97,6 +98,7 @@ def test_checkpoint_plugin_called(tmp_path):
     checkpoint_plugin.load_checkpoint.assert_called_with(str(tmp_path / "last-v1.ckpt"))
 
 
+@pytest.mark.flaky(reruns=3)
 def test_async_checkpoint_plugin(tmp_path):
     """Ensure that the custom checkpoint IO plugin and torch checkpoint IO plugin is called when async saving and
     loading."""
diff --git a/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py b/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py
index d26f6c4d2c3ef..d0b4ab617df66 100644
--- a/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py
+++ b/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py
@@ -230,7 +230,7 @@ def test_fit_twice_raises(mps_count_0):
         barebones=True,
     )
     trainer.fit(model)
-    trainer.test(model)  # make sure testing in between doesnt impact the result
+    trainer.test(model)  # make sure testing in between doesn't impact the result
     trainer.fit_loop.max_epochs += 1
     with pytest.raises(NotImplementedError, match=r"twice.*is not supported"):
         trainer.fit(model)
diff --git a/tests/tests_pytorch/strategies/test_fsdp.py b/tests/tests_pytorch/strategies/test_fsdp.py
index f3e88ca356764..560ab19f823ca 100644
--- a/tests/tests_pytorch/strategies/test_fsdp.py
+++ b/tests/tests_pytorch/strategies/test_fsdp.py
@@ -110,7 +110,7 @@ def __init__(self, wrap_min_params: int = 2):
         self.should_be_wrapped = [wrap_min_params < (32 * 32 + 32), None, wrap_min_params < (32 * 2 + 2)]
 
     def configure_optimizers(self):
-        # SGD's FSDP optimier state is fixed in https://github.com/pytorch/pytorch/pull/99214
+        # SGD's FSDP optimizer, state is fixed in https://github.com/pytorch/pytorch/pull/99214
         return torch.optim.AdamW(self.parameters(), lr=0.1)
 
 
@@ -808,7 +808,7 @@ def __init__(self, params_to_compare=None):
         self.params_to_compare = params_to_compare
 
     def configure_optimizers(self):
-        # SGD's FSDP optimier state is fixed in https://github.com/pytorch/pytorch/pull/99214
+        # SGD's FSDP optimizer, state is fixed in https://github.com/pytorch/pytorch/pull/99214
         return torch.optim.AdamW(self.parameters(), lr=0.1)
 
     def on_train_start(self):
diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py
index 9cf81aa5a739c..59ce4cfe4bb71 100644
--- a/tests/tests_pytorch/test_cli.py
+++ b/tests/tests_pytorch/test_cli.py
@@ -1251,7 +1251,7 @@ def test_lightning_cli_datamodule_short_arguments():
 
     with mock.patch("sys.argv", ["any.py"]):
         cli = LightningCLI(BoringModel, BoringDataModule, run=False)
-        # since we are passing the DataModule, that's whats added to the parser
+        # since we are passing the DataModule, that's what's added to the parser
         assert cli.parser.groups["data"].group_class is BoringDataModule
 
 
diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
index b8517a0303015..3877d6c051017 100644
--- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
@@ -207,23 +207,23 @@ def name() -> str:
     class Prec(Precision):
         pass
 
-    class Strat(SingleDeviceStrategy):
+    class TestStrategy(SingleDeviceStrategy):
         pass
 
-    strategy = Strat(device=torch.device("cpu"), accelerator=Accel(), precision_plugin=Prec())
+    strategy = TestStrategy(device=torch.device("cpu"), accelerator=Accel(), precision_plugin=Prec())
     trainer = Trainer(strategy=strategy, fast_dev_run=True, devices=2)
     assert isinstance(trainer.accelerator, Accel)
-    assert isinstance(trainer.strategy, Strat)
+    assert isinstance(trainer.strategy, TestStrategy)
     assert isinstance(trainer.precision_plugin, Prec)
     assert trainer._accelerator_connector.strategy is strategy
 
-    class Strat(DDPStrategy):
+    class TestStrategy(DDPStrategy):
         pass
 
-    strategy = Strat(accelerator=Accel(), precision_plugin=Prec())
+    strategy = TestStrategy(accelerator=Accel(), precision_plugin=Prec())
     trainer = Trainer(strategy=strategy, fast_dev_run=True, devices=2)
     assert isinstance(trainer.accelerator, Accel)
-    assert isinstance(trainer.strategy, Strat)
+    assert isinstance(trainer.strategy, TestStrategy)
     assert isinstance(trainer.precision_plugin, Prec)
     assert trainer._accelerator_connector.strategy is strategy