Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions .actions/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def load_readme_description(path_dir: str, homepage: str, version: str) -> str:

"""
path_readme = os.path.join(path_dir, "README.md")
with open(path_readme, encoding="utf-8") as fo:
text = fo.read()
with open(path_readme, encoding="utf-8") as fopen:
text = fopen.read()

# drop images from readme
text = text.replace(
Expand Down Expand Up @@ -308,17 +308,17 @@ def copy_replace_imports(
if ext in (".pyc",):
continue
# Try to parse everything else
with open(fp, encoding="utf-8") as fo:
with open(fp, encoding="utf-8") as fopen:
try:
lines = fo.readlines()
lines = fopen.readlines()
except UnicodeDecodeError:
# a binary file, skip
print(f"Skipped replacing imports for {fp}")
continue
lines = _replace_imports(lines, list(zip(source_imports, target_imports)), lightning_by=lightning_by)
os.makedirs(os.path.dirname(fp_new), exist_ok=True)
with open(fp_new, "w", encoding="utf-8") as fo:
fo.writelines(lines)
with open(fp_new, "w", encoding="utf-8") as fopen:
fopen.writelines(lines)


def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> None:
Expand Down Expand Up @@ -370,10 +370,10 @@ def _prune_packages(req_file: str, packages: Sequence[str]) -> None:

@staticmethod
def _replace_min(fname: str) -> None:
with open(fname, encoding="utf-8") as fo:
req = fo.read().replace(">=", "==")
with open(fname, "w", encoding="utf-8") as fw:
fw.write(req)
with open(fname, encoding="utf-8") as fopen:
req = fopen.read().replace(">=", "==")
with open(fname, "w", encoding="utf-8") as fwrite:
fwrite.write(req)

@staticmethod
def replace_oldest_ver(requirement_fnames: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
Expand Down Expand Up @@ -471,15 +471,15 @@ def convert_version2nightly(ver_file: str = "src/version.info") -> None:
"""Load the actual version and convert it to the nightly version."""
from datetime import datetime

with open(ver_file) as fo:
version = fo.read().strip()
with open(ver_file) as fopen:
version = fopen.read().strip()
# parse X.Y.Z version and prune any suffix
vers = re.match(r"(\d+)\.(\d+)\.(\d+).*", version)
# create timestamp YYYYMMDD
timestamp = datetime.now().strftime("%Y%m%d")
version = f"{'.'.join(vers.groups())}.dev{timestamp}"
with open(ver_file, "w") as fo:
fo.write(version + os.linesep)
with open(ver_file, "w") as fopen:
fopen.write(version + os.linesep)

@staticmethod
def generate_docker_tags(
Expand Down
9 changes: 5 additions & 4 deletions dockers/base-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ ENV \
MAKEFLAGS="-j2"

RUN \
apt-get update --fix-missing && apt-get install -y wget && \
apt-get update -qq --fix-missing && \
NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
CUDA_VERSION_MM=${CUDA_VERSION%.*} && \
apt-get update -qq --fix-missing && apt-get install -y wget && \
NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \
echo "NCCL version found: $NCCL_VER" && \
TO_INSTALL_NCCL=$(echo -e "$MAX_ALLOWED_NCCL\n$NCCL_VER" | sort -V | head -n1)-1+cuda${CUDA_VERSION_MM} && \
echo "NCCL version to install: $TO_INSTALL_NCCL" && \
apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
build-essential \
pkg-config \
Expand Down Expand Up @@ -96,7 +97,7 @@ RUN \
--extra-index-url="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM//'.'/''}/"

RUN \
# Show what we have
# Show what we have \
pip --version && \
pip list && \
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
Expand Down
2 changes: 1 addition & 1 deletion examples/fabric/build_your_own_trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def load(self, state: Optional[Mapping], path: str) -> None:
"""Loads a checkpoint from a given file into state.

Args:
state: a mapping contaning model, optimizer and lr scheduler
state: a mapping containing model, optimizer and lr scheduler
path: the path to load the checkpoint from

"""
Expand Down
2 changes: 1 addition & 1 deletion examples/fabric/meta_learning/train_fabric.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def accuracy(predictions, targets):
def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways):
data, labels = batch

# Separate data into adaptation/evalutation sets
# Separate data into adaptation/evaluation sets
adaptation_indices = torch.zeros(data.size(0), dtype=bool)
adaptation_indices[torch.arange(shots * ways) * 2] = True
evaluation_indices = ~adaptation_indices
Expand Down
2 changes: 1 addition & 1 deletion examples/fabric/meta_learning/train_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways, device):
data, labels = batch
data, labels = data.to(device), labels.to(device)

# Separate data into adaptation/evalutation sets
# Separate data into adaptation/evaluation sets
adaptation_indices = torch.zeros(data.size(0), dtype=bool)
adaptation_indices[torch.arange(shots * ways) * 2] = True
evaluation_indices = ~adaptation_indices
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/domain_templates/reinforce_learn_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def generate_trajectory_samples(self) -> tuple[list[torch.Tensor], list[torch.Te
# logging
self.avg_reward = sum(self.epoch_rewards) / self.steps_per_epoch

# if epoch ended abruptly, exlude last cut-short episode to prevent stats skewness
# if epoch ended abruptly, exclude last cut-short episode to prevent stats skewness
epoch_rewards = self.epoch_rewards
if not done:
epoch_rewards = epoch_rewards[:-1]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ blank = true

[tool.codespell]
# Todo: enable also python files in a next step
skip = '*.py'
#skip = '*.py'
quiet-level = 3
# comma separated list of words; waiting for:
# https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
- for `pytorch-lightning` use `export PACKAGE_NAME=pytorch ; pip install .`
- for `lightning-fabric` use `export PACKAGE_NAME=fabric ; pip install .`

3. Building packages as sdist or binary wheel and installing or publish to PyPI afterwords you use command
3. Building packages as sdist or binary wheel and installing or publish to PyPI afterwards you use command
`python setup.py sdist` or `python setup.py bdist_wheel` accordingly.
In case you want to build just a particular package you want to set an environment variable:
`PACKAGE_NAME=lightning|pytorch|fabric python setup.py sdist|bdist_wheel`
Expand Down
4 changes: 2 additions & 2 deletions src/lightning/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
if not os.path.exists(_VERSION_PATH):
# relevant for `bdist_wheel`
_VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info")
with open(_VERSION_PATH, encoding="utf-8") as fo:
version = fo.readlines()[0].strip()
with open(_VERSION_PATH, encoding="utf-8") as fopen:
version = fopen.readlines()[0].strip()
2 changes: 1 addition & 1 deletion src/lightning/fabric/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class _Connector:
1. strategy class
2. strategy str registered with STRATEGY_REGISTRY
3. strategy str in _strategy_type enum which listed in each strategy as
backend (registed these too, and _strategy_type could be deprecated)
backend (registered these too, and _strategy_type could be deprecated)

C. plugins flag could be:
1. precision class (should be removed, and precision flag should allow user pass classes)
Expand Down
2 changes: 1 addition & 1 deletion src/lightning/fabric/fabric.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def setup_optimizers(self, *optimizers: Optimizer) -> Union[_FabricOptimizer, tu
``.setup(model, optimizer, ...)`` instead to jointly set them up.

Args:
*optimizers: One or more optmizers to set up.
*optimizers: One or more optimizers to set up.

Returns:
The wrapped optimizer(s).
Expand Down
2 changes: 1 addition & 1 deletion src/lightning/fabric/strategies/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo

@override
def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool:
"""Reduces a boolean decision over distributed processes. By default is analagous to ``all`` from the standard
"""Reduces a boolean decision over distributed processes. By default is analogous to ``all`` from the standard
library, returning ``True`` only if all input decisions evaluate to ``True``. If ``all`` is set to ``False``,
it behaves like ``any`` instead.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ def on_validation_batch_start(
if self.val_progress_bar_id is not None:
self.progress.update(self.val_progress_bar_id, advance=0, visible=False)

# TODO: remove old tasks when new onces are created
# TODO: remove old tasks when new once they are created
self.val_progress_bar_id = self._add_task(
self.total_val_batches_current_dataloader,
self.validation_description,
Expand Down
2 changes: 1 addition & 1 deletion src/lightning/pytorch/core/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def current_epoch(self) -> int:
def global_step(self) -> int:
"""Total training batches seen across all epochs.

If no Trainer is attached, this propery is 0.
If no Trainer is attached, this property is 0.

"""
return self.trainer.global_step if self._trainer else 0
Expand Down
2 changes: 1 addition & 1 deletion src/lightning/pytorch/demos/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(self, dim: int, dropout: float = 0.1, max_len: int = 5000) -> None:
def forward(self, x: Tensor) -> Tensor:
if self.pe is None:
# 1) can't use buffer, see https://github.com/pytorch/pytorch/issues/68407
# 2) can't use parameter becauses pe gets sliced and DDP requires all params to participate in forward
# 2) can't use parameter because pe gets sliced and DDP requires all params to participate in forward
# TODO: Could make this a `nn.Parameter` with `requires_grad=False`
self.pe = self._init_pos_encoding(device=x.device)

Expand Down
2 changes: 1 addition & 1 deletion src/lightning/pytorch/strategies/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo

@override
def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool:
"""Reduces a boolean decision over distributed processes. By default is analagous to ``all`` from the standard
"""Reduces a boolean decision over distributed processes. By default is analogous to ``all`` from the standard
library, returning ``True`` only if all input decisions evaluate to ``True``. If ``all`` is set to ``False``,
it behaves like ``any`` instead.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ def _check_strategy_and_fallback(self) -> None:
if strategy_flag in _DDP_FORK_ALIASES and "fork" not in torch.multiprocessing.get_all_start_methods():
raise ValueError(
f"You selected `Trainer(strategy='{strategy_flag}')` but process forking is not supported on this"
f" platform. We recommed `Trainer(strategy='ddp_spawn')` instead."
f" platform. We recommend `Trainer(strategy='ddp_spawn')` instead."
)
if strategy_flag:
self._strategy_flag = strategy_flag
Expand Down
4 changes: 2 additions & 2 deletions src/lightning_fabric/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
if not os.path.exists(_VERSION_PATH):
# relevant for `bdist_wheel`
_VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info")
with open(_VERSION_PATH, encoding="utf-8") as fo:
version = fo.readlines()[0].strip()
with open(_VERSION_PATH, encoding="utf-8") as fopen:
version = fopen.readlines()[0].strip()
4 changes: 2 additions & 2 deletions src/pytorch_lightning/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
if not os.path.exists(_VERSION_PATH):
# relevant for `bdist_wheel`
_VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info")
with open(_VERSION_PATH, encoding="utf-8") as fo:
version = fo.readlines()[0].strip()
with open(_VERSION_PATH, encoding="utf-8") as fopen:
version = fopen.readlines()[0].strip()
2 changes: 1 addition & 1 deletion tests/parity_fabric/test_parity_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def train_fabric_ddp(fabric):
def run_parity_test(accelerator: str = "cpu", devices: int = 2, tolerance: float = 0.02):
cuda_reset()

# Launch processes with Fabric and re-use them for the PyTorch training for convenience
# Launch processes with Fabric and reuse them for the PyTorch training for convenience
fabric = Fabric(accelerator=accelerator, strategy="ddp", devices=devices)
fabric.launch()

Expand Down
2 changes: 1 addition & 1 deletion tests/tests_fabric/plugins/environments/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_validate_user_settings():
with pytest.raises(ValueError, match="the number of nodes configured in SLURM .* does not match"):
env.validate_settings(num_devices=4, num_nodes=1)

# in interactive mode, validation is skipped becauses processes get launched by Fabric/Trainer, not SLURM
# in interactive mode, validation is skipped because processes get launched by Fabric/Trainer, not SLURM
with mock.patch(
"lightning.fabric.plugins.environments.slurm.SLURMEnvironment.job_name", return_value="interactive"
):
Expand Down
2 changes: 1 addition & 1 deletion tests/tests_fabric/strategies/test_ddp_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_reapply_compile():
fabric.launch()

model = BoringModel()
# currently (PyTorch 2.6) using ruduce-overhead here casues a RuntimeError:
# currently (PyTorch 2.6) using reduce overhead here causes a RuntimeError:
# Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run.
compile_kwargs = {"mode": "reduce-overhead"} if _TORCH_LESS_EQUAL_2_6 else {}
compiled_model = torch.compile(model, **compile_kwargs)
Expand Down
2 changes: 1 addition & 1 deletion tests/tests_fabric/strategies/test_fsdp_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def test_reapply_compile():
fabric.launch()

model = BoringModel()
# currently (PyTorch 2.6) using ruduce-overhead here casues a RuntimeError:
# currently (PyTorch 2.6) using ruduce-overhead here causes a RuntimeError:
# Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run.
compile_kwargs = {"mode": "reduce-overhead"} if _TORCH_LESS_EQUAL_2_6 else {}
compiled_model = torch.compile(model, **compile_kwargs)
Expand Down
12 changes: 6 additions & 6 deletions tests/tests_fabric/test_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,23 @@ def name() -> str:
class Prec(Precision):
pass

class Strat(SingleDeviceStrategy):
class TestStrategy(SingleDeviceStrategy):
pass

strategy = Strat(device=torch.device("cpu"), accelerator=Accel(), precision=Prec())
strategy = TestStrategy(device=torch.device("cpu"), accelerator=Accel(), precision=Prec())
connector = _Connector(strategy=strategy, devices=2)
assert isinstance(connector.accelerator, Accel)
assert isinstance(connector.strategy, Strat)
assert isinstance(connector.strategy, TestStrategy)
assert isinstance(connector.precision, Prec)
assert connector.strategy is strategy

class Strat(DDPStrategy):
class TestStrategy(DDPStrategy):
pass

strategy = Strat(accelerator=Accel(), precision=Prec())
strategy = TestStrategy(accelerator=Accel(), precision=Prec())
connector = _Connector(strategy=strategy, devices=2)
assert isinstance(connector.accelerator, Accel)
assert isinstance(connector.strategy, Strat)
assert isinstance(connector.strategy, TestStrategy)
assert isinstance(connector.precision, Prec)
assert connector.strategy is strategy

Expand Down
2 changes: 1 addition & 1 deletion tests/tests_pytorch/callbacks/test_throughput_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def test_throughput_monitor_eval(tmp_path, fn):
assert logger_mock.log_metrics.mock_calls == [
call(metrics={**expected, f"{fn}|batches": 3, f"{fn}|samples": 9}, step=3),
call(metrics={**expected, f"{fn}|batches": 6, f"{fn}|samples": 18}, step=6),
# the step doesnt repeat
# the step doesn't repeat
call(metrics={**expected, f"{fn}|batches": 9, f"{fn}|samples": 27}, step=9),
call(metrics={**expected, f"{fn}|batches": 12, f"{fn}|samples": 36}, step=12),
]
4 changes: 2 additions & 2 deletions tests/tests_pytorch/checkpointing/test_model_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@ def test_model_checkpoint_to_yaml(tmp_path, save_top_k: int):

path_yaml = tmp_path / "best_k_models.yaml"
checkpoint.to_yaml(path_yaml)
with open(path_yaml) as fo:
d = yaml.full_load(fo)
with open(path_yaml) as fopen:
d = yaml.full_load(fopen)
best_k = dict(checkpoint.best_k_models.items())
assert d == best_k

Expand Down
2 changes: 1 addition & 1 deletion tests/tests_pytorch/core/test_lightning_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def configure_optimizers(self):


def test_init_optimizers_resets_lightning_optimizers(tmp_path):
"""Test that the Trainer resets the `lightning_optimizers` list everytime new optimizers get initialized."""
"""Test that the Trainer resets the `lightning_optimizers` list every time new optimizers get initialized."""

def compare_optimizers():
assert trainer.strategy._lightning_optimizers[0].optimizer is trainer.optimizers[0]
Expand Down
2 changes: 1 addition & 1 deletion tests/tests_pytorch/models/test_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_cpu_slurm_save_load(_, tmp_path):
trainer.fit(model)
real_global_step = trainer.global_step

# traning complete
# training complete
assert trainer.state.finished, "cpu model failed to complete"

# predict with trained model before saving
Expand Down
4 changes: 2 additions & 2 deletions tests/tests_pytorch/models/test_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def test_strict_model_load_more_params(monkeypatch, tmp_path, tmpdir_server, url
)
trainer.fit(model)

# traning complete
# training complete
assert trainer.state.finished, f"Training failed with {trainer.state}"

# save model
Expand Down Expand Up @@ -587,7 +587,7 @@ def test_strict_model_load_less_params(monkeypatch, tmp_path, tmpdir_server, url
)
trainer.fit(model)

# traning complete
# training complete
assert trainer.state.finished, f"Training failed with {trainer.state}"

# save model
Expand Down
2 changes: 2 additions & 0 deletions tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing import Any, Optional
from unittest.mock import MagicMock, Mock

import pytest
import torch

from lightning.fabric.plugins import CheckpointIO, TorchCheckpointIO
Expand Down Expand Up @@ -97,6 +98,7 @@ def test_checkpoint_plugin_called(tmp_path):
checkpoint_plugin.load_checkpoint.assert_called_with(str(tmp_path / "last-v1.ckpt"))


@pytest.mark.flaky(reruns=3)
def test_async_checkpoint_plugin(tmp_path):
"""Ensure that the custom checkpoint IO plugin and torch checkpoint IO plugin is called when async saving and
loading."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def test_fit_twice_raises(mps_count_0):
barebones=True,
)
trainer.fit(model)
trainer.test(model) # make sure testing in between doesnt impact the result
trainer.test(model) # make sure testing in between doesn't impact the result
trainer.fit_loop.max_epochs += 1
with pytest.raises(NotImplementedError, match=r"twice.*is not supported"):
trainer.fit(model)
Loading
Loading