Skip to content

Commit f77cf31

Browse files
authored
Move to Python 3.12. (#88)
* update readme to state python 3.12 requirement Signed-off-by: Hao Wu <skyw@nvidia.com> * remove typing_extensions fallback Signed-off-by: Hao Wu <skyw@nvidia.com> * modernize type annotation Signed-off-by: Hao Wu <skyw@nvidia.com> * update python badge in readme Signed-off-by: Hao Wu <skyw@nvidia.com> * remaining update for python 3.12 Signed-off-by: Hao Wu <skyw@nvidia.com> --------- Signed-off-by: Hao Wu <skyw@nvidia.com>
1 parent 3969290 commit f77cf31

File tree

18 files changed

+72
-438
lines changed

18 files changed

+72
-438
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<!-- Get the codecov badge with a token direct from https://app.codecov.io/gh/NVIDIA-NeMo -->
1010
[![codecov](https://codecov.io/gh/NVIDIA-NeMo/Emerging-Optimizers/graph/badge.svg?token=IQ6U7IFYN0)](https://codecov.io/gh/NVIDIA-NeMo/Emerging-Optimizers)
1111
[![CICD NeMo](https://github.com/NVIDIA-NeMo/Emerging-Optimizers/actions/workflows/cicd-main.yml/badge.svg?branch=main)](https://github.com/NVIDIA-NeMo/Emerging-Optimizers/actions/workflows/cicd-main.yml)
12-
[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/release/python-3100/)
12+
[![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/release/python-3120/)
1313
![GitHub Repo stars](https://img.shields.io/github/stars/NVIDIA-NeMo/Emerging-Optimizers)
1414
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://docs.nvidia.com/nemo/emerging-optimizers/latest/index.html)
1515

@@ -41,7 +41,7 @@ Emerging optimizers have demonstrated significant practical impact in large-scal
4141

4242
### Prerequisites
4343

44-
- Python 3.10 or higher, 3.12 is recommended
44+
- Python 3.12 (Release v0.1.0 is the last version supports Python 3.10)
4545
- PyTorch 2.0 or higher
4646

4747
### Install from Source

docs/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Emerging-Optimizers is under active development. All APIs are experimental and s
1212

1313
### Prerequisites
1414

15-
- Python 3.10 or higher, 3.12 is recommended
15+
- Python 3.12
1616
- PyTorch 2.0 or higher
1717

1818
### Install from Source

emerging_optimizers/orthogonalized_optimizers/adaptive_muon.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
from typing import Callable, Literal, overload
16-
17-
18-
# TODO(@boxiangw): remove this once bump to python 3.12
19-
try:
20-
from typing import override
21-
except ImportError:
22-
from typing_extensions import override
15+
from typing import Callable, Literal, overload, override
2316

2417
import torch
2518
from torch.optim.optimizer import ParamsT

emerging_optimizers/orthogonalized_optimizers/mop.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# limitations under the License.
1515

1616

17-
from typing import Literal, Optional
17+
from typing import Literal
1818

1919
import torch
2020
from torch.optim.optimizer import ParamsT
@@ -80,9 +80,7 @@ def scaled_orthogonalize_fn(grad: torch.Tensor) -> torch.Tensor:
8080
MOP.__doc__ = MOP.__doc__.format(_args_doc=_args_doc) # type: ignore[union-attr]
8181

8282

83-
def polar_via_svd(
84-
A: torch.Tensor, return_p: bool = False
85-
) -> tuple[torch.Tensor, Optional[torch.Tensor], torch.Tensor]:
83+
def polar_via_svd(A: torch.Tensor, return_p: bool = False) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor]:
8684
"""Compute polar decomposition via SVD
8785
8886
Args:

emerging_optimizers/orthogonalized_optimizers/orthogonalized_optimizer.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
from typing import Any, Callable, overload
16-
17-
18-
# TODO(@boxiangw): remove this once bump to python 3.12
19-
try:
20-
from typing import override
21-
except ImportError:
22-
from typing_extensions import override
15+
from typing import Any, Callable, overload, override
2316

2417
import torch
2518
import torch.optim as optim

emerging_optimizers/psgd/psgd.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import math
16-
from typing import Callable, overload
17-
18-
19-
try:
20-
from typing import override
21-
except ImportError:
22-
from typing_extensions import override
16+
from typing import Callable, overload, override
2317

2418
import torch
2519
from torch.optim.optimizer import ParamsT

emerging_optimizers/psgd/psgd_kron_contractions.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
from typing import List
16-
1715
import torch
1816

1917

@@ -43,7 +41,7 @@ def partial_contraction(G1: torch.Tensor, G2: torch.Tensor, axis: int) -> torch.
4341

4442

4543
@torch.compile # type: ignore[misc]
46-
def apply_kronecker_factors(Q_list: List[torch.Tensor], X: torch.Tensor) -> torch.Tensor:
44+
def apply_kronecker_factors(Q_list: list[torch.Tensor], X: torch.Tensor) -> torch.Tensor:
4745
"""Apply all Kronecker factors once to tensor :math:`X`, each to its corresponding dimension.
4846
4947
This applies each :math:`Q` factor once, for example in 2D case: :math:`Q_1 X Q_2^T`.
@@ -67,7 +65,7 @@ def apply_kronecker_factors(Q_list: List[torch.Tensor], X: torch.Tensor) -> torc
6765

6866

6967
@torch.compile # type: ignore[misc]
70-
def apply_preconditioner(Q_list: List[torch.Tensor], X: torch.Tensor) -> torch.Tensor:
68+
def apply_preconditioner(Q_list: list[torch.Tensor], X: torch.Tensor) -> torch.Tensor:
7169
"""Apply the full PSGD preconditioner to X.
7270
7371
This is the full Kronecker product of PSGD's kronecker factors Q^T Q, applied to X.
@@ -130,7 +128,7 @@ def _dim_n_mul_and_permute(X: torch.Tensor, M: torch.Tensor, contract_dim: int)
130128

131129

132130
@torch.compile # type: ignore[misc]
133-
def _apply_single_kronecker_factor(Q_list: List[torch.Tensor], X: torch.Tensor, axis: int) -> torch.Tensor:
131+
def _apply_single_kronecker_factor(Q_list: list[torch.Tensor], X: torch.Tensor, axis: int) -> torch.Tensor:
134132
"""Apply a single Kronecker factor Q to X at dimension `axis`. Helper function for apply_kronecker_factors.
135133
136134
If Q is a vector, we multiply X by Q.

emerging_optimizers/riemannian_optimizers/normalized_optimizer.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,7 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
from typing import Callable, overload
16-
17-
18-
try:
19-
from typing import override
20-
except ImportError:
21-
from typing_extensions import override
15+
from typing import Callable, overload, override
2216

2317
import torch
2418
from torch.optim.optimizer import Optimizer

emerging_optimizers/scalar_optimizers/ademamix.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import math
16-
from typing import Optional, Tuple
1716

1817
import torch
1918

@@ -30,9 +29,9 @@ def calculate_sim_ademamix_update(
3029
grad: torch.Tensor,
3130
exp_avg: torch.Tensor,
3231
exp_avg_sq: torch.Tensor,
33-
num_beta_fast_warmup_steps: Optional[int],
32+
num_beta_fast_warmup_steps: int | None,
3433
min_beta_fast: float,
35-
betas: Tuple[float, float],
34+
betas: tuple[float, float],
3635
step: int,
3736
eps: float,
3837
correct_bias: bool,
@@ -107,9 +106,9 @@ def calculate_ademamix_update(
107106
exp_avg_fast: torch.Tensor,
108107
exp_avg_slow: torch.Tensor,
109108
exp_avg_sq: torch.Tensor,
110-
num_beta_slow_warmup_steps: Optional[int],
111-
num_alpha_warmup_steps: Optional[int],
112-
betas: Tuple[float, float, float],
109+
num_beta_slow_warmup_steps: int | None,
110+
num_alpha_warmup_steps: int | None,
111+
betas: tuple[float, float, float],
113112
step: int,
114113
eps: float,
115114
correct_bias: bool,

emerging_optimizers/scalar_optimizers/laprop.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
from typing import Tuple
16-
1715
import torch
1816

1917

@@ -29,7 +27,7 @@ def calculate_laprop_update(
2927
exp_avg: torch.Tensor,
3028
exp_avg_sq: torch.Tensor,
3129
correct_bias: bool,
32-
betas: Tuple[float, float],
30+
betas: tuple[float, float],
3331
step: int,
3432
eps: float,
3533
) -> torch.Tensor:

0 commit comments

Comments
 (0)