Merge pull request #2 from araffin/feat/uv

araffin · web-flow · commit 9ad2dde4aa58 · 2026-03-13T08:56:02.000+01:00
Add `uv` support and update min Python version
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,33 +20,26 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           # Use uv for faster downloads
           pip install uv
-          # cpu version of pytorch
-          # See https://github.com/astral-sh/uv/issues/1497
-          uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
-
-          uv pip install --system ".[tests]"
+          uv sync --extra tests
+          echo "$PWD/.venv/bin" >> $GITHUB_PATH
       - name: Lint with ruff
-        run: |
-          make lint
+        run: make lint
       - name: Check codestyle
-        run: |
-          make check-codestyle
+        run: make check-codestyle
       - name: Type check
-        run: |
-          make type
+        run: make type
       - name: Test with pytest
-        run: |
-          make pytest
+        run: make pytest
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# Reinforcement Learning Summer School 2023 - DQN Tutorial
+# Reinforcement Learning Summer School 2023/2026 - DQN Tutorial
 
 <img style="background:white;" src="https://araffin.github.io/slides/dqn-tutorial/images/q_learning/dqn_pirate_cover.png" align="right" width="40%"/>
 
@@ -14,6 +14,11 @@ Stable-Baselines3 repo: https://github.com/DLR-RM/stable-baselines3
 
 RL Virtual School 2021: https://github.com/araffin/rl-handson-rlvs21
 
+RL Summer School 2023: https://rlsummerschool.com/2023/
+
+RL Summer School 2026: https://2026.rlsummerschool.com/
+
+
 <img style="background:white;" src="https://araffin.github.io/slides/dqn-tutorial/images/dqn/dqn.png" align="center" width="80%"/>
 
 ## Content
@@ -22,6 +27,11 @@ RL Virtual School 2021: https://github.com/araffin/rl-handson-rlvs21
 2. Deep Q-Network (DQN) Part I: DQN Components: Replay Buffer, Q-Network, ... [Colab Notebook](https://colab.research.google.com/github/araffin/rlss23-dqn-tutorial/blob/main/notebooks/2_deep_q_network_dqn_components.ipynb)
 3. Deep Q-Network (DQN) Part II: DQN Update and Training Loop [Colab Notebook](https://colab.research.google.com/github/araffin/rlss23-dqn-tutorial/blob/main/notebooks/3_deep_q_network_dqn_update.ipynb)
 
+## Run Locally (instead of using Google colab)
+
+1. Install [uv](https://docs.astral.sh/uv/getting-started/installation/)
+2. Run `uv run jupyter lab notebooks`
+
 ## Solutions
 
 Solutions can be found in the [notebooks/solutions/](https://github.com/araffin/rlss23-dqn-tutorial/tree/main/notebooks/solutions) folder.
diff --git a/dqn_tutorial/dqn/dqn.py b/dqn_tutorial/dqn/dqn.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 import gymnasium as gym
 import numpy as np
 import torch as th
@@ -96,7 +94,7 @@ def run_dqn(
     eval_exploration_rate: float = 0.0,
     seed: int = 2023,
     # device: Union[th.device, str] = "cpu",
-    eval_render_mode: Optional[str] = None,  # "human", "rgb_array", None
+    eval_render_mode: str | None = None,  # "human", "rgb_array", None
 ) -> QNetwork:
     """
     Run Deep Q-Learning (DQN) on a given environment.
diff --git a/dqn_tutorial/dqn/dqn_no_target.py b/dqn_tutorial/dqn/dqn_no_target.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 import gymnasium as gym
 import numpy as np
 import torch as th
@@ -85,7 +83,7 @@ def run_dqn_no_target(
     eval_exploration_rate: float = 0.0,
     seed: int = 2023,
     # device: Union[th.device, str] = "cpu",
-    eval_render_mode: Optional[str] = None,  # "human", "rgb_array", None
+    eval_render_mode: str | None = None,  # "human", "rgb_array", None
 ) -> QNetwork:
     """
     Run Deep Q-Learning (DQN) on a given environment.
diff --git a/dqn_tutorial/dqn/evaluation.py b/dqn_tutorial/dqn/evaluation.py
@@ -1,6 +1,5 @@
 import warnings
 from pathlib import Path
-from typing import Optional
 
 import gymnasium as gym
 import numpy as np
@@ -24,7 +23,7 @@ def evaluate_policy(
     q_net: QNetwork,
     n_eval_episodes: int,
     eval_exploration_rate: float = 0.0,
-    video_name: Optional[str] = None,
+    video_name: str | None = None,
 ) -> None:
     """
     Evaluate the policy by computing the average episode reward
diff --git a/dqn_tutorial/dqn/q_network.py b/dqn_tutorial/dqn/q_network.py
@@ -1,5 +1,3 @@
-from typing import Type
-
 import torch as th
 import torch.nn as nn
 from gymnasium import spaces
@@ -23,7 +21,7 @@ def __init__(
         observation_space: spaces.Box,
         action_space: spaces.Discrete,
         n_hidden_units: int = 64,
-        activation_fn: Type[nn.Module] = nn.ReLU,
+        activation_fn: type[nn.Module] = nn.ReLU,
     ) -> None:
         super().__init__()
         # Assume 1d space
diff --git a/dqn_tutorial/fqi/fqi.py b/dqn_tutorial/fqi/fqi.py
@@ -7,7 +7,6 @@
 
 from functools import partial
 from pathlib import Path
-from typing import Optional
 
 import gymnasium as gym
 import numpy as np
@@ -27,7 +26,7 @@
 def create_model_input(
     obs: np.ndarray,
     actions: np.ndarray,
-    features_extractor: Optional[PolynomialFeatures] = None,
+    features_extractor: PolynomialFeatures | None = None,
 ) -> np.ndarray:
     """
     Concatenate observation (batch_size, n_features)
@@ -57,7 +56,7 @@ def get_q_values(
     model: RegressorMixin,
     obs: np.ndarray,
     n_actions: int,
-    features_extractor: Optional[PolynomialFeatures] = None,
+    features_extractor: PolynomialFeatures | None = None,
 ) -> np.ndarray:
     """
     Retrieve the q-values for a set of observations (=states in the theory).
@@ -93,7 +92,7 @@ def evaluate(
     model: RegressorMixin,
     env: gym.Env,
     n_eval_episodes: int = 10,
-    features_extractor: Optional[PolynomialFeatures] = None,
+    features_extractor: PolynomialFeatures | None = None,
 ) -> None:
     episode_returns, episode_reward = [], 0.0
     total_episodes = 0
diff --git a/dqn_tutorial/notebook_utils.py b/dqn_tutorial/notebook_utils.py
@@ -14,12 +14,8 @@ def show_videos(video_path: str = "", prefix: str = "") -> None:
     html = []
     for mp4 in Path(video_path).glob(f"{prefix}*.mp4"):
         video_b64 = base64.b64encode(mp4.read_bytes())
-        html.append(
-            """<video alt="{}" autoplay
+        html.append("""<video alt="{}" autoplay
                     loop controls style="height: 400px;">
                     <source src="data:video/mp4;base64,{}" type="video/mp4" />
-                </video>""".format(
-                mp4, video_b64.decode("ascii")
-            )
-        )
+                </video>""".format(mp4, video_b64.decode("ascii")))
     ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))
diff --git a/notebooks/2_deep_q_network_dqn_components.ipynb b/notebooks/2_deep_q_network_dqn_components.ipynb
@@ -104,7 +104,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Optional\n",
     "\n",
     "import numpy as np\n",
     "import torch as th\n",
@@ -346,7 +345,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Type\n",
     "\n",
     "import torch as th\n",
     "import torch.nn as nn\n",
@@ -371,7 +369,7 @@
     "        observation_space: spaces.Box,\n",
     "        action_space: spaces.Discrete,\n",
     "        n_hidden_units: int = 64,\n",
-    "        activation_fn: Type[nn.Module] = nn.ReLU,\n",
+    "        activation_fn: type[nn.Module] = nn.ReLU,\n",
     "    ) -> None:\n",
     "        super().__init__()\n",
     "        # Assume 1d space\n",
diff --git a/notebooks/solutions/1_fitted_q_iteration_fqi.ipynb b/notebooks/solutions/1_fitted_q_iteration_fqi.ipynb
@@ -384,7 +384,6 @@
    "source": [
     "from functools import partial\n",
     "from pathlib import Path\n",
-    "from typing import Optional\n",
     "\n",
     "import gymnasium as gym\n",
     "import numpy as np\n",
@@ -606,7 +605,7 @@
     "    model: RegressorMixin,\n",
     "    env: gym.Env,\n",
     "    n_eval_episodes: int = 10,\n",
-    "    video_name: Optional[str] = None,\n",
+    "    video_name: str | None = None,\n",
     ") -> None:\n",
     "    episode_returns, episode_reward = [], 0.0\n",
     "    total_episodes = 0\n",
diff --git a/notebooks/solutions/2_deep_q_network_dqn_components.ipynb b/notebooks/solutions/2_deep_q_network_dqn_components.ipynb
@@ -110,7 +110,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Optional\n",
     "\n",
     "import numpy as np\n",
     "import torch as th\n",
@@ -359,7 +358,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Type\n",
     "\n",
     "import torch as th\n",
     "import torch.nn as nn\n",
@@ -384,7 +382,7 @@
     "        observation_space: spaces.Box,\n",
     "        action_space: spaces.Discrete,\n",
     "        n_hidden_units: int = 64,\n",
-    "        activation_fn: Type[nn.Module] = nn.ReLU,\n",
+    "        activation_fn: type[nn.Module] = nn.ReLU,\n",
     "    ) -> None:\n",
     "        super().__init__()\n",
     "        # Assume 1d space\n",
diff --git a/notebooks/solutions/3_deep_q_network_dqn_update.ipynb b/notebooks/solutions/3_deep_q_network_dqn_update.ipynb
@@ -74,7 +74,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import Optional\n",
     "\n",
     "import numpy as np\n",
     "import torch as th\n",
@@ -257,7 +256,7 @@
     "    eval_exploration_rate: float = 0.0,\n",
     "    seed: int = 2023,\n",
     "    # device: Union[th.device, str] = \"cpu\",\n",
-    "    eval_render_mode: Optional[str] = None,  # \"human\", \"rgb_array\", None\n",
+    "    eval_render_mode: str | None = None,  # \"human\", \"rgb_array\", None\n",
     ") -> QNetwork:\n",
     "    \"\"\"\n",
     "    Run Deep Q-Learning (DQN) on a given environment.\n",
@@ -558,7 +557,7 @@
     "    eval_exploration_rate: float = 0.0,\n",
     "    seed: int = 2023,\n",
     "    # device: Union[th.device, str] = \"cpu\",\n",
-    "    eval_render_mode: Optional[str] = None,  # \"human\", \"rgb_array\", None\n",
+    "    eval_render_mode: str | None = None,  # \"human\", \"rgb_array\", None\n",
     ") -> QNetwork:\n",
     "    \"\"\"\n",
     "    Run Deep Q-Learning (DQN) on a given environment.\n",
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,27 +12,41 @@ dynamic = ["version"]
 authors = [{ name = "Antonin Raffin", email = "antonin.raffin@dlr.de" }]
 description = "A small example package"
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.10"
 classifiers = [
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
 dependencies = [
     "numpy",
-    "gymnasium[classic-control,other]>=0.29.1,<1.1.0",
     "scikit-learn",
     "torch>=2.4.0",
+    "gymnasium[classic-control,other]>=0.29.1,<1.3.0",
+]
+
+[dependency-groups]
+dev = [
+    "jupyterlab>=4.5.6",
 ]
 
 [tool.setuptools_scm]
 write_to = "dqn_tutorial/_version.py"
 
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[tool.uv.sources]
+torch = [
+  { index = "pytorch-cpu" },
+]
+
 [project.optional-dependencies]
 tests = [
     # Run tests and coverage
@@ -53,8 +67,8 @@ tests = [
 [tool.ruff]
 # Same as Black.
 line-length = 127
-# Assume Python 3.8
-target-version = "py38"
+# Assume Python 3.10
+target-version = "py310"
 
 [tool.ruff.lint]
 # See https://beta.ruff.rs/docs/rules/
@@ -71,7 +85,7 @@ max-complexity = 15
 
 [tool.black]
 line-length = 127
-
+target-version = ["py310"]
 
 [tool.mypy]
 ignore_missing_imports = true
diff --git a/uv.lock b/uv.lock