Skip to content

Commit 9ad2dde

Browse files
authored
Merge pull request #2 from araffin/feat/uv
Add `uv` support and update min Python version
2 parents ee0e4ff + ee1dc43 commit 9ad2dde

File tree

14 files changed

+3839
-56
lines changed

14 files changed

+3839
-56
lines changed

.github/workflows/ci.yml

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,33 +20,26 @@ jobs:
2020
runs-on: ubuntu-latest
2121
strategy:
2222
matrix:
23-
python-version: ["3.8", "3.9", "3.10", "3.11"]
23+
python-version: ["3.10", "3.11", "3.12", "3.13"]
2424

2525
steps:
26-
- uses: actions/checkout@v3
26+
- uses: actions/checkout@v6
2727
- name: Set up Python ${{ matrix.python-version }}
28-
uses: actions/setup-python@v4
28+
uses: actions/setup-python@v6
2929
with:
3030
python-version: ${{ matrix.python-version }}
3131
- name: Install dependencies
3232
run: |
3333
python -m pip install --upgrade pip
3434
# Use uv for faster downloads
3535
pip install uv
36-
# cpu version of pytorch
37-
# See https://github.com/astral-sh/uv/issues/1497
38-
uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
39-
40-
uv pip install --system ".[tests]"
36+
uv sync --extra tests
37+
echo "$PWD/.venv/bin" >> $GITHUB_PATH
4138
- name: Lint with ruff
42-
run: |
43-
make lint
39+
run: make lint
4440
- name: Check codestyle
45-
run: |
46-
make check-codestyle
41+
run: make check-codestyle
4742
- name: Type check
48-
run: |
49-
make type
43+
run: make type
5044
- name: Test with pytest
51-
run: |
52-
make pytest
45+
run: make pytest

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Reinforcement Learning Summer School 2023 - DQN Tutorial
1+
# Reinforcement Learning Summer School 2023/2026 - DQN Tutorial
22

33
<img style="background:white;" src="https://araffin.github.io/slides/dqn-tutorial/images/q_learning/dqn_pirate_cover.png" align="right" width="40%"/>
44

@@ -14,6 +14,11 @@ Stable-Baselines3 repo: https://github.com/DLR-RM/stable-baselines3
1414

1515
RL Virtual School 2021: https://github.com/araffin/rl-handson-rlvs21
1616

17+
RL Summer School 2023: https://rlsummerschool.com/2023/
18+
19+
RL Summer School 2026: https://2026.rlsummerschool.com/
20+
21+
1722
<img style="background:white;" src="https://araffin.github.io/slides/dqn-tutorial/images/dqn/dqn.png" align="center" width="80%"/>
1823

1924
## Content
@@ -22,6 +27,11 @@ RL Virtual School 2021: https://github.com/araffin/rl-handson-rlvs21
2227
2. Deep Q-Network (DQN) Part I: DQN Components: Replay Buffer, Q-Network, ... [Colab Notebook](https://colab.research.google.com/github/araffin/rlss23-dqn-tutorial/blob/main/notebooks/2_deep_q_network_dqn_components.ipynb)
2328
3. Deep Q-Network (DQN) Part II: DQN Update and Training Loop [Colab Notebook](https://colab.research.google.com/github/araffin/rlss23-dqn-tutorial/blob/main/notebooks/3_deep_q_network_dqn_update.ipynb)
2429

30+
## Run Locally (instead of using Google colab)
31+
32+
1. Install [uv](https://docs.astral.sh/uv/getting-started/installation/)
33+
2. Run `uv run jupyter lab notebooks`
34+
2535
## Solutions
2636

2737
Solutions can be found in the [notebooks/solutions/](https://github.com/araffin/rlss23-dqn-tutorial/tree/main/notebooks/solutions) folder.

dqn_tutorial/dqn/dqn.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Optional
2-
31
import gymnasium as gym
42
import numpy as np
53
import torch as th
@@ -96,7 +94,7 @@ def run_dqn(
9694
eval_exploration_rate: float = 0.0,
9795
seed: int = 2023,
9896
# device: Union[th.device, str] = "cpu",
99-
eval_render_mode: Optional[str] = None, # "human", "rgb_array", None
97+
eval_render_mode: str | None = None, # "human", "rgb_array", None
10098
) -> QNetwork:
10199
"""
102100
Run Deep Q-Learning (DQN) on a given environment.

dqn_tutorial/dqn/dqn_no_target.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Optional
2-
31
import gymnasium as gym
42
import numpy as np
53
import torch as th
@@ -85,7 +83,7 @@ def run_dqn_no_target(
8583
eval_exploration_rate: float = 0.0,
8684
seed: int = 2023,
8785
# device: Union[th.device, str] = "cpu",
88-
eval_render_mode: Optional[str] = None, # "human", "rgb_array", None
86+
eval_render_mode: str | None = None, # "human", "rgb_array", None
8987
) -> QNetwork:
9088
"""
9189
Run Deep Q-Learning (DQN) on a given environment.

dqn_tutorial/dqn/evaluation.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import warnings
22
from pathlib import Path
3-
from typing import Optional
43

54
import gymnasium as gym
65
import numpy as np
@@ -24,7 +23,7 @@ def evaluate_policy(
2423
q_net: QNetwork,
2524
n_eval_episodes: int,
2625
eval_exploration_rate: float = 0.0,
27-
video_name: Optional[str] = None,
26+
video_name: str | None = None,
2827
) -> None:
2928
"""
3029
Evaluate the policy by computing the average episode reward

dqn_tutorial/dqn/q_network.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Type
2-
31
import torch as th
42
import torch.nn as nn
53
from gymnasium import spaces
@@ -23,7 +21,7 @@ def __init__(
2321
observation_space: spaces.Box,
2422
action_space: spaces.Discrete,
2523
n_hidden_units: int = 64,
26-
activation_fn: Type[nn.Module] = nn.ReLU,
24+
activation_fn: type[nn.Module] = nn.ReLU,
2725
) -> None:
2826
super().__init__()
2927
# Assume 1d space

dqn_tutorial/fqi/fqi.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from functools import partial
99
from pathlib import Path
10-
from typing import Optional
1110

1211
import gymnasium as gym
1312
import numpy as np
@@ -27,7 +26,7 @@
2726
def create_model_input(
2827
obs: np.ndarray,
2928
actions: np.ndarray,
30-
features_extractor: Optional[PolynomialFeatures] = None,
29+
features_extractor: PolynomialFeatures | None = None,
3130
) -> np.ndarray:
3231
"""
3332
Concatenate observation (batch_size, n_features)
@@ -57,7 +56,7 @@ def get_q_values(
5756
model: RegressorMixin,
5857
obs: np.ndarray,
5958
n_actions: int,
60-
features_extractor: Optional[PolynomialFeatures] = None,
59+
features_extractor: PolynomialFeatures | None = None,
6160
) -> np.ndarray:
6261
"""
6362
Retrieve the q-values for a set of observations (=states in the theory).
@@ -93,7 +92,7 @@ def evaluate(
9392
model: RegressorMixin,
9493
env: gym.Env,
9594
n_eval_episodes: int = 10,
96-
features_extractor: Optional[PolynomialFeatures] = None,
95+
features_extractor: PolynomialFeatures | None = None,
9796
) -> None:
9897
episode_returns, episode_reward = [], 0.0
9998
total_episodes = 0

dqn_tutorial/notebook_utils.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@ def show_videos(video_path: str = "", prefix: str = "") -> None:
1414
html = []
1515
for mp4 in Path(video_path).glob(f"{prefix}*.mp4"):
1616
video_b64 = base64.b64encode(mp4.read_bytes())
17-
html.append(
18-
"""<video alt="{}" autoplay
17+
html.append("""<video alt="{}" autoplay
1918
loop controls style="height: 400px;">
2019
<source src="data:video/mp4;base64,{}" type="video/mp4" />
21-
</video>""".format(
22-
mp4, video_b64.decode("ascii")
23-
)
24-
)
20+
</video>""".format(mp4, video_b64.decode("ascii")))
2521
ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

notebooks/2_deep_q_network_dqn_components.ipynb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@
104104
"metadata": {},
105105
"outputs": [],
106106
"source": [
107-
"from typing import Optional\n",
108107
"\n",
109108
"import numpy as np\n",
110109
"import torch as th\n",
@@ -346,7 +345,6 @@
346345
"metadata": {},
347346
"outputs": [],
348347
"source": [
349-
"from typing import Type\n",
350348
"\n",
351349
"import torch as th\n",
352350
"import torch.nn as nn\n",
@@ -371,7 +369,7 @@
371369
" observation_space: spaces.Box,\n",
372370
" action_space: spaces.Discrete,\n",
373371
" n_hidden_units: int = 64,\n",
374-
" activation_fn: Type[nn.Module] = nn.ReLU,\n",
372+
" activation_fn: type[nn.Module] = nn.ReLU,\n",
375373
" ) -> None:\n",
376374
" super().__init__()\n",
377375
" # Assume 1d space\n",

notebooks/solutions/1_fitted_q_iteration_fqi.ipynb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,6 @@
384384
"source": [
385385
"from functools import partial\n",
386386
"from pathlib import Path\n",
387-
"from typing import Optional\n",
388387
"\n",
389388
"import gymnasium as gym\n",
390389
"import numpy as np\n",
@@ -606,7 +605,7 @@
606605
" model: RegressorMixin,\n",
607606
" env: gym.Env,\n",
608607
" n_eval_episodes: int = 10,\n",
609-
" video_name: Optional[str] = None,\n",
608+
" video_name: str | None = None,\n",
610609
") -> None:\n",
611610
" episode_returns, episode_reward = [], 0.0\n",
612611
" total_episodes = 0\n",

0 commit comments

Comments
 (0)