edbeeching
diff --git a/‎.github/workflows/quality.yml‎
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/quality.yml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎.vscode/settings.json‎
Lines changed: 0 additions & 12 deletions b/‎.vscode/settings.json‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎Makefile‎
Lines changed: 7 additions & 8 deletions b/‎Makefile‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 27 additions & 0 deletions b/‎README.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/clean_rl_example.py‎
Lines changed: 12 additions & 10 deletions b/‎examples/clean_rl_example.py‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎examples/sample_factory_example.py‎
Lines changed: 10 additions & 6 deletions b/‎examples/sample_factory_example.py‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎examples/stable_baselines3_example.py‎
Lines changed: 51 additions & 41 deletions b/‎examples/stable_baselines3_example.py‎
Lines changed: 51 additions & 41 deletions
@@ -0,0 +1,29 @@
+name: Quality
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+
+  check_code_quality:
+    name: Check code quality
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Setup Python environment
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10.10
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install ".[dev]"
+      - name: Code quality
+        run: |
+          make quality
@@ -1,15 +1,14 @@
 .PHONY: quality style test unity-test
 
-# Check that source code meets quality standards
-quality:
-	black --check --line-length 119 --target-version py38 tests godot_rl 
-	isort --check-only tests godot_rl 
-	flake8 tests godot_rl
-
 # Format source code automatically
 style:
-	black --line-length 119 --target-version py38 tests godot_rl
-	isort tests godot_rl
+	black --line-length 120 --target-version py310 tests godot_rl examples
+	isort -w 120 tests godot_rl examples
+# Check that source code meets quality standards
+quality:
+	black --check --line-length 120 --target-version py310 tests godot_rl examples
+	isort -w 120 --check-only tests godot_rl examples
+	flake8 --max-line-length 120 tests godot_rl examples
 
 # Run tests for the library
 test:
 
@@ -83,6 +83,33 @@ Godot RL Agents supports 4 different RL training frameworks, the links below det
 - [CleanRL](docs/ADV_CLEAN_RL.md) (Windows, Mac, Linux)
 - [Ray rllib](docs/ADV_RLLIB.md) (Windows, Mac, Linux)
 
+## Contributing
+We welcome new contributions to the library, such as:
+- New environments made in Godot
+- Improvements to the readme files
+- Additions to the python codebase
+
+Start by forking the repo and then cloning it to your machine, creating a venv and performing an editable installation.
+
+```
+# If you want to PR, you should fork the lib or ask to be a contibutor
+git clone [email protected]:YOUR_USERNAME/godot_rl_agents.git
+cd godot_rl_agents
+python -m venv venv
+pip install -e ".[dev]"
+# check tests run
+make test
+```
+
+Then add your features.
+Format your code with:
+```
+make style
+make quality
+```
+Then make a PR against main on the original repo.
+
+
 ## FAQ
 
 ### Why have we developed Godot RL Agents?
 
@@ -4,14 +4,16 @@
 import pathlib
 import random
 import time
-from distutils.util import strtobool
 from collections import deque
+from distutils.util import strtobool
+
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.distributions.normal import Normal
 from torch.utils.tensorboard import SummaryWriter
+
 from godot_rl.wrappers.clean_rl_wrapper import CleanRLGodotEnv
 
 
@@ -167,8 +169,9 @@ def get_action_and_value(self, x, action=None):
 
     # env setup
 
-    envs = env = CleanRLGodotEnv(env_path=args.env_path, show_window=args.viz, speedup=args.speedup, seed=args.seed,
-                                 n_parallel=args.n_parallel)
+    envs = env = CleanRLGodotEnv(
+        env_path=args.env_path, show_window=args.viz, speedup=args.speedup, seed=args.seed, n_parallel=args.n_parallel
+    )
     args.num_envs = envs.num_envs
     args.batch_size = int(args.num_envs * args.num_steps)
     args.minibatch_size = int(args.batch_size // args.num_minibatches)
@@ -334,7 +337,6 @@ def get_action_and_value(self, x, action=None):
 
         agent.eval().to("cpu")
 
-
         class OnnxPolicy(torch.nn.Module):
             def __init__(self, actor_mean):
                 super().__init__()
@@ -344,7 +346,6 @@ def forward(self, obs, state_ins):
                 action_mean = self.actor_mean(obs)
                 return action_mean, state_ins
 
-
         onnx_policy = OnnxPolicy(agent.actor_mean)
         dummy_input = torch.unsqueeze(torch.tensor(envs.single_observation_space.sample()), 0)
 
@@ -355,9 +356,10 @@ def forward(self, obs, state_ins):
             opset_version=15,
             input_names=["obs", "state_ins"],
             output_names=["output", "state_outs"],
-            dynamic_axes={'obs': {0: 'batch_size'},
-                          'state_ins': {0: 'batch_size'},  # variable length axes
-                          'output': {0: 'batch_size'},
-                          'state_outs': {0: 'batch_size'}}
-
+            dynamic_axes={
+                "obs": {0: "batch_size"},
+                "state_ins": {0: "batch_size"},  # variable length axes
+                "output": {0: "batch_size"},
+                "state_outs": {0: "batch_size"},
+            },
         )
@@ -1,5 +1,6 @@
 import argparse
-from godot_rl.wrappers.sample_factory_wrapper import sample_factory_training, sample_factory_enjoy
+
+from godot_rl.wrappers.sample_factory_wrapper import sample_factory_enjoy, sample_factory_training
 
 
 def get_args():
@@ -10,8 +11,12 @@ def get_args():
     parser.add_argument("--seed", default=0, type=int, help="environment seed")
     parser.add_argument("--export", default=False, action="store_true", help="whether to export the model")
     parser.add_argument("--viz", default=False, action="store_true", help="Whether to visualize one process")
-    parser.add_argument("--experiment_dir", default="logs/sf", type=str,
-    help="The name of the experiment directory, in which the tensorboard logs are getting stored")
+    parser.add_argument(
+        "--experiment_dir",
+        default="logs/sf",
+        type=str,
+        help="The name of the experiment directory, in which the tensorboard logs are getting stored",
+    )
     parser.add_argument(
         "--experiment_name",
         default="experiment",
@@ -22,14 +27,13 @@ def get_args():
     return parser.parse_known_args()
 
 
-
 def main():
     args, extras = get_args()
     if args.eval:
         sample_factory_enjoy(args, extras)
     else:
         sample_factory_training(args, extras)
-        
-        
+
+
 if __name__ == "__main__":
     main()
@@ -3,12 +3,13 @@
 import pathlib
 from typing import Callable
 
+from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import CheckpointCallback
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
+
 from godot_rl.core.utils import can_import
-from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx
-from stable_baselines3 import PPO
-from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
+from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
 
 # To download the env source and binary:
 # 1.  gdrl.env_from_hub -r edbeeching/godot_rl_BallChase
@@ -28,42 +29,39 @@
     default="logs/sb3",
     type=str,
     help="The name of the experiment directory, in which the tensorboard logs and checkpoints (if enabled) are "
-         "getting stored."
+    "getting stored.",
 )
 parser.add_argument(
     "--experiment_name",
     default="experiment",
     type=str,
     help="The name of the experiment, which will be displayed in tensorboard and "
-         "for checkpoint directory and name (if enabled).",
-)
-parser.add_argument(
-    "--seed",
-    type=int,
-    default=0,
-    help="seed of the experiment"
+    "for checkpoint directory and name (if enabled).",
 )
+parser.add_argument("--seed", type=int, default=0, help="seed of the experiment")
 parser.add_argument(
     "--resume_model_path",
     default=None,
     type=str,
     help="The path to a model file previously saved using --save_model_path or a checkpoint saved using "
-         "--save_checkpoints_frequency. Use this to resume training or infer from a saved model.",
+    "--save_checkpoints_frequency. Use this to resume training or infer from a saved model.",
 )
 parser.add_argument(
     "--save_model_path",
     default=None,
     type=str,
     help="The path to use for saving the trained sb3 model after training is complete. Saved model can be used later "
-         "to resume training. Extension will be set to .zip",
+    "to resume training. Extension will be set to .zip",
 )
 parser.add_argument(
     "--save_checkpoint_frequency",
     default=None,
     type=int,
-    help=("If set, will save checkpoints every 'frequency' environment steps. "
-          "Requires a unique --experiment_name or --experiment_dir for each run. "
-          "Does not need --save_model_path to be set. "),
+    help=(
+        "If set, will save checkpoints every 'frequency' environment steps. "
+        "Requires a unique --experiment_name or --experiment_dir for each run. "
+        "Does not need --save_model_path to be set. "
+    ),
 )
 parser.add_argument(
     "--onnx_export_path",
@@ -76,34 +74,38 @@
     default=1_000_000,
     type=int,
     help="The number of environment steps to train for, default is 1_000_000. If resuming from a saved model, "
-         "it will continue training for this amount of steps from the saved state without counting previously trained "
-         "steps",
+    "it will continue training for this amount of steps from the saved state without counting previously trained "
+    "steps",
 )
 parser.add_argument(
     "--inference",
     default=False,
     action="store_true",
     help="Instead of training, it will run inference on a loaded model for --timesteps steps. "
-         "Requires --resume_model_path to be set."
+    "Requires --resume_model_path to be set.",
 )
 parser.add_argument(
     "--linear_lr_schedule",
     default=False,
     action="store_true",
     help="Use a linear LR schedule for training. If set, learning rate will decrease until it reaches 0 at "
-         "--timesteps"
-         "value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used."
+    "--timesteps"
+    "value. Note: On resuming training, the schedule will reset. If disabled, constant LR will be used.",
 )
 parser.add_argument(
     "--viz",
     action="store_true",
     help="If set, the simulation will be displayed in a window during training. Otherwise "
-         "training will run without rendering the simulation. This setting does not apply to in-editor training.",
-    default=False
+    "training will run without rendering the simulation. This setting does not apply to in-editor training.",
+    default=False,
 )
 parser.add_argument("--speedup", default=1, type=int, help="Whether to speed up the physics in the env")
-parser.add_argument("--n_parallel", default=1, type=int, help="How many instances of the environment executable to "
-                                                              "launch - requires --env_path to be set if > 1.")
+parser.add_argument(
+    "--n_parallel",
+    default=1,
+    type=int,
+    help="How many instances of the environment executable to " "launch - requires --env_path to be set if > 1.",
+)
 args, extras = parser.parse_known_args()
 
 
@@ -136,19 +138,22 @@ def close_env():
 
 # Prevent overwriting existing checkpoints when starting a new experiment if checkpoint saving is enabled
 if args.save_checkpoint_frequency is not None and os.path.isdir(path_checkpoint):
-    raise RuntimeError(abs_path_checkpoint + " folder already exists. "
-                                             "Use a different --experiment_dir, or --experiment_name,"
-                                             "or if previous checkpoints are not needed anymore, "
-                                             "remove the folder containing the checkpoints. ")
+    raise RuntimeError(
+        abs_path_checkpoint + " folder already exists. "
+        "Use a different --experiment_dir, or --experiment_name,"
+        "or if previous checkpoints are not needed anymore, "
+        "remove the folder containing the checkpoints. "
+    )
 
 if args.inference and args.resume_model_path is None:
     raise parser.error("Using --inference requires --resume_model_path to be set.")
 
 if args.env_path is None and args.viz:
     print("Info: Using --viz without --env_path set has no effect, in-editor training will always render.")
 
-env = StableBaselinesGodotEnv(env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel,
-                              speedup=args.speedup)
+env = StableBaselinesGodotEnv(
+    env_path=args.env_path, show_window=args.viz, seed=args.seed, n_parallel=args.n_parallel, speedup=args.speedup
+)
 env = VecMonitor(env)
 
 
@@ -177,13 +182,15 @@ def func(progress_remaining: float) -> float:
 
 if args.resume_model_path is None:
     learning_rate = 0.0003 if not args.linear_lr_schedule else linear_schedule(0.0003)
-    model: PPO = PPO("MultiInputPolicy",
-                     env,
-                     ent_coef=0.0001,
-                     verbose=2,
-                     n_steps=32,
-                     tensorboard_log=args.experiment_dir,
-                     learning_rate=learning_rate)
+    model: PPO = PPO(
+        "MultiInputPolicy",
+        env,
+        ent_coef=0.0001,
+        verbose=2,
+        n_steps=32,
+        tensorboard_log=args.experiment_dir,
+        learning_rate=learning_rate,
+    )
 else:
     path_zip = pathlib.Path(args.resume_model_path)
     print("Loading model: " + os.path.abspath(path_zip))
@@ -201,13 +208,16 @@ def func(progress_remaining: float) -> float:
         checkpoint_callback = CheckpointCallback(
             save_freq=(args.save_checkpoint_frequency // env.num_envs),
             save_path=path_checkpoint,
-            name_prefix=args.experiment_name
+            name_prefix=args.experiment_name,
         )
-        learn_arguments['callback'] = checkpoint_callback
+        learn_arguments["callback"] = checkpoint_callback
     try:
         model.learn(**learn_arguments)
     except KeyboardInterrupt:
-        print("Training interrupted by user. Will save if --save_model_path was used and/or export if --onnx_export_path was used.")
+        print(
+            """Training interrupted by user. Will save if --save_model_path was
+            used and/or export if --onnx_export_path was used."""
+        )
 
 close_env()
 handle_onnx_export()