KarlXing
diff --git a/‎README.md‎
Lines changed: 9 additions & 8 deletions b/‎README.md‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎example_a2c.py‎ ‎examples/example_a2c.py‎example_a2c.py renamed to examples/example_a2c.py
Lines changed: 4 additions & 4 deletions b/‎example_a2c.py‎ ‎examples/example_a2c.py‎example_a2c.py renamed to examples/example_a2c.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_ddpg.py‎ ‎examples/example_ddpg.py‎example_ddpg.py renamed to examples/example_ddpg.py
Lines changed: 4 additions & 4 deletions b/‎example_ddpg.py‎ ‎examples/example_ddpg.py‎example_ddpg.py renamed to examples/example_ddpg.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_dqn.py‎ ‎examples/example_dqn.py‎example_dqn.py renamed to examples/example_dqn.py
Lines changed: 4 additions & 4 deletions b/‎example_dqn.py‎ ‎examples/example_dqn.py‎example_dqn.py renamed to examples/example_dqn.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_ppo.py‎ ‎examples/example_ppo.py‎example_ppo.py renamed to examples/example_ppo.py
Lines changed: 4 additions & 4 deletions b/‎example_ppo.py‎ ‎examples/example_ppo.py‎example_ppo.py renamed to examples/example_ppo.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_procgen.py‎ ‎examples/example_procgen.py‎example_procgen.py renamed to examples/example_procgen.py
Lines changed: 4 additions & 4 deletions b/‎example_procgen.py‎ ‎examples/example_procgen.py‎example_procgen.py renamed to examples/example_procgen.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_sac.py‎ ‎examples/example_sac.py‎example_sac.py renamed to examples/example_sac.py
Lines changed: 4 additions & 4 deletions b/‎example_sac.py‎ ‎examples/example_sac.py‎example_sac.py renamed to examples/example_sac.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_sac_dmcontrol.py‎ ‎examples/example_sac_dmcontrol.py‎example_sac_dmcontrol.py renamed to examples/example_sac_dmcontrol.py
Lines changed: 4 additions & 4 deletions b/‎example_sac_dmcontrol.py‎ ‎examples/example_sac_dmcontrol.py‎example_sac_dmcontrol.py renamed to examples/example_sac_dmcontrol.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_td3.py‎ ‎examples/example_td3.py‎example_td3.py renamed to examples/example_td3.py
Lines changed: 4 additions & 4 deletions b/‎example_td3.py‎ ‎examples/example_td3.py‎example_td3.py renamed to examples/example_td3.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎example_td3_dmcontrol.py‎ ‎examples/example_td3_dmcontrol.py‎example_td3_dmcontrol.py renamed to examples/example_td3_dmcontrol.py
Lines changed: 4 additions & 4 deletions b/‎example_td3_dmcontrol.py‎ ‎examples/example_td3_dmcontrol.py‎example_td3_dmcontrol.py renamed to examples/example_td3_dmcontrol.py
Lines changed: 4 additions & 4 deletions
@@ -1,18 +1,19 @@
 # RLCodebase
 RLCodebase is a modularized codebase for deep reinforcement learning algorithms based on PyTorch. This repo aims to provide an user-friendly reinforcement learning codebase for beginners to get started and for researchers to try their ideas quickly and efficiently. 
 
-For now, it has implemented DQN(PER), A2C, PPO, DDPG, TD3 and SAC algorithms, and tested on OpenAI Gym, Procgen, PyBullet and DMControl Suite environments.
+For now, it has implemented DQN(PER), A2C, PPO, DDPG, TD3 and SAC algorithms, and has been tested on Atari, Procgen, Mujoco, PyBullet and DMControl Suite environments.
 
 ## Introduction
 The design of RLCodebase is shown as below. 
 
 
 ![RLCodebase](imgs/RLCodebase.png)
-* Config: Config is a class that contains parameters for reinforcement learning algorithms such as discount factor, learning rate, etc. and general configurations such as random seed, saving path, etc.
-* Agent: Agent is a wrapped class that controls the workflow of reinforcement learning algorithms like a manager. It's responsible for the interactions among submodules (policy, environment, memory). 
-* Policy: Policy tells us what action to taken given a state. It also implements a function that defines how to update the model given a batch of data.
-* Environment: Environment is designed to be a vectorized gym environment. Here we use gym wrappers from OpenAI baselines for convenient implementations. 
-* Memory: Memory stores data needed for improving our model.
+* **Config**: Config is a class that contains parameters for reinforcement learning algorithms such as discount factor, learning rate, etc. and general configurations such as random seed, saving path, etc.
+* **Trainer**: Trainer is a wrapped class that controls the workflow of reinforcement learning training. It manages the interactions between submodules (Agent, Env, memory). 
+* **Agent**: Agent chooses actions to take given states. It also defines how to update the model given a batch of data.
+* **Model**: Model gathers all neural networks to train.
+* **Env**: Env is a vectorized gym environment. 
+* **Memory**: Memory stores experiences utilized for RL training.
 
 ## Installtion
 All required packages have been included in setup.py and requirements.txt. Mujoco is needed for mujoco_py and dm control suite. To support mujoco_py and dm control, please refer to https://github.com/openai/mujoco-py and https://github.com/deepmind/dm_control. For mujoco_py 2.1.2.14 and dm_control (commit fe44496), you may download mujoco like below
@@ -43,7 +44,7 @@ pip install -e .
 pip install -r requirements.txt
 
 # try it
-python example_a2c.py
+python examples/example_ppo.py
 ````
 
 ## Supported Algorithms
@@ -64,7 +65,7 @@ python example_a2c.py
 ### 1. PPO & A2C In Atari Games
 <img src="https://github.com/KarlXing/RLCodebase/blob/master/imgs/A2C&PPO.png">
 
-### 2. DDPG & TD3 & SAC In Pybullet Environments
+### 2. DDPG & TD3 & SAC In PyBullet Environments
 <img src="https://github.com/KarlXing/RLCodebase/blob/master/imgs/DDPG&TD3&SAC.png">
 
 ### 3. DQN & DQN+PER In PongNoFrameskip-v4
 
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs
-from rlcodebase.agent import A2CAgent
+from rlcodebase.trainer import A2CTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import CatACConvNet
 from torch.utils.tensorboard import SummaryWriter
@@ -44,9 +44,9 @@ def main():
     model = CatACConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = A2CAgent(config, env, model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = A2CTrainer(config, env, model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs
-from rlcodebase.agent import DDPGAgent
+from rlcodebase.trainer import DDPGTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import ConDetACLinearNet
 from torch.utils.tensorboard import SummaryWriter
@@ -47,9 +47,9 @@ def main():
     target_model = ConDetACLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = DDPGAgent(config, env, eval_env, model, target_model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = DDPGTrainer(config, env, eval_env, model, target_model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs
-from rlcodebase.agent import DQNAgent
+from rlcodebase.trainer import DQNTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import CatQConvNet
 from torch.utils.tensorboard import SummaryWriter
@@ -54,9 +54,9 @@ def main():
     target_model = CatQConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes, config.log_episodes_avg_window)
 
-    # create agent and run
-    agent = DQNAgent(config, env, eval_env, model, target_model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = DQNTrainer(config, env, eval_env, model, target_model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs
-from rlcodebase.agent import PPOAgent
+from rlcodebase.trainer import PPOTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import CatACConvNet
 from torch.utils.tensorboard import SummaryWriter
@@ -47,9 +47,9 @@ def main():
     model = CatACConvNet(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = PPOAgent(config, env, model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = PPOTrainer(config, env, model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs_procgen
-from rlcodebase.agent import PPOAgent
+from rlcodebase.trainer import PPOTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import ImpalaCNN, SeparateImpalaCNN
 from torch.utils.tensorboard import SummaryWriter
@@ -62,9 +62,9 @@ def main():
     model = Model(input_channels = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = PPOAgent(config, env, model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = PPOTrainer(config, env, model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs
-from rlcodebase.agent import SACAgent
+from rlcodebase.trainer import SACTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import ConStoSGADCLinearNet
 from torch.utils.tensorboard import SummaryWriter
@@ -48,9 +48,9 @@ def main():
     target_model = ConStoSGADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = SACAgent(config, env, eval_env, model, target_model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = SACTrainer(config, env, eval_env, model, target_model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs_dmcontrol
-from rlcodebase.agent import SACAgent
+from rlcodebase.trainer import SACTrainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import ConStoSGADCLinearNet
 from torch.utils.tensorboard import SummaryWriter
@@ -49,9 +49,9 @@ def main():
     target_model = ConStoSGADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = SACAgent(config, env, eval_env, model, target_model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = SACTrainer(config, env, eval_env, model, target_model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,6 +1,6 @@
 import rlcodebase
 from rlcodebase.env import make_vec_envs
-from rlcodebase.agent import TD3Agent
+from rlcodebase.trainer import TD3Trainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import ConDetADCLinearNet
 from torch.utils.tensorboard import SummaryWriter
@@ -50,9 +50,9 @@ def main():
     target_model = ConDetADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = TD3Agent(config, env, eval_env, model, target_model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = TD3Trainer(config, env, eval_env, model, target_model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()
@@ -1,7 +1,7 @@
 from ast import arg
 import rlcodebase
 from rlcodebase.env import make_vec_envs_dmcontrol
-from rlcodebase.agent import TD3Agent
+from rlcodebase.trainer import TD3Trainer
 from rlcodebase.utils import get_action_dim, init_parser, Config, Logger
 from rlcodebase.model import ConDetADCLinearNet
 from torch.utils.tensorboard import SummaryWriter
@@ -53,9 +53,9 @@ def main():
     target_model = ConDetADCLinearNet(input_dim = env.observation_space.shape[0], action_dim = get_action_dim(env.action_space)).to(config.device)
     logger =  Logger(SummaryWriter(config.save_path), config.num_echo_episodes)
 
-    # create agent and run
-    agent = TD3Agent(config, env, eval_env, model, target_model, logger)
-    agent.run()
+    # create trainer and run
+    trainer = TD3Trainer(config, env, eval_env, model, target_model, logger)
+    trainer.run()
 
 if __name__ == '__main__':
     main()