DeepRL-in-PyTorch/4.REINFORCE/main.py at master · user432/DeepRL-in-PyTorch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import numpy as np
import gym
from reinforce import Agent
import matplotlib.pyplot as plt
from utils import PlotLearning
from gym import wrappers

if __name__ == '__main__':
    agent = Agent(ALPHA=0.001, inp_dims=[8], GAMMA=0.99, n_actions=4, l1_size=128, l2_size=128)
    env = gym.make('LunarLander-v2')
    score_history = []
    score = 0
    num_episodes = 2500
    env = wrappers.Monitor(env, "tmp/lunar-lander", video_callable=lambda episode_id: True, force=True)
    for i in range(num_episodes):
        print('episode: ', i,'score: ', score)
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            agent.store_rewards(reward)
            observation = observation_
            score += reward
        score_history.append(score)
        agent.learn()
    filename = 'lunar-lander-alpha001-128x128fc-newG.png'
    PlotLearning(score_history, filename=filename, window=25)