-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmountain_car2.py
More file actions
79 lines (65 loc) · 2.2 KB
/
mountain_car2.py
File metadata and controls
79 lines (65 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation
import numpy as np
env = gym.make('CartPole-v0')
class Transition(object):
s_curr = None
a = None
r_t = None
s_next= None
def __init__(self,s_curr,a,r,s_next):
self.s_curr=s_curr
self.a = a
self.r = r
self.s_next = s_next
nb_actions = env.action_space.n
model = Sequential([
Dense(16,input_dim=4),
Activation('relu'),
Dense(nb_actions),
Activation('linear')
])
model.compile(loss='mean_squared_error',
optimizer='sgd',
metrics=['accuracy'])
target = Sequential([
Dense(16,input_dim=4),
Activation('relu'),
Dense(nb_actions),
Activation('linear')
])
target.compile(loss='mean_squared_error',
optimizer='sgd',
metrics=['accuracy'])
epsilon = 0.3
gamma = 0.05
replay_memory = set()
for i_episode in range(10000):
observation = env.reset()
for t in range(200):
#env.render()
print(np.array([observation]))
best_action = env.action_space.sample() if (np.random.random() < epsilon) else np.argmax(model.predict(np.array([observation]))) #Runs the state and all actions through network and returns best.
new_observation, reward, done, info = env.step(best_action)
replay_memory.add(Transition(observation,best_action,reward,new_observation))
observation = new_observation
replay = replay_memory.pop()
X_replay = np.array([replay.s_curr])
if(done): #If it was a terminal state
Y_replay= np.array([[replay.r]])
else:
Y_replay = np.array(replay.r + gamma*target.predict(np.array([replay.s_next])))
print(X_replay)
print(Y_replay)
target.fit(X_replay,Y_replay) #Model is updated
replay_memory.add(replay)
if done:
print("Episode finished after {} timesteps".format(t+1))
#print(len(replay_memory))
break
if(i_episode%200 == 0):
model.set_weights(target.get_weights()) #Update actual model with target weights
#if(i_episode == 2000): #Flush replay memory occasionally
#replay_memory = set()
env.close()