Skip to content

Commit aad48f1

Browse files
committed
fixed error in critic constructor call
1 parent 4f95897 commit aad48f1

File tree

1 file changed

+15
-14
lines changed
  • ReinforcementLearning/PolicyGradient/DDPG/tensorflow2/pendulum

1 file changed

+15
-14
lines changed

ReinforcementLearning/PolicyGradient/DDPG/tensorflow2/pendulum/ddpg_tf2.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
import numpy as np
21
import tensorflow as tf
32
import tensorflow.keras as keras
43
from tensorflow.keras.optimizers import Adam
54
from buffer import ReplayBuffer
65
from networks import ActorNetwork, CriticNetwork
76

7+
88
class Agent:
99
def __init__(self, input_dims, alpha=0.001, beta=0.002, env=None,
10-
gamma=0.99, n_actions=2, max_size=1000000, tau=0.005,
11-
fc1=400, fc2=300, batch_size=64, noise=0.1):
10+
gamma=0.99, n_actions=2, max_size=1000000, tau=0.005,
11+
fc1=400, fc2=300, batch_size=64, noise=0.1):
1212
self.gamma = gamma
1313
self.tau = tau
1414
self.memory = ReplayBuffer(max_size, input_dims, n_actions)
@@ -17,11 +17,12 @@ def __init__(self, input_dims, alpha=0.001, beta=0.002, env=None,
1717
self.noise = noise
1818
self.max_action = env.action_space.high[0]
1919
self.min_action = env.action_space.low[0]
20-
20+
2121
self.actor = ActorNetwork(n_actions=n_actions, name='actor')
22-
self.critic = CriticNetwork(n_actions=n_actions, name='critic')
23-
self.target_actor = ActorNetwork(n_actions=n_actions, name='target_actor')
24-
self.target_critic = CriticNetwork(n_actions=n_actions, name='target_critic')
22+
self.critic = CriticNetwork(name='critic')
23+
self.target_actor = ActorNetwork(n_actions=n_actions,
24+
name='target_actor')
25+
self.target_critic = CriticNetwork(name='target_critic')
2526

2627
self.actor.compile(optimizer=Adam(learning_rate=alpha))
2728
self.critic.compile(optimizer=Adam(learning_rate=beta))
@@ -68,8 +69,8 @@ def choose_action(self, observation, evaluate=False):
6869
actions = self.actor(state)
6970
if not evaluate:
7071
actions += tf.random.normal(shape=[self.n_actions],
71-
mean=0.0, stddev=self.noise)
72-
# note that if the environment has an action > 1, we have to multiply by
72+
mean=0.0, stddev=self.noise)
73+
# note that if the env has an action > 1, we have to multiply by
7374
# max action at some point
7475
actions = tf.clip_by_value(actions, self.min_action, self.max_action)
7576

@@ -80,7 +81,7 @@ def learn(self):
8081
return
8182

8283
state, action, reward, new_state, done = \
83-
self.memory.sample_buffer(self.batch_size)
84+
self.memory.sample_buffer(self.batch_size)
8485

8586
states = tf.convert_to_tensor(state, dtype=tf.float32)
8687
states_ = tf.convert_to_tensor(new_state, dtype=tf.float32)
@@ -92,11 +93,11 @@ def learn(self):
9293
critic_value_ = tf.squeeze(self.target_critic(
9394
states_, target_actions), 1)
9495
critic_value = tf.squeeze(self.critic(states, actions), 1)
95-
target = reward + self.gamma*critic_value_*(1-done)
96+
target = rewards + self.gamma*critic_value_*(1-done)
9697
critic_loss = keras.losses.MSE(target, critic_value)
9798

9899
critic_network_gradient = tape.gradient(critic_loss,
99-
self.critic.trainable_variables)
100+
self.critic.trainable_variables)
100101
self.critic.optimizer.apply_gradients(zip(
101102
critic_network_gradient, self.critic.trainable_variables))
102103

@@ -105,8 +106,8 @@ def learn(self):
105106
actor_loss = -self.critic(states, new_policy_actions)
106107
actor_loss = tf.math.reduce_mean(actor_loss)
107108

108-
actor_network_gradient = tape.gradient(actor_loss,
109-
self.actor.trainable_variables)
109+
actor_network_gradient = tape.gradient(actor_loss,
110+
self.actor.trainable_variables)
110111
self.actor.optimizer.apply_gradients(zip(
111112
actor_network_gradient, self.actor.trainable_variables))
112113

0 commit comments

Comments
 (0)