Skip to content

Commit 37388d7

Browse files
committed
update docs for MCPG loss
1 parent 7e0ef36 commit 37388d7

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

tensorlayer/rein.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ def cross_entropy_reward_loss(logits, actions, rewards, name=None):
6060
6161
Examples
6262
----------
63-
>>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) # observation for training
64-
>>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer')
65-
>>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1')
66-
>>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer')
63+
>>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])
64+
>>> network = InputLayer(states_batch_pl, name='input')
65+
>>> network = DenseLayer(network, n_units=H, act=tf.nn.relu, name='relu1')
66+
>>> network = DenseLayer(network, n_units=3, name='out')
6767
>>> probs = network.outputs
6868
>>> sampling_prob = tf.nn.softmax(probs)
6969
>>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
7070
>>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
71-
>>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
71+
>>> loss = tl.rein.cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
7272
>>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
7373
"""
7474

0 commit comments

Comments
 (0)