@@ -60,15 +60,15 @@ def cross_entropy_reward_loss(logits, actions, rewards, name=None):
6060
6161 Examples
6262 ----------
63- >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) # observation for training
64- >>> network = tl.layers. InputLayer(states_batch_pl, name='input_layer ')
65- >>> network = tl.layers. DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1')
66- >>> network = tl.layers. DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer ')
63+ >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])
64+ >>> network = InputLayer(states_batch_pl, name='input ')
65+ >>> network = DenseLayer(network, n_units=H, act= tf.nn.relu, name='relu1')
66+ >>> network = DenseLayer(network, n_units=3, name='out ')
6767 >>> probs = network.outputs
6868 >>> sampling_prob = tf.nn.softmax(probs)
6969 >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
7070 >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
71- >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
71+ >>> loss = tl.rein. cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
7272 >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
7373 """
7474
0 commit comments