diff --git a/Policy Gradients/Cartpole/Cartpole REINFORCE Monte Carlo Policy Gradients.ipynb b/Policy Gradients/Cartpole/Cartpole REINFORCE Monte Carlo Policy Gradients.ipynb index d661e15..59a3d50 100644 --- a/Policy Gradients/Cartpole/Cartpole REINFORCE Monte Carlo Policy Gradients.ipynb +++ b/Policy Gradients/Cartpole/Cartpole REINFORCE Monte Carlo Policy Gradients.ipynb @@ -194,7 +194,7 @@ "The idea is simple:\n", "- Our state which is an array of 4 values will be used as an input.\n", "- Our NN is 3 fully connected layers.\n", - "- Our output activation function is softmax that squashes the outputs to a probability distribution (for instance if we have 4, 2, 6 --> softmax --> (0.4, 0.2, 0.6)" + "- Our output activation function is softmax that squashes the outputs to a probability distribution (for instance if we have 4, 2, 6 --> softmax --> (0.11731043, 0.01587624, 0.86681333)" ] }, {