add comment, GLIE

zsdonghao · zsdonghao · commit e46a3a841f97 · 2017-07-24T20:41:03.000+01:00
diff --git a/example/tutorial_frozenlake_dqn.py b/example/tutorial_frozenlake_dqn.py
@@ -5,7 +5,7 @@
 from tensorlayer.layers import *
 import matplotlib.pyplot as plt
 
-""" Q-Network Q(a, s) - TD Learning, Off-Policy, e-Greedy Exploration
+""" Q-Network Q(a, s) - TD Learning, Off-Policy, e-Greedy Exploration (GLIE)
 
 Q(S, A) <- Q(S, A) + alpha * (R + lambda * Q(newS, newA) - Q(S, A))
 delta_w = R + lambda * Q(newS, newA)
@@ -90,7 +90,7 @@ def to_one_hot(i, n_classes=None):
             s = s1
             ## Reduce chance of random action if an episode is done.
             if d == True:
-                e = 1./((i/50) + 10)    # reduce e
+                e = 1./((i/50) + 10)    # reduce e, GLIE: Greey in the limit with infinite Exploration
                 break
 
         ## Note that, the rewards here with random action