fix dqn

zsdonghao · zsdonghao · commit e8971b35ed34 · 2017-07-21T14:26:11.000+01:00
diff --git a/example/tutorial_frozenlake_dqn.py b/example/tutorial_frozenlake_dqn.py
@@ -83,7 +83,7 @@ def to_one_hot(i, n_classes=None):
             ## Obtain maxQ' and set our target value for chosen action.
             maxQ1 = np.max(Q1)
             targetQ = allQ
-            # targetQ[0, a[0]] = r + lambd * maxQ1
+            targetQ[0, a[0]] = r + lambd * maxQ1
             # targetQ[0, a[0]] = targetQ[0, a[0]] + alpha * (r + lambd * maxQ1 - targetQ[0, a[0]])
             ## Train network using target and predicted Q values
             _ = sess.run(train_op, {inputs : [to_one_hot(s, 16)], nextQ : targetQ})