|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +import DeepFried2 as df |
| 3 | + |
| 4 | + |
| 5 | +class DQNProp(df.Optimizer): |
| 6 | + """ |
| 7 | + RMSProp as described here on page 23: |
| 8 | + http://arxiv.org/pdf/1308.0850v5.pdf |
| 9 | + |
| 10 | + Also used by DeepMind here: |
| 11 | + https://sites.google.com/a/deepmind.com/dqn/ |
| 12 | + In NeuralQLearner.lua |
| 13 | +
|
| 14 | + The updates are: |
| 15 | +
|
| 16 | + g_{e+1} = ρ * g_e + (1-ρ) * ∇p_e |
| 17 | + g²_{e+1} = ρ * g²_e + (1-ρ) * ∇p_e² |
| 18 | + p_{e+1} = p_e - lr * ∇p_e / √(g²_{e+1} - g_{e+1}²) |
| 19 | +
|
| 20 | + This roughly corresponds to dividing the gradients by their standard deviation |
| 21 | + over the past batches, in a rolling-momentum fashion. |
| 22 | + The more "unstable" a gradient, the lower its effective learning-rate. |
| 23 | + |
| 24 | + """ |
| 25 | + |
| 26 | + def __init__(self, lr, rho, eps=1e-7): |
| 27 | + df.Optimizer.__init__(self, lr=lr, rho=rho, eps=eps) |
| 28 | + |
| 29 | + def get_updates(self, params, grads, lr, rho, eps): |
| 30 | + updates = [] |
| 31 | + |
| 32 | + for param, grad in zip(params, grads): |
| 33 | + g_state = df.utils.create_param_state_as(param) |
| 34 | + new_g = rho*g_state + (1-rho)*grad |
| 35 | + g2_state = df.utils.create_param_state_as(param) |
| 36 | + new_g2 = rho*g2_state+(1-rho)*grad*grad |
| 37 | + updates.append((g_state, new_g)) |
| 38 | + updates.append((g2_state, new_g2)) |
| 39 | + updates.append((param, param - lr*(grad/df.T.sqrt(new_g2-new_g*new_g+eps)))) |
| 40 | + |
| 41 | + return updates |
0 commit comments