@@ -130,7 +130,7 @@ RMSProp(; eta = 0.001, rho = 0.9, epsilon = 1e-8, kw...) = RMSProp(eta, rho, eps
130130init (o:: RMSProp , x:: AbstractArray ) = (zero (x), o. centred ? zero (x) : false )
131131
132132function apply! (o:: RMSProp , state, x:: AbstractArray{T} , dx) where T
133- η, ρ, ϵ = T (o. eta), T (o. rho), T ( o. epsilon)
133+ η, ρ, ϵ = T (o. eta), T (o. rho), _eps (T, o. epsilon)
134134 quad, lin = state
135135
136136 @. . quad = ρ * quad + (1 - ρ) * abs2 (dx)
216216init (o:: Adam , x:: AbstractArray{T} ) where T = (zero (x), zero (x), T .(o. beta))
217217
218218function apply! (o:: Adam , state, x:: AbstractArray{T} , dx) where T
219- η, β, ϵ = T (o. eta), T .(o. beta), T ( o. epsilon)
219+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
220220 mt, vt, βt = state
221221
222222 @. . mt = β[1 ] * mt + (1 - β[1 ]) * dx
279279init (o:: RAdam , x:: AbstractArray{T} ) where T = (zero (x), zero (x), T .(o. beta), 1 )
280280
281281function apply! (o:: RAdam , state, x:: AbstractArray{T} , dx) where T
282- η, β, ϵ = T (o. eta), T .(o. beta), T ( o. epsilon)
282+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
283283 ρ∞ = 2 / (1 - β[2 ]) - 1 |> real
284284
285285 mt, vt, βt, t = state
320320init (o:: AdaMax , x:: AbstractArray{T} ) where T = (zero (x), zero (x), T .(o. beta))
321321
322322function apply! (o:: AdaMax , state, x:: AbstractArray{T} , dx) where T
323- η, β, ϵ = T (o. eta), T .(o. beta), T ( o. epsilon)
323+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
324324 mt, ut, βt = state
325325
326326 @. . mt = β[1 ] * mt + (1 - β[1 ]) * dx
354354init (o:: OAdam , x:: AbstractArray{T} ) where T = (zero (x), zero (x), T .(o. beta), zero (x))
355355
356356function apply! (o:: OAdam , state, x:: AbstractArray{T} , dx) where T
357- η, β, ϵ = T (o. eta), T .(o. beta), T ( o. epsilon)
357+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
358358 mt, vt, βt, term = state
359359
360360 @. . mt = β[1 ] * mt + (1 - β[1 ]) * dx
388388init (o:: AdaGrad , x:: AbstractArray ) = onevalue (o. epsilon, x)
389389
390390function apply! (o:: AdaGrad , state, x:: AbstractArray{T} , dx) where T
391- η, ϵ = T (o. eta), T ( o. epsilon)
391+ η, ϵ = T (o. eta), _eps (T, o. epsilon)
392392 acc = state
393393
394394 @. . acc = acc + abs2 (dx)
418418init (o:: AdaDelta , x:: AbstractArray ) = (zero (x), zero (x))
419419
420420function apply! (o:: AdaDelta , state, x:: AbstractArray{T} , dx) where T
421- ρ, ϵ = T (o. rho), T ( o. epsilon)
421+ ρ, ϵ = T (o. rho), _eps (T, o. epsilon)
422422 acc, Δacc = state
423423
424424 @. . acc = ρ * acc + (1 - ρ) * abs2 (dx)
@@ -454,7 +454,7 @@ init(o::AMSGrad, x::AbstractArray) =
454454 (onevalue (o. epsilon, x), onevalue (o. epsilon, x), onevalue (o. epsilon, x))
455455
456456function apply! (o:: AMSGrad , state, x:: AbstractArray{T} , dx) where T
457- η, β, ϵ = T (o. eta), T .(o. beta), T ( o. epsilon)
457+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
458458 mt, vt, v̂t = state
459459
460460 @. . mt = β[1 ] * mt + (1 - β[1 ]) * dx
489489init (o:: NAdam , x:: AbstractArray{T} ) where T = (zero (x), zero (x), T .(o. beta))
490490
491491function apply! (o:: NAdam , state, x:: AbstractArray{T} , dx) where T
492- η, β, ϵ = T (o. eta), T .(o. beta), T (o. epsilon)
493-
492+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
494493 mt, vt, βt = state
495494
496495 @. . mt = β[1 ] * mt + (1 - β[1 ]) * dx
548547init (o:: AdaBelief , x:: AbstractArray{T} ) where T = (zero (x), zero (x), T .(o. beta))
549548
550549function apply! (o:: AdaBelief , state, x:: AbstractArray{T} , dx) where T
551- η, β, ϵ = T (o. eta), T .(o. beta), T ( o. epsilon)
550+ η, β, ϵ = T (o. eta), T .(o. beta), _eps (T, o. epsilon)
552551 mt, st, βt = state
553552
554553 @. . mt = β[1 ] * mt + (1 - β[1 ]) * dx
0 commit comments