@@ -22,13 +22,13 @@ import ReinforcementLearningBase: RLBase
2222 q_values = NN(rand(Float32, 2))
2323 @test size(q_values) == (3,)
2424
25- gs = gradient(params(NN) ) do
25+ gs = gradient(NN ) do
2626 sum(NN(rand(Float32, 2, 5)))
2727 end
2828
29- old_params = deepcopy(collect(params (NN).params))
29+ old_params = deepcopy(collect(Flux.trainable (NN).params))
3030 push!(NN, gs)
31- new_params = collect(params (NN).params)
31+ new_params = collect(Flux.trainable (NN).params)
3232
3333 @test old_params != new_params
3434 end
@@ -72,42 +72,40 @@ import ReinforcementLearningBase: RLBase
7272 end
7373 @testset " Correctness of gradients" begin
7474 @testset " One action per state" begin
75- @test Flux. params (gn) == Flux. Params ([gn. pre. weight, gn. pre. bias, gn. μ. weight, gn. μ. bias, gn. σ. weight, gn. σ. bias])
75+ @test Flux. trainable (gn). pre == gn. pre
76+ @test Flux. trainable (gn). μ == gn. μ
77+ @test Flux. trainable (gn). σ == gn. σ
7678 action_saver = Matrix[]
77- g = Flux. gradient (Flux . params ( gn)) do
78- a, logp = gn (state, is_sampling = true , is_return_log_prob = true )
79+ g = Flux. gradient (gn) do model
80+ a, logp = model (state, is_sampling = true , is_return_log_prob = true )
7981 ChainRulesCore. ignore_derivatives () do
8082 push! (action_saver, a)
8183 end
8284 sum (logp)
8385 end
84- g2 = Flux. gradient (Flux . params ( gn)) do
85- logp = gn (state, only (action_saver))
86+ g2 = Flux. gradient (gn) do model
87+ logp = model (state, only (action_saver))
8688 sum (logp)
8789 end
8890 # Check that gradients are identical
89- for (grad1, grad2) in zip (g,g2)
90- @test grad1 ≈ grad2
91- end
91+ @test g == g2
9292 end
9393 @testset " Multiple actions per state" begin
9494 # Same with multiple actions sampled
9595 action_saver = []
9696 state = unsqueeze (state, dims = 2 )
97- g = Flux. gradient (Flux . params ( gn)) do
98- a, logp = gn (state, 3 )
97+ g1 = Flux. gradient (gn) do model
98+ a, logp = model (state, 3 )
9999 ChainRulesCore. ignore_derivatives () do
100100 push! (action_saver, a)
101101 end
102102 sum (logp)
103103 end
104- g2 = Flux. gradient (Flux . params ( gn)) do
105- logp = gn (state, only (action_saver))
104+ g2 = Flux. gradient (gn) do model
105+ logp = model (state, only (action_saver))
106106 sum (logp)
107107 end
108- for (grad1, grad2) in zip (g,g2)
109- @test grad1 ≈ grad2
110- end
108+ @test g1 == g2
111109 end
112110 end
113111 end
@@ -117,7 +115,6 @@ import ReinforcementLearningBase: RLBase
117115 gn = GaussianNetwork (Dense (20 ,15 ), Dense (15 ,10 ), Dense (15 ,10 , softplus)) |> gpu
118116 state = rand (Float32, 20 ,3 ) |> gpu # batch of 3 states
119117 @testset " Forward pass compatibility" begin
120- @test Flux. params (gn) == Flux. Params ([gn. pre. weight, gn. pre. bias, gn. μ. weight, gn. μ. bias, gn. σ. weight, gn. σ. bias])
121118 m, L = gn (state)
122119 @test size (m) == size (L) == (10 ,3 )
123120 a, logp = gn (CUDA. CURAND. RNG (), state, is_sampling = true , is_return_log_prob = true )
@@ -134,15 +131,15 @@ import ReinforcementLearningBase: RLBase
134131 @testset " Backward pass compatibility" begin
135132 @testset " One action sampling" begin
136133 action_saver = CuMatrix[]
137- g = Flux. gradient (Flux . params ( gn)) do
138- a, logp = gn (CUDA. CURAND. RNG (), state, is_sampling = true , is_return_log_prob = true )
134+ g = Flux. gradient (gn) do model
135+ a, logp = model (CUDA. CURAND. RNG (), state, is_sampling = true , is_return_log_prob = true )
139136 ChainRulesCore. ignore_derivatives () do
140137 push! (action_saver, a)
141138 end
142139 sum (logp)
143140 end
144- g2 = Flux. gradient (Flux . params ( gn)) do
145- logp = gn (state, only (action_saver))
141+ g2 = Flux. gradient (gn) do model
142+ logp = model (state, only (action_saver))
146143 sum (logp)
147144 end
148145 # Check that gradients are identical
@@ -153,15 +150,15 @@ import ReinforcementLearningBase: RLBase
153150 @testset " Multiple actions sampling" begin
154151 action_saver = []
155152 state = unsqueeze (state, dims = 2 )
156- g = Flux. gradient (Flux . params (gn) ) do
153+ g = Flux. gradient (gn ) do
157154 a, logp = gn (CUDA. CURAND. RNG (), state, 3 )
158155 ChainRulesCore. ignore_derivatives () do
159156 push! (action_saver, a)
160157 end
161158 sum (logp)
162159 end
163- g2 = Flux. gradient (Flux . params ( gn)) do
164- logp = gn (state, only (action_saver))
160+ g2 = Flux. gradient (gn) do model
161+ logp = model (state, only (action_saver))
165162 sum (logp)
166163 end
167164 for (grad1, grad2) in zip (g,g2)
@@ -202,7 +199,10 @@ import ReinforcementLearningBase: RLBase
202199 μ = Dense (15 ,10 )
203200 Σ = Dense (15 ,10 * 11 ÷ 2 )
204201 gn = CovGaussianNetwork (pre, μ, Σ)
205- @test Flux. params (gn) == Flux. Params ([pre. weight, pre. bias, μ. weight, μ. bias, Σ. weight, Σ. bias])
202+ @test Flux. trainable (gn). pre == pre
203+ @test Flux. trainable (gn). μ == μ
204+ @test Flux. trainable (gn). Σ == Σ
205+
206206 state = rand (Float32, 20 ,3 ) # batch of 3 states
207207 # Check that it works in 2D
208208 m, L = gn (state)
@@ -233,35 +233,34 @@ import ReinforcementLearningBase: RLBase
233233 logp_truth = [logpdf (mvn, a) for (mvn, a) in zip (mvnormals, eachslice (as, dims = 3 ))]
234234 @test stack (logp_truth; dims= 2 ) ≈ dropdims (logps,dims = 1 ) # test against ground truth
235235 action_saver = []
236- g = Flux. gradient (Flux . params ( gn)) do
237- a, logp = gn (Flux. unsqueeze (state,dims = 2 ), is_sampling = true , is_return_log_prob = true )
236+ g1 = Flux. gradient (gn) do model
237+ a, logp = model (Flux. unsqueeze (state,dims = 2 ), is_sampling = true , is_return_log_prob = true )
238238 ChainRulesCore. ignore_derivatives () do
239239 push! (action_saver, a)
240240 end
241241 mean (logp)
242242 end
243- g2 = Flux. gradient (Flux . params ( gn)) do
244- logp = gn (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
243+ g2 = Flux. gradient (gn) do model
244+ logp = model (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
245245 mean (logp)
246246 end
247- for (grad1, grad2) in zip (g,g2)
248- @test grad1 ≈ grad2
249- end
247+ @test g1 == g2
248+
250249 empty! (action_saver)
251- g3 = Flux. gradient (Flux. params (gn)) do
252- a, logp = gn (Flux. unsqueeze (state,dims = 2 ), 3 )
250+
251+ g3 = Flux. gradient (gn) do model
252+ a, logp = model (Flux. unsqueeze (state,dims = 2 ), is_sampling = true , is_return_log_prob = true )
253253 ChainRulesCore. ignore_derivatives () do
254254 push! (action_saver, a)
255255 end
256256 mean (logp)
257257 end
258- g4 = Flux. gradient (Flux . params ( gn)) do
259- logp = gn (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
258+ g4 = Flux. gradient (gn) do model
259+ logp = model (Flux. unsqueeze (state, dims = 2 ), only (action_saver))
260260 mean (logp)
261261 end
262- for (grad1, grad2) in zip (g4,g3)
263- @test grad1 ≈ grad2
264- end
262+
263+ @test g4 == g3
265264 end
266265 @testset " CUDA" begin
267266 if (@isdefined CUDA) && CUDA. functional ()
@@ -271,7 +270,6 @@ import ReinforcementLearningBase: RLBase
271270 μ = Dense (15 ,10 ) |> gpu
272271 Σ = Dense (15 ,10 * 11 ÷ 2 ) |> gpu
273272 gn = CovGaussianNetwork (pre, μ, Σ)
274- @test Flux. params (gn) == Flux. Params ([pre. weight, pre. bias, μ. weight, μ. bias, Σ. weight, Σ. bias])
275273 state = rand (Float32, 20 ,3 )|> gpu # batch of 3 states
276274 m, L = gn (Flux. unsqueeze (state,dims = 2 ))
277275 @test size (m) == (10 ,1 ,3 )
@@ -292,31 +290,31 @@ import ReinforcementLearningBase: RLBase
292290 logp_truth = [logpdf (mvn, cpu (a)) for (mvn, a) in zip (mvnormals, eachslice (as, dims = 3 ))]
293291 @test reduce (hcat, collect (logp_truth)) ≈ dropdims (cpu (logps); dims= 1 ) # test against ground truth
294292 action_saver = []
295- g = Flux. gradient (Flux . params ( gn)) do
296- a, logp = gn (rng, Flux. unsqueeze (state,dims = 2 ), is_sampling = true , is_return_log_prob = true )
293+ g = Flux. gradient (gn) do model
294+ a, logp = model (rng, Flux. unsqueeze (state,dims = 2 ), is_sampling = true , is_return_log_prob = true )
297295 ChainRulesCore. ignore_derivatives () do
298296 push! (action_saver, a)
299297 end
300298 mean (logp)
301299 end
302300
303- g2 = Flux. gradient (Flux . params ( gn)) do
304- logp = gn (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
301+ g2 = Flux. gradient (gn) do model
302+ logp = model (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
305303 mean (logp)
306304 end
307305 for (grad1, grad2) in zip (g,g2)
308306 @test grad1 ≈ grad2
309307 end
310308 empty! (action_saver)
311- g3 = Flux. gradient (Flux . params ( gn)) do
312- a, logp = gn (rng, Flux. unsqueeze (state,dims = 2 ), 3 )
309+ g3 = Flux. gradient (gn) do model
310+ a, logp = model (rng, Flux. unsqueeze (state,dims = 2 ), 3 )
313311 ChainRulesCore. ignore_derivatives () do
314312 push! (action_saver, a)
315313 end
316314 mean (logp)
317315 end
318- g4 = Flux. gradient (Flux . params ( gn)) do
319- logp = gn (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
316+ g4 = Flux. gradient (gn) do model
317+ logp = model (Flux. unsqueeze (state,dims = 2 ), only (action_saver))
320318 mean (logp)
321319 end
322320 for (grad1, grad2) in zip (g4,g3)
0 commit comments