|
| 1 | + |
| 2 | +m1 = collect(1:3.0) |
| 3 | +m2 = (collect(1:3.0), collect(4:6.0)) |
| 4 | +m3 = (x = m1, y = sin, z = collect(4:6.0)) |
| 5 | + |
| 6 | +m4 = (x = m1, y = m1, z = collect(4:6.0)) # tied |
| 7 | +m5 = (a = (m3, true), b = (m1, false), c = (m4, true)) |
| 8 | +m6 = (a = m1, b = [4.0 + im], c = m1) |
| 9 | + |
| 10 | +m7 = TwoThirds((sin, collect(1:3.0)), (cos, collect(4:6.0)), (tan, collect(7:9.0))) |
| 11 | +m8 = [Foo(m1, m1), (a = true, b = Foo([4.0], false), c = ()), [[5.0]]] |
| 12 | + |
| 13 | +mat = Float32[4 6; 5 7] |
| 14 | +m9 = (a = m1, b = mat, c = [mat, m1]) |
| 15 | + |
| 16 | +@testset "trainables" begin |
| 17 | + ps = trainables(m1) |
| 18 | + @test ps isa Vector |
| 19 | + @test length(ps) == 1 |
| 20 | + @test ps[1] == m1 |
| 21 | + |
| 22 | + ps = trainables(m2) |
| 23 | + @test ps isa Vector |
| 24 | + @test length(ps) == 2 |
| 25 | + @test ps[1] == m2[1] |
| 26 | + @test ps[2] == m2[2] |
| 27 | + |
| 28 | + ps = trainables(m3) |
| 29 | + @test length(ps) == 2 |
| 30 | + @test ps[1] == 1:3 |
| 31 | + @test ps[2] == 4:6 |
| 32 | + |
| 33 | + ps = trainables(m4) |
| 34 | + @test length(ps) == 2 |
| 35 | + @test ps[1] == 1:3 |
| 36 | + @test ps[2] == 4:6 |
| 37 | + |
| 38 | + ps = trainables(m5) |
| 39 | + @test length(ps) == 3 |
| 40 | + @test ps[1] == 1:3 |
| 41 | + @test ps[2] == 4:6 |
| 42 | + @test ps[3] == 4:6 |
| 43 | + |
| 44 | + ps = trainables(m6) |
| 45 | + @test length(ps) == 2 |
| 46 | + @test ps[1] == 1:3 |
| 47 | + @test ps[2] == ComplexF64[4.0 + 1.0im] |
| 48 | + |
| 49 | + ps = trainables(m7) |
| 50 | + @test length(ps) == 1 |
| 51 | + @test ps[1] == [1.0, 2.0, 3.0] |
| 52 | + |
| 53 | + ps = trainables(m8) |
| 54 | + @test length(ps) == 3 |
| 55 | + @test ps[1] == 1:3 |
| 56 | + @test ps[2] == [4.0] |
| 57 | + @test ps[3] == [5.0] |
| 58 | + |
| 59 | + ps = trainables(m9) |
| 60 | + @test length(ps) == 2 |
| 61 | + @test ps[1] == 1:3 |
| 62 | + @test ps[2] == mat |
| 63 | +end |
| 64 | + |
| 65 | +@testset "gradient" begin |
| 66 | + loss(m) = sum([sum(abs2, p) for p in trainables(m)]) |
| 67 | + g = gradient(loss, m1)[1] |
| 68 | + @test g == [2.0, 4.0, 6.0] |
| 69 | + |
| 70 | + g = gradient(loss, m2)[1] |
| 71 | + @test g == ([2.0, 4.0, 6.0], [8.0, 10.0, 12.0]) |
| 72 | + |
| 73 | + g = gradient(loss, m3)[1] |
| 74 | + @test g.x == [2.0, 4.0, 6.0] |
| 75 | + @test g.y === nothing |
| 76 | + @test g.z == [8.0, 10.0, 12.0] |
| 77 | + |
| 78 | + g = gradient(loss, m4)[1] |
| 79 | + @test g == (x = [2.0, 4.0, 6.0], y = [2.0, 4.0, 6.0], z = [8.0, 10.0, 12.0]) |
| 80 | + g.x === g.y # shared gradient for shared weights |
| 81 | + |
| 82 | + g = gradient(loss, m5)[1] |
| 83 | + @test g == (a = ((x = [2.0, 4.0, 6.0], y = nothing, z = [8.0, 10.0, 12.0]), nothing), b = ([2.0, 4.0, 6.0], nothing), c = ((x = [2.0, 4.0, 6.0], y = [2.0, 4.0, 6.0], z = [8.0, 10.0, 12.0]), nothing)) |
| 84 | + |
| 85 | + g = gradient(loss, m6)[1] |
| 86 | + @test g == (a = [2.0, 4.0, 6.0], b = ComplexF64[8.0 + 2.0im], c = [2.0, 4.0, 6.0]) |
| 87 | + |
| 88 | + g = gradient(loss, m7)[1] |
| 89 | + @test g == (a = (nothing, [2.0, 4.0, 6.0]), b = nothing, c = nothing) |
| 90 | + |
| 91 | + g = gradient(loss, m8)[1] |
| 92 | + @test g[1] == (x = [2.0, 4.0, 6.0], y = [2.0, 4.0, 6.0]) |
| 93 | + @test g[2] == (a = nothing, b = (x = [8.0], y = nothing), c = nothing) |
| 94 | + @test g[3] == [[10.0]] |
| 95 | + |
| 96 | + g = gradient(loss, m9)[1] |
| 97 | + @test g == (a = [2.0, 4.0, 6.0], b = Float32[8.0 12.0; 10.0 14.0], c = Array[Float32[8.0 12.0; 10.0 14.0], [2.0, 4.0, 6.0]]) |
| 98 | +end |
| 99 | + |
| 100 | +@testset "second order derivatives" begin |
| 101 | + struct DenseLayer |
| 102 | + w |
| 103 | + b |
| 104 | + end |
| 105 | + |
| 106 | + Functors.@functor DenseLayer |
| 107 | + |
| 108 | + loss(m) = sum([sum(abs2, p) for p in trainables(m)]) |
| 109 | + |
| 110 | + model = DenseLayer([1. 2.; 3. 4.], [0., 0.]) |
| 111 | + |
| 112 | + g = gradient(m -> loss(gradient(loss, m)), model)[1] |
| 113 | + @test g.w == [8.0 16.0; 24.0 32.0] |
| 114 | + @test g.b == [0.0, 0.0] |
| 115 | +end |
0 commit comments