1
+ module forward
2
+ using Diffractor
3
+ using Diffractor: var"'" , ∂⃖, DiffractorRuleConfig
4
+ using ChainRules
5
+ using ChainRulesCore
6
+ using ChainRulesCore: ZeroTangent, NoTangent, frule_via_ad, rrule_via_ad
7
+ using LinearAlgebra
8
+
9
+ using Test
10
+
11
+ const fwd = Diffractor. PrimeDerivativeFwd
12
+ const bwd = Diffractor. PrimeDerivativeBack
13
+
14
+
15
+ # Regression tests
16
+ @test gradient (x -> sum (abs2, x .+ 1.0 ), zeros (3 ))[1 ] == [2.0 , 2.0 , 2.0 ]
17
+
18
+ function f_broadcast (a)
19
+ l = a / 2.0 * [[0. 1. 1. ]; [1. 0. 1. ]; [1. 1. 0. ]]
20
+ return sum (l)
21
+ end
22
+ @test fwd (f_broadcast)(1.0 ) == bwd (f_broadcast)(1.0 )
23
+
24
+ # Make sure that there's no infinite recursion in kwarg calls
25
+ g_kw (;x= 1.0 ) = sin (x)
26
+ f_kw (x) = g_kw (;x)
27
+ @test bwd (f_kw)(1.0 ) == bwd (sin)(1.0 )
28
+
29
+ function f_crit_edge (a, b, c, x)
30
+ # A function with two critical edges. This used to trigger an issue where
31
+ # Diffractor would fail to insert edges for the second split critical edge.
32
+ y = 1 x
33
+ if a && b
34
+ y = 2 x
35
+ end
36
+ if b && c
37
+ y = 3 x
38
+ end
39
+
40
+ if c
41
+ y = 4 y
42
+ end
43
+
44
+ return y
45
+ end
46
+ @test bwd (x-> f_crit_edge (false , false , false , x))(1.0 ) == 1.0
47
+ @test bwd (x-> f_crit_edge (true , true , false , x))(1.0 ) == 2.0
48
+ @test bwd (x-> f_crit_edge (false , true , true , x))(1.0 ) == 12.0
49
+ @test bwd (x-> f_crit_edge (false , false , true , x))(1.0 ) == 4.0
50
+
51
+ # Issue #27 - Mixup in lifting of getfield
52
+ let var"'" = bwd
53
+ @test (x-> x^ 5 )'' (1.0 ) == 20.
54
+ @test_broken (x-> x^ 5 )''' (1.0 ) == 60.
55
+ end
56
+
57
+ # Issue #38 - Splatting arrays
58
+ @test gradient (x -> max (x... ), (1 ,2 ,3 ))[1 ] == (0.0 , 0.0 , 1.0 )
59
+ @test gradient (x -> max (x... ), [1 ,2 ,3 ])[1 ] == [0.0 , 0.0 , 1.0 ]
60
+
61
+ # Issue #40 - Symbol type parameters not properly quoted
62
+ @test Diffractor.∂⃖recurse {1} ()(Val{:transformations })[1 ] === Val {:transformations} ()
63
+
64
+ # PR #43
65
+ loss (res, z, w) = sum (res. U * Diagonal (res. S) * res. V) + sum (res. S .* w)
66
+ x43 = rand (10 , 10 )
67
+ @test Diffractor. gradient (x-> loss (svd (x), x[:,1 ], x[:,2 ]), x43) isa Tuple{Matrix{Float64}}
68
+
69
+ # PR # 45 - Calling back into AD from ChainRules
70
+ y45, back45 = rrule_via_ad (DiffractorRuleConfig (), x -> log (exp (x)), 2 )
71
+ @test y45 ≈ 2.0
72
+ @test back45 (1 ) == (ZeroTangent (), 1.0 )
73
+
74
+ z45, delta45 = frule_via_ad (DiffractorRuleConfig (), (0 ,1 ), x -> log (exp (x)), 2 )
75
+ @test z45 ≈ 2.0
76
+ @test delta45 ≈ 1.0
77
+
78
+ # PR #82 - getindex on non-numeric arrays
79
+ @test gradient (ls -> ls[1 ](1. ), [Base. Fix1 (* , 1. )])[1 ][1 ] isa Tangent{<: Base.Fix1 }
80
+
81
+ @testset " broadcast" begin
82
+ @test gradient (x -> sum (x ./ x), [1 ,2 ,3 ]) == ([0 ,0 ,0 ],) # derivatives_given_output
83
+ @test gradient (x -> sum (sqrt .(atan .(x, transpose (x)))), [1 ,2 ,3 ])[1 ] ≈ [0.2338 , - 0.0177 , - 0.0661 ] atol= 1e-3
84
+ @test gradient (x -> sum (exp .(log .(x))), [1 ,2 ,3 ]) == ([1 ,1 ,1 ],)
85
+
86
+ @test gradient (x -> sum ((exp∘ log). (x)), [1 ,2 ,3 ]) == ([1 ,1 ,1 ],) # frule_via_ad
87
+ exp_log (x) = exp (log (x))
88
+ @test gradient (x -> sum (exp_log .(x)), [1 ,2 ,3 ]) == ([1 ,1 ,1 ],)
89
+ @test gradient ((x,y) -> sum (x ./ y), [1 2 ; 3 4 ], [1 ,2 ]) == ([1 1 ; 0.5 0.5 ], [- 3 , - 1.75 ])
90
+ @test gradient ((x,y) -> sum (x ./ y), [1 2 ; 3 4 ], 5 ) == ([0.2 0.2 ; 0.2 0.2 ], - 0.4 )
91
+ @test gradient (x -> sum ((y -> y/ x). ([1 ,2 ,3 ])), 4 ) == (- 0.375 ,) # closure
92
+
93
+ @test gradient (x -> sum (sum, (x,) ./ x), [1 ,2 ,3 ])[1 ] ≈ [- 4.1666 , 0.3333 , 1.1666 ] atol= 1e-3 # array of arrays
94
+ @test gradient (x -> sum (sum, Ref (x) ./ x), [1 ,2 ,3 ])[1 ] ≈ [- 4.1666 , 0.3333 , 1.1666 ] atol= 1e-3
95
+ @test gradient (x -> sum (sum, (x,) ./ x), [1 ,2 ,3 ])[1 ] ≈ [- 4.1666 , 0.3333 , 1.1666 ] atol= 1e-3
96
+ @test gradient (x -> sum (sum, (x,) .* transpose (x)), [1 ,2 ,3 ])[1 ] ≈ [12 , 12 , 12 ] # must not take the * fast path
97
+
98
+ @test gradient (x -> sum (x ./ 4 ), [1 ,2 ,3 ]) == ([0.25 , 0.25 , 0.25 ],)
99
+ @test gradient (x -> sum ([1 ,2 ,3 ] ./ x), 4 ) == (- 0.375 ,) # x/y rule
100
+ @test gradient (x -> sum (x.^ 2 ), [1 ,2 ,3 ]) == ([2.0 , 4.0 , 6.0 ],) # x.^2 rule
101
+ @test gradient (x -> sum ([1 ,2 ,3 ] ./ x.^ 2 ), 4 ) == (- 0.1875 ,) # scalar^2 rule
102
+
103
+ @test gradient (x -> sum ((1 ,2 ,3 ) .- x), (1 ,2 ,3 )) == (Tangent {Tuple{Int,Int,Int}} (- 1.0 , - 1.0 , - 1.0 ),)
104
+ @test gradient (x -> sum (transpose ([1 ,2 ,3 ]) .- x), (1 ,2 ,3 )) == (Tangent {Tuple{Int,Int,Int}} (- 3.0 , - 3.0 , - 3.0 ),)
105
+ @test gradient (x -> sum ([1 2 3 ] .+ x .^ 2 ), (1 ,2 ,3 )) == (Tangent {Tuple{Int,Int,Int}} (6.0 , 12.0 , 18.0 ),)
106
+
107
+ @test gradient (x -> sum (x .> 2 ), [1 ,2 ,3 ]) |> only |> iszero # Bool output
108
+ @test gradient (x -> sum (1 .+ iseven .(x)), [1 ,2 ,3 ]) |> only |> iszero
109
+ @test gradient ((x,y) -> sum (x .== y), [1 ,2 ,3 ], [1 2 3 ]) == (NoTangent (), NoTangent ())
110
+ @test gradient (x -> sum (x .+ [1 ,2 ,3 ]), true ) |> only |> iszero # Bool input
111
+ @test gradient (x -> sum (x ./ [1 ,2 ,3 ]), [true false ]) |> only |> iszero
112
+ @test gradient (x -> sum (x .* transpose ([1 ,2 ,3 ])), (true , false )) |> only |> iszero
113
+
114
+ tup_adj = gradient ((x,y) -> sum (2 .* x .+ log .(y)), (1 ,2 ), transpose ([3 ,4 ,5 ]))
115
+ @test tup_adj[1 ] == Tangent {Tuple{Int64, Int64}} (6.0 , 6.0 )
116
+ @test tup_adj[2 ] ≈ [0.6666666666666666 0.5 0.4 ]
117
+ @test tup_adj[2 ] isa Transpose
118
+ @test gradient (x -> sum (atan .(x, (1 ,2 ,3 ))), Diagonal ([4 ,5 ,6 ]))[1 ] isa Diagonal
119
+
120
+ @test gradient (x -> sum ((y -> (x* y)). ([1 ,2 ,3 ])), 4.0 ) == (6.0 ,) # closure
121
+ end
122
+
123
+ @testset " broadcast, 2nd order" begin
124
+ @test gradient (x -> gradient (y -> sum (y .* y), x)[1 ] |> sum, [1 ,2 ,3.0 ])[1 ] == [2 ,2 ,2 ] # calls "split broadcasting generic" with f = unthunk
125
+ @test gradient (x -> gradient (y -> sum (y .* x), x)[1 ]. ^ 3 |> sum, [1 ,2 ,3.0 ])[1 ] == [3 ,12 ,27 ]
126
+ @test_broken gradient (x -> gradient (y -> sum (y .* 2 .* y' ), x)[1 ] |> sum, [1 ,2 ,3.0 ])[1 ] == [12 , 12 , 12 ] # Control flow support not fully implemented yet for higher-order
127
+
128
+ @test_broken gradient (x -> sum (gradient (x -> sum (x .^ 2 .+ x' ), x)[1 ]), [1 ,2 ,3.0 ])[1 ] == [6 ,6 ,6 ] # BoundsError: attempt to access 18-element Vector{Core.Compiler.BasicBlock} at index [0]
129
+ @test_broken gradient (x -> sum (gradient (x -> sum ((x .+ 1 ) .* x .- x), x)[1 ]), [1 ,2 ,3.0 ])[1 ] == [2 ,2 ,2 ]
130
+ @test_broken gradient (x -> sum (gradient (x -> sum (x .* x ./ 2 ), x)[1 ]), [1 ,2 ,3.0 ])[1 ] == [1 ,1 ,1 ]
131
+
132
+ @test_broken gradient (x -> sum (gradient (x -> sum (exp .(x)), x)[1 ]), [1 ,2 ,3 ])[1 ] ≈ exp .(1 : 3 ) # MethodError: no method matching copy(::Nothing)
133
+ @test_broken gradient (x -> sum (gradient (x -> sum (atan .(x, x' )), x)[1 ]), [1 ,2 ,3.0 ])[1 ] ≈ [0 ,0 ,0 ]
134
+ @test_broken gradient (x -> sum (gradient (x -> sum (transpose (x) .* x), x)[1 ]), [1 ,2 ,3 ]) == ([6 ,6 ,6 ],) # accum(a::Transpose{Float64, Vector{Float64}}, b::ChainRulesCore.Tangent{Transpose{Int64, Vector{Int64}}, NamedTuple{(:parent,), Tuple{ChainRulesCore.NoTangent}}})
135
+ @test_broken gradient (x -> sum (gradient (x -> sum (transpose (x) ./ x.^ 2 ), x)[1 ]), [1 ,2 ,3 ])[1 ] ≈ [27.675925925925927 , - 0.824074074074074 , - 2.1018518518518516 ]
136
+
137
+ @test_broken gradient (z -> gradient (x -> sum ((y -> (x^ 2 * y)). ([1 ,2 ,3 ])), z)[1 ], 5.0 ) == (12.0 ,)
138
+ end
139
+
140
+ end
0 commit comments