Fix neuron selection on GPUs (#140)

adrhill · web-flow · commit 3aa10ba852fc · 2023-09-11T18:37:07.000+02:00
* Fix neuron selection

* Rename `rels` to `Rs`

* Rename `acts` to `as`
diff --git a/docs/src/lrp/developer.md b/docs/src/lrp/developer.md
@@ -148,21 +148,21 @@ For a detailed description of the layer modification mechanism, refer to the sec
 
 ## Forward and reverse pass
 When calling an `LRP` analyzer, a forward pass through the model is performed,
-saving the activations $aᵏ$ for all layers $k$ in a vector called `acts`.
+saving the activations $aᵏ$ for all layers $k$ in a vector called `as`.
 This vector of activations is then used to pre-allocate the relevances $R^k$ 
-for all layers in a vector called `rels`.
+for all layers in a vector called `Rs`.
 This is possible since for any layer $k$, $a^k$ and $R^k$ have the same shape.
-Finally, the last array of relevances $R^N$ in `rels` is set to zeros, 
+Finally, the last array of relevances $R^N$ in `Rs` is set to zeros, 
 except for the specified output neuron, which is set to one.
 
 We can now run the reverse pass, iterating backwards over the layers in the model
-and writing relevances $R^k$ into the pre-allocated array `rels`:
+and writing relevances $R^k$ into the pre-allocated array `Rs`:
 
 ```julia
 for k in length(model):-1:1
     #                  └─ loop over layers in reverse
-    lrp!(rels[k], rules[k], layers[k], modified_layers[i], acts[k], rels[k+1])
-    #    └─ Rᵏ: modified in-place                          └─ aᵏ    └─ Rᵏ⁺¹
+    lrp!(Rs[k], rules[k], layers[k], modified_layers[i], as[k], Rs[k+1])
+    #    └─ Rᵏ: modified in-place                        └─ aᵏ  └─ Rᵏ⁺¹
 end
 ```
 
@@ -185,7 +185,7 @@ and the output relevance `Rᵏ⁺¹`.
 The exclamation point in the function name `lrp!` is a 
 [naming convention](https://docs.julialang.org/en/v1/manual/style-guide/#bang-convention)
 in Julia to denote functions that modify their arguments -- 
-in this case the first argument `rels[k]`, which corresponds to $R^k$.
+in this case the first argument `Rs[k]`, which corresponds to $R^k$.
 
 ### Rule calls
 As discussed in [*The AD fallback*](@ref lrp-dev-ad-fallback),
diff --git a/src/lrp/lrp.jl b/src/lrp/lrp.jl
@@ -48,40 +48,40 @@ LRP(model::Chain, c::Composite; kwargs...) = LRP(model, lrp_rules(model, c); kwa
 
 get_activations(model, input) = [input, Flux.activations(model, input)...]
 
+function mask_output_neuron!(Rᴺ, aᴺ, ns::AbstractNeuronSelector)
+    fill!(Rᴺ, 0)
+    idx = ns(aᴺ)
+    Rᴺ[idx] .= 1
+    return Rᴺ
+end
+
 # Call to the LRP analyzer
 function (lrp::LRP)(
     input::AbstractArray{T}, ns::AbstractNeuronSelector; layerwise_relevances=false
 ) where {T}
-    acts = get_activations(lrp.model, input)      # compute  aᵏ for all layers k
-    rels = similar.(acts)                         # allocate Rᵏ for all layers k
-    mask_output_neuron!(rels[end], acts[end], ns) # compute  Rᵏ⁺¹ of output layer
-
-    # Apply LRP rules in backward-pass, inplace-updating relevances `rels[i]`
-    for i in length(lrp.model):-1:1
-        lrp!(
-            rels[i],
-            lrp.rules[i],
-            lrp.model[i],
-            lrp.modified_layers[i],
-            acts[i],
-            rels[i + 1],
-        )
+    as = get_activations(lrp.model, input)    # compute activations aᵏ for all layers k
+    Rs = similar.(as)                         # allocate relevances Rᵏ for all layers k
+    mask_output_neuron!(Rs[end], as[end], ns) # compute relevance Rᴺ of output layer N
+
+    # Apply LRP rules in backward-pass, inplace-updating relevances `Rs[k]` = Rᵏ
+    for k in length(lrp.model):-1:1
+        lrp!(Rs[k], lrp.rules[k], lrp.model[k], lrp.modified_layers[k], as[k], Rs[k + 1])
     end
-    extras = layerwise_relevances ? (layerwise_relevances=rels,) : nothing
 
-    return Explanation(first(rels), last(acts), ns(last(acts)), :LRP, extras)
+    extras = layerwise_relevances ? (layerwise_relevances=Rs,) : nothing
+    return Explanation(first(Rs), last(as), ns(last(as)), :LRP, extras)
 end
 
 function lrp!(Rᵏ, rules::ChainTuple, chain::Chain, modified_chain::ChainTuple, aᵏ, Rᵏ⁺¹)
-    acts = get_activations(chain, aᵏ)
-    rels = similar.(acts)
-    last(rels) .= Rᵏ⁺¹
+    as = get_activations(chain, aᵏ)
+    Rs = similar.(as)
+    last(Rs) .= Rᵏ⁺¹
 
-    # Apply LRP rules in backward-pass, inplace-updating relevances `rels[i]`
+    # Apply LRP rules in backward-pass, inplace-updating relevances `Rs[i]`
     for i in length(chain):-1:1
-        lrp!(rels[i], rules[i], chain[i], modified_chain[i], acts[i], rels[i + 1])
+        lrp!(Rs[i], rules[i], chain[i], modified_chain[i], as[i], Rs[i + 1])
     end
-    return Rᵏ .= first(rels)
+    return Rᵏ .= first(Rs)
 end
 
 function lrp!(
diff --git a/src/neuron_selection.jl b/src/neuron_selection.jl
@@ -1,12 +1,5 @@
 abstract type AbstractNeuronSelector end
 
-function mask_output_neuron!(R, a, ns::AbstractNeuronSelector)
-    fill!(R, 0)
-    idx = ns(a)
-    R[idx] .= 1
-    return R
-end
-
 """
     MaxActivationSelector()
 
@@ -15,7 +8,7 @@ Neuron selector that picks the output neuron with the highest activation.
 struct MaxActivationSelector <: AbstractNeuronSelector end
 function (::MaxActivationSelector)(out::AbstractArray{T,N}) where {T,N}
     N < 2 && throw(BATCHDIM_MISSING)
-    return Vector{CartesianIndex{N}}([argmax(out; dims=1:(N - 1))...])
+    return vec(argmax(out; dims=1:(N - 1)))
 end
 
 """
@@ -28,11 +21,13 @@ struct IndexSelector{I} <: AbstractNeuronSelector
 end
 function (s::IndexSelector{<:Integer})(out::AbstractArray{T,N}) where {T,N}
     N < 2 && throw(BATCHDIM_MISSING)
-    return CartesianIndex{N}.(s.index, 1:size(out, N))
+    batchsize = size(out, N)
+    return [CartesianIndex{N}(s.index, b) for b in 1:batchsize]
 end
 function (s::IndexSelector{I})(out::AbstractArray{T,N}) where {I,T,N}
     N < 2 && throw(BATCHDIM_MISSING)
-    return CartesianIndex{N}.(s.index..., 1:size(out, N))
+    batchsize = size(out, N)
+    return [CartesianIndex{N}(s.index..., b) for b in 1:batchsize]
 end
 
 """