Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ using CUDA
const use_cuda = Ref(false)

include("utils.jl")
include("zeros.jl")
include("onehot.jl")
include("functor.jl")

Expand Down
10 changes: 10 additions & 0 deletions src/deprecations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
@deprecate InstanceNorm(λ, β, γ, μ, σ², ϵ, momentum) InstanceNorm(λ, β, γ, μ, σ², ϵ, momentum, true, true, nothing)
@deprecate BatchNorm(λ, β, γ, μ, σ², ϵ, momentum) BatchNorm(λ, β, γ, μ, σ², ϵ, momentum, true, true, nothing)
@deprecate GroupNorm(G, λ, β, γ, μ, σ², ϵ, momentum) GroupNorm(G, λ, β, γ, μ, σ², ϵ, momentum, nothing)

@deprecate outdims(f, inputsize) outputsize(f, inputsize)

@deprecate Conv(; weight, bias, activation=identity, kws...) Conv(weight, bias, activation; kws...)
@deprecate ConvTranspose(; weight, bias, activation=identity, kws...) ConvTranspose(weight, bias, activation; kws...)
@deprecate DepthwiseConv(; weight, bias, activation=identity, kws...) DepthwiseConv(weight, bias, activation; kws...)
Expand All @@ -18,3 +20,11 @@ function Base.getproperty(a::Dense, s::Symbol)
end
return getfield(a, s)
end

struct Zeros # was used both Dense(10, 2, initb = Zeros) and Dense(rand(2,10), Zeros())
function Zeros()
Base.depwarn("Zeros() and Zeros(dims...) are deprecated, please simply use bias=false instead", :Zeros)
false
end
end
Zeros(args...) = Zeros()
3 changes: 1 addition & 2 deletions src/layers/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ end
extraChain(::Tuple{}, x) = ()



"""
Dense(in, out, σ=identity; bias=true, init=glorot_uniform)
Dense(W::AbstractMatrix, [bias, σ])
Expand Down Expand Up @@ -153,7 +152,7 @@ end
function Base.show(io::IO, l::Dense)
print(io, "Dense(", size(l.weight, 2), ", ", size(l.weight, 1))
l.σ == identity || print(io, ", ", l.σ)
l.bias == Zeros() && print(io, "; bias=false")
l.bias == false && print(io, "; bias=false")
print(io, ")")
end

Expand Down
16 changes: 8 additions & 8 deletions src/layers/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ _paddims(x::Tuple, y::Tuple) = (x..., y[(end - (length(y) - length(x) - 1)):end]
expand(N, i::Tuple) = i
expand(N, i::Integer) = ntuple(_ -> i, N)

conv_reshape_bias(c) = c.bias isa AbstractVector ?
reshape(c.bias, map(_->1, c.stride)..., :, 1) :
c.bias

"""
SamePad()

Expand Down Expand Up @@ -152,9 +156,8 @@ convfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
function (c::Conv)(x::AbstractArray)
# TODO: breaks gpu broadcast :(
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
σ, b = c.σ, reshape(c.bias, ntuple(_->1, length(c.stride))..., :, 1)
cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
σ.(conv(x, c.weight, cdims) .+ b)
(c.σ).(conv(x, c.weight, cdims) .+ conv_reshape_bias(c))
end

function Base.show(io::IO, l::Conv)
Expand Down Expand Up @@ -248,9 +251,8 @@ end

function (c::ConvTranspose)(x::AbstractArray)
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = conv_transpose_dims(c, x)
σ.(∇conv_data(x, c.weight, cdims) .+ b)
(c.σ).(∇conv_data(x, c.weight, cdims) .+ conv_reshape_bias(c))
end

function Base.show(io::IO, l::ConvTranspose)
Expand Down Expand Up @@ -341,9 +343,8 @@ depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1])

function (c::DepthwiseConv)(x)
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
σ.(depthwiseconv(x, c.weight, cdims) .+ b)
(c.σ).(depthwiseconv(x, c.weight, cdims) .+ conv_reshape_bias(c))
end

function Base.show(io::IO, l::DepthwiseConv)
Expand Down Expand Up @@ -422,9 +423,8 @@ end
function (c::CrossCor)(x::AbstractArray)
# TODO: breaks gpu broadcast :(
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
σ.(crosscor(x, c.weight, cdims) .+ b)
(c.σ).(crosscor(x, c.weight, cdims) .+ conv_reshape_bias(c))
end

function Base.show(io::IO, l::CrossCor)
Expand Down
4 changes: 2 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,11 @@ Return a bias parameter for a layer, based on the value given
to the constructor's keyword `bias=bias`.

* `bias == true` creates a zero vector, of the same type as weights.
* `bias == false` returns `Zeros()`, a special struct which exists only to encode the absence of bias.
* `bias == false` returns `false`, to indicate no trainable bias.
* `bias::AbstractArray` uses the array provided, provided it has the correct size and eltype. If the type is wrong, it will be converted.
"""
function create_bias(weights::AbstractArray, bias::Bool, dims::Integer...)
bias ? fill!(similar(weights, dims...), 0) : Zeros()
bias ? fill!(similar(weights, dims...), 0) : false
end
function create_bias(weights::AbstractArray, bias::AbstractArray, dims::Integer...)
size(bias) == dims || throw(DimensionMismatch("expected bias of size $(dims), got size $(size(bias))"))
Expand Down
52 changes: 0 additions & 52 deletions src/zeros.jl

This file was deleted.

2 changes: 1 addition & 1 deletion test/layers/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ end

@testset "constructors: $fun" for fun in [Conv, CrossCor, ConvTranspose, DepthwiseConv]
@test fun(rand(2,3,4)).bias isa Vector{Float64}
@test fun(rand(2,3,4,5), false).bias isa Flux.Zeros
@test fun(rand(2,3,4,5), false).bias === false
if fun == Conv
@test fun(rand(2,3,4,5,6), rand(6)).bias isa Vector{Float64}
@test fun(rand(2,3,4,5,6), 1:6).bias isa Vector{Float64}
Expand Down
2 changes: 1 addition & 1 deletion test/optimise.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using Random
Nesterov(), RMSProp(), Momentum()]
Random.seed!(42)
w′ = randn(10, 10)
b = Flux.Zeros()
b = false
loss(x) = Flux.Losses.mse(w*x, w′*x .+ b)
for t = 1: 10^5
θ = params([w′, b])
Expand Down
108 changes: 30 additions & 78 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,88 +187,39 @@ end
@test eltype(f32(f64(m))[1].W) == Float32
end

@testset "Zeros" begin
@testset "Without bias" begin
m = Dense(3,2; bias=false)
@test f64(m).b === m.b === Zeros()
@test f32(m).b === m.b === Zeros()
@test f64(m).b === m.b === false === Zeros() # Zeros() is deprecated
@test f32(m).b === m.b === false

@testset "Gradients for broadcasted $op with sizes $s" for op in (+,-,*), s in ((1,), (2,3))
o = ones(s)
z = zeros(s)
Z = Zeros()

@testset "Explicit" begin
gfun(args...) = gradient((x, y) -> sum(op.(x,y)), args...)
g = gfun(o, z)
@test gfun(o, Z) == (g[1], nothing)
@test gfun(o, false) == (g[1], nothing)

g = gfun(z, o)
@test gfun(Z, o) == (nothing, g[2])
@test gfun(false, o) == (nothing, g[2])
end

@testset "Implicit" begin
gfun(args...) = gradient(() -> sum(op.(args...)), params(collect(args)))
g = gfun(o, z)

gres = gfun(o, Z)
gres = gfun(o, false)
@test gres[o] == g[o]
@test Z ∉ gres.params
@test false ∉ gres.params
@test length(gres.params) == 1

g = gfun(z, o)
gres = gfun(Z, o)
@test gres[o] == g[o]
@test Z ∉ gres.params
end
end

@testset "Gradients for broadcasted / with sizes $s" for s in ((1,), (2,3))
o = ones(s)
z = zeros(s)
Z = Zeros() # Only defined for 0-dim

@testset "Explicit" begin
gfun(args...) = gradient((x, y) -> sum(x ./ y), args...)
g = gfun(z, o)
@test gfun(Z, o) == (nothing, g[2])
end

@testset "Implicit" begin
gfun(x,y) = gradient(() -> sum(x ./ y), params([x,y]))

g = gfun(z, o)
gres = gfun(Z, o)
@test gres[o] == g[o]
@test Z ∉ gres.params
end
end

@testset "Gradients for $op with sizes $s" for op in (+,-), s in (tuple(), (1,), (2,3))
o = ones(s)
z = zeros(s)
Z = Zeros()


@testset "Explicit" begin
gfun(args...) = gradient((x, y) -> sum(op(x,y)), args...)

g = gfun(o, z)
@test gfun(o, Z) == (g[1], nothing)

g = gfun(z, o)
@test gfun(Z, o) == (nothing, g[2])
end

@testset "Implicit" begin
gfun(args...) = gradient(() -> sum(op(args...)), params(collect(args)))
g = gfun(o, z)
gres = gfun(o, Z)
gres = gfun(false, o)
@test gres[o] == g[o]
@test Z ∉ gres.params

g = gfun(z, o)
gres = gfun(Z, o)
@test gres[o] == g[o]
@test Z ∉ gres.params
@test false ∉ gres.params
@test length(gres.params) == 1
end
end
end
Expand All @@ -281,52 +232,53 @@ end
@test stack(unstack(stacked_array, 1), 1) == stacked_array
end


@testset "Param remapping" begin
ls(dims...) = reshape(collect(Float32, 1:prod(dims)), dims...) # accepts dims in reverse order to Dense
dl(nin, nout, bias) = Dense(ls(nout, nin), bias(nout))
dm(bias) = Chain(
dl(3, 5, bias),
dl(5, 4, bias),
dl(4, 3, bias)
count32(dims...) = reshape(collect(Float32, 1:prod(dims)), dims...) # accepts dims in reverse order to Dense
dl(nin, nout, bt) = Dense(count32(nout, nin), bt(nout)) # this accepts dims in same order as Dense
densechain(bt) = Chain(
dl(3, 5, bt),
dl(5, 4, bt),
dl(4, 3, bt)
)
nobias(n) = false

nobias(n) = Zeros()
testdense(m, bt) = @testset "Check layer $i" for (i, (l1, l2)) in enumerate(zip(m, dm(bt)))
@test l1.W == l2.W
@test l1.b == l2.b
@test typeof(l1.b) === typeof(l2.b)
testdense(m, bt) = @testset "Check layer $i" for (i, (l1, l2)) in enumerate(zip(m, densechain(bt)))
@test l1.weight == l2.weight
@test l1.bias == l2.bias
@test typeof(l1.bias) === typeof(l2.bias)
end

@testset "loadparams!" begin
import Flux: loadparams!
pars(w, b) = [w, b]
import Flux: loadparams!, Zeros
pars(w, b::Zeros) = [w, Flux.zeros(size(w,1))]
pars(l) = pars(l.W, l.b)
pararray(m) = mapreduce(pars, vcat, m)
weights(m) = mapreduce(l -> [l.W], vcat, m)
@testset "Bias type $bt" for bt in (Flux.zeros, nobias)
m = dm(bt)
@testset "Bias type $bt" for bt in (zeros, nobias)
m = densechain(bt)
loadparams!(m, params(m))
testdense(m, bt)
end

#=
@testset "$b1 to $b2" for (b1, b2, be) in (
(Flux.zeros, ones, ones), # Load ones as bias to a model with zeros as bias -> model gets ones as bias
(ones, nobias, Flux.zeros), # Load Zeros as bias to a model with ones as bias-> model gets zeros as bias
(nobias, ones, nobias), # Load ones as bias to a model with Zeros as bias-> model bias does not change
)
m1 = dm(b1)
m2 = dm(b2)
m1 = densechain(b1)
m2 = densechain(b2)
loadparams!(m1, b1 == nobias ? weights(m2) : pararray(m2))
testdense(m1, be)
end
=#
end

@testset "destructure" begin
import Flux: destructure
@testset "Bias type $bt" for bt in (zeros, nobias)
m = dm(bt)
m = densechain(bt)
p, re = destructure(m)
testdense(re(p), bt)
end
Expand Down