diff --git a/src/convnets/convmixer.jl b/src/convnets/convmixer.jl index 70827d0aa..01a6e61be 100644 --- a/src/convnets/convmixer.jl +++ b/src/convnets/convmixer.jl @@ -17,11 +17,11 @@ Creates a ConvMixer model. function convmixer(planes, depth; inchannels = 3, kernel_size = (9, 9), patch_size::Dims{2} = (7, 7), activation = gelu, nclasses = 1000) stem = conv_bn(patch_size, inchannels, planes, activation; preact = true, stride = patch_size[1]) - blocks = [Chain(SkipConnection(conv_bn(kernel_size, planes, planes, activation; - preact = true, groups = planes, pad = SamePad()), +), - conv_bn((1, 1), planes, planes, activation; preact = true)) for _ in 1:depth] + blocks = [Chain(SkipConnection(Chain(conv_bn(kernel_size, planes, planes, activation; + preact = true, groups = planes, pad = SamePad())), +), + conv_bn((1, 1), planes, planes, activation; preact = true)...) for _ in 1:depth] head = Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, Dense(planes, nclasses)) - return Chain(Chain(stem, Chain(blocks)), head) + return Chain(Chain(stem..., Chain(blocks)), head) end convmixer_config = Dict(:base => Dict(:planes => 1536, :depth => 20, :kernel_size => (9, 9), diff --git a/src/convnets/densenet.jl b/src/convnets/densenet.jl index eff19f1a8..53d96df09 100644 --- a/src/convnets/densenet.jl +++ b/src/convnets/densenet.jl @@ -11,8 +11,8 @@ Create a Densenet bottleneck layer """ function dense_bottleneck(inplanes, outplanes) inner_channels = 4 * outplanes - m = Chain(conv_bn((1, 1), inplanes, inner_channels; bias = false, rev = true), - conv_bn((3, 3), inner_channels, outplanes; pad = 1, bias = false, rev = true)) + m = Chain(conv_bn((1, 1), inplanes, inner_channels; bias = false, rev = true)..., + conv_bn((3, 3), inner_channels, outplanes; pad = 1, bias = false, rev = true)...) SkipConnection(m, cat_channels) end @@ -28,7 +28,7 @@ Create a DenseNet transition sequence - `outplanes`: number of output feature maps """ transition(inplanes, outplanes) = - Chain(conv_bn((1, 1), inplanes, outplanes; bias = false, rev = true), MeanPool((2, 2))) + Chain(conv_bn((1, 1), inplanes, outplanes; bias = false, rev = true)..., MeanPool((2, 2))) """ dense_block(inplanes, growth_rates) @@ -60,7 +60,7 @@ Create a DenseNet model """ function densenet(inplanes, growth_rates; reduction = 0.5, nclasses = 1000) layers = [] - push!(layers, conv_bn((7, 7), 3, inplanes; stride = 2, pad = (3, 3), bias = false)) + append!(layers, conv_bn((7, 7), 3, inplanes; stride = 2, pad = (3, 3), bias = false)) push!(layers, MaxPool((3, 3), stride = 2, pad = (1, 1))) outplanes = 0 diff --git a/src/convnets/inception.jl b/src/convnets/inception.jl index 00bdd0ccb..ef8ab81ef 100644 --- a/src/convnets/inception.jl +++ b/src/convnets/inception.jl @@ -9,17 +9,17 @@ Create an Inception-v3 style-A module - `pool_proj`: the number of output feature maps for the pooling projection """ function inception_a(inplanes, pool_proj) - branch1x1 = conv_bn((1, 1), inplanes, 64) + branch1x1 = Chain(conv_bn((1, 1), inplanes, 64)) - branch5x5 = Chain(conv_bn((1, 1), inplanes, 48), - conv_bn((5, 5), 48, 64; pad = 2)) + branch5x5 = Chain(conv_bn((1, 1), inplanes, 48)..., + conv_bn((5, 5), 48, 64; pad = 2)...) - branch3x3 = Chain(conv_bn((1, 1), inplanes, 64), - conv_bn((3, 3), 64, 96; pad = 1), - conv_bn((3, 3), 96, 96; pad = 1)) + branch3x3 = Chain(conv_bn((1, 1), inplanes, 64)..., + conv_bn((3, 3), 64, 96; pad = 1)..., + conv_bn((3, 3), 96, 96; pad = 1)...) branch_pool = Chain(MeanPool((3, 3), pad = 1, stride = 1), - conv_bn((1, 1), inplanes, pool_proj)) + conv_bn((1, 1), inplanes, pool_proj)...) return Parallel(cat_channels, branch1x1, branch5x5, branch3x3, branch_pool) @@ -35,11 +35,11 @@ Create an Inception-v3 style-B module - `inplanes`: number of input feature maps """ function inception_b(inplanes) - branch3x3_1 = conv_bn((3, 3), inplanes, 384; stride = 2) + branch3x3_1 = Chain(conv_bn((3, 3), inplanes, 384; stride = 2)) - branch3x3_2 = Chain(conv_bn((1, 1), inplanes, 64), - conv_bn((3, 3), 64, 96; pad = 1), - conv_bn((3, 3), 96, 96; stride = 2)) + branch3x3_2 = Chain(conv_bn((1, 1), inplanes, 64)..., + conv_bn((3, 3), 64, 96; pad = 1)..., + conv_bn((3, 3), 96, 96; stride = 2)...) branch_pool = MaxPool((3, 3), stride = 2) @@ -59,20 +59,20 @@ Create an Inception-v3 style-C module - `n`: the "grid size" (kernel size) for the convolution layers """ function inception_c(inplanes, inner_planes, n = 7) - branch1x1 = conv_bn((1, 1), inplanes, 192) + branch1x1 = Chain(conv_bn((1, 1), inplanes, 192)) - branch7x7_1 = Chain(conv_bn((1, 1), inplanes, inner_planes), - conv_bn((1, n), inner_planes, inner_planes; pad = (0, 3)), - conv_bn((n, 1), inner_planes, 192; pad = (3, 0))) + branch7x7_1 = Chain(conv_bn((1, 1), inplanes, inner_planes)..., + conv_bn((1, n), inner_planes, inner_planes; pad = (0, 3))..., + conv_bn((n, 1), inner_planes, 192; pad = (3, 0))...) - branch7x7_2 = Chain(conv_bn((1, 1), inplanes, inner_planes), - conv_bn((n, 1), inner_planes, inner_planes; pad = (3, 0)), - conv_bn((1, n), inner_planes, inner_planes; pad = (0, 3)), - conv_bn((n, 1), inner_planes, inner_planes; pad = (3, 0)), - conv_bn((1, n), inner_planes, 192; pad = (0, 3))) + branch7x7_2 = Chain(conv_bn((1, 1), inplanes, inner_planes)..., + conv_bn((n, 1), inner_planes, inner_planes; pad = (3, 0))..., + conv_bn((1, n), inner_planes, inner_planes; pad = (0, 3))..., + conv_bn((n, 1), inner_planes, inner_planes; pad = (3, 0))..., + conv_bn((1, n), inner_planes, 192; pad = (0, 3))...) branch_pool = Chain(MeanPool((3, 3), pad = 1, stride=1), - conv_bn((1, 1), inplanes, 192)) + conv_bn((1, 1), inplanes, 192)...) return Parallel(cat_channels, branch1x1, branch7x7_1, branch7x7_2, branch_pool) @@ -88,13 +88,13 @@ Create an Inception-v3 style-D module - `inplanes`: number of input feature maps """ function inception_d(inplanes) - branch3x3 = Chain(conv_bn((1, 1), inplanes, 192), - conv_bn((3, 3), 192, 320; stride = 2)) + branch3x3 = Chain(conv_bn((1, 1), inplanes, 192)..., + conv_bn((3, 3), 192, 320; stride = 2)...) - branch7x7x3 = Chain(conv_bn((1, 1), inplanes, 192), - conv_bn((1, 7), 192, 192; pad = (0, 3)), - conv_bn((7, 1), 192, 192; pad = (3, 0)), - conv_bn((3, 3), 192, 192; stride = 2)) + branch7x7x3 = Chain(conv_bn((1, 1), inplanes, 192)..., + conv_bn((1, 7), 192, 192; pad = (0, 3))..., + conv_bn((7, 1), 192, 192; pad = (3, 0))..., + conv_bn((3, 3), 192, 192; stride = 2)...) branch_pool = MaxPool((3, 3), stride=2) @@ -112,19 +112,19 @@ Create an Inception-v3 style-E module - `inplanes`: number of input feature maps """ function inception_e(inplanes) - branch1x1 = conv_bn((1, 1), inplanes, 320) + branch1x1 = Chain(conv_bn((1, 1), inplanes, 320)) - branch3x3_1 = conv_bn((1, 1), inplanes, 384) - branch3x3_1a = conv_bn((1, 3), 384, 384; pad = (0, 1)) - branch3x3_1b = conv_bn((3, 1), 384, 384; pad = (1, 0)) + branch3x3_1 = Chain(conv_bn((1, 1), inplanes, 384)) + branch3x3_1a = Chain(conv_bn((1, 3), 384, 384; pad = (0, 1))) + branch3x3_1b = Chain(conv_bn((3, 1), 384, 384; pad = (1, 0))) - branch3x3_2 = Chain(conv_bn((1, 1), inplanes, 448), - conv_bn((3, 3), 448, 384; pad = 1)) - branch3x3_2a = conv_bn((1, 3), 384, 384; pad = (0, 1)) - branch3x3_2b = conv_bn((3, 1), 384, 384; pad = (1, 0)) + branch3x3_2 = Chain(conv_bn((1, 1), inplanes, 448)..., + conv_bn((3, 3), 448, 384; pad = 1)...) + branch3x3_2a = Chain(conv_bn((1, 3), 384, 384; pad = (0, 1))) + branch3x3_2b = Chain(conv_bn((3, 1), 384, 384; pad = (1, 0))) branch_pool = Chain(MeanPool((3, 3), pad = 1, stride = 1), - conv_bn((1, 1), inplanes, 192)) + conv_bn((1, 1), inplanes, 192)...) return Parallel(cat_channels, branch1x1, @@ -150,12 +150,12 @@ Create an Inception-v3 model ([reference](https://arxiv.org/abs/1512.00567v3)). `inception3` does not currently support pretrained weights. """ function inception3(; nclasses = 1000) - layer = Chain(Chain(conv_bn((3, 3), 3, 32; stride = 2), - conv_bn((3, 3), 32, 32), - conv_bn((3, 3), 32, 64; pad = 1), + layer = Chain(Chain(conv_bn((3, 3), 3, 32; stride = 2)..., + conv_bn((3, 3), 32, 32)..., + conv_bn((3, 3), 32, 64; pad = 1)..., MaxPool((3, 3), stride = 2), - conv_bn((1, 1), 64, 80), - conv_bn((3, 3), 80, 192), + conv_bn((1, 1), 64, 80)..., + conv_bn((3, 3), 80, 192)..., MaxPool((3, 3), stride = 2), inception_a(192, 32), inception_a(256, 64), diff --git a/src/convnets/mobilenet.jl b/src/convnets/mobilenet.jl index 186726ef9..1d0a6227e 100644 --- a/src/convnets/mobilenet.jl +++ b/src/convnets/mobilenet.jl @@ -34,7 +34,7 @@ function mobilenetv1(width_mult, config; layer = dw ? depthwise_sep_conv_bn((3, 3), inchannels, outch, activation; stride = stride, pad = 1) : conv_bn((3, 3), inchannels, outch, activation; stride = stride, pad = 1) - push!(layers, layer) + append!(layers, layer) inchannels = outch end end @@ -118,7 +118,7 @@ function mobilenetv2(width_mult, configs; max_width = 1280, nclasses = 1000) # building first layer inplanes = _round_channels(32 * width_mult, width_mult == 0.1 ? 4 : 8) layers = [] - push!(layers, conv_bn((3, 3), 3, inplanes, stride = 2)) + append!(layers, conv_bn((3, 3), 3, inplanes, stride = 2)) # building inverted residual blocks for (t, c, n, s, a) in configs @@ -134,7 +134,7 @@ function mobilenetv2(width_mult, configs; max_width = 1280, nclasses = 1000) outplanes = (width_mult > 1) ? _round_channels(max_width * width_mult, width_mult == 0.1 ? 4 : 8) : max_width - return Chain(Chain(Chain(layers), conv_bn((1, 1), inplanes, outplanes, relu6, bias = false)), + return Chain(Chain(Chain(layers), conv_bn((1, 1), inplanes, outplanes, relu6, bias = false)...), Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, Dense(outplanes, nclasses))) end @@ -211,7 +211,7 @@ function mobilenetv3(width_mult, configs; max_width = 1024, nclasses = 1000) # building first layer inplanes = _round_channels(16 * width_mult, 8) layers = [] - push!(layers, conv_bn((3, 3), 3, inplanes, hardswish; stride = 2)) + append!(layers, conv_bn((3, 3), 3, inplanes, hardswish; stride = 2)) explanes = 0 # building inverted residual blocks for (k, t, c, r, a, s) in configs @@ -230,7 +230,7 @@ function mobilenetv3(width_mult, configs; max_width = 1024, nclasses = 1000) Dropout(0.2), Dense(output_channel, nclasses)) - return Chain(Chain(Chain(layers), conv_bn((1, 1), inplanes, explanes, hardswish, bias = false)), + return Chain(Chain(Chain(layers), conv_bn((1, 1), inplanes, explanes, hardswish, bias = false)...), Chain(AdaptiveMeanPool((1, 1)), MLUtils.flatten, classifier)) end diff --git a/src/convnets/resnet.jl b/src/convnets/resnet.jl index 5de0e35dd..72d2de3fb 100644 --- a/src/convnets/resnet.jl +++ b/src/convnets/resnet.jl @@ -12,8 +12,8 @@ Create a basic residual block """ function basicblock(inplanes, outplanes, downsample = false) stride = downsample ? 2 : 1 - Chain(conv_bn((3, 3), inplanes, outplanes[1]; stride = stride, pad = 1, bias = false), - conv_bn((3, 3), outplanes[1], outplanes[2], identity; stride = 1, pad = 1, bias = false)) + Chain(conv_bn((3, 3), inplanes, outplanes[1]; stride = stride, pad = 1, bias = false)..., + conv_bn((3, 3), outplanes[1], outplanes[2], identity; stride = 1, pad = 1, bias = false)...) end """ @@ -36,12 +36,11 @@ The original paper uses `stride == [2, 1, 1]` when `downsample == true` instead. """ function bottleneck(inplanes, outplanes, downsample = false; stride = [1, (downsample ? 2 : 1), 1]) - Chain(conv_bn((1, 1), inplanes, outplanes[1]; stride = stride[1], bias = false), - conv_bn((3, 3), outplanes[1], outplanes[2]; stride = stride[2], pad = 1, bias = false), - conv_bn((1, 1), outplanes[2], outplanes[3], identity; stride = stride[3], bias = false)) + Chain(conv_bn((1, 1), inplanes, outplanes[1]; stride = stride[1], bias = false)..., + conv_bn((3, 3), outplanes[1], outplanes[2]; stride = stride[2], pad = 1, bias = false)..., + conv_bn((1, 1), outplanes[2], outplanes[3], identity; stride = stride[3], bias = false)...) end - """ bottleneck_v1(inplanes, outplanes, downsample = false) @@ -82,7 +81,7 @@ function resnet(block, residuals::AbstractVector{<:NTuple{2, Any}}, connection = inplanes = 64 baseplanes = 64 layers = [] - push!(layers, conv_bn((7, 7), 3, inplanes; stride = 2, pad = 3, bias = false)) + append!(layers, conv_bn((7, 7), 3, inplanes; stride = 2, pad = 3, bias = false)) push!(layers, MaxPool((3, 3), stride = (2, 2), pad = (1, 1))) for (i, nrepeats) in enumerate(block_config) # output planes within a block diff --git a/src/convnets/resnext.jl b/src/convnets/resnext.jl index c9d7aa669..53ff60c95 100644 --- a/src/convnets/resnext.jl +++ b/src/convnets/resnext.jl @@ -14,10 +14,10 @@ Create a basic residual block as defined in the paper for ResNeXt function resnextblock(inplanes, outplanes, cardinality, width, downsample = false) stride = downsample ? 2 : 1 hidden_channels = cardinality * width - return Chain(conv_bn((1, 1), inplanes, hidden_channels; stride = 1, bias = false), + return Chain(conv_bn((1, 1), inplanes, hidden_channels; stride = 1, bias = false)..., conv_bn((3, 3), hidden_channels, hidden_channels; - stride = stride, pad = 1, bias = false, groups = cardinality), - conv_bn((1, 1), hidden_channels, outplanes; stride = 1, bias = false)) + stride = stride, pad = 1, bias = false, groups = cardinality)..., + conv_bn((1, 1), hidden_channels, outplanes; stride = 1, bias = false)...) end """ @@ -40,7 +40,7 @@ function resnext(cardinality, width, widen_factor = 2, connection = (x, y) -> @. inplanes = 64 baseplanes = 128 layers = [] - push!(layers, conv_bn((7, 7), 3, inplanes; stride = 2, pad = (3, 3))) + append!(layers, conv_bn((7, 7), 3, inplanes; stride = 2, pad = (3, 3))) push!(layers, MaxPool((3, 3), stride = (2, 2), pad = (1, 1))) for (i, nrepeats) in enumerate(block_config) # output planes within a block diff --git a/src/convnets/vgg.jl b/src/convnets/vgg.jl index 6cc9dab83..3cfb2dc12 100644 --- a/src/convnets/vgg.jl +++ b/src/convnets/vgg.jl @@ -16,7 +16,7 @@ function vgg_block(ifilters, ofilters, depth, batchnorm) layers = [] for _ in 1:depth if batchnorm - push!(layers, conv_bn(k, ifilters, ofilters; pad = p, bias = false)) + append!(layers, conv_bn(k, ifilters, ofilters; pad = p, bias = false)) else push!(layers, Conv(k, ifilters => ofilters, relu, pad = p)) end diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 78b729c01..ca30df8a4 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -45,7 +45,7 @@ function conv_bn(kernelsize, inplanes, outplanes, activation = relu; push!(layers, BatchNorm(Int(bnplanes), activations.bn; initβ = initβ, initγ = initγ, ϵ = ϵ, momentum = momentum)) - return rev ? Chain(reverse(layers)) : Chain(layers) + return rev ? reverse(layers) : layers end """ @@ -82,13 +82,13 @@ depthwise_sep_conv_bn(kernelsize, inplanes, outplanes, activation = relu; initβ = Flux.zeros32, initγ = Flux.ones32, ϵ = 1f-5, momentum = 1f-1, stride = 1, kwargs...) = - Chain(vcat(conv_bn(kernelsize, inplanes, inplanes, activation; - rev = rev, initβ = initβ, initγ = initγ, - ϵ = ϵ, momentum = momentum, - stride = stride, groups = Int(inplanes), kwargs...), - conv_bn((1, 1), inplanes, outplanes, activation; - rev = rev, initβ = initβ, initγ = initγ, - ϵ = ϵ, momentum = momentum))) + vcat(conv_bn(kernelsize, inplanes, inplanes, activation; + rev = rev, initβ = initβ, initγ = initγ, + ϵ = ϵ, momentum = momentum, + stride = stride, groups = Int(inplanes), kwargs...), + conv_bn((1, 1), inplanes, outplanes, activation; + rev = rev, initβ = initβ, initγ = initγ, + ϵ = ϵ, momentum = momentum)) """ skip_projection(inplanes, outplanes, downsample = false) @@ -102,8 +102,8 @@ Create a skip projection - `downsample`: set to `true` to downsample the input """ skip_projection(inplanes, outplanes, downsample = false) = downsample ? - conv_bn((1, 1), inplanes, outplanes, identity; stride = 2, bias = false) : - conv_bn((1, 1), inplanes, outplanes, identity; stride = 1, bias = false) + Chain(conv_bn((1, 1), inplanes, outplanes, identity; stride = 2, bias = false)) : + Chain(conv_bn((1, 1), inplanes, outplanes, identity; stride = 1, bias = false)) # array -> PaddedView(0, array, outplanes) for zero padding arrays """ @@ -144,8 +144,8 @@ Squeeze and excitation layer used by MobileNet variants function squeeze_excite(channels, reduction = 4) @assert (reduction >= 1) "`reduction` must be >= 1" SkipConnection(Chain(AdaptiveMeanPool((1, 1)), - conv_bn((1, 1), channels, channels ÷ reduction, relu; bias = false), - conv_bn((1, 1), channels ÷ reduction, channels, hardσ)), .*) + conv_bn((1, 1), channels, channels ÷ reduction, relu; bias = false)..., + conv_bn((1, 1), channels ÷ reduction, channels, hardσ)...), .*) end """ @@ -171,14 +171,14 @@ function invertedresidual(kernel_size, inplanes, hidden_planes, outplanes, activ @assert stride in [1, 2] "`stride` has to be 1 or 2" pad = @. (kernel_size - 1) ÷ 2 - conv1 = (inplanes == hidden_planes) ? identity : conv_bn((1, 1), inplanes, hidden_planes, activation; bias = false) + conv1 = (inplanes == hidden_planes) ? identity : Chain(conv_bn((1, 1), inplanes, hidden_planes, activation; bias = false)) selayer = isnothing(reduction) ? identity : squeeze_excite(hidden_planes, reduction) invres = Chain(conv1, conv_bn(kernel_size, hidden_planes, hidden_planes, activation; - bias = false, stride, pad = pad, groups = hidden_planes), + bias = false, stride, pad = pad, groups = hidden_planes)..., selayer, - conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)) + conv_bn((1, 1), hidden_planes, outplanes, identity; bias = false)...) (stride == 1 && inplanes == outplanes) ? SkipConnection(invres, +) : invres end