FluxML
diff --git a/‎src/conv.jl
Lines changed: 22 additions & 25 deletions b/‎src/conv.jl
Lines changed: 22 additions & 25 deletions
diff --git a/‎src/conv_bias_act.jl
Lines changed: 2 additions & 2 deletions b/‎src/conv_bias_act.jl
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/dim_helpers.jl
Lines changed: 1 addition & 2 deletions b/‎src/dim_helpers.jl
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/dim_helpers/ConvDims.jl
Lines changed: 55 additions & 59 deletions b/‎src/dim_helpers/ConvDims.jl
Lines changed: 55 additions & 59 deletions
@@ -26,20 +26,20 @@ export conv, conv!, ∇conv_data, ∇conv_data!, ∇conv_filter, ∇conv_filter!
 #       cdims = ConvDims(x, w; stride=2, dilation=(3,2))
 #       dx = ∇conv_data(conv(x, w, cdims), w, cdims)
 
-#   The computational flow, starting from the user facing functions, 
-#   goes through the following steps:  
+#   The computational flow, starting from the user facing functions,
+#   goes through the following steps:
 #
-#   STEP 1: 
+#   STEP 1:
 #       use ConvDims objects (only for `conv` and `depthwiseconv`)
-#   STEP 2: 
+#   STEP 2:
 #        define autoallocating version (frontend and implementations)
-#   STEP 3: 
+#   STEP 3:
 #        reshape to 3d convolutions (frontend and implementions)
-#   STEP 4: 
+#   STEP 4:
 #        choose implementation
 
 # TODO: should we also add
-#   STEP X: 
+#   STEP X:
 #        use homogeneus datatypes
 # to handle etherogeneus inputs now handled by conv_direct?
 
@@ -48,22 +48,23 @@ export conv, conv!, ∇conv_data, ∇conv_data!, ∇conv_filter, ∇conv_filter!
 """
     conv(x, w; stride = 1, pad = 0, dilation = 1, flipped = false, groups = 1)
 
-Apply convolution filter `w` to input `x`. `x` and `w` are 3d/4d/5d tensors 
-in 1d/2d/3d convolutions respectively. 
+Apply convolution filter `w` to input `x`. `x` and `w` are 3d/4d/5d tensors
+in 1d/2d/3d convolutions respectively.
 """
-function conv(x, w::AbstractArray{T, N}; stride=1, pad=0, dilation=1, flipped=false, groups = 1) where {T, N}
-    stride = expand(Val(N-2), stride)
-    pad = expand(Val(N-2), pad)
-    dilation = expand(Val(N-2), dilation)
-    cdims = DenseConvDims(x, w; stride=stride, padding=pad, dilation=dilation, flipkernel=flipped, groups = groups)
+function conv(x, w::AbstractArray{T, N}; stride = 1, pad = 0, dilation = 1, flipped = false, groups = 1) where {T, N}
+    stride = expand(Val(N - 2), stride)
+    padding = expand(Val(N - 2), pad)
+    dilation = expand(Val(N - 2), dilation)
+    cdims = DenseConvDims(
+        size(x), size(w); stride, padding, dilation, flipkernel=flipped, groups)
     return conv(x, w, cdims)
 end
 
 """
     depthwiseconv(x, w; stride=1, pad=0, dilation=1, flipped=false)
 
-Depthwise convolution operation with filter `w` on input `x`. `x` and `w` 
-are 3d/4d/5d tensors in 1d/2d/3d convolutions respectively. 
+Depthwise convolution operation with filter `w` on input `x`. `x` and `w`
+are 3d/4d/5d tensors in 1d/2d/3d convolutions respectively.
 """
 function depthwiseconv(x, w::AbstractArray{T, N}; stride=1, pad=0, dilation=1, flipped=false) where {T, N}
     stride = expand(Val(N-2), stride)
@@ -98,9 +99,7 @@ for backend in (Symbol(), :_direct, :_im2col, :_nnpack)
             function $(Symbol("$(name)$(backend)"))(
                             dy::AbstractArray{yT,N}, w::AbstractArray{wT,N},
                             cdims::C; kwargs...) where {yT, wT, N, C <: ConvDims}
-                dx = similar(dy, input_size(cdims)..., channels_in(cdims),
-                                                        size(dy, N))
-
+                dx = similar(dy, input_size(cdims)..., channels_in(cdims), size(dy, N))
                 return $(Symbol("$(name)$(backend)!"))(dx, dy, w, cdims; kwargs...)
             end
         end
@@ -114,7 +113,6 @@ for backend in (Symbol(), :_direct, :_im2col, :_nnpack)
                         cdims::ConvDims; kwargs...) where {xT, yT, N}
             dw = similar(dy, kernel_size(cdims)..., channels_in(cdims) ÷ groupcount(cdims),
                                                     channels_out(cdims))
-
             return $(Symbol("∇conv_filter$(backend)!"))(dw, x, dy, cdims; kwargs...)
         end
     end
@@ -197,15 +195,15 @@ for (front_name, backend) in (
                                  G = 1,
                                  C_in = channels_in(cdims) ÷ groupcount(cdims),
                                  C_out = channels_out(cdims) ÷ groupcount(cdims))
-            
+
             Threads.@sync for (xc, wc) in zip(x_cs, w_cs)
                 x = @view in1[ntuple(i -> i == 4 ? xc : Colon(), 5)...]
                 w = @view in2[ntuple(i -> i == 5 ? wc : Colon(), 5)...]
                 y = @view out[ntuple(i -> i == 4 ? wc : Colon(), 5)...]
                 Threads.@spawn $(Symbol("$(front_name)_$(backend)!"))(y, x, w, cdims2; kwargs...)
             end
 
-           return out
+            return out
         end
     end
 end
@@ -232,12 +230,11 @@ function ∇conv_data!(out::AbstractArray{T,5}, in1::AbstractArray{T,5},
         Threads.@spawn ∇conv_data_im2col!(dxv, dyv, wv, cdims2; kwargs...)
     end
 
-   return out
+    return out
 end
 
 function ∇conv_filter!(out::AbstractArray{T,5}, in1::AbstractArray{T,5},
                        in2::AbstractArray{T,5}, cdims::C; kwargs...) where {T <: G, C <: ConvDims}
-
     dw_cs = Iterators.partition(1:size(out, 5),
                                 channels_out(cdims) ÷ groupcount(cdims))
     dy_cs = Iterators.partition(1:size(in2, 4),
@@ -256,7 +253,7 @@ function ∇conv_filter!(out::AbstractArray{T,5}, in1::AbstractArray{T,5},
         Threads.@spawn ∇conv_filter_im2col!(dw, x, dy, cdims2; kwargs...)
     end
 
-   return out
+    return out
 end
 
 
 
@@ -1,13 +1,13 @@
 export conv_bias_act, conv_bias_act!
 
-function conv_bias_act(x::AbstractArray{xT,N}, w::AbstractArray{wT,N}, 
+function conv_bias_act(x::AbstractArray{xT,N}, w::AbstractArray{wT,N},
                 cdims::ConvDims, b::AbstractArray{bT,N}, σ=identity; kwargs...) where {xT, wT, bT, N}
     y = similar(x, promote_type(xT, wT, bT), output_size(cdims)..., channels_out(cdims), size(x,N))
     conv_bias_act!(y, x, w, cdims, b, σ; kwargs...)
     return y
 end
 
-function conv_bias_act!(y::AbstractArray{yT,5}, x::AbstractArray{xT,5}, w::AbstractArray{wT,5}, 
+function conv_bias_act!(y::AbstractArray{yT,5}, x::AbstractArray{xT,5}, w::AbstractArray{wT,5},
                 cdims::ConvDims, b::AbstractArray{bT,5}, σ=identity; kwargs...) where {yT, xT, wT, bT}
     conv!(y, x, w, cdims)
     y .= σ.(y .+ b)
 
@@ -45,14 +45,13 @@ function transpose_pad(cdims::ConvDims)
 end
 
 """
-    insert_singleton_spatial_dimension(cdims::DenseConvDims)
+    insert_singleton_spatial_dimension(cdims::ConvDims)
 
 When converting a 1d convolution to a 2d, or a 2d to a 3d, we need to insert a singleton
 spatial dimension at the end of the spatial dimensions.  This does so for a ConvDims.
 """
 @inline function insert_singleton_spatial_dimension(cdims::C) where {C <: ConvDims}
     return basetype(C)(cdims;
-        N=spatial_dims(cdims) + 1,
         I=(input_size(cdims)..., 1),
         K=(kernel_size(cdims)..., 1),
         S=(stride(cdims)..., 1),
 
@@ -6,13 +6,19 @@ export ConvDims
 Type system-level information about convolution dimensions. Critical for things like
 `im2col!()` to generate efficient code, and helpful to reduce the number of kwargs
 getting passed around.
-
-We don't want to specialize on things like image size/channel count, so we generally
-store those as fields, just for convenience, and to allow for non-breaking changes when
-we decide we _do_ want to specialize on those values.  We always want to specialize on
-things like stride, padding, dilation, and kernel flipping though.
 """
-abstract type ConvDims{N, S, P, D, F} end
+abstract type ConvDims{N} end
+
+@inline spatial_dims(::ConvDims{N}) where N = N
+@inline groupcount(c::ConvDims) = 1
+
+# Below functions should be implemented by dims that subtype `ConvDims`.
+function input_size end
+function kernel_size end
+function stride end
+function padding end
+function dilation end
+function flipkernel end
 
 # Hack to get rid of type parameters
 function basetype(::Type{C}) where {C <: ConvDims}
@@ -27,13 +33,29 @@ function basetype(::Type{C}) where {C <: ConvDims}
     end
 end
 
-# Obvious getter definitions for the type system-level definitions
-spatial_dims(c::ConvDims{N,S,P,D,F}) where {N, S, P, D, F} = N
-stride(c::ConvDims{N,S,P,D,F}) where {N, S, P, D, F} = S
-padding(c::ConvDims{N,S,P,D,F}) where {N, S, P, D, F} = P
-dilation(c::ConvDims{N,S,P,D,F}) where {N, S, P, D, F} = D
-flipkernel(c::ConvDims{N,S,P,D,F}) where {N, S, P, D, F} = F
-groupcount(c::ConvDims) = 1
+function output_size(c::ConvDims)
+    I = input_size(c)
+    K = kernel_size(c)
+    S = stride(c)
+    P = padding(c)
+    D = dilation(c)
+
+    return ntuple(spatial_dims(c)) do i
+        return div(I[i] + P[(i-1)*2 + 1] + P[(i-1)*2 + 2] - (K[i] - 1) * D[i] - 1, S[i]) + 1
+    end
+end
+
+function Base.show(io::IO, cdims::C) where {C <: ConvDims}
+    I = (input_size(cdims)..., channels_in(cdims))
+    O = (output_size(cdims)..., channels_out(cdims))
+    K = kernel_size(cdims)
+    S = stride(cdims)
+    P = padding(cdims)
+    D = dilation(cdims)
+    F = flipkernel(cdims)
+    G = groupcount(cdims)
+    print(io, "$(basetype(C)): $I * $K -> $O, stride: $S, pad: $P, dil: $D, flip: $F, groups: $G")
+end
 
 """
     im2col_dims(c::ConvDims)
@@ -81,57 +103,31 @@ function check_spdf(x_size::NTuple{N}, w_size::NTuple{N}, stride, padding, dilat
 
     # padding is kind of a special case; we allow it to be either 2-length or 4-length,
     # since we support asymmetrical padding
-    if length(ppadding) != 2*nd
-        if length(ppadding) == nd
-            # Do this repeat dance so that we get lo/hi symmetrical padding
-            ppadding = tuple(repeat(collect(ppadding), inner=2)...)
-        else
-            throw(DimensionMismatch("Padding $(length(ppadding))d, should be either $(nd)d or $(2*nd)d!"))
-        end
+    if length(ppadding) == 2 * nd
+        _validate_padding(x_size, w_size, ppadding, pdilation)
+        return pstride, ppadding, pdilation
     end
 
-    # Assert that kernel size * dilation is <= padded input size
-    for idx in 1:nd
+    length(ppadding) != nd && throw(DimensionMismatch(
+        "Padding $(length(ppadding))d, should be either $(nd)d or $(2*nd)d!"))
+
+    # Do this repeat dance so that we get lo/hi symmetrical padding
+    ppadding_expanded = ntuple(i -> ppadding[(i - 1) ÷ 2 + 1], 2 * nd)
+    _validate_padding(x_size, w_size, ppadding_expanded, pdilation)
+    return pstride, ppadding_expanded, pdilation
+end
+
+# Assert that kernel size * dilation is <= padded input size
+function _validate_padding(x_size::NTuple{N}, w_size::NTuple{N}, padding, dilation) where N
+    for idx in 1:(N - 2)
         Is = x_size[idx]
-        Pl = ppadding[(idx - 1)*2 + 1]
-        Ph = ppadding[(idx - 1)*2 + 2]
         Ks = w_size[idx]
-        Ds = pdilation[idx]
-        if Is + Pl + Ph < (Ks - 1)*Ds + 1
+        Pl = padding[(idx - 1) * 2 + 1]
+        Ph = padding[(idx - 1) * 2 + 2]
+        Ds = dilation[idx]
+        if Is + Pl + Ph < (Ks - 1) * Ds + 1
             throw(DimensionMismatch("Kernel * dilation (($Ks - 1) * $Ds + 1) cannot be larger than input + padding ($Is + $Pl + $Ph)!"))
         end
     end
-
-    return pstride, ppadding, pdilation
-end
-
-"""
-    output_size(c::ConvDims)
-
-Calculate the output (spatial) dimensions of the convolution.  Get channel count via
-`channels_out(c)`, and batch count is unknowable.
-"""
-function output_size(c::ConvDims)
-    I = input_size(c)
-    K = kernel_size(c)
-    S = stride(c)
-    P = padding(c)
-    D = dilation(c)
-
-    return ntuple(spatial_dims(c)) do i
-        return div(I[i] + P[(i-1)*2 + 1] + P[(i-1)*2 + 2] - (K[i] - 1) * D[i] - 1, S[i]) + 1
-    end
-end
-
-# Override show() for these beauties
-function Base.show(io::IO, cdims::C) where {C <: ConvDims}
-    I = (input_size(cdims)..., channels_in(cdims))
-    O = (output_size(cdims)..., channels_out(cdims))
-    K = kernel_size(cdims)
-    S = stride(cdims)
-    P = padding(cdims)
-    D = dilation(cdims)
-    F = flipkernel(cdims)
-    G = groupcount(cdims)
-    print(io, "$(basetype(C)): $I * $K -> $O, stride: $S, pad: $P, dil: $D, flip: $F, groups: $G")
+    nothing
 end