FluxML
diff --git a/‎Manifest.toml
Lines changed: 0 additions & 12 deletions b/‎Manifest.toml
Lines changed: 0 additions & 12 deletions
diff --git a/‎Project.toml
Lines changed: 0 additions & 1 deletion b/‎Project.toml
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/NNlib.jl
Lines changed: 1 addition & 4 deletions b/‎src/NNlib.jl
Lines changed: 1 addition & 4 deletions
diff --git a/‎src/conv.jl
Lines changed: 5 additions & 5 deletions b/‎src/conv.jl
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/impl/conv_direct.jl
Lines changed: 3 additions & 3 deletions b/‎src/impl/conv_direct.jl
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/impl/conv_im2col.jl
Lines changed: 62 additions & 67 deletions b/‎src/impl/conv_im2col.jl
Lines changed: 62 additions & 67 deletions
diff --git a/‎src/impl/depthwiseconv_direct.jl
Lines changed: 3 additions & 3 deletions b/‎src/impl/depthwiseconv_direct.jl
Lines changed: 3 additions & 3 deletions
@@ -9,12 +9,6 @@ git-tree-sha1 = "055eb2690182ebc31087859c3dd8598371d3ef9e"
 uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
 version = "0.5.3"
 
-[[Crayons]]
-deps = ["Test"]
-git-tree-sha1 = "f621b8ef51fd2004c7cf157ea47f027fdeac5523"
-uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
-version = "4.0.0"
-
 [[Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -87,12 +81,6 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[[TimerOutputs]]
-deps = ["Crayons", "Printf", "Test", "Unicode"]
-git-tree-sha1 = "b80671c06f8f8bae08c55d67b5ce292c5ae2660c"
-uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-version = "0.5.0"
-
 [[UUIDs]]
 deps = ["Random", "SHA"]
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
@@ -8,7 +8,6 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
@@ -1,8 +1,5 @@
 module NNlib
-using Requires, TimerOutputs
-
-const to = TimerOutput()
-
+using Requires
 
 # Include APIs
 include("dim_helpers.jl")
 
@@ -45,7 +45,7 @@ for (front_name, backend) in (
     # We only define 3d conv primitives, we reshape lower down to get 1d and 2d convolution
     @eval begin
         # im2col-accelerated function forwarding definition
-        @timeit_debug to function $(Symbol("$(front_name)!"))(
+        function $(Symbol("$(front_name)!"))(
                         out::AbstractArray{T,5}, in1::AbstractArray{T,5},
                         in2::AbstractArray{T,5}, cdims::ConvDims; kwargs...) where {T <: $G}
             $(Symbol("$(front_name)_$(backend)!"))(out, in1, in2, cdims; kwargs...)
@@ -106,7 +106,7 @@ for backend in (Symbol(), :_direct, :_im2col)
     # First make auto-allocating versions of the conv()-like calls:
     for name in (:conv, :depthwiseconv)
         @eval begin
-            @timeit_debug to function $(Symbol("$(name)$(backend)"))(
+            function $(Symbol("$(name)$(backend)"))(
                             x::AbstractArray{xT,N}, w::AbstractArray{wT,N},
                             cdims::ConvDims; kwargs...) where {xT, wT, N}
                 y = similar(x, promote_type(xT, wT), output_size(cdims)...,
@@ -118,7 +118,7 @@ for backend in (Symbol(), :_direct, :_im2col)
 
     for name in (:∇conv_data, :∇depthwiseconv_data)
         @eval begin
-            @timeit_debug to function $(Symbol("$(name)$(backend)"))(
+            function $(Symbol("$(name)$(backend)"))(
                             dy::AbstractArray{yT,N}, w::AbstractArray{wT,N},
                             cdims::ConvDims; kwargs...) where {yT, wT, N}
                 dx = similar(dy, input_size(cdims)..., channels_in(cdims),
@@ -131,7 +131,7 @@ for backend in (Symbol(), :_direct, :_im2col)
     # We do the conv/depthwiseconv filter backprops separately, as the shape calculation
     # for `w` is slightly different for depthwise than for normal dense convolution.
     @eval begin
-        @timeit_debug to function $(Symbol("∇conv_filter$(backend)"))(
+        function $(Symbol("∇conv_filter$(backend)"))(
                         x::AbstractArray{xT,N}, dy::AbstractArray{yT,N},
                         cdims::ConvDims; kwargs...) where {xT, yT, N}
             dw = similar(dy, kernel_size(cdims)..., channels_in(cdims),
@@ -141,7 +141,7 @@ for backend in (Symbol(), :_direct, :_im2col)
     end
 
     @eval begin
-        @timeit_debug to function $(Symbol("∇depthwiseconv_filter$(backend)"))(
+        function $(Symbol("∇depthwiseconv_filter$(backend)"))(
                         x::AbstractArray{xT,N}, dy::AbstractArray{yT,N},
                         cdims::ConvDims; kwargs...) where {xT, yT, N}
             dw = similar(dy, kernel_size(cdims)..., channel_multiplier(cdims),
 
@@ -44,7 +44,7 @@ wrapper methods are available.
 """
 conv_direct!
 
-@timeit_debug to function conv_direct!(y::AbstractArray{yT,5}, x::AbstractArray{xT,5},
+function conv_direct!(y::AbstractArray{yT,5}, x::AbstractArray{xT,5},
                       w::AbstractArray{wT,5}, cdims::DenseConvDims;
                       alpha::yT = yT(1), beta = false) where {yT, xT, wT}
     check_dims(size(x), size(w), size(y), cdims)
@@ -114,7 +114,7 @@ Calculate the gradient imposed upon `x` in the convolution `y = x * w`.
 """
 ∇conv_data_direct!
 
-@timeit_debug to function ∇conv_data_direct!(dx::AbstractArray{xT,5}, dy::AbstractArray{yT,5},
+function ∇conv_data_direct!(dx::AbstractArray{xT,5}, dy::AbstractArray{yT,5},
                             w::AbstractArray{wT,5}, cdims::DenseConvDims;
                             alpha::xT=xT(1), beta=false) where {xT, yT, wT}
     w = transpose_swapbatch(w[end:-1:1, end:-1:1, end:-1:1, :, :])
@@ -133,7 +133,7 @@ Calculate the gradient imposed upon `w` in the convolution `y = x * w`.
 """
 ∇conv_filter_direct!
 
-@timeit_debug to function ∇conv_filter_direct!(dw::AbstractArray{wT,5}, x::AbstractArray{xT,5},
+function ∇conv_filter_direct!(dw::AbstractArray{wT,5}, x::AbstractArray{xT,5},
                               dy::AbstractArray{yT,5}, cdims::DenseConvDims;
                               alpha::wT=wT(1), beta=false) where {xT, yT, wT}
     x = transpose_swapbatch(x[end:-1:1, end:-1:1, end:-1:1, :, :])
 
@@ -22,7 +22,7 @@ by setting `alpha` to a nonunitary value, various gain factors can be applied.
 Note for the particularly performance-minded, you can provide a pre-allocated `col`,
 which should eliminate any need for large allocations within this method.
 """
-@timeit_debug to function conv_im2col!(
+function conv_im2col!(
                 y::AbstractArray{T,5}, x::AbstractArray{T,5},
                 w::AbstractArray{T,5}, cdims::DenseConvDims;
                 col::AbstractArray{T,2}=similar(x, im2col_dims(cdims)),
@@ -49,12 +49,12 @@ which should eliminate any need for large allocations within this method.
     @inbounds for batch_idx in 1:size(x,5)
         # We invoke `@timeit_debug` on the outside of `im2col!()` because inference
         # doesn't like us putting it on the inside.
-        @timeit_debug to "im2col!" im2col!(col, view(x, :, :, :, :, batch_idx), cdims)
+        im2col!(col, view(x, :, :, :, :, batch_idx), cdims)
         GC.@preserve col, w, y, begin
             col_ptr = pointer(col)
             w_ptr = pointer(w)
             y_ptr = pointer(y, (batch_idx - 1)*M*N + 1)
-            @timeit_debug to "gemm!" gemm!(Val(false), Val(false), M, N, K, alpha, col_ptr, w_ptr, beta, y_ptr)
+            gemm!(Val(false), Val(false), M, N, K, alpha, col_ptr, w_ptr, beta, y_ptr)
         end
     end
     return y
@@ -66,7 +66,7 @@ end
 Conv backward pass onto the weights using im2col and GEMM; stores the result in `dw`.
 See the documentation for `conv_im2col!()` for explanation of optional parameters.
 """
-@timeit_debug to function ∇conv_filter_im2col!(
+function ∇conv_filter_im2col!(
                 dw::AbstractArray{T,5}, x::AbstractArray{T,5},
                 dy::AbstractArray{T,5}, cdims::DenseConvDims;
                 col::AbstractArray{T,2} = similar(dw, im2col_dims(cdims)),
@@ -95,14 +95,12 @@ See the documentation for `conv_im2col!()` for explanation of optional parameter
     K = prod(output_size(cdims))
 
     @inbounds for batch_idx in 1:size(x,5)
-        # We invoke `@timeit_debug` on the outside of `im2col!()` because inference
-        # doesn't like us putting it on the inside.
-        @timeit_debug to "im2col!" im2col!(col, view(x, :, :, :, :, batch_idx), cdims)
+        im2col!(col, view(x, :, :, :, :, batch_idx), cdims)
         GC.@preserve col, dw, dy, begin
             col_ptr = pointer(col)
             dy_ptr = pointer(dy,(batch_idx - 1)*K*N + 1)
             dw_ptr = pointer(dw)
-            @timeit_debug to "gemm!" gemm!(Val(true), Val(false), M, N, K, alpha, col_ptr, dy_ptr, beta, dw_ptr)
+            gemm!(Val(true), Val(false), M, N, K, alpha, col_ptr, dy_ptr, beta, dw_ptr)
         end
 
         # Because we accumulate over batches in this loop, we must set `beta` equal
@@ -118,7 +116,7 @@ end
 Conv2d backward pass onto the input using im2col and GEMM; stores the result in `dx`.
 See the documentation for `conv_im2col!()` for explanation of other parameters.
 """
-@timeit_debug to function ∇conv_data_im2col!(
+function ∇conv_data_im2col!(
                 dx::AbstractArray{T,5}, dy::AbstractArray{T,5},
                 w::AbstractArray{T,5}, cdims::DenseConvDims;
                 col::AbstractArray{T,2} = similar(dx, im2col_dims(cdims)),
@@ -149,9 +147,9 @@ See the documentation for `conv_im2col!()` for explanation of other parameters.
             dy_ptr = pointer(dy, (batch_idx - 1)*M*K + 1)
             w_ptr = pointer(w)
             col_ptr = pointer(col)
-            @timeit_debug to "gemm!" gemm!(Val(false), Val(true), M, N, K, alpha, dy_ptr, w_ptr, T(0), col_ptr)
+            gemm!(Val(false), Val(true), M, N, K, alpha, dy_ptr, w_ptr, T(0), col_ptr)
         end
-        @timeit_debug to "col2im!" col2im!(view(dx, :, :, :, :, batch_idx), col, cdims)
+        col2im!(view(dx, :, :, :, :, batch_idx), col, cdims)
     end
     return dx
 end
@@ -207,77 +205,74 @@ function im2col!(col::AbstractArray{T,2}, x::AbstractArray{T,4},
     # We begin by copying the central region of the image which requires no padding at all.
     # Eliminating the branches of the fully generalized version below gives us a nice
     # speedup on the majority of the data.
-    @timeit_debug to "im2col!() - central region" begin
-        @inbounds for c in 1:C_in
-            # Unpack "central region"
-            w_region, h_region, d_region = central_region
-
-            for kd in 1:kernel_d,
-                kh in 1:kernel_h,
-                kw in 1:kernel_w,
-                d in d_region,
-                h in h_region,
-                w in w_region
-
-                input_kd = project(d, stride_d, pad_d_lo) + (kd - 1)*dil_d
-                input_kh = project(h, stride_h, pad_h_lo) + (kh - 1)*dil_h
-                input_kw = project(w, stride_w, pad_w_lo) + (kw - 1)*dil_w
-                kidxs = kernel_index(kw, kh, kd, cdims)
+    @inbounds for c in 1:C_in
+        # Unpack "central region"
+        w_region, h_region, d_region = central_region
 
-                xval::T = x[input_kw, input_kh, input_kd, c]
-                col_reshaped[w, h, d, kidxs..., c] = xval
-            end
+        for kd in 1:kernel_d,
+            kh in 1:kernel_h,
+            kw in 1:kernel_w,
+            d in d_region,
+            h in h_region,
+            w in w_region
+ 
+            input_kd = project(d, stride_d, pad_d_lo) + (kd - 1)*dil_d
+            input_kh = project(h, stride_h, pad_h_lo) + (kh - 1)*dil_h
+            input_kw = project(w, stride_w, pad_w_lo) + (kw - 1)*dil_w
+            kidxs = kernel_index(kw, kh, kd, cdims)
+
+            xval::T = x[input_kw, input_kh, input_kd, c]
+            col_reshaped[w, h, d, kidxs..., c] = xval
         end
     end
 
+    
     # For each "padded region", we run the fully general version
-    @timeit_debug to "im2col!() - padded region" begin
-        @inbounds for (w_region, h_region, d_region) in padded_regions
-            for c in 1:C_in,
-                d in d_region,
-                h in h_region,
-                w in w_region,
-                kd in 1:kernel_d,
-                kh in 1:kernel_h,
-                kw in 1:kernel_w
+    @inbounds for (w_region, h_region, d_region) in padded_regions
+        for c in 1:C_in,
+            d in d_region,
+            h in h_region,
+            w in w_region,
+            kd in 1:kernel_d,
+            kh in 1:kernel_h,
+            kw in 1:kernel_w
 
-                input_kd = project(d, stride_d, pad_d_lo) + (kd - 1)*dil_d
-                input_kh = project(h, stride_h, pad_h_lo) + (kh - 1)*dil_h
-                input_kw = project(w, stride_w, pad_w_lo) + (kw - 1)*dil_w
+            input_kd = project(d, stride_d, pad_d_lo) + (kd - 1)*dil_d
+            input_kh = project(h, stride_h, pad_h_lo) + (kh - 1)*dil_h
+            input_kw = project(w, stride_w, pad_w_lo) + (kw - 1)*dil_w
 
-                kidxs = kernel_index(kw, kh, kd, cdims)
+            kidxs = kernel_index(kw, kh, kd, cdims)
 
-                # If this d is off the edge, then deal with the entire plane
-                # in one fell swoop, like a ravenous flock of crows.  CAW CAW.
-                if input_kd <= 0 || input_kd > depth
-                    for kh in 1:kernel_h,
-                        kw in 1:kernel_w
-                        col_reshaped[w, h, d, kidxs..., c] = T(0)
-                    end
-                    continue
-                end
-
-                # Same for `h`, but in this case it's only a line, not a plane.
-                # This results in slightly less caw'ing.
-                if input_kh <= 0 || input_kh > height
-                    for kw in 1:kernel_w
-                        col_reshaped[w, h, d, kidxs..., c] = T(0)
-                    end
-                    continue
+            # If this d is off the edge, then deal with the entire plane
+            # in one fell swoop, like a ravenous flock of crows.  CAW CAW.
+            if input_kd <= 0 || input_kd > depth
+                for kh in 1:kernel_h,
+                    kw in 1:kernel_w
+                    col_reshaped[w, h, d, kidxs..., c] = T(0)
                 end
+                continue
+            end
 
-                # If this `w` is off the edge it and only it gets cleared out
-                if input_kw <= 0 || input_kw > width
+            # Same for `h`, but in this case it's only a line, not a plane.
+            # This results in slightly less caw'ing.
+            if input_kh <= 0 || input_kh > height
+                for kw in 1:kernel_w
                     col_reshaped[w, h, d, kidxs..., c] = T(0)
-                    continue
                 end
+                continue
+            end
 
-                # Copy the data over
-                xval::T = x[input_kw, input_kh, input_kd, c]
-                col_reshaped[w, h, d, kidxs..., c] = xval
+            # If this `w` is off the edge it and only it gets cleared out
+            if input_kw <= 0 || input_kw > width
+                col_reshaped[w, h, d, kidxs..., c] = T(0)
+                continue
             end
+
+            # Copy the data over
+            xval::T = x[input_kw, input_kh, input_kd, c]
+            col_reshaped[w, h, d, kidxs..., c] = xval
         end
-    end
+    end    
 end
 
 
 
@@ -18,7 +18,7 @@ channels in `x` is the last, not the second-to-last, as in a normal dense convol
 
 See the docstring for `conv_direct!()` for more on the optional parameters.
 """
-@timeit_debug to function depthwiseconv_direct!(
+function depthwiseconv_direct!(
                 y::AbstractArray{yT,5}, x::AbstractArray{xT,5},
                 w::AbstractArray{wT,5}, cdims::DepthwiseConvDims;
                 alpha::yT = yT(1), beta::yT = yT(0)) where {yT, xT, wT}
@@ -95,7 +95,7 @@ for each batch and channel independently.
 """
 ∇depthwiseconv_data_direct!
 
-@timeit_debug to function ∇depthwiseconv_data_direct!(
+function ∇depthwiseconv_data_direct!(
                 dx::AbstractArray{xT,5}, dy::AbstractArray{yT,5},
                 w::AbstractArray{wT,5}, cdims::DepthwiseConvDims;
                 alpha::xT=xT(1), beta::xT=xT(0)) where {xT, yT, wT}
@@ -128,7 +128,7 @@ Calculate the gradient imposed upon `w` in the depthwise convolution `y = x * w`
 """
 ∇depthwiseconv_filter_direct!
 
-@timeit_debug to function ∇depthwiseconv_filter_direct!(
+function ∇depthwiseconv_filter_direct!(
                 dw::AbstractArray{wT,5}, x::AbstractArray{xT,5},
                 dy::AbstractArray{yT,5}, cdims::DepthwiseConvDims;
                 alpha::wT=wT(1),beta::wT=wT(0)) where {xT, yT, wT}