@@ -55,7 +55,7 @@ ChainRulesCore.@non_differentiable dropout_mask(::Any, ::Any, ::Any)
55
55
"""
56
56
Dropout(p; dims=:, rng = rng_from_array())
57
57
58
- Dropout layer. In the forward pass, apply the [`Flux.dropout`](@ref) function on the input.
58
+ Dropout layer. In the forward pass, applies the [`Flux.dropout`](@ref) function on the input.
59
59
60
60
To apply dropout along certain dimension(s), specify the `dims` keyword.
61
61
e.g. `Dropout(p; dims = 3)` will randomly zero out entire channels on WHCN input
@@ -65,6 +65,35 @@ Specify `rng` to use a custom RNG instead of the default.
65
65
Custom RNGs are only supported on the CPU.
66
66
67
67
Does nothing to the input once [`Flux.testmode!`](@ref) is `true`.
68
+
69
+ # Examples
70
+ ```jldoctest
71
+ julia> m = Chain(Dense(2 => 2), Dropout(1))
72
+ Chain(
73
+ Dense(2 => 2), # 6 parameters
74
+ Dropout(1),
75
+ )
76
+
77
+ julia> Flux.trainmode!(m); # activating the layer without actually training it
78
+
79
+ julia> m([1, 2]) # drops neurons with a probability of 1
80
+ 2-element Vector{Float32}:
81
+ -0.0
82
+ -0.0
83
+
84
+ julia> m = Chain(Dense(2 => 2), Dropout(0.5))
85
+ Chain(
86
+ Dense(2 => 2), # 6 parameters
87
+ Dropout(0.5),
88
+ )
89
+
90
+ julia> Flux.trainmode!(m); # activating the layer without actually training it
91
+
92
+ julia> m([1, 2]) # drops neurons with a probability of 0.5
93
+ 2-element Vector{Float32}:
94
+ -4.537827
95
+ -0.0
96
+ ```
68
97
"""
69
98
mutable struct Dropout{F,D,R<: AbstractRNG }
70
99
p:: F
@@ -105,6 +134,33 @@ The AlphaDropout layer ensures that mean and variance of activations
105
134
remain the same as before.
106
135
107
136
Does nothing to the input once [`testmode!`](@ref) is true.
137
+
138
+ # Examples
139
+ ```jldoctest
140
+ julia> x = randn(20,1);
141
+
142
+ julia> m = Chain(Dense(20 => 10, selu), AlphaDropout(0.5))
143
+ Chain(
144
+ Dense(20 => 10, selu), # 210 parameters
145
+ AlphaDropout{Float64, Random.TaskLocalRNG}(0.5, nothing, Random.TaskLocalRNG()),
146
+ )
147
+
148
+ julia> Flux.trainmode!(m);
149
+
150
+ julia> y = m(x);
151
+
152
+ julia> Flux.std(x)
153
+ 1.097500619939126
154
+
155
+ julia> Flux.std(y) # maintains the standard deviation of the input
156
+ 1.1504012188827453
157
+
158
+ julia> Flux.mean(x) # maintains the mean of the input
159
+ -0.3217018554158738
160
+
161
+ julia> Flux.mean(y)
162
+ -0.2526866470385106
163
+ ```
108
164
"""
109
165
mutable struct AlphaDropout{F,R<: AbstractRNG }
110
166
p:: F
@@ -154,6 +210,27 @@ If `affine=true`, it also applies a learnable shift and rescaling
154
210
using the [`Scale`](@ref) layer.
155
211
156
212
See also [`BatchNorm`](@ref), [`InstanceNorm`](@ref), [`GroupNorm`](@ref), and [`normalise`](@ref).
213
+
214
+ # Examples
215
+ ```jldoctest
216
+ julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images
217
+
218
+ julia> m = LayerNorm(3);
219
+
220
+ julia> y = m(xs);
221
+
222
+ julia> Flux.std(xs[:, :, :, 1])
223
+ 0.28713812337208383
224
+
225
+ julia> Flux.std(y[:, :, :, 1]) # normalises each image (or all channels in an image)
226
+ 1.018993632693022
227
+
228
+ julia> Flux.std(xs[:, :, :, 2])
229
+ 0.22540260537916373
230
+
231
+ julia> Flux.std(y[:, :, :, 2]) # normalises each image (or all channels in an image)
232
+ 1.018965249873791
233
+ ```
157
234
"""
158
235
struct LayerNorm{F,D,T,N}
159
236
λ:: F
@@ -256,12 +333,17 @@ Use [`testmode!`](@ref) during inference.
256
333
257
334
# Examples
258
335
```julia
259
- m = Chain(
260
- Dense(28^2 => 64),
261
- BatchNorm(64, relu),
262
- Dense(64 => 10),
263
- BatchNorm(10),
264
- softmax)
336
+ julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images
337
+
338
+ julia> Flux.std(xs)
339
+ 2.6822461565718467
340
+
341
+ julia> m = BatchNorm(3);
342
+
343
+ julia> Flux.trainmode!(m); # activating the layer without actually training it
344
+
345
+ julia> Flux.std(m(xs)) # normalises the complete batch
346
+ 1.0093209961092855
265
347
```
266
348
"""
267
349
mutable struct BatchNorm{F,V,N,W}
@@ -339,6 +421,27 @@ that will be used to renormalize the input in test phase.
339
421
340
422
**Warning**: the defaults for `affine` and `track_stats` used to be `true`
341
423
in previous Flux versions (< v0.12).
424
+
425
+ # Examples
426
+ ```jldoctest
427
+ julia> xs = rand(3, 3, 3, 2); # a batch of 2 3X3X3 images
428
+
429
+ julia> m = InstanceNorm(3);
430
+
431
+ julia> y = m(xs);
432
+
433
+ julia> Flux.std(xs[:, :, 1, 1]) # original standard deviation of the first channel of image 1
434
+ 0.2989802650787384
435
+
436
+ julia> Flux.std(y[:, :, 1, 1]) # each channel of the batch is normalised
437
+ 1.0606027381538408
438
+
439
+ julia> Flux.std(xs[:, :, 2, 2]) # original standard deviation of the second channel of image 2
440
+ 0.28662705400461197
441
+
442
+ julia> Flux.std(y[:, :, 2, 2]) # each channel of the batch is normalised
443
+ 1.06058729821187
444
+ ```
342
445
"""
343
446
mutable struct InstanceNorm{F,V,N,W}
344
447
λ:: F # activation function
@@ -416,6 +519,27 @@ through to learnable per-channel bias `β` and scale `γ` parameters.
416
519
417
520
If `track_stats=true`, accumulates mean and var statistics in training phase
418
521
that will be used to renormalize the input in test phase.
522
+
523
+ # Examples
524
+ ```jldoctest
525
+ julia> xs = rand(3, 3, 4, 2); # a batch of 2 3X3X4 images
526
+
527
+ julia> m = GroupNorm(4, 2);
528
+
529
+ julia> y = m(xs);
530
+
531
+ julia> Flux.std(xs[:, :, 1:2, 1]) # original standard deviation of the first 2 channels of image 1
532
+ 0.307588490584917
533
+
534
+ julia> Flux.std(y[:, :, 1:2, 1]) # normalises channels in groups of 2 (as specified)
535
+ 1.0289339365431291
536
+
537
+ julia> Flux.std(xs[:, :, 3:4, 2]) # original standard deviation of the last 2 channels of image 2
538
+ 0.3111566100804274
539
+
540
+ julia> Flux.std(y[:, :, 3:4, 2]) # normalises channels in groups of 2 (as specified)
541
+ 1.0289352493058574
542
+ ```
419
543
"""
420
544
mutable struct GroupNorm{F,V,N,W}
421
545
G:: Int # number of groups
0 commit comments