Add documentation for loadmodel!

darsnack · darsnack · commit b4fe66b569d3 · 2022-04-05T08:13:25.000-05:00
diff --git a/docs/src/saving.md b/docs/src/saving.md
@@ -2,7 +2,7 @@
 
 You may wish to save models so that they can be loaded and run in a later
 session. The easiest way to do this is via
-[BSON.jl](https://github.com/MikeInnes/BSON.jl).
+[BSON.jl](https://github.com/JuliaIO/BSON.jl).
 
 Save a model:
 
@@ -46,15 +46,17 @@ versions of Flux).
 
 !!! note
 
-    If a saved model's weights are stored on the GPU, the model will not load
+    If a saved model's parameters are stored on the GPU, the model will not load
     later on if there is no GPU support available. It's best to [move your model
     to the CPU](gpu.md) with `cpu(model)` before saving it.
 
-## Saving Model Weights
+!!! warning
 
-In some cases it may be useful to save only the model parameters themselves, and
-rebuild the model architecture in your code. You can use `params(model)` to get
-model parameters.
+    Previous versions of Flux suggested saving only the model weights using
+    `@save "mymodel.bson" params(model)`.
+    This is no longer recommended and even strongly discouraged.
+    Saving models this way will only store the trainable parameters which
+    will result in incorrect behavior for layers like `BatchNorm`.
 
 ```Julia
 julia> using Flux
@@ -64,28 +66,29 @@ Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax)
 
 julia> weights = Flux.params(model);
 
-julia> using BSON: @save
-
-julia> @save "mymodel.bson" weights
-```
-
-You can easily load parameters back into a model with `Flux.loadparams!`.
+Loading the model as shown above will return a new model with the stored parameters.
+But sometimes you already have a model, and you want to load stored parameters into it.
+This can be done as
 
 ```julia
-julia> using Flux
+using Flux: loadmodel!
+using BSON: @load
 
-julia> model = Chain(Dense(10 => 5,relu),Dense(5 => 2),softmax)
-Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax)
+# some predefined model
+model = Chain(Dense(10 => 5,relu),Dense(5 => 2),softmax)
 
-julia> using BSON: @load
+# load one model into another
+model = loadmodel!(model, @load("mymodel.bson"))
+```
 
-julia> @load "mymodel.bson" weights
+This ensures that the model loaded from `"mymodel.bson"` matches the structure of `model`. [`Flux.loadmodel!`](@ref) is also convenient for copying parameters between models in memory.
 
-julia> Flux.loadparams!(model, weights)
+```@docs
+Flux.loadmodel!
+Flux.isloadleaf
+Flux.loadleaf!
 ```
 
-The new `model` we created will now be identical to the one we saved parameters for.
-
 ## Checkpointing
 
 In longer training runs it's a good idea to periodically save your model, so that you can resume if training is interrupted (for example, if there's a power cut). You can do this by saving the model in the [callback provided to `train!`](training/training.md).
diff --git a/src/loading.jl b/src/loading.jl
@@ -1,8 +1,25 @@
+"""
+    isloadleaf(x)
+
+Return `true` whenever `x` should be treated as a "leaf node"
+for the purposes of loading parameters.
+By default, `isloadleaf` returns `true` if [`Functors.isleaf`](@ref)
+is `true` for all [`Functors.children(x)`](@ref `Functors.children`).
+
+You can override this function for a specific type if needed.
+"""
 isloadleaf(x) = all(Functors.isleaf, Functors.children(x))
 
-loadnumeric!(x, x̄, err) = x
-loadnumeric!(x::Zeros, x̄, err) = x
-function loadnumeric!(x::AbstractArray, x̄::AbstractArray, err)
+"""
+    loadleaf!(x, x̄, err)
+
+Copy `x̄` to `x` or throw `err` when their sizes are mismatched.
+By default, use `copyto!` when `x` and `x̄` are arrays.
+Otherwise, just return `x`.
+"""
+loadleaf!(x, x̄, err) = x
+loadleaf!(x::Zeros, x̄, err) = x
+function loadleaf!(x::AbstractArray, x̄::AbstractArray, err)
     (size(x) == size(x̄)) || throw(err)
     copyto!(x, x̄)
 end
@@ -14,7 +31,7 @@ function _loadto!(m, m̄)
         throw(ArgumentError("Tried to load $m̄ into $m but the structures do not match."))
 
     err = DimensionMismatch("Tried to load $m̄ into $m but the parameter sizes do not match.")
-    foreach((l, l̄) -> loadnumeric!(l, l̄, err), ls, l̄s)
+    foreach((l, l̄) -> loadleaf!(l, l̄, err), ls, l̄s)
 
     return m
 end
@@ -23,6 +40,27 @@ function loadto!(m::T, m̄::S) where {T, S}
     _loadto!(m, m̄)
 end
 
+"""
+    loadmodel!(m, m̄)
+
+Copy all the parameters (trainable and non-trainable) from `m̄` to `m`.
+
+`loadmodel!` recursively walks `m` and `m̄` until it encounters
+a subfield, `x`, (i.e. layer) where `isloadleaf(x)` is true.
+The parameters of the matching subfield, `x̄`, are copied to `x`,
+throwing an error whenever:
+- `x` and `x̄` are not the same type (e.g. loading a `Conv` to a `Dense`)
+- `x` and `x̄` do not share the same fields
+- the parameter sizes are mismatched between `x` and `x̄`
+
+See [`loadleaf!`](@ref) for more details on the copy behavior.
+See [`isloadleaf`](@ref) for more details on which layers are considered leaves.
+
+!!! warning
+    This function allows `m̄` to be a vector or `Params` for backwards-compatibility.
+    You should avoid using `loadmodel!` this way, because it skips most of the structural
+    checking used when `m̄` is also a struct. Silent errors may occur.
+"""
 function loadmodel!(m, xs::Params)
   for (p, x) in zip(params(m), xs)
     size(p) == size(x) ||