From 25d25a4536abc694921f4296785705d79e1dd1e3 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Wed, 16 Nov 2022 17:53:45 -0500
Subject: [PATCH 01/22] make guide + reference

---
 docs/make.jl | 58 ++++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 0847656b9d..40deada126 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -9,40 +9,42 @@ makedocs(
     sitename = "Flux",
     # strict = [:cross_references,],
     pages = [
-        "Getting Started" => [
-            "Welcome" => "index.md",
+        "Welcome" => "index.md",
+        "Guide" => [
+        # You could read this end-to-end, or skip to what you need.
+        # Aim is to cover each new concept exactly once (but not list all variants).
+        # Hard to invent further divisions which aren't more confusing than helpful?
             "Quick Start" => "models/quickstart.md",
             "Fitting a Line" => "models/overview.md",
             "Gradients and Layers" => "models/basics.md",
+            "Training" => "training/training.md",
+            "Regularisation" => "models/regularisation.md",
+            "Recurrence" => "models/recurrence.md",
+            "GPU Support" => "gpu.md",
+            "Saving & Loading" => "saving.md",
+            "Performance Tips" => "performance.md",
         ],
-        "Building Models" => [
+        "Reference" => [
+        # This essentially collects docstrings, with a bit of introduction.
+        # Probably the 📚 marker can be removed now.
             "Built-in Layers 📚" => "models/layers.md",
-            "Recurrence" => "models/recurrence.md",
             "Activation Functions 📚" => "models/activation.md",
+            "Weight Initialisation 📚" => "utilities.md",
+            "Loss Functions 📚" => "models/losses.md",
+            "Optimisation Rules 📚" => "training/optimisers.md",  # TODO move optimiser intro up to Training
+            "Shape Inference 📚" => "outputsize.md",
+            "Flat vs. Nested 📚" => "destructure.md",
+            "Callback Helpers 📚" => "training/callbacks.md",
             "NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
+            "Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
+            "MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
+            "Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
+            "OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
          ],
-         "Handling Data" => [
-             "MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
-             "OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
-         ],
-         "Training Models" => [
-             "Training" => "training/training.md",
-             "Regularisation" => "models/regularisation.md",
-             "Loss Functions 📚" => "models/losses.md",
-             "Optimisation Rules 📚" => "training/optimisers.md",  # TODO move optimiser intro up to Training
-             "Callback Helpers 📚" => "training/callbacks.md",
-             "Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
-         ],
-         "Model Tools" => [
-             "GPU Support" => "gpu.md",
-             "Saving & Loading" => "saving.md",
-             "Shape Inference 📚" => "outputsize.md",
-             "Weight Initialisation 📚" => "utilities.md",
-             "Flat vs. Nested 📚" => "destructure.md",
-             "Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
-         ],
-         "Tutorials" => [
-             # Roughly in order of increasing complexity? Not chronological.
+        "Flux's Ecosystem" => "ecosystem.md",  # This is a links page
+        "Tutorials" => [
+        # These walk you through various tasks. It's fine if they overlap quite a lot.
+        # All the website tutorials can move here, perhaps much of the model zoo too.
             "Linear Regression" => "tutorials/linear_regression.md",
             "Julia & Flux: 60 Minute Blitz" => "tutorials/2020-09-15-deep-learning-flux.md",
             "Multi-layer Perceptron" => "tutorials/2021-01-26-mlp.md",
@@ -51,9 +53,7 @@ makedocs(
             "Deep Convolutional GAN" => "tutorials/2021-10-08-dcgan-mnist.md",
             # Not really sure where this belongs... some in Fluxperimental, aim to delete?
             "Custom Layers" => "models/advanced.md",  # TODO move freezing to Training
-         ],
-         "Performance Tips" => "performance.md",
-         "Flux's Ecosystem" => "ecosystem.md",
+        ],
     ],
     format = Documenter.HTML(
         sidebar_sitename = false,

From 4e56b71ff90d3ee29ff619897bf69b6a2c0235b8 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Fri, 25 Nov 2022 14:45:14 -0500
Subject: [PATCH 02/22] try moving Fitting a Line to tutorials

---
 docs/make.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 40deada126..feba17d9c0 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,10 +15,9 @@ makedocs(
         # Aim is to cover each new concept exactly once (but not list all variants).
         # Hard to invent further divisions which aren't more confusing than helpful?
             "Quick Start" => "models/quickstart.md",
-            "Fitting a Line" => "models/overview.md",
             "Gradients and Layers" => "models/basics.md",
             "Training" => "training/training.md",
-            "Regularisation" => "models/regularisation.md",
+            # "Regularisation" => "models/regularisation.md",  # consolidated in #2114
             "Recurrence" => "models/recurrence.md",
             "GPU Support" => "gpu.md",
             "Saving & Loading" => "saving.md",
@@ -45,6 +44,7 @@ makedocs(
         "Tutorials" => [
         # These walk you through various tasks. It's fine if they overlap quite a lot.
         # All the website tutorials can move here, perhaps much of the model zoo too.
+            "Fitting a Line" => "models/overview.md",
             "Linear Regression" => "tutorials/linear_regression.md",
             "Julia & Flux: 60 Minute Blitz" => "tutorials/2020-09-15-deep-learning-flux.md",
             "Multi-layer Perceptron" => "tutorials/2021-01-26-mlp.md",

From 6bd7df745174c5e90e04c897842981ae99129be0 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 28 Nov 2022 11:16:02 -0500
Subject: [PATCH 03/22] restore Fitting a Line's place

---
 docs/make.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/make.jl b/docs/make.jl
index feba17d9c0..1f791149a3 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,6 +15,7 @@ makedocs(
         # Aim is to cover each new concept exactly once (but not list all variants).
         # Hard to invent further divisions which aren't more confusing than helpful?
             "Quick Start" => "models/quickstart.md",
+            "Fitting a Line" => "models/overview.md",
             "Gradients and Layers" => "models/basics.md",
             "Training" => "training/training.md",
             # "Regularisation" => "models/regularisation.md",  # consolidated in #2114
@@ -44,7 +45,6 @@ makedocs(
         "Tutorials" => [
         # These walk you through various tasks. It's fine if they overlap quite a lot.
         # All the website tutorials can move here, perhaps much of the model zoo too.
-            "Fitting a Line" => "models/overview.md",
             "Linear Regression" => "tutorials/linear_regression.md",
             "Julia & Flux: 60 Minute Blitz" => "tutorials/2020-09-15-deep-learning-flux.md",
             "Multi-layer Perceptron" => "tutorials/2021-01-26-mlp.md",

From 082fe4ba110f3d765ce589c708d9419f34a11026 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 28 Nov 2022 11:16:22 -0500
Subject: [PATCH 04/22] remove duplicate functor docstring (it's in Functors
 section)

---
 docs/src/models/basics.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md
index d1335ff229..140ed7e13d 100644
--- a/docs/src/models/basics.md
+++ b/docs/src/models/basics.md
@@ -213,13 +213,13 @@ m(5) # => 26
 
 ## Layer Helpers
 
-There is still one problem with this `Affine` layer, that Flux does not know to look inside it. This means that [`Flux.train!`](@ref) won't see its parameters, nor will [`gpu`](@ref) be able to move them to your GPU. These features are enabled by the `@functor` macro:
+There is still one problem with this `Affine` layer, that Flux does not know to look inside it. This means that [`Flux.train!`](@ref) won't see its parameters, nor will [`gpu`](@ref) be able to move them to your GPU. These features are enabled by the [`@functor`](@ref Functors.@functor) macro:
 
 ```
 Flux.@functor Affine
 ```
 
-Finally, most Flux layers make bias optional, and allow you to supply the function used for generating random weights. We can easily add these refinements to the `Affine` layer as follows:
+Finally, most Flux layers make bias optional, and allow you to supply the function used for generating random weights. We can easily add these refinements to the `Affine` layer as follows, using the helper function [`create_bias`](@ref Flux.create_bias):
 
 ```
 function Affine((in, out)::Pair; bias=true, init=Flux.randn32)
@@ -230,7 +230,3 @@ end
 
 Affine(3 => 1, bias=false, init=ones) |> gpu
 ```
-
-```@docs
-Functors.@functor
-```

From 7b4cfb6b958a9fc71db211bbf79834753c40477e Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 28 Nov 2022 12:13:05 -0500
Subject: [PATCH 05/22] alter "Learning Flux" section to match

---
 docs/src/index.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 98fffc4a5c..c3ddd332f1 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -18,11 +18,11 @@ Other closely associated packages, also installed automatically, include [Zygote
 
 The [quick start](@ref man-quickstart) page trains a simple neural network.
 
-This rest of this documentation provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
+This rest of the **guide** provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
 
-Sections with 📚 contain API listings. The same text is avalable at the Julia prompt, by typing for example `?gpu`.
+The **reference** section contains API listings. The same text is avalable at the Julia prompt, by typing for example `?gpu`.
 
-If you just want to get started writing models, the [model zoo](https://github.com/FluxML/model-zoo/) gives good starting points for many common ones.
+There are some **tutorials** about building particular models. The **[model zoo](https://github.com/FluxML/model-zoo/)** has starting points for many other common ones. And finally, the **[ecosystem page](ecosystem.md)** lists packages which define Flux models.
 
 ## Community
 

From e8ca1982db545fa31e35d00d283bf42ac2629d92 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 28 Nov 2022 14:34:17 -0500
Subject: [PATCH 06/22] reduce the amount of text on welcome page

---
 docs/src/index.md | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index c3ddd332f1..7c1d27ce00 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,9 +2,9 @@
 
 Flux is a library for machine learning. It comes "batteries-included" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. We follow a few key principles:
 
-* **Doing the obvious thing**. Flux has relatively few explicit APIs for features like regularisation or embeddings. Instead, writing down the mathematical form will work – and be fast.
-* **Extensible by default**. Flux is written to be highly extensible and flexible while being performant. Extending Flux is as simple as using your own code as part of the model you want - it is all [high-level Julia code](https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131). When in doubt, it’s well worth looking at [the source](https://github.com/FluxML/Flux.jl/tree/master/src). If you need something different, you can easily roll your own.
-* **Play nicely with others**. Flux works well with Julia libraries from [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/SciML/DifferentialEquations.jl), so you can easily build complex data processing pipelines that integrate Flux models.
+* **Doing the obvious thing**. Flux has relatively few explicit APIs. Instead, writing down the mathematical form will work – and be fast.
+* **Extensible by default**. Flux is written to be highly flexible while being performant. Extending Flux is as simple as using your own code as part of the model you want - it is all [high-level Julia code](https://github.com/FluxML/Flux.jl/tree/master/src).
+* **Play nicely with others**. Flux works well with unrelated Julia libraries from [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/SciML/DifferentialEquations.jl), rather than duplicating them.
 
 ## Installation
 
@@ -12,18 +12,16 @@ Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the
 
 This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) which supports Nvidia GPUs. To directly access some of its functionality, you may want to add `] add CUDA` too. The page on [GPU support](gpu.md) has more details.
 
-Other closely associated packages, also installed automatically, include [Zygote](https://github.com/FluxML/Zygote.jl), [Optimisers](https://github.com/FluxML/Optimisers.jl), [NNlib](https://github.com/FluxML/NNlib.jl), [Functors](https://github.com/FluxML/Functors.jl) and [MLUtils](https://github.com/JuliaML/MLUtils.jl).
-
 ## Learning Flux
 
-The [quick start](@ref man-quickstart) page trains a simple neural network.
+The **[quick start](@ref man-quickstart)** page trains a simple neural network.
 
 This rest of the **guide** provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
 
-The **reference** section contains API listings. The same text is avalable at the Julia prompt, by typing for example `?gpu`.
-
 There are some **tutorials** about building particular models. The **[model zoo](https://github.com/FluxML/model-zoo/)** has starting points for many other common ones. And finally, the **[ecosystem page](ecosystem.md)** lists packages which define Flux models.
 
+The **reference** section contains API listings, including some companion packages: [Zygote](https://github.com/FluxML/Zygote.jl) (automatic differentiation), [Optimisers](https://github.com/FluxML/Optimisers.jl) (training), [NNlib](https://github.com/FluxML/NNlib.jl) (misc functions) and more.
+
 ## Community
 
 Everyone is welcome to join our community on the [Julia discourse forum](https://discourse.julialang.org/), or the [slack chat](https://discourse.julialang.org/t/announcing-a-julia-slack/4866) (channel #machine-learning). If you have questions or issues we'll try to help you out.

From 02a3635bd6b5cdadea178a83a4d0e7c11ee598d2 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 28 Nov 2022 14:34:33 -0500
Subject: [PATCH 07/22] add a CUDA.jl reference page

---
 docs/make.jl     |  5 +++--
 docs/src/CUDA.md | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/CUDA.md

diff --git a/docs/make.jl b/docs/make.jl
index 1f791149a3..5b84b4043b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,10 +1,10 @@
-using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Plots, MLDatasets, Statistics, DataFrames
+using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Plots, MLDatasets, Statistics, DataFrames, CUDA
 
 
 DocMeta.setdocmeta!(Flux, :DocTestSetup, :(using Flux); recursive = true)
 
 makedocs(
-    modules = [Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Base, Plots, MLDatasets, Statistics, DataFrames],
+    modules = [Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Base, Plots, MLDatasets, Statistics, DataFrames, CUDA],
     doctest = false,
     sitename = "Flux",
     # strict = [:cross_references,],
@@ -35,6 +35,7 @@ makedocs(
             "Shape Inference 📚" => "outputsize.md",
             "Flat vs. Nested 📚" => "destructure.md",
             "Callback Helpers 📚" => "training/callbacks.md",
+            "CUDA.jl 📚 (`cu`, `CuIterator`, ...)" => "CUDA.md",
             "NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
             "Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
             "MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
diff --git a/docs/src/CUDA.md b/docs/src/CUDA.md
new file mode 100644
index 0000000000..ca6ec1269b
--- /dev/null
+++ b/docs/src/CUDA.md
@@ -0,0 +1,38 @@
+# CUDA.jl
+
+I'm not entirely sure this page should be separate from the gpu.md page. 
+
+
+## Arrays
+
+```@docs
+CUDA.cu
+CUDA.AbstractGPUArray
+CUDA.CuIterator
+```
+
+```@docs
+CUDA.allowscalar
+```
+
+```@docs
+Flux.gpu
+Flux.cpu
+```
+
+
+## Devices
+
+```@docs
+CUDA.functional
+CUDA.device
+CUDA.device!
+```
+
+
+## Benchmarking
+
+```@docs
+CUDA.@time
+CUDA.@sync
+```

From e0b62c07b47d993c0c52c1859ceafb3b7fde7c03 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Mon, 28 Nov 2022 22:03:08 -0500
Subject: [PATCH 08/22] add Alternatives to Flux section

---
 docs/src/ecosystem.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/src/ecosystem.md b/docs/src/ecosystem.md
index 9bcefc8d28..e36d328839 100644
--- a/docs/src/ecosystem.md
+++ b/docs/src/ecosystem.md
@@ -9,7 +9,7 @@ See also academic work citing Flux or Zygote.
 
 ## Flux models
 
-Packages that are actual `Flux` models but are not available directly through the `Flux` package.
+Packages that are actual `Flux` models. See also Flux's [model-zoo](https://github.com/FluxML/model-zoo).
 
 ### Computer vision
 
@@ -109,3 +109,15 @@ Some useful and random packages!
 - [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) is a scientific project assistant software.
 
 This tight integration among Julia packages is shown in some of the examples in the [model-zoo](https://github.com/FluxML/model-zoo) repository.
+
+
+## Alternatives to Flux
+
+Julia has several other libraries for making neural networks. 
+
+* [SimpleChains.jl](https://github.com/PumasAI/SimpleChains.jl) is focused on making small, simple, CPU-based, neural networks fast. Uses [LoopVectorization.jl](https://github.com/JuliaSIMD/LoopVectorization.jl). (Was `FastChain` in DiffEqFlux.jl) 
+
+* [Knet.jl](https://github.com/denizyuret/Knet.jl) is a neural network library built around [AutoGrad.jl](https://github.com/denizyuret/AutoGrad.jl), with beautiful documentation.
+
+* [Lux.jl](https://github.com/avik-pal/Lux.jl) (earlier ExplicitFluxLayers.jl) shares much of the design, use-case, and NNlib.jl / Optimisers.jl back-end of Flux. But instead of encapsulating all parameters within the model structure, it separates this into 3 components: a model, a tree of parameters, and a tree of model states.
+

From 17c49cbe5c22415bf381147018b504aa066a2fb2 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 09:46:02 -0500
Subject: [PATCH 09/22] fixup

---
 docs/Project.toml                            | 1 +
 docs/make.jl                                 | 2 +-
 docs/src/{ => reference}/CUDA.md             | 0
 docs/src/tutorials/2021-10-14-vanilla-gan.md | 2 +-
 4 files changed, 3 insertions(+), 2 deletions(-)
 rename docs/src/{ => reference}/CUDA.md (100%)

diff --git a/docs/Project.toml b/docs/Project.toml
index c1812ee385..8190171b09 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
diff --git a/docs/make.jl b/docs/make.jl
index 5b84b4043b..9d662f07ef 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -35,7 +35,7 @@ makedocs(
             "Shape Inference 📚" => "outputsize.md",
             "Flat vs. Nested 📚" => "destructure.md",
             "Callback Helpers 📚" => "training/callbacks.md",
-            "CUDA.jl 📚 (`cu`, `CuIterator`, ...)" => "CUDA.md",
+            "CUDA.jl 📚 (`cu`, `CuIterator`, ...)" => "reference/CUDA.md",  # not sure
             "NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
             "Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
             "MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
diff --git a/docs/src/CUDA.md b/docs/src/reference/CUDA.md
similarity index 100%
rename from docs/src/CUDA.md
rename to docs/src/reference/CUDA.md
diff --git a/docs/src/tutorials/2021-10-14-vanilla-gan.md b/docs/src/tutorials/2021-10-14-vanilla-gan.md
index b2c7bb6f4c..5b09345db8 100644
--- a/docs/src/tutorials/2021-10-14-vanilla-gan.md
+++ b/docs/src/tutorials/2021-10-14-vanilla-gan.md
@@ -32,7 +32,7 @@ type `add MLDatasets` or perform this operation with the Pkg module like this
 > Pkg.add(MLDatasets)
 ```
 
-While [UnicodePlots]() is not necessary, it can be used to plot generated samples
+While [UnicodePlots](https://github.com/JuliaPlots/UnicodePlots.jl) is not necessary, it can be used to plot generated samples
 into the terminal during training. Having direct feedback, instead of looking
 at plots in a separate window, use fantastic for debugging.
 

From 4f6015cf1182d30da93c55e4a97987620a14f413 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 09:46:46 -0500
Subject: [PATCH 10/22] hide some tutorials, try moving Ecosystem up

---
 docs/make.jl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 9d662f07ef..cf12b802c1 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -24,6 +24,7 @@ makedocs(
             "Saving & Loading" => "saving.md",
             "Performance Tips" => "performance.md",
         ],
+        "Flux's Ecosystem" => "ecosystem.md",  # This is a links page
         "Reference" => [
         # This essentially collects docstrings, with a bit of introduction.
         # Probably the 📚 marker can be removed now.
@@ -42,16 +43,18 @@ makedocs(
             "Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
             "OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
          ],
-        "Flux's Ecosystem" => "ecosystem.md",  # This is a links page
         "Tutorials" => [
         # These walk you through various tasks. It's fine if they overlap quite a lot.
-        # All the website tutorials can move here, perhaps much of the model zoo too.
+        # All the website tutorials can move here, perhaps much of the model zoo too?
+        # Or perhaps those should just be trashed, model zoo versions are newer & more useful.
             "Linear Regression" => "tutorials/linear_regression.md",
+            #=
             "Julia & Flux: 60 Minute Blitz" => "tutorials/2020-09-15-deep-learning-flux.md",
             "Multi-layer Perceptron" => "tutorials/2021-01-26-mlp.md",
             "Simple ConvNet" => "tutorials/2021-02-07-convnet.md",
             "Generative Adversarial Net" => "tutorials/2021-10-14-vanilla-gan.md",
             "Deep Convolutional GAN" => "tutorials/2021-10-08-dcgan-mnist.md",
+            =#
             # Not really sure where this belongs... some in Fluxperimental, aim to delete?
             "Custom Layers" => "models/advanced.md",  # TODO move freezing to Training
         ],

From 9d5b5ccb9a56263f44b2f87f31d5ed4b827b1ad3 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 09:55:56 -0500
Subject: [PATCH 11/22] =?UTF-8?q?remove=20the=20=F0=9F=93=9A=20marker?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/make.jl | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index cf12b802c1..5f99a5448f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -27,21 +27,20 @@ makedocs(
         "Flux's Ecosystem" => "ecosystem.md",  # This is a links page
         "Reference" => [
         # This essentially collects docstrings, with a bit of introduction.
-        # Probably the 📚 marker can be removed now.
-            "Built-in Layers 📚" => "models/layers.md",
-            "Activation Functions 📚" => "models/activation.md",
-            "Weight Initialisation 📚" => "utilities.md",
-            "Loss Functions 📚" => "models/losses.md",
-            "Optimisation Rules 📚" => "training/optimisers.md",  # TODO move optimiser intro up to Training
-            "Shape Inference 📚" => "outputsize.md",
-            "Flat vs. Nested 📚" => "destructure.md",
-            "Callback Helpers 📚" => "training/callbacks.md",
-            "CUDA.jl 📚 (`cu`, `CuIterator`, ...)" => "reference/CUDA.md",  # not sure
-            "NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
-            "Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
-            "MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
-            "Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
-            "OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
+            "Built-in Layers" => "models/layers.md",
+            "Activation Functions" => "models/activation.md",
+            "Weight Initialisation" => "utilities.md",
+            "Loss Functions" => "models/losses.md",
+            "Optimisation Rules" => "training/optimisers.md",  # TODO move optimiser intro up to Training
+            "Shape Inference" => "outputsize.md",
+            "Flat vs. Nested" => "destructure.md",
+            "Callback Helpers" => "training/callbacks.md",
+            "NNlib.jl (`softmax`, `conv`, ...)" => "models/nnlib.md",
+            "Zygote.jl (`gradient`, ...)" => "training/zygote.md",
+            "MLUtils.jl (`DataLoader`, ...)" => "data/mlutils.md",
+            "Functors.jl (`fmap`, ...)" => "models/functors.md",
+            "OneHotArrays.jl (`onehot`, ...)" => "data/onehot.md",
+            "CUDA.jl (`cu`, `CuIterator`, ...)" => "reference/CUDA.md",  # do we want this?
          ],
         "Tutorials" => [
         # These walk you through various tasks. It's fine if they overlap quite a lot.

From d7d07ec0e464dc905a7c79285cb68f5bfd40479e Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:01:23 -0500
Subject: [PATCH 12/22] tweaks to links page

---
 docs/make.jl          |  2 +-
 docs/src/ecosystem.md | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 5f99a5448f..3418f926a0 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -24,7 +24,7 @@ makedocs(
             "Saving & Loading" => "saving.md",
             "Performance Tips" => "performance.md",
         ],
-        "Flux's Ecosystem" => "ecosystem.md",  # This is a links page
+        "Ecosystem" => "ecosystem.md",  # This is a links page...
         "Reference" => [
         # This essentially collects docstrings, with a bit of introduction.
             "Built-in Layers" => "models/layers.md",
diff --git a/docs/src/ecosystem.md b/docs/src/ecosystem.md
index e36d328839..361f889d13 100644
--- a/docs/src/ecosystem.md
+++ b/docs/src/ecosystem.md
@@ -5,11 +5,11 @@ globally providing a rich and consistent user experience.
 
 This is a non-exhaustive list of Julia packages, nicely complementing `Flux` in typical
 machine learning and deep learning workflows. To add your project please send a [PR](https://github.com/FluxML/Flux.jl/pulls).
-See also academic work citing Flux or Zygote.
+See also academic work [citing Flux](https://scholar.google.com/scholar?cites=9731162218836700005&hl=en) or [citing Zygote](https://scholar.google.com/scholar?cites=11943854577624257878&hl=en).
 
 ## Flux models
 
-Packages that are actual `Flux` models. See also Flux's [model-zoo](https://github.com/FluxML/model-zoo).
+- Flux's [model-zoo](https://github.com/FluxML/model-zoo) contains examples from many domains.
 
 ### Computer vision
 
@@ -38,6 +38,8 @@ Packages that are actual `Flux` models. See also Flux's [model-zoo](https://gith
 
 - [FluxArchitectures.jl](https://github.com/sdobber/FluxArchitectures.jl) is a collection of advanced network architectures for time series forecasting.
 
+---
+
 ## Tools closely associated with Flux
 
 Utility tools you're unlikely to have met if you never used Flux!
@@ -64,9 +66,10 @@ Tools to put data into the right order for creating a model.
 
 ### Parameters
 
-- [Parameters.jl](https://github.com/mauro3/Parameters.jl) types with default field values, keyword constructors and (un-)pack macros.
 - [ParameterSchedulers.jl](https://github.com/darsnack/ParameterSchedulers.jl) standard scheduling policies for machine learning.
 
+---
+
 ## Differentiable programming
 
 Packages based on differentiable programming but not necessarily related to Machine Learning. 
@@ -90,6 +93,7 @@ Packages based on differentiable programming but not necessarily related to Mach
 
 - [OnlineStats.jl](https://github.com/joshday/OnlineStats.jl) provides single-pass algorithms for statistics.
 
+---
 
 ## Useful miscellaneous packages
 
@@ -104,12 +108,14 @@ Some useful and random packages!
 - [ProgressMeter.jl](https://github.com/timholy/ProgressMeter.jl) progress meters for long-running computations.
 - [TensorBoardLogger.jl](https://github.com/PhilipVinc/TensorBoardLogger.jl) easy peasy logging to [tensorboard](https://www.tensorflow.org/tensorboard) in Julia
 - [ArgParse.jl](https://github.com/carlobaldassi/ArgParse.jl) is a package for parsing command-line arguments to Julia programs.
+- [Parameters.jl](https://github.com/mauro3/Parameters.jl) types with default field values, keyword constructors and (un-)pack macros.
 - [BSON.jl](https://github.com/JuliaIO/BSON.jl) is a package for working with the Binary JSON serialisation format.
 - [DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) in-memory tabular data in Julia.
 - [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) is a scientific project assistant software.
 
 This tight integration among Julia packages is shown in some of the examples in the [model-zoo](https://github.com/FluxML/model-zoo) repository.
 
+---
 
 ## Alternatives to Flux
 

From b222b04f35ab3615ba7c4b5fc3024700d1164090 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:23:39 -0500
Subject: [PATCH 13/22] trim even more surplus text from welcome page

---
 docs/src/index.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 7c1d27ce00..c8b51f2da4 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -8,9 +8,7 @@ Flux is a library for machine learning. It comes "batteries-included" with many
 
 ## Installation
 
-Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt.
-
-This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) which supports Nvidia GPUs. To directly access some of its functionality, you may want to add `] add CUDA` too. The page on [GPU support](gpu.md) has more details.
+Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) which supports Nvidia GPUs.
 
 ## Learning Flux
 

From 81fb2a3de915dc7cd13bc786d4fc50466ab59a24 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:47:45 -0500
Subject: [PATCH 14/22] rm CUDA page

---
 docs/make.jl               |  1 -
 docs/src/gpu.md            | 26 ++++++++++++++++++++++++++
 docs/src/reference/CUDA.md | 38 --------------------------------------
 3 files changed, 26 insertions(+), 39 deletions(-)
 delete mode 100644 docs/src/reference/CUDA.md

diff --git a/docs/make.jl b/docs/make.jl
index 3418f926a0..cd85e84db4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -40,7 +40,6 @@ makedocs(
             "MLUtils.jl (`DataLoader`, ...)" => "data/mlutils.md",
             "Functors.jl (`fmap`, ...)" => "models/functors.md",
             "OneHotArrays.jl (`onehot`, ...)" => "data/onehot.md",
-            "CUDA.jl (`cu`, `CuIterator`, ...)" => "reference/CUDA.md",  # do we want this?
          ],
         "Tutorials" => [
         # These walk you through various tasks. It's fine if they overlap quite a lot.
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index e8e98774b6..d466c566e9 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -182,3 +182,29 @@ $ export CUDA_VISIBLE_DEVICES='0,1'
 
 
 More information for conditional use of GPUs in CUDA.jl can be found in its [documentation](https://cuda.juliagpu.org/stable/installation/conditional/#Conditional-use), and information about the specific use of the variable is described in the [Nvidia CUDA blog post](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/).
+
+
+## CUDA Reference
+
+Arrays and iterators:
+
+```@docs
+CUDA.cu
+CUDA.CuIterator
+```
+
+Device settings:
+
+```@docs
+CUDA.allowscalar
+CUDA.functional
+CUDA.device
+CUDA.device!
+```
+
+For benchmarking:
+
+```@docs
+CUDA.@time
+CUDA.@sync
+```
diff --git a/docs/src/reference/CUDA.md b/docs/src/reference/CUDA.md
deleted file mode 100644
index ca6ec1269b..0000000000
--- a/docs/src/reference/CUDA.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# CUDA.jl
-
-I'm not entirely sure this page should be separate from the gpu.md page. 
-
-
-## Arrays
-
-```@docs
-CUDA.cu
-CUDA.AbstractGPUArray
-CUDA.CuIterator
-```
-
-```@docs
-CUDA.allowscalar
-```
-
-```@docs
-Flux.gpu
-Flux.cpu
-```
-
-
-## Devices
-
-```@docs
-CUDA.functional
-CUDA.device
-CUDA.device!
-```
-
-
-## Benchmarking
-
-```@docs
-CUDA.@time
-CUDA.@sync
-```

From cbdc1543aca14510c30ae7ce7b894a47602ad968 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 12:15:18 -0500
Subject: [PATCH 15/22] the welcome page is short enough not to need
 navigation, and using H3 saves space on the navigation panel so you can see
 more other things

---
 docs/src/index.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index c8b51f2da4..d2a796b684 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -6,11 +6,11 @@ Flux is a library for machine learning. It comes "batteries-included" with many
 * **Extensible by default**. Flux is written to be highly flexible while being performant. Extending Flux is as simple as using your own code as part of the model you want - it is all [high-level Julia code](https://github.com/FluxML/Flux.jl/tree/master/src).
 * **Play nicely with others**. Flux works well with unrelated Julia libraries from [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/SciML/DifferentialEquations.jl), rather than duplicating them.
 
-## Installation
+### Installation
 
 Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) which supports Nvidia GPUs.
 
-## Learning Flux
+### Learning Flux
 
 The **[quick start](@ref man-quickstart)** page trains a simple neural network.
 
@@ -20,7 +20,7 @@ There are some **tutorials** about building particular models. The **[model zoo]
 
 The **reference** section contains API listings, including some companion packages: [Zygote](https://github.com/FluxML/Zygote.jl) (automatic differentiation), [Optimisers](https://github.com/FluxML/Optimisers.jl) (training), [NNlib](https://github.com/FluxML/NNlib.jl) (misc functions) and more.
 
-## Community
+### Community
 
 Everyone is welcome to join our community on the [Julia discourse forum](https://discourse.julialang.org/), or the [slack chat](https://discourse.julialang.org/t/announcing-a-julia-slack/4866) (channel #machine-learning). If you have questions or issues we'll try to help you out.
 

From 7b48d24574744421d8efd57dbee45cfc92df267f Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Tue, 29 Nov 2022 12:35:51 -0500
Subject: [PATCH 16/22] fix misleading sentence in nnlib intro

---
 docs/src/models/nnlib.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/models/nnlib.md b/docs/src/models/nnlib.md
index cf42cc99bf..72b8481f56 100644
--- a/docs/src/models/nnlib.md
+++ b/docs/src/models/nnlib.md
@@ -1,6 +1,6 @@
 # Neural Network primitives from NNlib.jl
 
-Flux re-exports all of the functions exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package. This includes activation functions, described on the next page. Many of the functions on this page exist primarily as the internal implementation of Flux layer, but can also be used independently.
+Flux re-exports all of the functions exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package. This includes activation functions, described on [their own page](@ref man-activations). Many of the functions on this page exist primarily as the internal implementation of Flux layer, but can also be used independently.
 
 ## Softmax
 

From a0d90b8587c802fee2512e89eedb08d78d03612e Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Wed, 30 Nov 2022 22:56:38 -0500
Subject: [PATCH 17/22] remove CUDA

---
 docs/Project.toml |  1 -
 docs/make.jl      |  8 ++++----
 docs/src/gpu.md   | 25 -------------------------
 3 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 8190171b09..c1812ee385 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,7 +1,6 @@
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
diff --git a/docs/make.jl b/docs/make.jl
index cd85e84db4..0a982c52e9 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,10 +1,10 @@
-using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Plots, MLDatasets, Statistics, DataFrames, CUDA
+using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Plots, MLDatasets, Statistics, DataFrames
 
 
 DocMeta.setdocmeta!(Flux, :DocTestSetup, :(using Flux); recursive = true)
 
 makedocs(
-    modules = [Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Base, Plots, MLDatasets, Statistics, DataFrames, CUDA],
+    modules = [Flux, NNlib, Functors, MLUtils, BSON, Optimisers, OneHotArrays, Zygote, ChainRulesCore, Base, Plots, MLDatasets, Statistics, DataFrames],
     doctest = false,
     sitename = "Flux",
     # strict = [:cross_references,],
@@ -18,13 +18,13 @@ makedocs(
             "Fitting a Line" => "models/overview.md",
             "Gradients and Layers" => "models/basics.md",
             "Training" => "training/training.md",
-            # "Regularisation" => "models/regularisation.md",  # consolidated in #2114
+            "Regularisation" => "models/regularisation.md",  # consolidated in #2114
             "Recurrence" => "models/recurrence.md",
             "GPU Support" => "gpu.md",
             "Saving & Loading" => "saving.md",
             "Performance Tips" => "performance.md",
         ],
-        "Ecosystem" => "ecosystem.md",  # This is a links page...
+        "Ecosystem" => "ecosystem.md",
         "Reference" => [
         # This essentially collects docstrings, with a bit of introduction.
             "Built-in Layers" => "models/layers.md",
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index d466c566e9..46fed4e1bf 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -183,28 +183,3 @@ $ export CUDA_VISIBLE_DEVICES='0,1'
 
 More information for conditional use of GPUs in CUDA.jl can be found in its [documentation](https://cuda.juliagpu.org/stable/installation/conditional/#Conditional-use), and information about the specific use of the variable is described in the [Nvidia CUDA blog post](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/).
 
-
-## CUDA Reference
-
-Arrays and iterators:
-
-```@docs
-CUDA.cu
-CUDA.CuIterator
-```
-
-Device settings:
-
-```@docs
-CUDA.allowscalar
-CUDA.functional
-CUDA.device
-CUDA.device!
-```
-
-For benchmarking:
-
-```@docs
-CUDA.@time
-CUDA.@sync
-```

From 952972454204211be4b28e8073ca789572a3c343 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Wed, 30 Nov 2022 23:33:21 -0500
Subject: [PATCH 18/22] Apply 2 suggestions

Co-authored-by: Brian Chen <ToucheSir@users.noreply.github.com>
---
 docs/src/ecosystem.md | 2 +-
 docs/src/index.md     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/ecosystem.md b/docs/src/ecosystem.md
index 361f889d13..95360cad50 100644
--- a/docs/src/ecosystem.md
+++ b/docs/src/ecosystem.md
@@ -123,7 +123,7 @@ Julia has several other libraries for making neural networks.
 
 * [SimpleChains.jl](https://github.com/PumasAI/SimpleChains.jl) is focused on making small, simple, CPU-based, neural networks fast. Uses [LoopVectorization.jl](https://github.com/JuliaSIMD/LoopVectorization.jl). (Was `FastChain` in DiffEqFlux.jl) 
 
-* [Knet.jl](https://github.com/denizyuret/Knet.jl) is a neural network library built around [AutoGrad.jl](https://github.com/denizyuret/AutoGrad.jl), with beautiful documentation.
+* [Knet.jl](https://github.com/denizyuret/Knet.jl) is a neural network library built around [AutoGrad.jl](https://github.com/denizyuret/AutoGrad.jl).
 
 * [Lux.jl](https://github.com/avik-pal/Lux.jl) (earlier ExplicitFluxLayers.jl) shares much of the design, use-case, and NNlib.jl / Optimisers.jl back-end of Flux. But instead of encapsulating all parameters within the model structure, it separates this into 3 components: a model, a tree of parameters, and a tree of model states.
 
diff --git a/docs/src/index.md b/docs/src/index.md
index d2a796b684..e14f6fc970 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -8,7 +8,7 @@ Flux is a library for machine learning. It comes "batteries-included" with many
 
 ### Installation
 
-Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) which supports Nvidia GPUs.
+Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) for Nvidia GPU support.
 
 ### Learning Flux
 

From d4c177d3d77c4782b293f16a4a285c6512de24d0 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Thu, 1 Dec 2022 15:21:58 -0500
Subject: [PATCH 19/22] Update docs/src/index.md

---
 docs/src/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index e14f6fc970..833c85e5e8 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -18,7 +18,7 @@ This rest of the **guide** provides a from-scratch introduction to Flux's take o
 
 There are some **tutorials** about building particular models. The **[model zoo](https://github.com/FluxML/model-zoo/)** has starting points for many other common ones. And finally, the **[ecosystem page](ecosystem.md)** lists packages which define Flux models.
 
-The **reference** section contains API listings, including some companion packages: [Zygote](https://github.com/FluxML/Zygote.jl) (automatic differentiation), [Optimisers](https://github.com/FluxML/Optimisers.jl) (training), [NNlib](https://github.com/FluxML/NNlib.jl) (misc functions) and more.
+The **reference** section includes, beside Flux's own functions, those of some companion packages: [Zygote.jl](https://github.com/FluxML/Zygote.jl) (automatic differentiation), [Optimisers.jl](https://github.com/FluxML/Optimisers.jl) (training) and others.
 
 ### Community
 

From e1e83c4aea5e348ae8efc363c49e2856668a5dd5 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Wed, 7 Dec 2022 18:49:31 -0500
Subject: [PATCH 20/22] add terminology note

---
 docs/src/ecosystem.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/src/ecosystem.md b/docs/src/ecosystem.md
index 95360cad50..add69ba64f 100644
--- a/docs/src/ecosystem.md
+++ b/docs/src/ecosystem.md
@@ -127,3 +127,11 @@ Julia has several other libraries for making neural networks.
 
 * [Lux.jl](https://github.com/avik-pal/Lux.jl) (earlier ExplicitFluxLayers.jl) shares much of the design, use-case, and NNlib.jl / Optimisers.jl back-end of Flux. But instead of encapsulating all parameters within the model structure, it separates this into 3 components: a model, a tree of parameters, and a tree of model states.
 
+!!! compat Explicit or explicit?
+    Flux's [training docs](@ref man-training) talk about changes from Zygote's implicit to
+    explicit gradients, dictionary-like to tree-like structures.
+    (See also [Zygote's description](https://fluxml.ai/Zygote.jl/dev/#Explicit-and-Implicit-Parameters-1) of these.)
+    Lux also uses Zygote, but perhaps confusingly uses of the word "explicit" means
+    something completetly unrelated: storing the tree of parameters (and of state)
+    separately from the model.
+

From 821d8f2cd37c7ef7f7ad20b46cb2af1eacc3a6b9 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Wed, 7 Dec 2022 20:05:09 -0500
Subject: [PATCH 21/22] remove example functions from headings

---
 docs/make.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 0a982c52e9..ee836b216b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -35,11 +35,11 @@ makedocs(
             "Shape Inference" => "outputsize.md",
             "Flat vs. Nested" => "destructure.md",
             "Callback Helpers" => "training/callbacks.md",
-            "NNlib.jl (`softmax`, `conv`, ...)" => "models/nnlib.md",
-            "Zygote.jl (`gradient`, ...)" => "training/zygote.md",
-            "MLUtils.jl (`DataLoader`, ...)" => "data/mlutils.md",
-            "Functors.jl (`fmap`, ...)" => "models/functors.md",
-            "OneHotArrays.jl (`onehot`, ...)" => "data/onehot.md",
+            "Gradients -- Zygote.jl" => "training/zygote.md",
+            "Batching Data -- MLUtils.jl" => "data/mlutils.md",
+            "OneHotArrays.jl" => "data/onehot.md",
+            "Low-level Operations -- NNlib.jl" => "models/nnlib.md",
+            "Nested Structures -- Functors.jl" => "models/functors.md",
          ],
         "Tutorials" => [
         # These walk you through various tasks. It's fine if they overlap quite a lot.

From 10a50af6928110db6ebe3e7ea61d6e52240bf977 Mon Sep 17 00:00:00 2001
From: Michael Abbott <32575566+mcabbott@users.noreply.github.com>
Date: Wed, 7 Dec 2022 20:33:05 -0500
Subject: [PATCH 22/22] fixup

---
 docs/src/ecosystem.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/src/ecosystem.md b/docs/src/ecosystem.md
index add69ba64f..785d36ea59 100644
--- a/docs/src/ecosystem.md
+++ b/docs/src/ecosystem.md
@@ -127,11 +127,10 @@ Julia has several other libraries for making neural networks.
 
 * [Lux.jl](https://github.com/avik-pal/Lux.jl) (earlier ExplicitFluxLayers.jl) shares much of the design, use-case, and NNlib.jl / Optimisers.jl back-end of Flux. But instead of encapsulating all parameters within the model structure, it separates this into 3 components: a model, a tree of parameters, and a tree of model states.
 
-!!! compat Explicit or explicit?
+!!! compat "Explicit or explicit?"
     Flux's [training docs](@ref man-training) talk about changes from Zygote's implicit to
     explicit gradients, dictionary-like to tree-like structures.
     (See also [Zygote's description](https://fluxml.ai/Zygote.jl/dev/#Explicit-and-Implicit-Parameters-1) of these.)
-    Lux also uses Zygote, but perhaps confusingly uses of the word "explicit" means
-    something completetly unrelated: storing the tree of parameters (and of state)
-    separately from the model.
+    Lux also uses Zygote, but uses the word "explicit" to mean something unrelated,
+    namely storing the tree of parameters (and of state) separately from the model.