Add Adapt.adapt_structure method for Optimisers.Leaf (#180)

vpuri3 · ToucheSir · mcabbott · web-flow · commit 9928588342a3 · 2024-11-12T12:49:05.000-05:00
* Adapt.adapt_structure method for Optimisers.Leaf

* import Adapt.jl

* add Adapt.jl to Project.toml

* adapt compat

* based on discussion: adapt_structure method does not maintain IdDict handled by functors. So we add a warning referring the user to Flux.gpu or MLDataDevices.gpu_device()

* Update ext/OptimisersAdaptExt.jl

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;

* edit warning to indicate that this is a correctness issue

* Update ext/OptimisersAdaptExt.jl

Co-authored-by: Michael Abbott &lt;32575566+mcabbott@users.noreply.github.com&gt;

---------

Co-authored-by: Brian Chen &lt;ToucheSir@users.noreply.github.com&gt;
Co-authored-by: Michael Abbott &lt;32575566+mcabbott@users.noreply.github.com&gt;
diff --git a/Project.toml b/Project.toml
@@ -11,12 +11,15 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [weakdeps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
 
 [extensions]
+OptimisersAdaptExt = ["Adapt"]
 OptimisersEnzymeCoreExt = "EnzymeCore"
 
 [compat]
+Adapt = "4"
 ChainRulesCore = "1"
 EnzymeCore = "0.8.5"
 Functors = "0.4.9, 0.5"
diff --git a/ext/OptimisersAdaptExt.jl b/ext/OptimisersAdaptExt.jl
@@ -0,0 +1,20 @@
+module OptimisersAdaptExt
+
+import Adapt
+import Optimisers: Leaf
+
+function Adapt.adapt_structure(to, leaf::Leaf)
+  @warn """`Optimisers.Leaf` object does not support device transfer via
+  `Adapt.jl`. This is because `Adapt.jl` does not handle shared parameters (i.e. the same parameter array
+  appearing more than once in the model), and in such cases this will lead to  incorrect gradient updates. 
+  Avoid this by calling `Flux.gpu/cpu` or `MLDataDevices.cpu_device()/gpu_device()` on the
+  optimiser state object.
+  """ maxlog=1
+
+  rule = Adapt.adapt(to, leaf.rule)
+  state = Adapt.adapt(to, leaf.state)
+
+  Leaf(rule, state, leaf.frozen)
+end
+	
+end