improve docstrings

sammccallum · sammccallum · commit d158c118ad81 · 2025-03-14T11:44:14.000Z
diff --git a/diffrax/_adjoint.py b/diffrax/_adjoint.py
@@ -1099,7 +1099,34 @@ class ReversibleAdjoint(AbstractAdjoint):
     [`diffrax.AbstractReversibleSolver`][].
 
     Gradient calculation is exact (up to floating point errors) and backpropagation
-    is linear in time $O(n)$ and constant in memory $O(1)$, for $n$ time steps.
+    becomes linear in time $O(n)$ and constant in memory $O(1)$, for $n$ time steps.
+
+    !!! note
+
+        This adjoint can be less numerically stable than
+        [`diffrax.RecursiveCheckpointAdjoint`][] and [`diffrax.DirectAdjoint`][].
+        Stability can be largely improved by using [double (64bit) precision](https://jax.readthedocs.io/en/latest/notebooks/Common_Gotchas_in_JAX.html#double-64bit-precision)
+        and [smaller/adaptive step sizes](https://docs.kidger.site/diffrax/api/stepsize_controller/).
+
+    ??? cite "References"
+
+        For an introduction to reversible backpropagation, see these references:
+
+        ```bibtex
+        @article{mccallum2024efficient,
+            title={Efficient, Accurate and Stable Gradients for Neural ODEs},
+            author={McCallum, Sam and Foster, James},
+            journal={arXiv preprint arXiv:2410.11648},
+            year={2024}
+        }
+
+        @phdthesis{kidger2021on,
+            title={{O}n {N}eural {D}ifferential {E}quations},
+            author={Patrick Kidger},
+            year={2021},
+            school={University of Oxford},
+        }
+        ```
     """
 
     def loop(
diff --git a/diffrax/_solver/reversible.py b/diffrax/_solver/reversible.py
@@ -8,7 +8,7 @@
 from .._solution import RESULTS, update_result
 from .._solver.base import (
     AbstractReversibleSolver,
-    AbstractSolver,
+    AbstractStratonovichSolver,
     AbstractWrappedSolver,
 )
 from .._term import AbstractTerm
@@ -26,12 +26,54 @@ class Reversible(
     """
     Reversible solver method.
 
-    Allows any solver ([`diffrax.AbstractSolver`][]) to be made algebraically
-    reversible.
+    Allows any solver ([`diffrax.AbstractStratonovichSolver`][]) to be made
+    algebraically reversible.
+
+    **Arguments:**
+    - `solver`: base solver to be made reversible
+    - `coupling_parameter`: determines coupling between the two evolving solutions.
+    Must be within the range `0 < coupling_parameter < 1`. Unless you need finer control
+    over stability, the default value of `0.999` should be sufficient.
+
+    ??? cite "References"
+
+        This method was developed in:
+
+        ```bibtex
+        @article{mccallum2024efficient,
+            title={Efficient, Accurate and Stable Gradients for Neural ODEs},
+            author={McCallum, Sam and Foster, James},
+            journal={arXiv preprint arXiv:2410.11648},
+            year={2024}
+        }
+        ```
+
+        And built on previous work by:
+
+        ```bibtex
+        @article{kidger2021efficient,
+            title={Efficient and accurate gradients for neural sdes},
+            author={Kidger, Patrick and Foster, James and Li, Xuechen Chen and Lyons,
+                    Terry},
+            journal={Advances in Neural Information Processing Systems},
+            volume={34},
+            pages={18747--18761},
+            year={2021}
+        }
+
+        @article{zhuang2021mali,
+            title={Mali: A memory efficient and reverse accurate integrator for neural
+                    odes},
+            author={Zhuang, Juntang and Dvornek, Nicha C and Tatikonda, Sekhar and
+            Duncan, James S},
+            journal={arXiv preprint arXiv:2102.04668},
+            year={2021}
+        }
+        ```
     """
 
-    solver: AbstractSolver
-    l: float = 0.999
+    solver: AbstractStratonovichSolver
+    coupling_parameter: float = 0.999
 
     @property
     def interpolation_cls(self):  # pyright: ignore
@@ -85,7 +127,7 @@ def step(
         step_z0, _, dense_info, original_solver_state, result1 = self.solver.step(
             terms, t0, t1, z0, args, original_solver_state, True
         )
-        y1 = (self.l * (ω(y0) - ω(z0)) + ω(step_z0)).ω
+        y1 = (self.coupling_parameter * (ω(y0) - ω(z0)) + ω(step_z0)).ω
 
         step_y1, y_error, _, _, result2 = self.solver.step(
             terms, t1, t0, y1, args, original_solver_state, True
@@ -115,7 +157,7 @@ def backward_step(
         step_z0, _, dense_info, _, _ = self.solver.step(
             terms, t0, t1, z0, args, original_solver_state, True
         )
-        y0 = ((1 / self.l) * (ω(y1) - ω(step_z0)) + ω(z0)).ω
+        y0 = ((1 / self.coupling_parameter) * (ω(y1) - ω(step_z0)) + ω(z0)).ω
         solver_state = (original_solver_state, z0)
 
         return y0, dense_info, solver_state