feat: non-allocating write outs (#189)

martinjrobins · web-flow · commit 3cd7f179c1d6 · 2025-10-13T23:45:06.000+01:00
* feat: non-allocating dense write out

* format

* feat: non-allocating solve write_out test

* add some tests and tidy

* use named constants

* format
diff --git a/diffsol/src/matrix/dense_faer_serial.rs b/diffsol/src/matrix/dense_faer_serial.rs
@@ -177,6 +177,14 @@ impl<T: Scalar> DenseMatrix for FaerMat<T> {
         Self { data, context: ctx }
     }
 
+    fn resize_cols(&mut self, ncols: IndexType) {
+        if ncols == self.ncols() {
+            return;
+        }
+        let nrows = self.nrows();
+        self.data.resize_with(nrows, ncols, |_, _| T::zero());
+    }
+
     fn get_index(&self, i: IndexType, j: IndexType) -> Self::T {
         self.data[(i, j)]
     }
@@ -384,4 +392,9 @@ mod tests {
     fn test_partition_indices_by_zero_diagonal() {
         super::super::tests::test_partition_indices_by_zero_diagonal::<FaerMat<f64>>();
     }
+
+    #[test]
+    fn test_resize_cols() {
+        super::super::tests::test_resize_cols::<FaerMat<f64>>();
+    }
 }
diff --git a/diffsol/src/matrix/dense_nalgebra_serial.rs b/diffsol/src/matrix/dense_nalgebra_serial.rs
@@ -270,6 +270,13 @@ impl<T: Scalar> DenseMatrix for NalgebraMat<T> {
         self.data.gemm(alpha, &a.data, &b.data, beta);
     }
 
+    fn resize_cols(&mut self, ncols: IndexType) {
+        if ncols == self.ncols() {
+            return;
+        }
+        self.data.resize_horizontally_mut(ncols, Self::T::zero());
+    }
+
     fn get_index(&self, i: IndexType, j: IndexType) -> Self::T {
         self.data[(i, j)]
     }
@@ -347,4 +354,9 @@ mod tests {
     fn test_partition_indices_by_zero_diagonal() {
         super::super::tests::test_partition_indices_by_zero_diagonal::<NalgebraMat<f64>>();
     }
+
+    #[test]
+    fn test_resize_cols() {
+        super::super::tests::test_resize_cols::<NalgebraMat<f64>>();
+    }
 }
diff --git a/diffsol/src/matrix/mod.rs b/diffsol/src/matrix/mod.rs
@@ -326,6 +326,9 @@ pub trait DenseMatrix:
         ret
     }
 
+    /// Resize the number of columns in the matrix. Existing data is preserved, new elements are uninitialized
+    fn resize_cols(&mut self, ncols: IndexType);
+
     /// creates a new matrix from a vector of values, which are assumed
     /// to be in column-major order
     fn from_vec(nrows: IndexType, ncols: IndexType, data: Vec<Self::T>, ctx: Self::C) -> Self;
@@ -390,4 +393,33 @@ mod tests {
         assert_eq!(a.get_index(1, 0), M::T::from(3.0));
         assert_eq!(a.get_index(1, 1), M::T::from(10.0));
     }
+
+    pub fn test_resize_cols<M: DenseMatrix>() {
+        let mut a = M::zeros(2, 2, Default::default());
+        a.set_index(0, 0, M::T::from(1.0));
+        a.set_index(0, 1, M::T::from(2.0));
+        a.set_index(1, 0, M::T::from(3.0));
+        a.set_index(1, 1, M::T::from(4.0));
+
+        a.resize_cols(3);
+        assert_eq!(a.ncols(), 3);
+        assert_eq!(a.nrows(), 2);
+        assert_eq!(a.get_index(0, 0), M::T::from(1.0));
+        assert_eq!(a.get_index(0, 1), M::T::from(2.0));
+        assert_eq!(a.get_index(1, 0), M::T::from(3.0));
+        assert_eq!(a.get_index(1, 1), M::T::from(4.0));
+
+        a.set_index(0, 2, M::T::from(5.0));
+        a.set_index(1, 2, M::T::from(6.0));
+        assert_eq!(a.get_index(0, 2), M::T::from(5.0));
+        assert_eq!(a.get_index(1, 2), M::T::from(6.0));
+
+        a.resize_cols(2);
+        assert_eq!(a.ncols(), 2);
+        assert_eq!(a.nrows(), 2);
+        assert_eq!(a.get_index(0, 0), M::T::from(1.0));
+        assert_eq!(a.get_index(0, 1), M::T::from(2.0));
+        assert_eq!(a.get_index(1, 0), M::T::from(3.0));
+        assert_eq!(a.get_index(1, 1), M::T::from(4.0));
+    }
 }
diff --git a/diffsol/src/ode_solver/method.rs b/diffsol/src/ode_solver/method.rs
@@ -5,9 +5,10 @@ use crate::{
     ode_solver_error,
     scalar::Scalar,
     AugmentedOdeEquations, Checkpointing, Context, DefaultDenseMatrix, DenseMatrix,
-    HermiteInterpolator, NonLinearOp, OdeEquations, OdeSolverConfig, OdeSolverProblem,
-    OdeSolverState, Op, StateRef, StateRefMut, Vector, VectorViewMut,
+    HermiteInterpolator, MatrixCommon, NonLinearOp, OdeEquations, OdeSolverConfig,
+    OdeSolverProblem, OdeSolverState, Op, StateRef, StateRefMut, Vector, VectorViewMut,
 };
+use nalgebra::ComplexField;
 
 #[derive(Debug, PartialEq)]
 pub enum OdeSolverStopReason<T: Scalar> {
@@ -120,27 +121,20 @@ where
         Self: Sized,
     {
         let mut ret_t = Vec::new();
-        let mut ret_y = Vec::new();
+        let (mut ret_y, mut tmp_nout) = allocate_return(self)?;
 
         // do the main loop
-        write_out(self, &mut ret_y, &mut ret_t);
+        write_out(self, &mut ret_y, &mut ret_t, final_time, &mut tmp_nout);
         self.set_stop_time(final_time)?;
         while self.step()? != OdeSolverStopReason::TstopReached {
-            write_out(self, &mut ret_y, &mut ret_t);
+            write_out(self, &mut ret_y, &mut ret_t, final_time, &mut tmp_nout);
         }
 
         // store the final step
-        write_out(self, &mut ret_y, &mut ret_t);
+        write_out(self, &mut ret_y, &mut ret_t, final_time, &mut tmp_nout);
         let ntimes = ret_t.len();
-        let nrows = ret_y[0].len();
-        let mut ret_y_matrix = self
-            .problem()
-            .context()
-            .dense_mat_zeros::<Eqn::V>(nrows, ntimes);
-        for (i, y) in ret_y.iter().enumerate() {
-            ret_y_matrix.column_mut(i).copy_from(y);
-        }
-        Ok((ret_y_matrix, ret_t))
+        ret_y.resize_cols(ntimes);
+        Ok((ret_y, ret_t))
     }
 
     /// Using the provided state, solve the problem up to time `t_eval[t_eval.len()-1]`
@@ -154,7 +148,7 @@ where
         Eqn::V: DefaultDenseMatrix,
         Self: Sized,
     {
-        let mut ret = dense_allocate_return(self, t_eval)?;
+        let (mut ret, mut tmp_nout) = dense_allocate_return(self, t_eval)?;
 
         // do loop
         self.set_stop_time(t_eval[t_eval.len() - 1])?;
@@ -163,7 +157,7 @@ where
             while self.state().t < *t {
                 step_reason = self.step()?;
             }
-            dense_write_out(self, &mut ret, t_eval, i)?;
+            dense_write_out(self, &mut ret, t_eval, i, &mut tmp_nout)?;
         }
         assert_eq!(step_reason, OdeSolverStopReason::TstopReached);
         Ok(ret)
@@ -187,7 +181,7 @@ where
         Self: Sized,
     {
         let mut ret_t = Vec::new();
-        let mut ret_y = Vec::new();
+        let (mut ret_y, mut tmp_nout) = allocate_return(self)?;
         let max_steps_between_checkpoints = max_steps_between_checkpoints.unwrap_or(500);
 
         // allocate checkpoint info
@@ -199,10 +193,10 @@ where
         let mut ydots = vec![self.state().dy.clone()];
 
         // do the main loop, saving checkpoints
-        write_out(self, &mut ret_y, &mut ret_t);
+        write_out(self, &mut ret_y, &mut ret_t, final_time, &mut tmp_nout);
         self.set_stop_time(final_time)?;
         while self.step()? != OdeSolverStopReason::TstopReached {
-            write_out(self, &mut ret_y, &mut ret_t);
+            write_out(self, &mut ret_y, &mut ret_t, final_time, &mut tmp_nout);
             ts.push(self.state().t);
             ys.push(self.state().y.clone());
             ydots.push(self.state().dy.clone());
@@ -217,16 +211,9 @@ where
         }
 
         // store the final step
-        write_out(self, &mut ret_y, &mut ret_t);
+        write_out(self, &mut ret_y, &mut ret_t, final_time, &mut tmp_nout);
         let ntimes = ret_t.len();
-        let nrows = ret_y[0].len();
-        let mut ret_y_matrix = self
-            .problem()
-            .context()
-            .dense_mat_zeros::<Eqn::V>(nrows, ntimes);
-        for (i, y) in ret_y.iter().enumerate() {
-            ret_y_matrix.column_mut(i).copy_from(y);
-        }
+        ret_y.resize_cols(ntimes);
 
         // add final checkpoint
         ts.push(self.state().t);
@@ -243,7 +230,7 @@ where
             Some(last_segment),
         );
 
-        Ok((checkpointer, ret_y_matrix, ret_t))
+        Ok((checkpointer, ret_y, ret_t))
     }
 
     /// Solve the problem and write out the solution at the given timepoints, using checkpointing so that
@@ -265,7 +252,7 @@ where
         Eqn::V: DefaultDenseMatrix,
         Self: Sized,
     {
-        let mut ret = dense_allocate_return(self, t_eval)?;
+        let (mut ret, mut tmp_nout) = dense_allocate_return(self, t_eval)?;
         let max_steps_between_checkpoints = max_steps_between_checkpoints.unwrap_or(500);
 
         // allocate checkpoint info
@@ -296,7 +283,7 @@ where
                     ydots.clear();
                 }
             }
-            dense_write_out(self, &mut ret, t_eval, i)?;
+            dense_write_out(self, &mut ret, t_eval, i, &mut tmp_nout)?;
         }
         assert_eq!(step_reason, OdeSolverStopReason::TstopReached);
 
@@ -334,6 +321,7 @@ fn dense_write_out<'a, Eqn: OdeEquations + 'a, S: OdeSolverMethod<'a, Eqn>>(
     y_out: &mut <Eqn::V as DefaultDenseMatrix>::M,
     t_eval: &[Eqn::T],
     i: usize,
+    tmp_nout: &mut Eqn::V,
 ) -> Result<(), DiffsolError>
 where
     Eqn::V: DefaultDenseMatrix,
@@ -346,7 +334,10 @@ where
     } else {
         let y = s.interpolate(t)?;
         match s.problem().eqn.out() {
-            Some(out) => y_out.copy_from(&out.call(&y, t_eval[i])),
+            Some(out) => {
+                out.call_inplace(&y, t_eval[i], tmp_nout);
+                y_out.copy_from(tmp_nout)
+            }
             None => y_out.copy_from(&y),
         }
     }
@@ -357,30 +348,74 @@ where
 /// This function is used by the `solve` method to write out the solution at a given timepoint.
 fn write_out<'a, Eqn: OdeEquations + 'a, S: OdeSolverMethod<'a, Eqn>>(
     s: &S,
-    ret_y: &mut Vec<Eqn::V>,
+    ret_y: &mut <Eqn::V as DefaultDenseMatrix>::M,
     ret_t: &mut Vec<Eqn::T>,
-) {
+    final_time: Eqn::T,
+    tmp_nout: &mut Eqn::V,
+) where
+    Eqn::V: DefaultDenseMatrix,
+{
     let t = s.state().t;
     let y = s.state().y;
     ret_t.push(t);
+    let i = ret_t.len() - 1;
+    if i >= ret_y.ncols() {
+        const GROWTH_FACTOR: f64 = 1.5;
+        let remaining: f64 = (Eqn::T::from(GROWTH_FACTOR) * (final_time - ret_t[i - 1])
+            / (ret_t[i] - ret_t[i - 1]))
+            .ceil()
+            .into();
+        let n = ret_y.ncols() + (remaining as usize);
+        ret_y.resize_cols(n);
+    }
+    let mut ret_y_col = ret_y.column_mut(i);
     match s.problem().eqn.out() {
         Some(out) => {
             if s.problem().integrate_out {
-                ret_y.push(s.state().g.clone());
+                ret_y_col.copy_from(s.state().g);
             } else {
-                ret_y.push(out.call(y, t));
+                out.call_inplace(y, t, tmp_nout);
+                ret_y_col.copy_from(tmp_nout);
             }
         }
-        None => ret_y.push(y.clone()),
+        None => ret_y_col.copy_from(y),
     }
 }
 
+/// Utility function to allocate the return matrix for the `solve`
+/// method
+fn allocate_return<'a, Eqn: OdeEquations + 'a, S: OdeSolverMethod<'a, Eqn>>(
+    s: &S,
+) -> Result<(<Eqn::V as DefaultDenseMatrix>::M, Eqn::V), DiffsolError>
+where
+    Eqn::V: DefaultDenseMatrix,
+{
+    let nrows = if s.problem().eqn.out().is_some() {
+        s.problem().eqn.out().unwrap().nout()
+    } else {
+        s.problem().eqn.rhs().nstates()
+    };
+    const INITIAL_NCOLS: usize = 10;
+    let ret = s
+        .problem()
+        .context()
+        .dense_mat_zeros::<Eqn::V>(nrows, INITIAL_NCOLS);
+
+    // check t_eval is increasing and all values are greater than or equal to the current time
+    let tmp_nout = if let Some(out) = s.problem().eqn.out() {
+        Eqn::V::zeros(out.nout(), s.problem().context().clone())
+    } else {
+        Eqn::V::zeros(0, s.problem().context().clone())
+    };
+    Ok((ret, tmp_nout))
+}
+
 /// Utility function to allocate the return matrix for the `solve_dense`
 /// and `solve_dense_sensitivities` methods.
 fn dense_allocate_return<'a, Eqn: OdeEquations + 'a, S: OdeSolverMethod<'a, Eqn>>(
     s: &S,
     t_eval: &[Eqn::T],
-) -> Result<<Eqn::V as DefaultDenseMatrix>::M, DiffsolError>
+) -> Result<(<Eqn::V as DefaultDenseMatrix>::M, Eqn::V), DiffsolError>
 where
     Eqn::V: DefaultDenseMatrix,
 {
@@ -399,7 +434,12 @@ where
     if t_eval.windows(2).any(|w| w[0] > w[1] || w[0] < t0) {
         return Err(ode_solver_error!(InvalidTEval));
     }
-    Ok(ret)
+    let tmp_nout = if let Some(out) = s.problem().eqn.out() {
+        Eqn::V::zeros(out.nout(), s.problem().context().clone())
+    } else {
+        Eqn::V::zeros(0, s.problem().context().clone())
+    };
+    Ok((ret, tmp_nout))
 }
 
 #[cfg(test)]

Original file line number	Diff line number	Diff line change
`@@ -177,6 +177,14 @@ impl<T: Scalar> DenseMatrix for FaerMat<T> {`
`177`	`177`	`Self { data, context: ctx }`
`178`	`178`	`}`
`179`	`179`
	`180`	`+ fn resize_cols(&mut self, ncols: IndexType) {`
	`181`	`+ if ncols == self.ncols() {`
	`182`	`+ return;`
	`183`	`+ }`
	`184`	`+ let nrows = self.nrows();`
	`185`	`+ self.data.resize_with(nrows, ncols, \|_, _\| T::zero());`
	`186`	`+ }`
	`187`	`+`
`180`	`188`	`fn get_index(&self, i: IndexType, j: IndexType) -> Self::T {`
`181`	`189`	`self.data[(i, j)]`
`182`	`190`	`}`
`@@ -384,4 +392,9 @@ mod tests {`
`384`	`392`	`fn test_partition_indices_by_zero_diagonal() {`
`385`	`393`	`super::super::tests::test_partition_indices_by_zero_diagonal::<FaerMat<f64>>();`
`386`	`394`	`}`
	`395`	`+`
	`396`	`+ #[test]`
	`397`	`+ fn test_resize_cols() {`
	`398`	`+ super::super::tests::test_resize_cols::<FaerMat<f64>>();`
	`399`	`+ }`
`387`	`400`	`}`
Original file line number	Diff line number	Diff line change
`@@ -270,6 +270,13 @@ impl<T: Scalar> DenseMatrix for NalgebraMat<T> {`
`270`	`270`	`self.data.gemm(alpha, &a.data, &b.data, beta);`
`271`	`271`	`}`
`272`	`272`
	`273`	`+ fn resize_cols(&mut self, ncols: IndexType) {`
	`274`	`+ if ncols == self.ncols() {`
	`275`	`+ return;`
	`276`	`+ }`
	`277`	`+ self.data.resize_horizontally_mut(ncols, Self::T::zero());`
	`278`	`+ }`
	`279`	`+`
`273`	`280`	`fn get_index(&self, i: IndexType, j: IndexType) -> Self::T {`
`274`	`281`	`self.data[(i, j)]`
`275`	`282`	`}`
`@@ -347,4 +354,9 @@ mod tests {`
`347`	`354`	`fn test_partition_indices_by_zero_diagonal() {`
`348`	`355`	`super::super::tests::test_partition_indices_by_zero_diagonal::<NalgebraMat<f64>>();`
`349`	`356`	`}`
	`357`	`+`
	`358`	`+ #[test]`
	`359`	`+ fn test_resize_cols() {`
	`360`	`+ super::super::tests::test_resize_cols::<NalgebraMat<f64>>();`
	`361`	`+ }`
`350`	`362`	`}`