diff --git a/.github/workflows/good_defines.txt b/.github/workflows/good_defines.txt
index 9cbd6c8ede..a084e38fcc 100644
--- a/.github/workflows/good_defines.txt
+++ b/.github/workflows/good_defines.txt
@@ -24,6 +24,7 @@ SCREENING
 SCREEN_METHOD
 SDC
 SIMPLIFIED_SDC
+SINGLE_PRECISION_JACOBIAN
 STRANG
 TRUE_SDC
 _OPENMP
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000000..256143394a
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,19 @@
+# Repository Guidelines
+
+## Project Structure & Module Organization
+The repository is organized around modular physics kernels in `EOS/`, `networks/`, `integration/`, and companion directories such as `conductivity/`, `neutrinos/`, `opacity/`, and `rates/`. Shared math utilities and build scripts remain in `util/`, while reusable constants sit in `constants/`. Interfaces consumed by AMReX-based application codes are in `interfaces/`. Unit regression assets reside under `unit_test/`, and Sphinx documentation sources are in `Docs/` with release history tracked in `CHANGES.md`.
+
+## Build, Test, and Development Commands
+Export `AMREX_HOME` and `MICROPHYSICS_HOME` before building so the GNU make stubs locate AMReX sources. Build a standalone unit test in place, e.g. `cd unit_test/burn_cell && make -j4`, which yields `main3d.gnu.ex`. Select physics at compile time with flags like `make NETWORK_DIR=aprox19 EOS_DIR=helmholtz`, and reset a directory via `make clean`.
+
+## Coding Style & Naming Conventions
+C++ sources follow `.editorconfig`: four-space indentation, LF line endings, and tabs only inside Makefiles. Headers keep the uppercase `.H` suffix and implementations use `.cpp`. Favor the existing snake_case routine pattern (`nse_check`, `burn_cell`) and guard autogenerated directories such as `util/autodiff`. Run `.clang-tidy` with `make USE_CLANG_TIDY=TRUE` on significant changes.
+
+## Testing Guidelines
+Each directory beneath `unit_test/` ships a `GNUmakefile`, `inputs_*` controls, and README notes; run the built binary with the matching inputs file (for example `./main3d.gnu.ex inputs_aprox13`). For new coverage, clone an existing `test_*` layout, describe the scenario, and rerun the most relevant case before posting. Capture scratch artifacts via `.gitignore` rather than committing them.
+
+## Commit & Pull Request Guidelines
+Develop on topic branches and open pull requests against `development`; merges to `main` occur monthly. Keep commit subjects concise and imperative, mirroring history such as `add NSE network compatibility docs (#1852)`. Update `CHANGES.md` for user-facing fixes, link issues, and note required AMReX revisions. PRs should summarize physics choices, list tests run, and attach plots or logs when behavior shifts.
+
+## Documentation & Automation
+Sphinx sources in `Docs/` back the published guide; update them with code changes and ensure `make -C Docs html` finishes cleanly. GitHub Actions publishes the result and exercises unit tests, so keep workflows green by limiting optional dependencies and recording new Python requirements in `requirements.txt`.
diff --git a/Docs/source/ode_integrators.rst b/Docs/source/ode_integrators.rst
index 6ee5bc5804..4644a347a5 100644
--- a/Docs/source/ode_integrators.rst
+++ b/Docs/source/ode_integrators.rst
@@ -113,6 +113,23 @@ For the other networks (usually pynucastro networks), the implementation is
 provided in ``Microphysics/util/linpack.H`` and is templated on the number
 of equations.  Pivoting can be disabled by setting ``integrator.linalg_do_pivoting=0``.
 
+.. index:: USE_SINGLE_PRECISION_JACOBIAN
+
+.. tip::
+
+   The storage for the Jacobian can take up the most memory when
+   integrating the reaction system.  It is possible to store the
+   Jacobian as single-precision, by building with:
+
+   ::
+
+      USE_SINGLE_PRECISION_JACOBIAN=TRUE
+
+   This can speed up the integration prevent the code from running out
+   of memory when run on GPUs.
+
+
+
 Integration errors
 ==================
 
diff --git a/integration/BackwardEuler/be_type.H b/integration/BackwardEuler/be_type.H
index 0efae612a7..04d7d8f190 100644
--- a/integration/BackwardEuler/be_type.H
+++ b/integration/BackwardEuler/be_type.H
@@ -48,7 +48,7 @@ struct be_t {
     amrex::Real rtol_enuc;
 
     amrex::Array1D<amrex::Real, 1, int_neqs> y;
-    ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac;
+    ArrayUtil::MathArray2D<jac_t, 1, int_neqs, 1, int_neqs> jac;
 
     short jacobian_type;
 };
diff --git a/integration/Make.package b/integration/Make.package
index 81f5c94887..9b8ab665f6 100644
--- a/integration/Make.package
+++ b/integration/Make.package
@@ -26,6 +26,9 @@ else
   CEXE_headers += integrator_setup_strang.H
 endif
 
+ifeq ($(USE_SINGLE_PRECISION_JACOBIAN), TRUE)
+  DEFINES += -DSINGLE_PRECISION_JACOBIAN
+endif
 
 ifeq ($(USE_ALL_NSE), TRUE)
   ifeq ($(USE_ALL_SDC), TRUE)
diff --git a/integration/VODE/vode_dvode.H b/integration/VODE/vode_dvode.H
index bbbf414398..aeb69725b1 100644
--- a/integration/VODE/vode_dvode.H
+++ b/integration/VODE/vode_dvode.H
@@ -170,14 +170,18 @@ int dvode (BurnT& state, DvodeT& vstate)
 
            if (vstate.n_step == 0) {
 #ifndef AMREX_USE_GPU
+           if (!state.suppress_failure_output) {
                std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: too much accuracy requested at start of integration" << amrex::ResetDisplay << std::endl;
+           }
 #endif
                return IERR_TOO_MUCH_ACCURACY_REQUESTED;
            }
 
            // Too much accuracy requested for machine precision.
 #ifndef AMREX_USE_GPU
-           std::cout << "DVODE: too much accuracy requested" << std::endl;
+           if (!state.suppress_failure_output) {
+               std::cout << "DVODE: too much accuracy requested" << std::endl;
+           }
 #endif
            for (int i = 1; i <= int_neqs; ++i) {
                vstate.y(i) = vstate.yh(i,1);
@@ -197,7 +201,9 @@ int dvode (BurnT& state, DvodeT& vstate)
        if (kflag == -1) {
            // Error test failed repeatedly or with ABS(H) = HMIN.
 #ifndef AMREX_USE_GPU
-           std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: error test failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl;
+           if (!state.suppress_failure_output) {
+               std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: error test failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl;
+           }
 #endif
            // Set Y array, T, and optional output.
            for (int i = 1; i <= int_neqs; ++i) {
@@ -211,7 +217,9 @@ int dvode (BurnT& state, DvodeT& vstate)
        else if (kflag == -2) {
            // Convergence failed repeatedly or with ABS(H) = HMIN.
 #ifndef AMREX_USE_GPU
-           std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: corrector convergence failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl;
+           if (!state.suppress_failure_output) {
+               std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: corrector convergence failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl;
+           }
 #endif
            // Set Y array, T, and optional output.
            for (int i = 1; i <= int_neqs; ++i) {
diff --git a/integration/VODE/vode_type.H b/integration/VODE/vode_type.H
index 3c246bfb50..a1df1f9bf2 100644
--- a/integration/VODE/vode_type.H
+++ b/integration/VODE/vode_type.H
@@ -143,11 +143,11 @@ struct dvode_t
     amrex::Array1D<amrex::Real, 1, int_neqs> y;
 
     // Jacobian
-    ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac;
+    ArrayUtil::MathArray2D<jac_t, 1, int_neqs, 1, int_neqs> jac;
 
 #ifdef ALLOW_JACOBIAN_CACHING
     // Saved Jacobian
-    ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac_save;
+    ArrayUtil::MathArray2D<jac_t, 1, int_neqs, 1, int_neqs> jac_save;
 #endif
 
     // the Nordsieck history array
diff --git a/integration/integrator_data.H b/integration/integrator_data.H
index 2d4f7795cd..fb698318aa 100644
--- a/integration/integrator_data.H
+++ b/integration/integrator_data.H
@@ -52,6 +52,6 @@ constexpr int integrator_neqs ()
 
 using IArray1D = amrex::Array1D<short, 1, INT_NEQS>;
 using RArray1D = amrex::Array1D<amrex::Real, 1, INT_NEQS>;
-using RArray2D = ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS>;
+using RArray2D = ArrayUtil::MathArray2D<jac_t, 1, INT_NEQS, 1, INT_NEQS>;
 
 #endif
diff --git a/integration/integrator_setup_strang.H b/integration/integrator_setup_strang.H
index b18b6d5136..d63f246c1c 100644
--- a/integration/integrator_setup_strang.H
+++ b/integration/integrator_setup_strang.H
@@ -181,7 +181,7 @@ void integrator_cleanup (IntegratorT& int_state, BurnT& state,
 
     // If we failed, print out the current state of the integration.
 
-    if (! state.success) {
+    if (! state.success && !state.suppress_failure_output) {
 #ifndef AMREX_USE_GPU
         std::cout << amrex::Font::Bold << amrex::FGColor::Red << "[ERROR] integration failed in net" << amrex::ResetDisplay << std::endl;
         std::cout << "istate = " << istate << std::endl;
diff --git a/integration/utils/circle_theorem.H b/integration/utils/circle_theorem.H
index 957a2f3ada..85afc59d7e 100644
--- a/integration/utils/circle_theorem.H
+++ b/integration/utils/circle_theorem.H
@@ -17,7 +17,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void circle_theorem_sprad(const amrex::Real time, BurnT& state, T& int_state, amrex::Real& sprad)
 {
 
-    ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS> jac_array;
+    ArrayUtil::MathArray2D<jac_t, 1, INT_NEQS, 1, INT_NEQS> jac_array;
 
     if (integrator_rp::jacobian == 1) {
         jac(time, state, int_state, jac_array);
diff --git a/interfaces/ArrayUtilities.H b/interfaces/ArrayUtilities.H
index 37f6b51963..9b000eb77e 100644
--- a/interfaces/ArrayUtilities.H
+++ b/interfaces/ArrayUtilities.H
@@ -34,7 +34,7 @@ namespace ArrayUtil
         amrex::Real arr[(XHI-XLO+1)];
     };
 
-    template <int XLO, int XHI, int YLO, int YHI>
+    template <typename T, int XLO, int XHI, int YLO, int YHI>
     struct MathArray2D
     {
         AMREX_GPU_HOST_DEVICE AMREX_INLINE
@@ -71,7 +71,7 @@ namespace ArrayUtil
         }
 
         [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-        amrex::Real get (const int i, const int j) const noexcept {
+        T get (const int i, const int j) const noexcept {
             AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
             return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)];
         }
@@ -84,18 +84,18 @@ namespace ArrayUtil
         }
 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-        const amrex::Real& operator() (int i, int j) const noexcept {
+        const T& operator() (int i, int j) const noexcept {
             AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
             return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)];
         }
 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-        amrex::Real& operator() (int i, int j) noexcept {
+        T& operator() (int i, int j) noexcept {
             AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
             return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)];
         }
 
-        amrex::Real arr[(XHI-XLO+1)*(YHI-YLO+1)];
+        T arr[(XHI-XLO+1)*(YHI-YLO+1)];
     };
 
     namespace Math
diff --git a/interfaces/burn_type.H b/interfaces/burn_type.H
index bf7ce27b6d..817998455e 100644
--- a/interfaces/burn_type.H
+++ b/interfaces/burn_type.H
@@ -56,7 +56,15 @@ const int SVAR  = SEDEN+1;
 const int SVAR_EVOLVE = SFX;
 
 // this is the data type of the dense Jacobian that the network wants.
-using  JacNetArray2D = ArrayUtil::MathArray2D<1, neqs, 1, neqs>;
+
+// the Jacobian can have a different type that our system
+#ifdef SINGLE_PRECISION_JACOBIAN
+using jac_t = float;
+#else
+using jac_t = amrex::Real;
+#endif
+
+using  JacNetArray2D = ArrayUtil::MathArray2D<jac_t, 1, neqs, 1, neqs>;
 
 struct burn_t
 {
@@ -154,6 +162,9 @@ struct burn_t
   // integrator error code
   short error_code{};
 
+  // suppress verbose failure diagnostics (useful for GPU retries)
+  bool suppress_failure_output{};
+
 };
 
 
diff --git a/networks/rhs.H b/networks/rhs.H
index c096cc6d99..9d417cd2f3 100644
--- a/networks/rhs.H
+++ b/networks/rhs.H
@@ -1357,7 +1357,7 @@ void rhs (burn_t& burn_state, amrex::Array1D<amrex::Real, 1, nrhs>& ydot)
 
 // Analytical Jacobian
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
-void jac (burn_t& burn_state, ArrayUtil::MathArray2D<1, neqs, 1, neqs>& jac)
+void jac (burn_t& burn_state, ArrayUtil::MathArray2D<jac_t, 1, neqs, 1, neqs>& jac)
 {
     rhs_state_t<autodiff::dual> rhs_state;
 
@@ -1521,7 +1521,7 @@ void actual_rhs (burn_t& state, amrex::Array1D<amrex::Real, 1, neqs>& ydot)
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
-void actual_jac (burn_t& state, ArrayUtil::MathArray2D<1, neqs, 1, neqs>& jac)
+void actual_jac (burn_t& state, ArrayUtil::MathArray2D<jac_t, 1, neqs, 1, neqs>& jac)
 {
     RHS::jac(state, jac);
 }
diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile
index 669e9839c7..c00cb402e2 100644
--- a/unit_test/burn_cell_primordial_chem/GNUmakefile
+++ b/unit_test/burn_cell_primordial_chem/GNUmakefile
@@ -2,7 +2,7 @@ PRECISION  = DOUBLE
 PROFILE    = FALSE
 
 # Set DEBUG to TRUE if debugging
-DEBUG      = TRUE
+DEBUG      = FALSE
 
 DIM        = 1
 
@@ -15,7 +15,7 @@ USE_CUDA   = FALSE
 USE_REACT = TRUE
 
 # Set USE_MICROPHYSICS_DEBUG to TRUE if debugging
-USE_MICROPHYSICS_DEBUG = TRUE
+USE_MICROPHYSICS_DEBUG = FALSE
 
 EBASE = main
 
diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H
index f5ced4cb5e..797a68002e 100644
--- a/unit_test/burn_cell_primordial_chem/burn_cell.H
+++ b/unit_test/burn_cell_primordial_chem/burn_cell.H
@@ -1,9 +1,21 @@
 #ifndef BURN_CELL_H
 #define BURN_CELL_H
 
+#include <algorithm>
+#include <cmath>
+#include <chrono>
 #include <fstream>
+#include <functional>
+#include <iomanip>
 #include <iostream>
-
+#include <limits>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include <AMReX_GpuContainers.H>
+#include <AMReX_GpuDevice.H>
+#include <AMReX_GpuLaunch.H>
 #include <burner.H>
 #include <eos.H>
 #include <extern_parameters.H>
@@ -12,10 +24,11 @@
 
 amrex::Real grav_constant = 6.674e-8;
 
-AMREX_INLINE
-auto burn_cell_c() -> int {
+namespace {
 
-  burn_t state;
+AMREX_INLINE
+void init_burn_state(burn_t &state, amrex::Real density_scale,
+                     amrex::Real temp_offset, bool print_initial) {
 
   amrex::Real numdens[NumSpec] = {-1.0};
 
@@ -67,23 +80,28 @@ auto burn_cell_c() -> int {
     }
   }
 
-  // Echo initial conditions at burn and fill burn state input
-
-  std::cout << "Maximum Time (s): " << unit_test_rp::tmax << std::endl;
-  std::cout << "State Temperature (K): " << unit_test_rp::temperature << std::endl;
   for (int n = 0; n < NumSpec; ++n) {
-    std::cout << "Number Density input (" << short_spec_names_cxx[n]
-              << "): " << numdens[n] << std::endl;
+    numdens[n] *= density_scale;
   }
 
-  state.T = unit_test_rp::temperature;
+  if (print_initial) {
+    std::cout << "Maximum Time (s): " << unit_test_rp::tmax << std::endl;
+    std::cout << "State Temperature (K): "
+              << unit_test_rp::temperature + temp_offset << std::endl;
+    for (int n = 0; n < NumSpec; ++n) {
+      std::cout << "Number Density input (" << short_spec_names_cxx[n]
+                << "): " << numdens[n] << std::endl;
+    }
+  }
+
+  state = burn_t{};
+  state.T = unit_test_rp::temperature + temp_offset;
 
   // find the density in g/cm^3
   amrex::Real rhotot = 0.0_rt;
   for (int n = 0; n < NumSpec; ++n) {
     state.xn[n] = numdens[n];
-    rhotot += state.xn[n] * spmasses[n]; // spmasses contains the masses of all
-                                         // species, defined in EOS
+    rhotot += state.xn[n] * spmasses[n];
   }
 
   state.rho = rhotot;
@@ -105,158 +123,489 @@ auto burn_cell_c() -> int {
   }
 
 #ifdef DEBUG
-  for (int n = 0; n < NumSpec; ++n) {
-    std::cout << "Mass fractions (" << short_spec_names_cxx[n]
-              << "): " << mfracs[n] << std::endl;
-    std::cout << "Number Density conserved (" << short_spec_names_cxx[n]
-              << "): " << state.xn[n] << std::endl;
+  if (print_initial) {
+    for (int n = 0; n < NumSpec; ++n) {
+      std::cout << "Mass fractions (" << short_spec_names_cxx[n]
+                << "): " << mfracs[n] << std::endl;
+      std::cout << "Number Density conserved (" << short_spec_names_cxx[n]
+                << "): " << state.xn[n] << std::endl;
+    }
   }
 #endif
 
-  // call the EOS to set initial internal energy e
   eos(eos_input_rt, state);
+}
 
-  // name of output file
-  std::ofstream state_over_time("state_over_time.txt");
-
-  // save the initial state -- we'll use this to determine
-  // how much things changed over the entire burn
-  burn_t state_in = state;
+AMREX_INLINE
+void enforce_post_burn_constraints(burn_t &state) {
+
+  amrex::Real inmfracs[NumSpec] = {-1.0};
+  amrex::Real insum = 0.0_rt;
+  for (int nn = 0; nn < NumSpec; ++nn) {
+    state.xn[nn] = amrex::max(state.xn[nn], small_x);
+    inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho;
+    insum += inmfracs[nn];
+  }
 
-  // output the data in columns, one line per timestep
-  state_over_time << std::setw(10) << "# Time";
-  state_over_time << std::setw(15) << "Density";
-  state_over_time << std::setw(15) << "Temperature";
-  for (int x = 0; x < NumSpec; ++x) {
-    const std::string &element = short_spec_names_cxx[x];
-    state_over_time << std::setw(15) << element;
+  for (int nn = 0; nn < NumSpec; ++nn) {
+    inmfracs[nn] /= insum;
+    // update the number densities with conserved mass fractions
+    state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn];
   }
-  state_over_time << std::endl;
 
-  amrex::Real t = 0.0;
+  // update the number density of electrons due to charge conservation
+  balance_charge(state);
 
-  state_over_time << std::setw(10) << t;
-  state_over_time << std::setw(15) << state.rho;
-  state_over_time << std::setw(15) << state.T;
-  for (int x = 0; x < NumSpec; ++x) {
-    state_over_time << std::setw(15) << state.xn[x];
+  // reconserve mass fractions post charge conservation
+  insum = 0.0_rt;
+  for (int nn = 0; nn < NumSpec; ++nn) {
+    state.xn[nn] = amrex::max(state.xn[nn], small_x);
+    inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho;
+    insum += inmfracs[nn];
   }
-  state_over_time << std::endl;
 
-  // loop over steps, burn, and output the current state
-  // the loop below is similar to that used in krome and pynucastro
-  amrex::Real dd = rhotot;
-  amrex::Real dd1 = 0.0_rt;
+  for (int nn = 0; nn < NumSpec; ++nn) {
+    inmfracs[nn] /= insum;
+    // update the number densities with conserved mass fractions
+    state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn];
+  }
 
-  for (int n = 0; n < unit_test_rp::nsteps; n++) {
+  // get the updated T
+  eos(eos_input_re, state);
+}
 
-    dd1 = dd;
+} // namespace
 
-    amrex::Real rhotmp = 0.0_rt;
 
-    for (int nn = 0; nn < NumSpec; ++nn) {
-      rhotmp += state.xn[nn] * spmasses[nn];
+AMREX_INLINE
+auto burn_cell_multi_c(int n_zones) -> int {
+
+  constexpr amrex::Real rel_tol = 1.0e-10_rt;
+  constexpr amrex::Real abs_tol = 1.0e-12_rt;
+
+  AMREX_ALWAYS_ASSERT_WITH_MESSAGE(n_zones > 0,
+                                   "n_zones must be positive");
+
+  struct MultiZoneSummary {
+    int failures = 0;
+    int retries = 0;
+    amrex::Real t_min = std::numeric_limits<amrex::Real>::max();
+    amrex::Real t_max = -std::numeric_limits<amrex::Real>::max();
+    bool any_success = false;
+  };
+
+  auto run_multi_zone = [&](bool use_gpu, std::vector<burn_t> &final_states,
+                            std::vector<amrex::Real> &final_times,
+                            MultiZoneSummary &summary) -> bool {
+    amrex::Gpu::HostVector<burn_t> states_h(n_zones);
+    amrex::Gpu::HostVector<amrex::Real> dd_h(n_zones);
+    amrex::Gpu::HostVector<amrex::Real> dt_h(n_zones);
+    amrex::Gpu::HostVector<amrex::Real> time_h(n_zones);
+    std::vector<burn_t> states_before(n_zones);
+    std::vector<int> active_h(n_zones, 1);
+
+    summary = MultiZoneSummary{};
+    int retry_total = 0;
+
+    for (int iz = 0; iz < n_zones; ++iz) {
+      amrex::Real temp_offset =
+          0.5_rt * unit_test_rp::temperature *
+          (static_cast<amrex::Real>(iz) /
+               static_cast<amrex::Real>(n_zones - 1) -
+           0.5_rt);
+      init_burn_state(states_h[iz], 1.0_rt, temp_offset, false);
+      dd_h[iz] = states_h[iz].rho;
+      dt_h[iz] = 0.0_rt;
+      time_h[iz] = 0.0_rt;
+      active_h[iz] = 1;
     }
 
-    // find the freefall time
-    amrex::Real tff = std::sqrt(M_PI * 3.0 / (32.0 * rhotmp * grav_constant));
-    amrex::Real dt = unit_test_rp::tff_reduc * tff;
-    // scale the density
-    dd += dt * (dd / tff);
-
-#ifdef DEBUG
-    std::cout << "step params " << dd << ", " << tff << ", " << rhotmp
-              << std::endl;
-#endif
-
-    // stop the test if dt is very small
-    if (dt < 10) {
-      break;
+    amrex::Gpu::DeviceVector<burn_t> states_d;
+    amrex::Gpu::DeviceVector<amrex::Real> dt_d;
+    if (use_gpu) {
+      states_d.resize(n_zones);
+      dt_d.resize(n_zones);
     }
 
-    // stop the test if we have reached very high densities
-    if (dd > 2e-6) {
-      break;
+    const int max_split_depth = 6;
+    constexpr amrex::Real min_split_dt = 1.0_rt;
+
+    auto burn_backend = [&](burn_t &state, amrex::Real dt) {
+      state.time = 0.0_rt;
+      state.suppress_failure_output = true;
+
+      if (use_gpu) {
+        amrex::AsyncArray<burn_t> state_device(&state, 1);
+        burn_t *state_ptr = state_device.data();
+
+        amrex::ParallelFor(
+            1, [state_ptr, dt] AMREX_GPU_DEVICE(int idx) noexcept {
+              burner(state_ptr[idx], dt);
+            });
+
+        state_device.copyToHost(&state, 1);
+        amrex::Gpu::streamSynchronize();
+      } else {
+        burner(state, dt);
+      }
+    };
+
+    std::function<bool(burn_t &, amrex::Real, int)> integrate_with_retries;
+    integrate_with_retries = [&](burn_t &state, amrex::Real dt,
+                                 int depth) -> bool {
+      burn_t backup = state;
+      burn_backend(state, dt);
+      if (state.success && state.e > 0.0_rt) {
+        state.suppress_failure_output = false;
+        return true;
+      }
+      if (depth >= max_split_depth || 0.5_rt * dt <= min_split_dt) {
+        state = backup;
+        state.suppress_failure_output = false;
+        return false;
+      }
+      amrex::Real half_dt = 0.5_rt * dt;
+      state = backup;
+      if (!integrate_with_retries(state, half_dt, depth + 1)) {
+        state.suppress_failure_output = false;
+        return false;
+      }
+      bool status = integrate_with_retries(state, half_dt, depth + 1);
+      state.suppress_failure_output = false;
+      return status;
+    };
+
+    for (int step = 0; step < unit_test_rp::nsteps; ++step) {
+      bool any_active = false;
+
+      for (int iz = 0; iz < n_zones; ++iz) {
+        if (active_h[iz] == 0) {
+          dt_h[iz] = 0.0_rt;
+          continue;
+        }
+
+        amrex::Real dd1 = dd_h[iz];
+        amrex::Real rhotmp = 0.0_rt;
+        for (int nn = 0; nn < NumSpec; ++nn) {
+          rhotmp += states_h[iz].xn[nn] * spmasses[nn];
+        }
+        amrex::Real tff =
+            std::sqrt(M_PI * 3.0_rt / (32.0_rt * rhotmp * grav_constant));
+        amrex::Real dt = unit_test_rp::tff_reduc * tff;
+        amrex::Real dd_new = dd1 + dt * (dd1 / tff);
+
+        if (dt < 10.0_rt || dd_new > 2.0e-6_rt) {
+          active_h[iz] = 0;
+          dt_h[iz] = 0.0_rt;
+          continue;
+        }
+
+        amrex::Real ratio = dd_new / dd1;
+        for (int nn = 0; nn < NumSpec; ++nn) {
+          states_h[iz].xn[nn] *= ratio;
+        }
+        states_h[iz].rho *= ratio;
+
+        states_h[iz].suppress_failure_output = true;
+        states_before[iz] = states_h[iz];
+
+        dd_h[iz] = dd_new;
+        dt_h[iz] = dt;
+        any_active = true;
+      }
+
+      if (!any_active) {
+        break;
+      }
+
+      if (use_gpu) {
+        amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, states_h.begin(),
+                              states_h.end(), states_d.begin());
+        amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, dt_h.begin(),
+                              dt_h.end(), dt_d.begin());
+        amrex::Gpu::streamSynchronize();
+
+        burn_t *states_ptr = states_d.data();
+        const amrex::Real *dt_ptr = dt_d.data();
+
+        amrex::ParallelFor(
+            n_zones, [states_ptr, dt_ptr] AMREX_GPU_DEVICE(int idx) noexcept {
+              amrex::Real dt = dt_ptr[idx];
+              if (dt <= 0.0_rt) {
+                return;
+              }
+              burner(states_ptr[idx], dt);
+            });
+        amrex::Gpu::streamSynchronize();
+
+        amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, states_d.begin(),
+                              states_d.end(), states_h.begin());
+        amrex::Gpu::streamSynchronize();
+      } else {
+        burn_t *states_ptr = states_h.data();
+        amrex::Real *dt_ptr = dt_h.data();
+        amrex::ParallelForOMP(
+            n_zones, [states_ptr, dt_ptr, &burn_backend](int iz) noexcept {
+              amrex::Real dt = dt_ptr[iz];
+              if (dt <= 0.0_rt) {
+                return;
+              }
+              burn_backend(states_ptr[iz], dt);
+            });
+      }
+
+      for (int iz = 0; iz < n_zones; ++iz) {
+        if (dt_h[iz] <= 0.0_rt) {
+          continue;
+        }
+
+        auto &state = states_h[iz];
+
+        if (!state.success || state.e <= 0.0_rt) {
+          burn_t retry_state = states_before[iz];
+          retry_state.suppress_failure_output = false;
+          ++retry_total;
+          if (integrate_with_retries(retry_state, dt_h[iz], 0)) {
+            state = retry_state;
+          } else {
+            state = states_before[iz];
+            state.success = false;
+            active_h[iz] = 0;
+            dt_h[iz] = 0.0_rt;
+            continue;
+          }
+        }
+
+        state.suppress_failure_output = false;
+        enforce_post_burn_constraints(state);
+        time_h[iz] += dt_h[iz];
+      }
     }
 
-    std::cout << "step " << n << " done" << std::endl;
-
-    // scale the number densities
-    for (int nn = 0; nn < NumSpec; ++nn) {
-      state.xn[nn] *= dd / dd1;
+    summary.failures = 0;
+    summary.t_min = std::numeric_limits<amrex::Real>::max();
+    summary.t_max = -std::numeric_limits<amrex::Real>::max();
+    summary.any_success = false;
+    summary.retries = retry_total;
+
+    for (int iz = 0; iz < n_zones; ++iz) {
+      if (!states_h[iz].success || states_h[iz].e <= 0.0_rt) {
+        ++summary.failures;
+        continue;
+      }
+      summary.any_success = true;
+      summary.t_min = std::min(summary.t_min, states_h[iz].T);
+      summary.t_max = std::max(summary.t_max, states_h[iz].T);
     }
 
-    // input the scaled density in burn state
-    state.rho *= dd / dd1;
-
-    // do the actual integration
-    burner(state, dt);
-
-    // ensure positivity and normalize
-    amrex::Real inmfracs[NumSpec] = {-1.0};
-    amrex::Real insum = 0.0_rt;
-    for (int nn = 0; nn < NumSpec; ++nn) {
-      state.xn[nn] = amrex::max(state.xn[nn], small_x);
-      inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho;
-      insum += inmfracs[nn];
+    final_states.assign(states_h.begin(), states_h.end());
+    final_times.assign(time_h.begin(), time_h.end());
+
+    return (summary.failures == 0);
+  };
+
+  std::vector<burn_t> cpu_states;
+  std::vector<amrex::Real> cpu_times;
+  MultiZoneSummary cpu_summary;
+  auto cpu_start = std::chrono::steady_clock::now();
+  bool cpu_success = run_multi_zone(false, cpu_states, cpu_times, cpu_summary);
+  auto cpu_end = std::chrono::steady_clock::now();
+  std::chrono::duration<double> cpu_wall = cpu_end - cpu_start;
+
+  std::vector<burn_t> gpu_states;
+  std::vector<amrex::Real> gpu_times;
+  MultiZoneSummary gpu_summary;
+  auto gpu_start = std::chrono::steady_clock::now();
+  bool gpu_success = run_multi_zone(true, gpu_states, gpu_times, gpu_summary);
+  auto gpu_end = std::chrono::steady_clock::now();
+  std::chrono::duration<double> gpu_wall = gpu_end - gpu_start;
+
+  auto print_summary = [&](const std::string &label,
+                           const MultiZoneSummary &summary) {
+    std::cout << label << " multi-zone burn complete; zones failed = "
+              << summary.failures << std::endl;
+    if (summary.any_success) {
+      std::cout << label << " multi-zone temperature range: ["
+                << summary.t_min << ", " << summary.t_max << "]"
+                << std::endl;
+      std::cout << label << " multi-zone retries applied: "
+                << summary.retries << std::endl;
+    } else {
+      std::cout << label << " multi-zone burn had no successful zones"
+                << std::endl;
     }
-
-    for (int nn = 0; nn < NumSpec; ++nn) {
-      inmfracs[nn] /= insum;
-      // update the number densities with conserved mass fractions
-      state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn];
+  };
+
+  print_summary("CPU", cpu_summary);
+  std::cout << "CPU multi-zone burn walltime (s): " << cpu_wall.count()
+            << std::endl;
+  print_summary("GPU", gpu_summary);
+  std::cout << "GPU multi-zone burn walltime (s): " << gpu_wall.count()
+            << std::endl;
+
+  auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool {
+    amrex::Real diff = std::abs(a - b);
+    amrex::Real scale = std::max(std::abs(a), std::abs(b));
+    return diff <= (abs_tol + rel_tol * scale);
+  };
+
+  struct MismatchStats {
+    int count = 0;
+    amrex::Real max_abs_diff = 0.0_rt;
+    int max_zone = -1;
+    amrex::Real cpu_value_at_max = 0.0_rt;
+    amrex::Real gpu_value_at_max = 0.0_rt;
+  };
+
+  std::map<std::string, MismatchStats> mismatch_summary;
+
+  struct NormAccumulator {
+    amrex::Real diff_l1 = 0.0_rt;
+    amrex::Real diff_l2_sq = 0.0_rt;
+    amrex::Real diff_linf = 0.0_rt;
+    amrex::Real ref_l1 = 0.0_rt;
+    amrex::Real ref_l2_sq = 0.0_rt;
+    amrex::Real ref_linf = 0.0_rt;
+    void record(amrex::Real diff, amrex::Real ref) {
+      amrex::Real abs_diff = std::abs(diff);
+      amrex::Real abs_ref = std::abs(ref);
+      diff_l1 += abs_diff;
+      diff_l2_sq += diff * diff;
+      diff_linf = std::max(diff_linf, abs_diff);
+      ref_l1 += abs_ref;
+      ref_l2_sq += ref * ref;
+      ref_linf = std::max(ref_linf, abs_ref);
     }
-
-    // update the number density of electrons due to charge conservation
-    balance_charge(state);
-
-    // reconserve mass fractions post charge conservation
-    insum = 0;
-    for (int nn = 0; nn < NumSpec; ++nn) {
-      state.xn[nn] = amrex::max(state.xn[nn], small_x);
-      inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho;
-      insum += inmfracs[nn];
+  };
+
+  std::map<std::string, NormAccumulator> field_norms;
+
+  auto record_mismatch = [&](int zone, const std::string &field,
+                             amrex::Real cpu_value, amrex::Real gpu_value) {
+    auto &stats = mismatch_summary[field];
+    stats.count++;
+    amrex::Real diff = std::abs(cpu_value - gpu_value);
+    if (stats.max_zone < 0 || diff > stats.max_abs_diff) {
+      stats.max_abs_diff = diff;
+      stats.max_zone = zone;
+      stats.cpu_value_at_max = cpu_value;
+      stats.gpu_value_at_max = gpu_value;
     }
+  };
 
-    for (int nn = 0; nn < NumSpec; ++nn) {
-      inmfracs[nn] /= insum;
-      // update the number densities with conserved mass fractions
-      state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn];
+  auto record_difference = [&](const std::string &field, amrex::Real cpu_value,
+                               amrex::Real gpu_value, bool track_norm) {
+    if (!track_norm) {
+      return;
+    }
+    field_norms[field].record(cpu_value - gpu_value, cpu_value);
+  };
+
+  for (int iz = 0; iz < n_zones; ++iz) {
+    bool cpu_zone_success =
+        cpu_states[iz].success && cpu_states[iz].e > 0.0_rt;
+    bool gpu_zone_success =
+        gpu_states[iz].success && gpu_states[iz].e > 0.0_rt;
+
+    if (cpu_zone_success != gpu_zone_success) {
+      record_mismatch(iz, "success_flag",
+                      cpu_zone_success ? 1.0_rt : 0.0_rt,
+                      gpu_zone_success ? 1.0_rt : 0.0_rt);
+      record_difference("success_flag",
+                        cpu_zone_success ? 1.0_rt : 0.0_rt,
+                        gpu_zone_success ? 1.0_rt : 0.0_rt, false);
+      continue;
+    }
+    if (!cpu_zone_success) {
+      continue;
     }
 
-    // get the updated T
-    eos(eos_input_re, state);
+    if (!within_tol(cpu_times[iz], gpu_times[iz])) {
+      record_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]);
+    }
+    record_difference("time", cpu_times[iz], gpu_times[iz], false);
 
-    t += dt;
+    if (!within_tol(cpu_states[iz].rho, gpu_states[iz].rho)) {
+      record_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho);
+    }
+    record_difference("rho", cpu_states[iz].rho, gpu_states[iz].rho, false);
+    if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) {
+      record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T);
+    }
+    record_difference("temperature", cpu_states[iz].T, gpu_states[iz].T, true);
+    if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) {
+      record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e);
+    }
+    record_difference("e", cpu_states[iz].e, gpu_states[iz].e, false);
+    for (int n = 0; n < NumSpec; ++n) {
+      if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) {
+        std::string label =
+            "xn(" + std::string(short_spec_names_cxx[n]) + ")";
+        record_mismatch(iz, label, cpu_states[iz].xn[n],
+                        gpu_states[iz].xn[n]);
+      }
+      std::string norm_label =
+          "number_density(" + std::string(short_spec_names_cxx[n]) + ")";
+      record_difference(norm_label, cpu_states[iz].xn[n],
+                        gpu_states[iz].xn[n], true);
+    }
+  }
 
-    state_over_time << std::setw(10) << t;
-    state_over_time << std::setw(15) << state.rho;
-    state_over_time << std::setw(12) << state.T;
-    for (int x = 0; x < NumSpec; ++x) {
-      state_over_time << std::setw(15) << state.xn[x];
+  if (!field_norms.empty()) {
+    std::cout << "CPU/GPU multi-zone difference norms:" << std::endl;
+    auto safe_norm_ratio = [](amrex::Real num, amrex::Real denom) {
+      if (denom > 0.0_rt) {
+        return num / denom;
+      }
+      return (num == 0.0_rt) ? 0.0_rt
+                             : std::numeric_limits<amrex::Real>::infinity();
+    };
+    auto print_norm = [&](const std::string &label,
+                          const NormAccumulator &acc) {
+      amrex::Real diff_l2 = std::sqrt(acc.diff_l2_sq);
+      amrex::Real ref_l2 = std::sqrt(acc.ref_l2_sq);
+      std::cout << "  " << label
+                << ": L1 = " << safe_norm_ratio(acc.diff_l1, acc.ref_l1)
+                << ", L2 = " << safe_norm_ratio(diff_l2, ref_l2)
+                << ", L_inf = "
+                << safe_norm_ratio(acc.diff_linf, acc.ref_linf) << std::endl;
+    };
+    auto temp_it = field_norms.find("temperature");
+    if (temp_it != field_norms.end()) {
+      print_norm("temperature", temp_it->second);
+    }
+    for (int n = 0; n < NumSpec; ++n) {
+      std::string label =
+          "number_density(" + std::string(short_spec_names_cxx[n]) + ")";
+      auto it = field_norms.find(label);
+      if (it != field_norms.end()) {
+        print_norm(label, it->second);
+      }
     }
-    state_over_time << std::endl;
+  } else {
+    std::cout << "No comparable CPU/GPU multi-zone states to compute norms"
+              << std::endl;
   }
-  state_over_time.close();
-
-  // output diagnostics to the terminal
-  std::cout << "------------------------------------" << std::endl;
-  std::cout << "successful? " << state.success << std::endl;
-
-  std::cout << "------------------------------------" << std::endl;
-  std::cout << "T initial = " << state_in.T << std::endl;
-  std::cout << "T final =   " << state.T << std::endl;
-  std::cout << "Eint initial = " << state_in.e << std::endl;
-  std::cout << "Eint final =   " << state.e << std::endl;
-  std::cout << "rho initial = " << state_in.rho << std::endl;
-  std::cout << "rho final =   " << state.rho << std::endl;
-
-  std::cout << "------------------------------------" << std::endl;
-  std::cout << "New number densities: " << std::endl;
-  for (int n = 0; n < NumSpec; ++n) {
-    std::cout << state.xn[n] << std::endl;
+
+  if (mismatch_summary.empty()) {
+    std::cout << "CPU/GPU multi-zone states agree within tolerances"
+              << std::endl;
+  } else {
+    std::cout << "CPU/GPU mismatches detected:" << std::endl;
+    for (const auto &entry : mismatch_summary) {
+      const auto &field = entry.first;
+      const auto &stats = entry.second;
+      std::cout << "  field " << field << " mismatched in " << stats.count
+                << " zone(s)";
+      if (stats.max_zone >= 0) {
+        std::cout << "; largest difference at zone " << stats.max_zone
+                  << " (CPU = " << stats.cpu_value_at_max
+                  << ", GPU = " << stats.gpu_value_at_max << ")";
+      }
+      std::cout << std::endl;
+    }
   }
 
-  return state.success;
+  return (cpu_success && gpu_success && mismatch_summary.empty());
 }
 #endif
diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem
index 27305ba634..d25657d102 100644
--- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem
+++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem
@@ -26,6 +26,8 @@ unit_test.primary_species_12 = 1e-40
 unit_test.primary_species_13 = 1e-40
 unit_test.primary_species_14 = 0.0775e0
 
+unit_test.n_zones = 32**3
+
 # integrator runtime parameters
 # are we using primordial chemistry? then we use number densities
 integrator.use_number_densities = 1
@@ -43,7 +45,7 @@ integrator.X_reject_buffer = 1e100
 # Set which jacobian to use
 # 1 = analytic jacobian
 # 2 = numerical jacobian
-integrator.jacobian = 1
+integrator.jacobian = 2
 # do you want to normalize abundances within VODE? (you don't!)
 integrator.renormalize_abundances = 0
 # tolerances
diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp
index 5e458f8936..622d0eabfa 100644
--- a/unit_test/burn_cell_primordial_chem/main.cpp
+++ b/unit_test/burn_cell_primordial_chem/main.cpp
@@ -23,11 +23,12 @@ int main(int argc, char *argv[]) {
     std::string const run_prefix = "burn_cell_primordial_chem_";
     std::string input_run_prefix;
     pp.query("run_prefix", input_run_prefix);
+    int n_zones = 128;
+    pp.query("n_zones", n_zones);
+    amrex::Print() << "n_zones = " << n_zones << "\n";
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(run_prefix == input_run_prefix,
                                      "input file is missing or incorrect!");
 
-    std::cout << "starting the single zone burn..." << std::endl;
-
     ParmParse ppa("amr");
 
     init_unit_test();
@@ -39,7 +40,10 @@ int main(int argc, char *argv[]) {
     // C++ Network, RHS, screening, rates initialization
     network_init();
 
-    success = burn_cell_c();
+    std::cout << "\nstarting the multi-zone burn..." << std::endl;
+    int multi_success = burn_cell_multi_c(n_zones);
+
+    success = multi_success;
   }
 
   amrex::Finalize();
diff --git a/unit_test/test_linear_algebra/test_linear_algebra.H b/unit_test/test_linear_algebra/test_linear_algebra.H
index 0762163da3..caf5677f84 100644
--- a/unit_test/test_linear_algebra/test_linear_algebra.H
+++ b/unit_test/test_linear_algebra/test_linear_algebra.H
@@ -175,7 +175,7 @@ void linear_algebra() {
 
     // now try to output a Jacobian mask based on the actual Jacobian
 
-    ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS> jac;
+    ArrayUtil::MathArray2D<jac_t, 1, INT_NEQS, 1, INT_NEQS> jac;
 
     burn_t burn_state;
     burn_state.rho = 1.e8;
diff --git a/unit_test/test_rhs/rhs_zones.H b/unit_test/test_rhs/rhs_zones.H
index 87a3ec559a..54a7bfaba0 100644
--- a/unit_test/test_rhs/rhs_zones.H
+++ b/unit_test/test_rhs/rhs_zones.H
@@ -41,7 +41,7 @@ bool do_rhs (int i, int j, int k, amrex::Array4<amrex::Real> const& state, const
 
     amrex::Array1D<amrex::Real, 1, neqs> ydot;
 
-    ArrayUtil::MathArray2D<1, neqs, 1, neqs> jac;
+    ArrayUtil::MathArray2D<jac_t, 1, neqs, 1, neqs> jac;
 
 #ifdef NEW_NETWORK_IMPLEMENTATION
     RHS::rhs(burn_state, ydot);