From 86a9550ffb1e9ac025e47922bde201d236fd7cca Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Wed, 10 Dec 2025 14:21:45 -0500
Subject: [PATCH 01/32] First commit for 2x OAP for mirrors. Major changes are
 made to how the maximum magnetic field is determined. Bmag max is stored as a
 gkyl_array. Right now, we only do this for bmag, but we need to store phi as
 a 1d maximum array. I haven't decided on the final design for how 2x OAP
 simulations should be accomodated. Perhpas we need some general
 gkyl_dg_array_reduce methods that take a 2D array and turn it into a 1D array
 instead of a 0D number. This is a kind of reduction method, but it's not a
 total reduction. I tested the regression test included for a 2x2v boltzmann
 mirror and the output of the magnetic field looks correct. The current
 implementation evaluates bmag at cell corners, but we ideally should do the
 corners in Z, but the quadrature nodes in psi.

---
 gyrokinetic/apps/gk_species_damping.c         |   86 +-
 gyrokinetic/apps/gk_species_fdot_multiplier.c |   93 +-
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |   16 +-
 gyrokinetic/apps/gyrokinetic.c                |    8 +
 .../creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c | 1080 +++++++++++++++++
 .../unit/ctest_loss_cone_mask_gyrokinetic.c   |   63 +-
 gyrokinetic/zero/gk_geometry.c                |  248 ++++
 gyrokinetic/zero/gkyl_gk_geometry.h           |   34 +
 .../zero/gkyl_loss_cone_mask_gyrokinetic.h    |    8 +-
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |   13 +-
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c |  103 +-
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     |   12 +-
 12 files changed, 1598 insertions(+), 166 deletions(-)
 create mode 100644 gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index 236451383e..fc61d3213e 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -99,44 +99,44 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       damp->evolve = true; // Since the loss cone boundary is proportional to phi(t).
 
-      // Maximum bmag and its location.
-      // NOTE: if the same max bmag occurs at multiple locations,
-      // bmag_max_coord may have different values on different MPI processes.
-      double bmag_max_coord_ho[GKYL_MAX_CDIM];
-      double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho);
-      double bmag_max_local = bmag_max_ho;
-      double bmag_max_global;
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global);
-      double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim];
-      if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = bmag_max_coord_ho[d];
-      }
-      else {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = -DBL_MAX;
-      }
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global);
-
-      if (app->use_gpu) {
-        damp->bmag_max = gkyl_cu_malloc(sizeof(double));
-        damp->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double));
-	gkyl_cu_memcpy(damp->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D);
-	gkyl_cu_memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
-      }
-      else {
-        damp->bmag_max = gkyl_malloc(sizeof(double));
-        damp->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double));
-	memcpy(damp->bmag_max, &bmag_max_global, sizeof(double));
-	memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double));
+      // Store pointers to per-field-line bmag_max arrays from gk_geometry.
+      damp->bmag_max = app->gk_geom->bmag_max;
+      damp->bmag_max_z_coord = app->gk_geom->bmag_max_z_coord;
+      damp->bmag_max_basis = &app->gk_geom->bmag_max_basis;
+      damp->bmag_max_range = &app->gk_geom->bmag_max_range;
+
+      // Compute reference coordinate for phi evaluation at mirror throat.
+      // For 1x: use the single bmag_max_z value.
+      // For 2x: use the bmag_max_z at the center of the psi domain (mid field line).
+      double bmag_max_coord_ref_ho[GKYL_MAX_CDIM];
+      if (app->cdim == 1) {
+        // 1x case: single value.
+        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, 0);
+        bmag_max_coord_ref_ho[0] = bmag_max_z_d[0];
+      } else {
+        // 2x case: use the center psi cell's bmag_max_z.
+        int mid_psi_idx = (app->gk_geom->bmag_max_range.lower[0] + app->gk_geom->bmag_max_range.upper[0]) / 2;
+        int psi_idx[1] = {mid_psi_idx};
+        long bmag_max_z_linidx = gkyl_range_idx(&app->gk_geom->bmag_max_range, psi_idx);
+        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, bmag_max_z_linidx);
+        double xc[1] = {0.0};
+        double z_val = app->gk_geom->bmag_max_basis.eval_expand(xc, bmag_max_z_d);
+        double psi_lo = app->gk_geom->bmag_max_grid.lower[0];
+        double psi_dx = app->gk_geom->bmag_max_grid.dx[0];
+        double psi_val = psi_lo + (mid_psi_idx - 0.5) * psi_dx;
+        bmag_max_coord_ref_ho[0] = psi_val;
+        bmag_max_coord_ref_ho[1] = z_val;
       }
 
-      // Electrostatic potential at bmag_max_coord.
+      // Allocate and copy reference coordinate.
       if (app->use_gpu) {
+        damp->bmag_max_coord_ref = gkyl_cu_malloc(app->cdim*sizeof(double));
+        gkyl_cu_memcpy(damp->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
         damp->phi_m = gkyl_cu_malloc(sizeof(double));
         damp->phi_m_global = gkyl_cu_malloc(sizeof(double));
-      }
-      else {
+      } else {
+        damp->bmag_max_coord_ref = gkyl_malloc(app->cdim*sizeof(double));
+        memcpy(damp->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double));
         damp->phi_m = gkyl_malloc(sizeof(double));
         damp->phi_m_global = gkyl_malloc(sizeof(double));
       }
@@ -152,6 +152,9 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = damp->bmag_max,
+        .bmag_max_z_coord = damp->bmag_max_z_coord,
+        .bmag_max_basis = damp->bmag_max_basis,
+        .bmag_max_range = damp->bmag_max_range,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
         .num_quad = num_quad,
@@ -178,7 +181,7 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 
       // Compute the initial damping rate (assuming phi=0 because phi hasn't been computed).
       // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(app->field->phi_smooth, damp->bmag_max_coord,
+      gkyl_dg_basis_ops_eval_array_at_coord_comp(app->field->phi_smooth, damp->bmag_max_coord_ref,
         app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
       gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
       // Project the loss cone mask.
@@ -212,7 +215,7 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, damp->bmag_max_coord,
+      gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, damp->bmag_max_coord_ref,
         app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
       gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
 
@@ -246,22 +249,21 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
 {
   if (damp->type) {
     gkyl_array_release(damp->rate);
-    if (app->use_gpu)
+    if (app->use_gpu) {
       gkyl_array_release(damp->rate_host);
+    }
 
     if (damp->type == GKYL_GK_DAMPING_USER_INPUT) {
       // Nothing to release.
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
+      // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
       if (app->use_gpu) {
-        gkyl_cu_free(damp->bmag_max);
-        gkyl_cu_free(damp->bmag_max_coord);
+        gkyl_cu_free(damp->bmag_max_coord_ref);
         gkyl_cu_free(damp->phi_m);
         gkyl_cu_free(damp->phi_m_global);
-      }
-      else {
-        gkyl_free(damp->bmag_max);
-        gkyl_free(damp->bmag_max_coord);
+      } else {
+        gkyl_free(damp->bmag_max_coord_ref);
         gkyl_free(damp->phi_m);
         gkyl_free(damp->phi_m_global);
       }
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index c3006e9403..99987191fc 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -58,7 +58,7 @@ gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, con
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   // Find the potential at the mirror throat.
-  gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, fdmul->bmag_max_coord,
+  gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, fdmul->bmag_max_coord_ref,
     app->basis_on_dev, &app->grid, &app->local, fdmul->phi_m);
   gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, fdmul->phi_m, fdmul->phi_m_global);
 
@@ -159,44 +159,47 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be
                                               // at leat p+1 for Gauss-Lobatto.
 
-      // Maximum bmag and its location.
-      // NOTE: if the same max bmag occurs at multiple locations,
-      // bmag_max_coord may have different values on different MPI processes.
-      double bmag_max_coord_ho[GKYL_MAX_CDIM];
-      double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho);
-      double bmag_max_local = bmag_max_ho;
-      double bmag_max_global;
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global);
-      double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim];
-      if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = bmag_max_coord_ho[d];
-      }
-      else {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = -DBL_MAX;
-      }
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global);
-
-      if (app->use_gpu) {
-        fdmul->bmag_max = gkyl_cu_malloc(sizeof(double));
-        fdmul->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double));
-	gkyl_cu_memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D);
-	gkyl_cu_memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
-      }
-      else {
-        fdmul->bmag_max = gkyl_malloc(sizeof(double));
-        fdmul->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double));
-	memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double));
-	memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double));
+      // Store pointers to per-field-line bmag_max arrays from gk_geometry.
+      fdmul->bmag_max = app->gk_geom->bmag_max;
+      fdmul->bmag_max_z_coord = app->gk_geom->bmag_max_z_coord;
+      fdmul->bmag_max_basis = &app->gk_geom->bmag_max_basis;
+      fdmul->bmag_max_range = &app->gk_geom->bmag_max_range;
+
+      // Compute reference coordinate for phi evaluation at mirror throat.
+      // For 1x: use the single bmag_max_z value.
+      // For 2x: use the bmag_max_z at the center of the psi domain (mid field line).
+      double bmag_max_coord_ref_ho[GKYL_MAX_CDIM];
+      if (app->cdim == 1) {
+        // 1x case: single value.
+        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, 0);
+        bmag_max_coord_ref_ho[0] = bmag_max_z_d[0];
+      } else {
+        // 2x case: use the center psi cell's bmag_max_z.
+        // Get the mid psi index.
+        int mid_psi_idx = (app->gk_geom->bmag_max_range.lower[0] + app->gk_geom->bmag_max_range.upper[0]) / 2;
+        int psi_idx[1] = {mid_psi_idx};
+        long bmag_max_z_linidx = gkyl_range_idx(&app->gk_geom->bmag_max_range, psi_idx);
+        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, bmag_max_z_linidx);
+        // Evaluate at cell center (logical coord 0).
+        double xc[1] = {0.0};
+        double z_val = app->gk_geom->bmag_max_basis.eval_expand(xc, bmag_max_z_d);
+        // Compute the psi coordinate at the mid cell.
+        double psi_lo = app->gk_geom->bmag_max_grid.lower[0];
+        double psi_dx = app->gk_geom->bmag_max_grid.dx[0];
+        double psi_val = psi_lo + (mid_psi_idx - 0.5) * psi_dx;
+        bmag_max_coord_ref_ho[0] = psi_val;
+        bmag_max_coord_ref_ho[1] = z_val;
       }
 
-      // Electrostatic potential at bmag_max_coord.
+      // Allocate and copy reference coordinate.
       if (app->use_gpu) {
+        fdmul->bmag_max_coord_ref = gkyl_cu_malloc(app->cdim*sizeof(double));
+        gkyl_cu_memcpy(fdmul->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
         fdmul->phi_m = gkyl_cu_malloc(sizeof(double));
         fdmul->phi_m_global = gkyl_cu_malloc(sizeof(double));
-      }
-      else {
+      } else {
+        fdmul->bmag_max_coord_ref = gkyl_malloc(app->cdim*sizeof(double));
+        memcpy(fdmul->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double));
         fdmul->phi_m = gkyl_malloc(sizeof(double));
         fdmul->phi_m_global = gkyl_malloc(sizeof(double));
       }
@@ -212,7 +215,9 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = fdmul->bmag_max,
-        .bmag_max_loc = fdmul->bmag_max_coord,
+        .bmag_max_z_coord = fdmul->bmag_max_z_coord,
+        .bmag_max_basis = fdmul->bmag_max_basis,
+        .bmag_max_range = fdmul->bmag_max_range,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
         .qtype = qtype,
@@ -226,10 +231,11 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
 
       fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_loss_cone_mult;
       fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult;
-      if (fdmul->write_diagnostics)
+      if (fdmul->write_diagnostics) {
         fdmul->write_func = gk_species_fdot_multiplier_write_enabled;
-      else
+      } else {
         gkyl_array_release(fdmul->multiplier_host);
+      }
     }
   }
 }
@@ -268,22 +274,21 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
 {
   if (fdmul->type) {
     gkyl_array_release(fdmul->multiplier);
-    if (fdmul->write_diagnostics)
+    if (fdmul->write_diagnostics) {
       gkyl_array_release(fdmul->multiplier_host);
+    }
 
     if (fdmul->type == GKYL_GK_DAMPING_USER_INPUT) {
       // Nothing to release.
     }
     else if (fdmul->type == GKYL_GK_DAMPING_LOSS_CONE) {
+      // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
       if (app->use_gpu) {
-        gkyl_cu_free(fdmul->bmag_max);
-        gkyl_cu_free(fdmul->bmag_max_coord);
+        gkyl_cu_free(fdmul->bmag_max_coord_ref);
         gkyl_cu_free(fdmul->phi_m);
         gkyl_cu_free(fdmul->phi_m_global);
-      }
-      else {
-        gkyl_free(fdmul->bmag_max);
-        gkyl_free(fdmul->bmag_max_coord);
+      } else {
+        gkyl_free(fdmul->bmag_max_coord_ref);
         gkyl_free(fdmul->phi_m);
         gkyl_free(fdmul->phi_m_global);
       }
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index 92f1d4ba57..5e463a88f4 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -816,8 +816,12 @@ struct gk_damping {
   struct gkyl_array *rate; // Damping rate.
   struct gkyl_array *rate_host; // Host copy for use in IO and projecting.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_coord; // Location of bmag_max.
+  // Per-field-line bmag_max arrays (pointers to gk_geometry's arrays).
+  const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  double *bmag_max_coord_ref; // Reference coordinate for phi evaluation at mirror throat.
   double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
   struct gkyl_array *scale_prof; // Conf-space scaling factor profile.
   // Functions chosen at runtime.
@@ -832,8 +836,12 @@ struct gk_fdot_multiplier {
   struct gkyl_array *multiplier_host; // Host copy for use in IO and projecting.
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_coord; // Location of bmag_max.
+  // Per-field-line bmag_max arrays (pointers to gk_geometry's arrays).
+  const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  double *bmag_max_coord_ref; // Reference coordinate for phi evaluation at mirror throat.
   double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
diff --git a/gyrokinetic/apps/gyrokinetic.c b/gyrokinetic/apps/gyrokinetic.c
index 8065d8c90e..72442cf14a 100644
--- a/gyrokinetic/apps/gyrokinetic.c
+++ b/gyrokinetic/apps/gyrokinetic.c
@@ -399,6 +399,14 @@ gkyl_gyrokinetic_app_new_geom(struct gkyl_gk *gk)
 
   gkyl_gk_geometry_release(gk_geom_3d); // Release temporary 3d geometry.
 
+  // Initialize per-field-line bmag_max arrays.
+  gkyl_gk_geometry_bmag_max_init(app->gk_geom);
+
+  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_range, NULL, app->gk_geom->bmag_max, "bmag_max.gkyl");
+  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_range, NULL, app->gk_geom->bmag_max_z_coord, "bmag_max_z_coord.gkyl");
+  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_nrange, NULL, app->gk_geom->bmag_max_nodal, "bmag_max_nodal.gkyl");
+  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_nrange, NULL, app->gk_geom->bmag_max_z_coord_nodal, "bmag_max_z_coord_nodal.gkyl");
+
   double bmag_min_local, bmag_min_global;
   bmag_min_local = gkyl_gk_geometry_reduce_bmag(app->gk_geom, GKYL_MIN);
   gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MIN, 1, &bmag_min_local, &bmag_min_global);
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
new file mode 100644
index 0000000000..bac3d878ed
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -0,0 +1,1080 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx
+{
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi; // Ion mass.
+  double me; // Electron mass.
+  double qi; // Ion charge.
+  double qe; // Electron charge.
+  double Te0; // Electron temperature.
+  double Ti0; // Ion temperature.
+  double n0; // Density.
+  double B_p; // Plasma magnetic field (mirror center).
+  double beta; // Plasma beta in the center.
+  double tau; // Temperature ratio.
+
+  // Parameters controlling initial conditions.
+  double alim;
+  double alphaIC0;
+  double alphaIC1;
+  double Ti_perp0; // Reference ion perp temperature.
+  double Ti_par0; // Reference ion par temperature.
+  double Ti_perp_m; // Ion perp temperature at the throat.
+  double Ti_par_m; // Ion par temperature at the throat.
+  double cs_m; // Ion sound speed at the throat.
+
+  double nuFrac; // Fraction multiplying collision frequency.
+  double logLambdaIon; // Ion Coulomb logarithm.
+  double nuIon; // Ion-ion collision freq.
+
+  double vti; // Ion thermal speed.
+  double vte; // Electron thermal speed.
+  double c_s; // Ion sound speed.
+  double omega_ci; // Ion gyrofrequency.
+  double rho_s; // Ion sound gyroradius.
+
+  double RatZeq0; // Radius of the field line at Z=0.
+  double Z_min; // Minimum axial coordinate Z.
+  double Z_max; // Maximum axial coordinate Z.
+  double z_min; // Minimum value of the position along the field line.
+  double z_max; // Maximum value of the position along the field line.
+  double psi_min; // Minimum value of the poloidal flux.
+  double psi_max; // Maximum value of the poloidal flux.
+  double psi_in, z_in; // Auxiliary psi and z.
+
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m; // Axial coordinate at mirror throat.
+  double z_m; // Computational coordinate at mirror throat.
+
+  // Source parameters
+  double NSrcIon;
+  double lineLengthSrcIon;
+  double sigSrcIon;
+  double NSrcFloorIon;
+  double TSrc0Ion;
+  double TSrcFloorIon;
+
+  // Physical velocity space limits.
+  double vpar_min_ion, vpar_max_ion;
+  double mu_max_ion;
+  // Computational velocity space limits.
+  double vpar_lin_fac_inv, mu_lin_fac_inv; // Inverse factor of where linear mapping ends.
+  double vpar_pow, mu_pow; // Power of the velocity grid.
+  double vpar_min_ion_c, vpar_max_ion_c;
+  double mu_min_ion_c, mu_max_ion_c;
+
+  // Grid DOF.
+  int Nx;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+  *BZ = mcB *
+        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn)
+  {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else
+  {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0)
+  {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else
+  {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+void
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double psi = xn[0];
+  double z = xn[1];
+
+  struct gk_mirror_ctx *app = ctx;
+  double NSrc = app->NSrcIon;
+  double zSrc = app->lineLengthSrcIon;
+  double sigSrc = app->sigSrcIon;
+  double NSrcFloor = app->NSrcFloorIon;
+
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+
+  if (fabs(Z) <= app->Z_m) {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
+                              exp(-pow(z - zSrc, 2) / (2.0 * pow(sigSrc, 2))));
+  } else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xn[1];
+
+  struct gk_mirror_ctx *app = ctx;
+  double sigSrc = app->sigSrcIon;
+  double TSrc0 = app->TSrc0Ion;
+  double Tfloor = app->TSrcFloorIon;
+
+  if (fabs(z) <= 2.0 * sigSrc)
+  {
+    fout[0] = TSrc0;
+  }
+  else
+  {
+    fout[0] = Tfloor;
+  }
+}
+
+// Ion initial conditions
+void
+eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xn[1];
+
+  struct gk_mirror_ctx *app = ctx;
+  double z_m = app->z_m;
+  double sigma = 0.9*z_m;
+  if (fabs(z) <= sigma)
+  {
+    fout[0] = 0.5*app->n0*(1. + tanh(10. * sigma * fabs(sigma - fabs(z))));
+  }
+  else
+  {
+    fout[0] = 0.5*app->n0*exp(-5 * (fabs(sigma - fabs(z))));
+  }
+}
+
+void
+eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xn[1];
+
+  struct gk_mirror_ctx *app = ctx;
+  double cs_m = app->cs_m;
+  double z_m = app->z_m;
+  double z_max = app->z_max;
+  if (fabs(z) <= z_m)
+  {
+    fout[0] = 0.0;
+  }
+  else
+  {
+    fout[0] = (fabs(z) / z) * cs_m * tanh(3 * (z_max - z_m) * fabs(fabs(z) - z_m));
+  }
+}
+
+void
+eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xn[1];
+
+  struct gk_mirror_ctx *app = ctx;
+  double z_m = app->z_m;
+  double Ti_par0 = app->Ti_par0;
+  double Ti_par_m = app->Ti_par_m;
+  if (fabs(z) <= z_m)
+  {
+    fout[0] = Ti_par_m+(Ti_par0-Ti_par_m)*tanh(4 * fabs(z_m - fabs(z)));
+  }
+  else
+  {
+    fout[0] = Ti_par_m;
+  }
+}
+
+void
+eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xn[1];
+
+  struct gk_mirror_ctx *app = ctx;
+  double z_m = app->z_m;
+  double Ti_perp0 = app->Ti_perp0;
+  double Ti_perp_m = app->Ti_perp_m;
+  if (fabs(z) <= z_m)
+  {
+    fout[0] = Ti_perp_m - Ti_perp0*tanh(3.*fabs(z_m-fabs(z)));
+  }
+  else
+  {
+    fout[0] = Ti_perp_m * GKYL_MAX2(1.e-3, exp(-5. * (fabs(z_m - fabs(z)))));
+  }
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bmag_func must assume a 3d input xc
+void
+bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double psi = xc[0];
+  double z = xc[2];
+
+  struct gk_mirror_ctx *app = ctx;
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  fout[0] = Bmag;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double psi = xc[0];
+  double z = xc[2];
+
+  struct gk_mirror_ctx *app = ctx;
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords. 
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad*cos(phi);
+  fout[1] = BRad*sin(phi);
+  fout[2] = BZ;
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  // Linear map up to vpar_max/lin_frac_inv, then a power grid.
+  double vpar_lin_fac_inv = app->vpar_lin_fac_inv;
+  double vpar_pow = app->vpar_pow;
+  if (fabs(cvpar) <= 1.0/vpar_lin_fac_inv)
+    vp[0] = vpar_max_ion*cvpar;
+  else if (cvpar < -1.0/vpar_lin_fac_inv)
+    vp[0] = -vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
+  else
+    vp[0] =  vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
+
+//  // Quadratic mu.
+//  vp[1] = mu_max_ion*pow(cmu,2.0);
+  // Linear map up to mu_max/lin_frac_inv, then a power grid.
+  double mu_lin_fac_inv = app->mu_lin_fac_inv;
+  double mu_pow = app->mu_pow;
+//  if (cmu <= 1.0/mu_lin_fac_inv)
+//    vp[0] = mu_max_ion*cmu;
+//  else
+//    vp[0] = mu_max_ion*pow(mu_lin_fac_inv,mu_pow-1)*pow(cmu,mu_pow);
+  double w = 0.3;
+  double f = 0.012;
+  double a = mu_max_ion*(f-1.0)/(w*w-1.0);
+  double b = mu_max_ion*(w*w-f)/(w*w-1.0);
+  if (cmu <= w)
+    vp[1] = (f*mu_max_ion/w)*cmu;
+  else
+    vp[1] = a*pow(cmu,2)+b;
+
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 2, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0;
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS;
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -2.5;
+  double Z_max =  2.5;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 6.51292;
+  double gamma = 0.124904;
+  double Z_m = 0.98;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
+  double lineLengthSrcIon = 0.0;
+  double sigSrcIon = Z_m / 4.0;
+  double NSrcFloorIon = 0.05 * NSrcIon;
+  double TSrc0Ion = Ti0 * 1.25;
+  double TSrcFloorIon = TSrc0Ion / 8.0;
+
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double vpar_min_ion = -vpar_max_ion;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+
+  // Computational velocity space limits.
+  double vpar_lin_fac_inv = 4;
+  double vpar_pow = 3;
+  double vpar_min_ion_c = -1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
+  double vpar_max_ion_c =  1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
+  double mu_min_ion_c = 0.;
+  double mu_max_ion_c = 1.;
+  double mu_lin_fac_inv = 1.0/0.012;
+  double mu_pow = 2;
+//  double mu_min_ion_c = 0.0;
+//  double mu_max_ion_c = 1.0/pow(mu_lin_fac_inv,(mu_pow-1)/mu_pow);
+
+  // Grid DOF:
+  int Nx = 8;  // Number of cells in x (psi) direction.
+  int Nz = 192; // Number of cells in z direction.
+  int Nvpar = 48; // Number of cells in parallel velocity direction.
+  int Nmu = 16;  // Number of cells in mu direction.
+  int poly_order = 1;
+
+  // Initial conditions parameter.s
+  double Ti_perp0 = 10000 * eV;
+  double Ti_par0 = 7500 * eV;
+
+  // Parameters at mirror throat
+  double Ti_perp_m = 15000 * eV;
+  double Ti_par_m = 1000 * eV;
+  double z_m = 0.982544;
+  double cs_m = sqrt((Te0+3.0*Ti_par_m)/mi);
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 2400.0e-9;
+  double tau_fdp = 24.0e-9;
+  double tau_fdp_extra = 2*tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 4; // Frames per OAP phase
+  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2*num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
+  for (int i=0; i<(num_phases-1)/2; i++) {
+    // OAPs.
+    poa_phases[2*i].phase = GK_POA_OAP;
+    poa_phases[2*i].num_frames = num_frames_oap;
+    poa_phases[2*i].duration = tau_oap;
+    poa_phases[2*i].alpha = alpha_oap;
+    poa_phases[2*i].is_static_field = is_static_field_oap;
+    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2*i+1].phase = GK_POA_FDP;
+    poa_phases[2*i+1].num_frames = num_frames_fdp;
+    poa_phases[2*i+1].duration = tau_fdp;
+    poa_phases[2*i+1].alpha = alpha_fdp;
+    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
+    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases-1].phase = GK_POA_FDP;
+  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases-1].duration = tau_fdp_extra;
+  poa_phases[num_phases-1].alpha = alpha_fdp;
+  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,  .vdim = vdim,
+    .mi = mi,  .qi = qi,
+    .me = me,  .qe = qe,
+    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
+    .B_p = B_p,  .beta = beta,  .tau = tau,
+    .alim = alim,
+    .alphaIC0 = alphaIC0,
+    .alphaIC1 = alphaIC1,
+    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
+    .vti = vti,  .vte = vte,  .c_s = c_s,
+    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min,  .Z_max = Z_max,
+    // Parameters controlling the magnetic equilibrium model.
+    .mcB = mcB,  .gamma = gamma,
+    .Z_m = Z_m,
+    .z_m = z_m,
+    // Initial condition parameters.
+    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
+    .Ti_perp_m = Ti_perp_m,  .Ti_par_m = Ti_par_m,  .cs_m = cs_m,
+    // Source parameters
+    .NSrcIon = NSrcIon,  .NSrcFloorIon = NSrcFloorIon,
+    .TSrc0Ion = TSrc0Ion,  .TSrcFloorIon = TSrcFloorIon,
+    .lineLengthSrcIon = lineLengthSrcIon,  .sigSrcIon = sigSrcIon,
+    // Physical velocity space limits.
+    .vpar_min_ion = vpar_min_ion,
+    .vpar_max_ion = vpar_max_ion,
+    .mu_max_ion = mu_max_ion,
+    // Computational velocity space limits.
+    .vpar_lin_fac_inv = vpar_lin_fac_inv,
+    .vpar_pow = vpar_pow,
+    .vpar_min_ion_c = vpar_min_ion_c,
+    .vpar_max_ion_c = vpar_max_ion_c,
+    .mu_lin_fac_inv = mu_lin_fac_inv,
+    .mu_pow = mu_pow,
+    .mu_min_ion_c = mu_min_ion_c,
+    .mu_max_ion_c = mu_max_ion_c,
+    // Grid DOF.
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = {Nx, Nz, Nvpar, Nmu},
+    .poly_order = poly_order,
+    // Time integration and I/O parameters.
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq     = write_phase_freq    , 
+    .int_diag_calc_freq   = int_diag_calc_freq  , 
+    .dt_failure_tol       = dt_failure_tol      , 
+    .num_failures_max     = num_failures_max    , 
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_max = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.psi_min = ctx.psi_max * 0.1;
+  ctx.z_min   = ctx.Z_min;
+  ctx.z_max   = ctx.Z_max;
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if ( !(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
+  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx->me,
+    .electron_charge = ctx->qe,
+    .electron_temp = ctx->Te0,
+    .polarization_bmag = ctx->B_p,
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps))
+  {
+    if (step == 1 || step % 20 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 20 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success)
+    {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d=0; d<ctx.cdim; d++)
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  for (int d=0; d<ctx.vdim; d++)
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,  .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
+    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .density = eval_density_ion,
+      .upar = eval_upar_ion,
+      .temppar = eval_temp_par_ion,
+      .tempperp = eval_temp_perp_ion,
+      .ctx_density = &ctx,
+      .ctx_upar = &ctx,
+      .ctx_temppar = &ctx,
+      .ctx_tempperp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .self_nu = evalNuIon,
+      .self_nu_ctx = &ctx,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+	      .density = eval_density_ion_source,
+        .upar = eval_upar_ion_source,
+        .temp = eval_temp_ion_source,
+        .ctx_density = &ctx,
+        .ctx_upar = &ctx,
+        .ctx_temp = &ctx,
+      }, 
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_REFLECT },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_ABSORB },
+      { .dir = 1, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
+      { .dir = 1, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
+    },
+
+    .num_diag_moments = 4,
+    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx.me,
+    .electron_charge = ctx.qe,
+    .electron_temp = ctx.Te0,
+    .is_static = false, // So solvers are allocated.
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = { 
+    .name = "gk_mirror_boltz_elc_poa_2x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = {ctx.psi_min, ctx.z_min},
+    .upper = {ctx.psi_max, ctx.z_max},
+    .cells = { cells_x[0], cells_x[1] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_MAPC2P,
+      .world = {0.0},
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx
+    },
+
+    .num_periodic_dir = 1,
+    .periodic_dirs = {0},
+
+    .num_species = 1,
+    .species = {ion},
+
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0], app_args.cuts[1] },
+      .comm = comm,
+    },
+  };
+
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit=0; pit<ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    };
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0)
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+  freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+  
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 35b2e11421..7b17ba65f6 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -228,6 +228,10 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Deflate geometry if necessary.
   struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
   gkyl_gk_geometry_release(gk_geom_3d);
+  
+  // Initialize per-field-line bmag_max arrays.
+  gkyl_gk_geometry_bmag_max_init(gk_geom);
+  
   // If we are on the gpu, copy from host
   if (use_gpu) {
     struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
@@ -251,34 +255,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_eval_on_nodes_release(evphi);
   gkyl_array_copy(phi, phi_ho);
 
-  // Location of the mirror throat.
-  double bmag_max_loc_ho[] = {ctx.z_m};
-  double *bmag_max_loc;
-  if (use_gpu) {
-    bmag_max_loc = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    bmag_max_loc = gkyl_malloc(sizeof(double));
-    memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double));
-  }
-
-  // Get the magnetic field at the mirror throat.
-  double bfield_max_ho[3], bmag_max_ho[1];
-  double xc_infl[] = {0.0,0.0,ctx.z_m};
-  bfield_func_3x(0.0, xc_infl, bfield_max_ho, &ctx);
-  bmag_max_ho[0] = bfield_max_ho[2];
-  double *bmag_max;
-  if (use_gpu) {
-    bmag_max = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(bmag_max, bmag_max_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    bmag_max = gkyl_malloc(sizeof(double));
-    memcpy(bmag_max, bmag_max_ho, sizeof(double));
-  }
-
-  // Get the potential at the mirror throat (z=pi/2).
+  // Get the potential at the mirror throat (z=z_m).
   double phi_m_ho[1];
   double xc[] = {ctx.z_m};
   phi_func_1x(0.0, xc, phi_m_ho, &ctx);
@@ -286,21 +263,21 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   if (use_gpu) {
     phi_m = gkyl_cu_malloc(sizeof(double));
     gkyl_cu_memcpy(phi_m, phi_m_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
+  } else {
     phi_m = gkyl_malloc(sizeof(double));
     memcpy(phi_m, phi_m_ho, sizeof(double));
   }
 
   // Basis used to project the mask.
   struct gkyl_basis basis_mask;
-  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss)
+  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) {
     gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
-  else {
-    if (poly_order == 1) 
+  } else {
+    if (poly_order == 1) {
       gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
-    else
+    } else {
       gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
+    }
   }
 
   // Create mask array.
@@ -309,6 +286,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 	                              : gkyl_array_acquire(mask);
 
   // Project the loss cone mask.
+  // Use bmag_max and bmag_max_z_coord arrays from gk_geometry.
   struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
     .phase_grid = &grid,
     .conf_basis = &basis_conf,
@@ -318,8 +296,10 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .vel_range = &local_vel, 
     .vel_map = gvm,
     .bmag = gk_geom->geo_int.bmag,
-    .bmag_max = bmag_max,
-    .bmag_max_loc = bmag_max_loc,
+    .bmag_max = gk_geom->bmag_max,
+    .bmag_max_z_coord = gk_geom->bmag_max_z_coord,
+    .bmag_max_basis = &gk_geom->bmag_max_basis,
+    .bmag_max_range = &gk_geom->bmag_max_range,
     .mass = ctx.mass,
     .charge = ctx.charge,
     .qtype = ctx.quad_type,
@@ -368,21 +348,20 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 
   // Write mask to file.
   char fname[1024];
-  if (use_gpu)
+  if (use_gpu) {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_dev.gkyl", poly_order);
-  else
+  } else {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ho.gkyl", poly_order);
+  }
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
 
   sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ref.gkyl", poly_order);
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ref_ho, fname);
 
+  // Free phi_m (bmag_max arrays are owned by gk_geom).
   if (use_gpu) {
-    gkyl_cu_free(bmag_max);
     gkyl_cu_free(phi_m);
-  }
-  else {
-    gkyl_free(bmag_max);
+  } else {
     gkyl_free(phi_m);
   }
   gkyl_array_release(phi); 
diff --git a/gyrokinetic/zero/gk_geometry.c b/gyrokinetic/zero/gk_geometry.c
index c05d209e15..ccb950ee95 100644
--- a/gyrokinetic/zero/gk_geometry.c
+++ b/gyrokinetic/zero/gk_geometry.c
@@ -77,6 +77,12 @@ gkyl_gk_geometry_new(struct gk_geometry* geo_host, struct gkyl_gk_geometry_inp *
     gk_geometry_surf_alloc_nodal(up, dir);
   }
 
+  // Initialize bmag_max pointers to NULL (will be populated later if needed).
+  up->bmag_max = NULL;
+  up->bmag_max_z_coord = NULL;
+  up->bmag_max_nodal = NULL;
+  up->bmag_max_z_coord_nodal = NULL;
+
   up->flags = 0;
   GKYL_CLEAR_CU_ALLOC(up->flags);
   up->ref_count = gkyl_ref_count_init(gkyl_gk_geometry_free);
@@ -330,6 +336,239 @@ gkyl_gk_geometry_reduce_arg_bmag(struct gk_geometry* up, enum gkyl_array_op op,
   return b_m;
 }
 
+void
+gkyl_gk_geometry_bmag_max_init(struct gk_geometry *gk_geom)
+{
+  int cdim = gk_geom->grid.ndim;
+  int poly_order = gk_geom->basis.poly_order;
+
+  // For 1x: bmag_max is 0D (single value).
+  // For 2x: bmag_max is 1D (varies with psi, constant along z).
+  // For 3x: bmag_max is 2D (varies with psi and alpha, constant along theta).
+  int bmag_max_dim = cdim - 1;
+
+  if (bmag_max_dim == 0) {
+    // 1x case: single scalar value.
+    // Create a 1-cell range and grid for storing the single value.
+    int cells_1d[1] = {1};
+    double lower_1d[1] = {0.0};
+    double upper_1d[1] = {1.0};
+    gkyl_rect_grid_init(&gk_geom->bmag_max_grid, 1, lower_1d, upper_1d, cells_1d);
+    gkyl_range_init(&gk_geom->bmag_max_range, 1, (int[]){1}, (int[]){1});
+    gkyl_range_init(&gk_geom->bmag_max_range_ext, 1, (int[]){0}, (int[]){2});
+    
+    // For 0D, use a p=0 1D basis (just the constant).
+    gkyl_cart_modal_serendip(&gk_geom->bmag_max_basis, 1, 0);
+    
+    // Nodal range: single node.
+    int nodes_1d[1] = {1};
+    gkyl_range_init_from_shape(&gk_geom->bmag_max_nrange, 1, nodes_1d);
+    
+    // Allocate arrays.
+    gk_geom->bmag_max = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_range_ext.volume);
+    gk_geom->bmag_max_z_coord = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_range_ext.volume);
+    gk_geom->bmag_max_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
+    gk_geom->bmag_max_z_coord_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
+    
+    // Compute the global bmag_max by iterating over all cells.
+    double bmag_max_val = -DBL_MAX;
+    double bmag_max_z = 0.0;
+    
+    struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, cdim, gk_geom->basis.num_basis);
+    gk_geom->basis.node_list(gkyl_array_fetch(nodes, 0));
+    
+    struct gkyl_array *bmag_ho = gkyl_array_new(GKYL_DOUBLE, gk_geom->geo_int.bmag->ncomp, gk_geom->geo_int.bmag->size);
+    gkyl_array_copy(bmag_ho, gk_geom->geo_int.bmag);
+    
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, &gk_geom->local);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(&gk_geom->local, iter.idx);
+      double *b_d = gkyl_array_fetch(bmag_ho, linidx);
+      
+      double xc[cdim];
+      gkyl_rect_grid_cell_center(&gk_geom->grid, iter.idx, xc);
+      
+      for (int n = 0; n < gk_geom->basis.num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double b = gk_geom->basis.eval_expand(nod_log, b_d);
+        
+        double nod_phys[cdim];
+        log_to_comp(cdim, nod_log, gk_geom->grid.dx, xc, nod_phys);
+        
+        if (b > bmag_max_val) {
+          bmag_max_val = b;
+          bmag_max_z = nod_phys[cdim-1]; // z is the last coordinate.
+        }
+      }
+    }
+    
+    gkyl_array_release(nodes);
+    gkyl_array_release(bmag_ho);
+    
+    // Store in nodal arrays.
+    double *bmag_max_n = gkyl_array_fetch(gk_geom->bmag_max_nodal, 0);
+    double *bmag_max_z_n = gkyl_array_fetch(gk_geom->bmag_max_z_coord_nodal, 0);
+    bmag_max_n[0] = bmag_max_val;
+    bmag_max_z_n[0] = bmag_max_z;
+    
+    // For 0D (1x case), the modal value is just the nodal value.
+    double *bmag_max_m = gkyl_array_fetch(gk_geom->bmag_max, 0);
+    double *bmag_max_z_m = gkyl_array_fetch(gk_geom->bmag_max_z_coord, 0);
+    bmag_max_m[0] = bmag_max_val;
+    bmag_max_z_m[0] = bmag_max_z;
+  }
+  else if (bmag_max_dim == 1) {
+    // 2x case: bmag_max varies with psi (x-direction).
+    // Create a 1D grid/range in the psi direction.
+    int cells_psi = gk_geom->grid.cells[0];
+    double lower_psi = gk_geom->grid.lower[0];
+    double upper_psi = gk_geom->grid.upper[0];
+    
+    gkyl_rect_grid_init(&gk_geom->bmag_max_grid, 1, &lower_psi, &upper_psi, &cells_psi);
+    
+    // Create range matching the local range in psi direction.
+    int lower_idx[1] = {gk_geom->local.lower[0]};
+    int upper_idx[1] = {gk_geom->local.upper[0]};
+    gkyl_sub_range_init(&gk_geom->bmag_max_range, &gk_geom->local, lower_idx, upper_idx);
+    // Actually need a proper 1D range:
+    gkyl_range_init(&gk_geom->bmag_max_range, 1, lower_idx, upper_idx);
+    
+    int lower_ext_idx[1] = {gk_geom->local_ext.lower[0]};
+    int upper_ext_idx[1] = {gk_geom->local_ext.upper[0]};
+    gkyl_range_init(&gk_geom->bmag_max_range_ext, 1, lower_ext_idx, upper_ext_idx);
+    
+    // Create 1D basis for psi direction.
+    gkyl_cart_modal_serendip(&gk_geom->bmag_max_basis, 1, poly_order);
+    
+    // Create nodal range for psi direction.
+    int num_nodes_psi = (poly_order == 1) ? gkyl_range_shape(&gk_geom->bmag_max_range, 0) + 1
+                                          : 2*gkyl_range_shape(&gk_geom->bmag_max_range, 0) + 1;
+    int nodes_shape[1] = {num_nodes_psi};
+    gkyl_range_init_from_shape(&gk_geom->bmag_max_nrange, 1, nodes_shape);
+    
+    // Allocate arrays.
+    gk_geom->bmag_max = gkyl_array_new(GKYL_DOUBLE, gk_geom->bmag_max_basis.num_basis, gk_geom->bmag_max_range_ext.volume);
+    gk_geom->bmag_max_z_coord = gkyl_array_new(GKYL_DOUBLE, gk_geom->bmag_max_basis.num_basis, gk_geom->bmag_max_range_ext.volume);
+    gk_geom->bmag_max_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
+    gk_geom->bmag_max_z_coord_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
+    
+    // For each psi (field line), find max bmag over all z values.
+    // We need to iterate over the 2D grid and for each psi, find max over z.
+    
+    struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, cdim, gk_geom->basis.num_basis);
+    gk_geom->basis.node_list(gkyl_array_fetch(nodes, 0));
+    
+    struct gkyl_array *bmag_ho = gkyl_array_new(GKYL_DOUBLE, gk_geom->geo_int.bmag->ncomp, gk_geom->geo_int.bmag->size);
+    gkyl_array_copy(bmag_ho, gk_geom->geo_int.bmag);
+    
+    // Create temporary arrays to store max bmag and z-coord per psi node.
+    double *bmag_max_per_psi = gkyl_malloc(sizeof(double) * num_nodes_psi);
+    double *z_coord_per_psi = gkyl_malloc(sizeof(double) * num_nodes_psi);
+    for (int i = 0; i < num_nodes_psi; i++) {
+      bmag_max_per_psi[i] = -DBL_MAX;
+      z_coord_per_psi[i] = 0.0;
+    }
+    
+    // Iterate over all cells in 2D.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, &gk_geom->local);
+    while (gkyl_range_iter_next(&iter)) {
+      int psi_idx = iter.idx[0]; // psi cell index
+      long linidx = gkyl_range_idx(&gk_geom->local, iter.idx);
+      double *b_d = gkyl_array_fetch(bmag_ho, linidx);
+      
+      double xc[cdim];
+      gkyl_rect_grid_cell_center(&gk_geom->grid, iter.idx, xc);
+      
+      // Evaluate bmag at each node in this cell.
+      for (int n = 0; n < gk_geom->basis.num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double b = gk_geom->basis.eval_expand(nod_log, b_d);
+        
+        double nod_phys[cdim];
+        log_to_comp(cdim, nod_log, gk_geom->grid.dx, xc, nod_phys);
+        
+        // Determine which psi nodal index this corresponds to.
+        // For p=1: nodes are at cell corners, so node indices are psi_idx-local.lower[0] and psi_idx-local.lower[0]+1.
+        // For p=2: nodes are at cell corners and center.
+        int psi_node_offset;
+        if (poly_order == 1) {
+          // nod_log[0] is -1 or +1, mapping to left or right corner.
+          psi_node_offset = (nod_log[0] < 0) ? 0 : 1;
+        }
+        else { // poly_order == 2
+          // nod_log[0] is -1, 0, or +1.
+          if (nod_log[0] < -0.5)
+            psi_node_offset = 0;
+          else if (nod_log[0] > 0.5)
+            psi_node_offset = 2;
+          else
+            psi_node_offset = 1;
+        }
+        
+        int psi_cell_local = psi_idx - gk_geom->local.lower[0];
+        int psi_node_idx;
+        if (poly_order == 1)
+          psi_node_idx = psi_cell_local + psi_node_offset;
+        else
+          psi_node_idx = 2*psi_cell_local + psi_node_offset;
+        
+        // Update max for this psi node.
+        if (b > bmag_max_per_psi[psi_node_idx]) {
+          bmag_max_per_psi[psi_node_idx] = b;
+          z_coord_per_psi[psi_node_idx] = nod_phys[cdim-1]; // z is last coordinate.
+        }
+      }
+    }
+    
+    gkyl_array_release(nodes);
+    gkyl_array_release(bmag_ho);
+    
+    // Store in nodal arrays.
+    for (int i = 0; i < num_nodes_psi; i++) {
+      double *bmag_max_n = gkyl_array_fetch(gk_geom->bmag_max_nodal, i);
+      double *z_coord_n = gkyl_array_fetch(gk_geom->bmag_max_z_coord_nodal, i);
+      bmag_max_n[0] = bmag_max_per_psi[i];
+      z_coord_n[0] = z_coord_per_psi[i];
+    }
+    
+    gkyl_free(bmag_max_per_psi);
+    gkyl_free(z_coord_per_psi);
+    
+    // Transform nodal to modal using n2m.
+    struct gkyl_nodal_ops *n2m = gkyl_nodal_ops_new(&gk_geom->bmag_max_basis, &gk_geom->bmag_max_grid, false);
+    gkyl_nodal_ops_n2m(n2m, &gk_geom->bmag_max_basis, &gk_geom->bmag_max_grid, 
+      &gk_geom->bmag_max_nrange, &gk_geom->bmag_max_range, 1, 
+      gk_geom->bmag_max_nodal, gk_geom->bmag_max, false);
+    gkyl_nodal_ops_n2m(n2m, &gk_geom->bmag_max_basis, &gk_geom->bmag_max_grid, 
+      &gk_geom->bmag_max_nrange, &gk_geom->bmag_max_range, 1, 
+      gk_geom->bmag_max_z_coord_nodal, gk_geom->bmag_max_z_coord, false);
+    gkyl_nodal_ops_release(n2m);
+
+  }
+  else {
+    // 3x case: bmag_max varies with psi and alpha (2D).
+    // TODO: Implement 3x case if needed.
+    assert(false && "bmag_max per field line not yet implemented for 3x");
+  }
+}
+
+void
+gkyl_gk_geometry_bmag_max_release(struct gk_geometry *gk_geom)
+{
+  if (gk_geom->bmag_max) {
+    gkyl_array_release(gk_geom->bmag_max);
+    gkyl_array_release(gk_geom->bmag_max_z_coord);
+    gkyl_array_release(gk_geom->bmag_max_nodal);
+    gkyl_array_release(gk_geom->bmag_max_z_coord_nodal);
+    gk_geom->bmag_max = NULL;
+    gk_geom->bmag_max_z_coord = NULL;
+    gk_geom->bmag_max_nodal = NULL;
+    gk_geom->bmag_max_z_coord_nodal = NULL;
+  }
+}
+
 void
 gkyl_gk_geometry_init_nodal_range( struct gkyl_range *nrange, struct gkyl_range *range, int poly_order)
 {
@@ -510,6 +749,12 @@ gkyl_gk_geometry_deflate(const struct gk_geometry* up_3d, struct gkyl_gk_geometr
     }
   }
 
+  // Initialize bmag_max pointers to NULL (will be populated later if needed).
+  up->bmag_max = NULL;
+  up->bmag_max_z_coord = NULL;
+  up->bmag_max_nodal = NULL;
+  up->bmag_max_z_coord_nodal = NULL;
+
   up->flags = 0;
   GKYL_CLEAR_CU_ALLOC(up->flags);
   up->ref_count = gkyl_ref_count_init(gkyl_gk_geometry_free);
@@ -578,6 +823,9 @@ gkyl_gk_geometry_free(const struct gkyl_ref_count *ref)
   for (int dir=0; dir<up->grid.ndim; ++dir)
     gk_geometry_surf_release_nodal(up, dir);
 
+  // Release bmag_max arrays.
+  gkyl_gk_geometry_bmag_max_release(up);
+
   if (gkyl_gk_geometry_is_cu_dev(up)) 
     gkyl_cu_free(up->on_dev); 
 
diff --git a/gyrokinetic/zero/gkyl_gk_geometry.h b/gyrokinetic/zero/gkyl_gk_geometry.h
index 2e783e52d4..dae4a6f4b7 100644
--- a/gyrokinetic/zero/gkyl_gk_geometry.h
+++ b/gyrokinetic/zero/gkyl_gk_geometry.h
@@ -188,6 +188,20 @@ struct gk_geometry {
                  // in the eqdsk.
   int idx_LCFS_lo; // Index of the cell that abuts the LCFS from below.
 
+  // Per-field-line bmag_max for loss cone calculations.
+  // In 1x: single value. In 2x: array indexed by psi (x-direction).
+  // These are computed by finding max(bmag) along z for each field line,
+  // assuming symmetric bmag with a single peak for positive z.
+  struct gkyl_array *bmag_max;         // Maximum bmag on each field line (modal DG expansion).
+  struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max on each field line (modal DG expansion).
+  struct gkyl_array *bmag_max_nodal;         // Nodal values of bmag_max.
+  struct gkyl_array *bmag_max_z_coord_nodal; // Nodal values of z-coordinate of bmag_max.
+  struct gkyl_range bmag_max_range;          // Range for bmag_max arrays (1D in psi for 2x, 0D for 1x).
+  struct gkyl_range bmag_max_range_ext;      // Extended range for bmag_max arrays.
+  struct gkyl_range bmag_max_nrange;         // Nodal range for bmag_max arrays.
+  struct gkyl_rect_grid bmag_max_grid;       // Grid for bmag_max arrays (1D in psi for 2x).
+  struct gkyl_basis bmag_max_basis;          // Basis for bmag_max arrays (1D for 2x, 0D for 1x).
+
   uint32_t flags;
   struct gkyl_ref_count ref_count;  
   struct gk_geometry *on_dev; // Pointer to itself or device object.
@@ -315,6 +329,26 @@ double gkyl_gk_geometry_reduce_bmag(struct gk_geometry* up, enum gkyl_array_op o
  */
 double gkyl_gk_geometry_reduce_arg_bmag(struct gk_geometry* up, enum gkyl_array_op op, double *coord);
 
+/**
+ * Compute bmag_max per field line. For each psi value (field line), finds the
+ * maximum bmag along z (assuming symmetry with a single peak for positive z).
+ * Stores the result in gk_geom->bmag_max (modal expansion) and the z-coordinate
+ * of the maximum in gk_geom->bmag_max_z_coord.
+ * 
+ * For 1x simulations, this is a single value. For 2x simulations, this is a
+ * 1D array varying with psi.
+ *
+ * @param gk_geom gk_geometry object (modified in place).
+ */
+void gkyl_gk_geometry_bmag_max_init(struct gk_geometry *gk_geom);
+
+/**
+ * Release bmag_max arrays in gk_geometry.
+ *
+ * @param gk_geom gk_geometry object.
+ */
+void gkyl_gk_geometry_bmag_max_release(struct gk_geometry *gk_geom);
+
 /**
  * Init nodal range from modal range
  *
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index 36e12300c3..d6eeb2a586 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -27,9 +27,11 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
   const struct gkyl_range *conf_range_ext; // Extended configuration-space range (for internal memory allocations).
   const struct gkyl_range *vel_range; // Velocity space range.
   const struct gkyl_velocity_map *vel_map; // Velocity space mapping object.
-  const struct gkyl_array *bmag; // Magnetic field magnitude.
-  const double *bmag_max; // Maximum bmag (on GPU if use_gpu=true).
-  const double *bmag_max_loc; // Location of maximum bmag (on GPU if use_gpu=true)..
+  const struct gkyl_array *bmag; // Magnetic field magnitude (cdim DG expansion).
+  const struct gkyl_array *bmag_max; // Maximum bmag per field line (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays (1D for 2x, 0D for 1x).
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   double mass; // Species mass.
   double charge; // Species charge.
   enum gkyl_quad_type qtype; // Quadrature rule/nodes.
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 03e57dec97..608de6d7af 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -44,8 +44,17 @@ struct gkyl_loss_cone_mask_gyrokinetic {
 
   double mass; // Species mass.
   double charge; // Species charge.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_loc; // Location of bmag_max.
+  
+  // Per-field-line bmag_max arrays (1D for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  
+  // GPU helper: scalar bmag_max_z value for simple 1x cases.
+  // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
+  double *bmag_max_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
+  
   bool use_gpu; // Boolean if we are performing projection on device.
 
   loss_cone_mask_gyrokinetic_c2p_t c2p_pos; // Function transforming position comp to phys coords.
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index e7e5535705..c2c628ee10 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -156,12 +156,13 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
 
 static void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, 
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max)
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag)
 {
   // Get bmag_max-bmag at quadrature nodes.
+  // bmag_max is now a per-field-line array (1D for 2x, scalar for 1x).
 #ifdef GKYL_HAVE_CUDA
   if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag, bmag_max);
+    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag);
 #endif
 
   int cdim = up->cdim, pdim = up->pdim;
@@ -177,13 +178,33 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
     const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
     double *Dbmag_quad = gkyl_array_fetch(up->Dbmag_quad, linidx);
 
+    // Get bmag_max for this field line (psi value).
+    // For 1x: bmag_max is a single value (index 0).
+    // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0].
+    double bmag_max_val;
+    if (cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, 0);
+      bmag_max_val = bmag_max_d[0]; // Just the constant coefficient.
+    }
+    else {
+      // 2x case: evaluate bmag_max at this psi cell.
+      // The bmag_max array is 1D in psi, so we need the psi index.
+      int psi_idx[1] = {conf_iter.idx[0]};
+      long bmag_max_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+      const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, bmag_max_linidx);
+      // For simplicity, evaluate at cell center (logical coord 0).
+      double xc[1] = {0.0};
+      bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
+    }
+
     // Sum over basis 
     for (int n=0; n<tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
       for (int k=0; k<num_basis_conf; ++k)
         Dbmag_quad[n] += bmag_d[k]*b_ord[k];
 
-      Dbmag_quad[n] = bmag_max[0] - Dbmag_quad[n];
+      Dbmag_quad[n] = bmag_max_val - Dbmag_quad[n];
     }
   }
 }
@@ -216,6 +237,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     up->num_basis_phase = inp->phase_basis->num_basis;
   }
   up->use_gpu = inp->use_gpu;
+  up->bmag_max_z_scalar_gpu = NULL; // Will be set for GPU case.
 
   if (inp->c2p_pos_func == 0) {
     up->c2p_pos = c2p_pos_identity;
@@ -299,9 +321,34 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
       p2c_qidx_ho[n] = cqidx;
     }
     gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int)*up->phase_qrange.volume, GKYL_CU_MEMCPY_H2D);
+    
+    // Allocate and set scalar bmag_max_z for GPU kernels.
+    // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
+    double bmag_max_z_val;
+    if (up->cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_max_z_d = gkyl_array_cfetch(inp->bmag_max_z_coord, 0);
+      bmag_max_z_val = bmag_max_z_d[0];
+    } else {
+      // 2x case: use the first field line's value (simplified approach).
+      int psi_idx[1] = {inp->bmag_max_range->lower[0]};
+      long bmag_max_z_linidx = gkyl_range_idx(inp->bmag_max_range, psi_idx);
+      const double *bmag_max_z_d = gkyl_array_cfetch(inp->bmag_max_z_coord, bmag_max_z_linidx);
+      double xc[1] = {0.0};
+      bmag_max_z_val = inp->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
+    }
+    up->bmag_max_z_scalar_gpu = gkyl_cu_malloc(sizeof(double));
+    gkyl_cu_memcpy(up->bmag_max_z_scalar_gpu, &bmag_max_z_val, sizeof(double), GKYL_CU_MEMCPY_H2D);
   }
 #endif
 
+  // Store references to bmag_max arrays (no copy, just store pointers).
+  // Must be done before calling gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad.
+  up->bmag_max = inp->bmag_max;
+  up->bmag_max_z_coord = inp->bmag_max_z_coord;
+  up->bmag_max_basis = inp->bmag_max_basis;
+  up->bmag_max_range = inp->bmag_max_range;
+
   // Allocate and obtain bmag_max-bmag at quadrature points.
   if (up->use_gpu) 
     up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
@@ -309,17 +356,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
 
   gkyl_array_clear(up->Dbmag_quad, 0.0); 
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, inp->bmag_max);
-
-  // Save the location of bmag_max in this updater.
-  if (up->use_gpu) {
-    up->bmag_max_loc = gkyl_cu_malloc(sizeof(double)*up->cdim);
-    gkyl_cu_memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim, GKYL_CU_MEMCPY_D2D);
-  }
-  else {
-    up->bmag_max_loc = gkyl_malloc(sizeof(double)*up->cdim);
-    memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim);
-  }
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag);
     
   return up;
 }
@@ -451,18 +488,39 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 
         // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
         double KEparDbmag = 0.0;
-        if (Dbmag_quad[cqidx] > 0.0)
+        if (Dbmag_quad[cqidx] > 0.0) {
           KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx];
-        else
+        } else {
           KEparDbmag = 0.0;
+        }
 
-	double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
+        double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
+
+        // Get the z-coordinate of bmag_max for this field line.
+        // For 1x: single value (index 0).
+        // For 2x: varies with psi, so use conf_iter.idx[0].
+        double bmag_max_z_val;
+        if (cdim == 1) {
+          // 1x case: single value.
+          const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, 0);
+          bmag_max_z_val = bmag_max_z_d[0];
+        } else {
+          // 2x case: evaluate bmag_max_z at this psi cell.
+          int psi_idx[1] = {conf_iter.idx[0]};
+          long bmag_max_z_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+          const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, bmag_max_z_linidx);
+          // For simplicity, evaluate at cell center (logical coord 0).
+          double xc[1] = {0.0};
+          bmag_max_z_val = up->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
+        }
 
         double *fq = gkyl_array_fetch(up->fun_at_ords, pqidx);
-	if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(up->bmag_max_loc[cdim-1])) 
+        // xmu[cdim-1] is the z-coordinate (last config space coordinate).
+        if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) {
           fq[0] = 1.0 * up->norm_fac;
-        else
+        } else {
           fq[0] = 0.0;
+        }
       }
       // Compute DG expansion coefficients of the mask.
       if (up->cellwise_trap_loss)
@@ -494,11 +552,10 @@ gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
     gkyl_array_release(up->mask_out_quad);
     gkyl_array_release(up->qDphiDbmag_quad);
     gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem);
-    gkyl_cu_free(up->bmag_max_loc);
-  }
-  else {
-    gkyl_free(up->bmag_max_loc);
+    gkyl_cu_free(up->bmag_max_z_scalar_gpu);
+    // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
   }
+  // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
 
   gkyl_free(up);
 }
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index ee35da47e4..2679a9c348 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -107,7 +107,7 @@ __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
   double mass, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
+  const double *bmag_max_z_scalar, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
   const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out)
 {
   int pdim = phase_range.ndim, cdim = conf_range.ndim;
@@ -164,7 +164,7 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
   
       double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
   
-      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) ) {
+      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0])) ) {
         mask_d[0] = 0.0;
         break;
       }
@@ -176,7 +176,7 @@ __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
   double mass, double norm_fac, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
+  const double *bmag_max_z_scalar, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
   const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad)
 {
   int pdim = phase_range.ndim, cdim = conf_range.ndim;
@@ -232,7 +232,7 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
     double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
 
     double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase);
-    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) 
+    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0])) 
       fq[linc2] = norm_fac;
     else
       fq[linc2] = 0.0;
@@ -259,7 +259,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
     int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
     gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(*up->grid_phase, *phase_range, *conf_range,
       gvm->local_ext_vel, up->mass, up->ordinates_phase->on_dev,
-      up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
+      up->bmag_max_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
       gvm->vmap_basis, mask_out->on_dev);
   }
   else {
@@ -270,7 +270,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
 
     gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(*up->grid_phase, *phase_range, *conf_range,
       gvm->local_ext_vel, up->mass, up->norm_fac, up->ordinates_phase->on_dev,
-      up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
+      up->bmag_max_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
       gvm->vmap_basis, up->mask_out_quad->on_dev);
 
     // Call cublas to do the matrix multiplication nodal to modal conversion

From 913f20ad001d883bd305c3ccae1be54f664a4fdd Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 11 Dec 2025 00:56:39 -0500
Subject: [PATCH 02/32] Add peak finding functionality for DG fields

- Introduced a new header file `gkyl_array_dg_find_peaks.h` that defines a structure and functions for finding peaks (local maxima, minima, and boundary values) in DG fields.
- Implemented an internal structure in `gkyl_array_dg_find_peaks_priv.h` to manage peak finding operations, including storage for peak values and coordinates.
- Removed unused initialization and writing of `bmag_max` arrays in `gyrokinetic.c` to streamline the geometry setup process.
- Deleted the `gkyl_gk_geometry_bmag_max_init` and `gkyl_gk_geometry_bmag_max_release` functions from `gk_geometry.c` as they are no longer needed, simplifying the geometry management.
---
 core/unit/ctest_dg_find_peaks.c           | 528 +++++++++++++++++++++
 core/zero/array_dg_find_peaks.c           | 543 ++++++++++++++++++++++
 core/zero/gkyl_array_dg_find_peaks.h      | 145 ++++++
 core/zero/gkyl_array_dg_find_peaks_priv.h |  58 +++
 gyrokinetic/apps/gyrokinetic.c            |   8 -
 gyrokinetic/zero/gk_geometry.c            | 242 ----------
 6 files changed, 1274 insertions(+), 250 deletions(-)
 create mode 100644 core/unit/ctest_dg_find_peaks.c
 create mode 100644 core/zero/array_dg_find_peaks.c
 create mode 100644 core/zero/gkyl_array_dg_find_peaks.h
 create mode 100644 core/zero/gkyl_array_dg_find_peaks_priv.h

diff --git a/core/unit/ctest_dg_find_peaks.c b/core/unit/ctest_dg_find_peaks.c
new file mode 100644
index 0000000000..71422cf8dd
--- /dev/null
+++ b/core/unit/ctest_dg_find_peaks.c
@@ -0,0 +1,528 @@
+#include <acutest.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_ops.h>
+#include <gkyl_array_rio.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_array_dg_find_peaks_priv.h>
+#include <gkyl_basis.h>
+#include <gkyl_eval_on_nodes.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_decomp.h>
+#include <gkyl_rect_grid.h>
+#include <gkyl_util.h>
+
+#include <math.h>
+#include <stdio.h>
+
+// 1D test function with multiple peaks: f(z) = cos(2*pi*z/L) 
+// Has maxima at z=0, z=L and minimum at z=L/2.
+static void
+test_func_1d_cos(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  double L = 2.0;  // Period.
+  fout[0] = cos(2.0 * M_PI * z / L);
+}
+
+// 1D test function that looks like a mirror bmag profile:
+// f(z) = B0 * (1 + (R-1)*sin^2(pi*z/L))
+// Has minimum at z=0, maxima at z=-L/2 and z=L/2 (mirror throats).
+static void
+test_func_1d_mirror(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  double L = 2.0;       // Half-length.
+  double B0 = 1.0;      // Minimum B.
+  double R = 4.0;       // Mirror ratio.
+  double sinval = sin(M_PI * z / L);
+  fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
+}
+
+// 2D test function: f(psi, z) = (1 + 0.1*psi) * cos(2*pi*z/L)
+// The peaks vary slightly with psi.
+static void
+test_func_2d_cos(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double L = 2.0;
+  fout[0] = (1.0 + 0.1*psi) * cos(2.0 * M_PI * z / L);
+}
+
+// 2D mirror-like function: peaks at z = +/- z_m(psi).
+static void
+test_func_2d_mirror(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double L = 2.0;
+  double B0 = 1.0 + 0.1*psi;  // Varies with psi.
+  double R = 4.0;
+  double sinval = sin(M_PI * z / L);
+  fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
+}
+
+// Complex test function: f(z) = (sin^2(z) + 0.1) * exp(-z^2/100)
+// Has 9 peaks on [-5, 5]: edges at z=±5, local extrema at z≈±3π/2, ±π, ±π/2, 0.
+static void
+test_func_1d_complex(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  double sinz = sin(z);
+  fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0);
+}
+
+// 2D complex test function: f(psi, z) = (sin^2(z) + 0.1) * exp(-z^2/100) * psi
+// Peaks scale linearly with psi.
+static void
+test_func_2d_complex(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double sinz = sin(z);
+  fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0) * psi;
+}
+
+// Test 1D peak finding with cos function.
+void
+test_1d_find_peaks_cos(int poly_order)
+{
+  // Grid: z in [-1, 1] (one period of cos(2*pi*z/2)).
+  double lower[] = {-1.0};
+  double upper[] = {1.0};
+  int cells[] = {16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  // Basis.
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  // Ranges.
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_cos, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results: cos(pi*z) on [-1,1] has EDGE_LO at z=-1, LOCAL_MAX at z=0, EDGE_HI at z=1.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 3);
+
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    {GKYL_PEAK_EDGE_LO,    -1.0,},
+    {GKYL_PEAK_LOCAL_MAX,   0.0,},
+    {GKYL_PEAK_EDGE_HI,     1.0,},
+  };
+
+  for (int p = 0; p < 3 && p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    const double *val = gkyl_array_cfetch(vals, 0);
+    const double *coord = gkyl_array_cfetch(coords, 0);
+    
+    double z = coord[0];
+    double expected_val[1];
+    test_func_1d_cos(0.0, &z, expected_val, NULL);
+
+    TEST_CHECK(ptype == expected_peaks[p].type);
+    TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 0.1);
+    TEST_CHECK(gkyl_compare_double(val[0], expected_val[0], 0.2));
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 1D peak finding with mirror-like function.
+void
+test_1d_find_peaks_mirror(int poly_order)
+{
+  // Grid: z in [-1, 1].
+  double lower[] = {-1.0};
+  double upper[] = {1.0};
+  int cells[] = {16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  // Basis.
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  // Ranges.
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_mirror, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 3);
+
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    const double *val = gkyl_array_cfetch(vals, 0);
+    const double *coord = gkyl_array_cfetch(coords, 0);
+    
+    // Check specific peaks.
+    if (ptype == GKYL_PEAK_EDGE_LO) {
+      TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15));
+      TEST_CHECK(fabs(coord[0] - (-1.0)) < 1e-15);
+    }
+    else if (ptype == GKYL_PEAK_LOCAL_MIN) {
+      TEST_CHECK(gkyl_compare_double(val[0], 1.0, 1e-15));
+      TEST_CHECK(fabs(coord[0]) < 1e-15);
+    }
+    else if (ptype == GKYL_PEAK_EDGE_HI) {
+      TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15));
+      TEST_CHECK(fabs(coord[0] - 1.0) < 1e-15);
+    }
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D peak finding.
+void
+test_2d_find_peaks(int poly_order)
+{
+  double lower[] = {0.0, -1.0};
+  double upper[] = {1.0, 1.0};
+  int cells[] = {4, 16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 2, poly_order);
+
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_mirror, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results: Mirror function should have 3 peaks along z.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 3);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Check that values and coordinates are reasonable for each peak.
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    double xc_log[1] = {0.0};
+    
+    // Check first and last psi cells.
+    for (int cell_idx = out_range->lower[0]; cell_idx <= out_range->upper[0]; 
+         cell_idx += (out_range->upper[0] - out_range->lower[0])) {
+      long linidx = gkyl_range_idx(out_range, (int[]){cell_idx});
+      const double *val_d = gkyl_array_cfetch(vals, linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+
+      double val_at_center = out_basis->eval_expand(xc_log, val_d);
+      double coord_at_center = out_basis->eval_expand(xc_log, coord_d);
+      double psi_phys = out_grid->lower[0] + (cell_idx - 0.5) * out_grid->dx[0];
+      
+      // Compute expected value at detected coordinate.
+      double xn[2] = {psi_phys, coord_at_center};
+      double expected_val[1];
+      test_func_2d_mirror(0.0, xn, expected_val, NULL);
+      
+      // Check value matches analytical function.
+      TEST_CHECK(gkyl_compare_double(val_at_center, expected_val[0], 1e-15));
+      
+      // Check that coordinate matches expected peak location.
+      if (ptype == GKYL_PEAK_EDGE_LO) {
+        TEST_CHECK(fabs(coord_at_center - (-1.0)) < 1e-15);
+      } else if (ptype == GKYL_PEAK_LOCAL_MIN) {
+        TEST_CHECK(fabs(coord_at_center) < 1e-15);
+      } else if (ptype == GKYL_PEAK_EDGE_HI) {
+        TEST_CHECK(fabs(coord_at_center - 1.0) < 1e-15);
+      }
+    }
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 1D peak finding with complex oscillatory function.
+void
+test_1d_find_peaks_complex(int poly_order)
+{
+  double lower[] = {-2.0*M_PI};
+  double upper[] = {2.0*M_PI};
+  int cells[] = {64};  // Need fine resolution to capture oscillations.
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  
+  TEST_CHECK(num_peaks == 9);
+
+  // Define expected peak locations and types.
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   0.0,},
+    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
+    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+  };
+
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    const double *val = gkyl_array_cfetch(vals, 0);
+    const double *coord = gkyl_array_cfetch(coords, 0);
+    
+    double z = coord[0];
+    double expected_val[1];
+    test_func_1d_complex(0.0, &z, expected_val, NULL);
+
+    TEST_CHECK(ptype == expected_peaks[p].type);
+    TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 1e-15);
+    double rel_error = fabs(val[0] - expected_val[0]) / fabs(expected_val[0]);
+    TEST_CHECK(rel_error < 1e-15);
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D peak finding with complex oscillatory function.
+void
+test_2d_find_peaks_complex(int poly_order)
+{
+  // Grid: psi in [0.5, 2.0], z in [-5, 5].
+  double lower[] = {0.5, -2.0*M_PI};
+  double upper[] = {2.0, 2.0*M_PI};
+  int cells[] = {16, 64};
+  int ndim = 2;
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+
+  // Basis.
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+
+  // Ranges.
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Define expected peak locations and types (same as 1D).
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   0.0,},
+    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
+    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+  };
+
+  // Get node locations for output basis.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis);
+  out_basis->node_list(gkyl_array_fetch(nodes, 0));
+
+  // Check peak types and validate values at each psi cell.
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    TEST_CHECK(ptype == expected_peaks[p].type);
+    
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    // Check each psi cell.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, out_range);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(out_range, iter.idx);
+      
+      const double *val_d = gkyl_array_cfetch(vals, linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+      
+      // Get cell center for physical psi coordinate.
+      double xc_out[1];
+      gkyl_rect_grid_cell_center(out_grid, (int[]){iter.idx[0]}, xc_out);
+      double psi_phys = xc_out[0];
+      
+      // Evaluate at each nodal point in this cell.
+      for (int n = 0; n < out_basis->num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double val_at_node = out_basis->eval_expand(nod_log, val_d);
+        double z_at_node = out_basis->eval_expand(nod_log, coord_d);
+        
+        // Compute physical psi coordinate at this node.
+        // dx/2 away from the center is the nodal location.
+        double nod_phys[1];
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0]/2.0;
+        
+        // Compute expected value at detected coordinates.
+        double xn[2] = {nod_phys[0], z_at_node};
+        double expected_val[1];
+        test_func_2d_complex(0.0, xn, expected_val, NULL);
+
+        TEST_CHECK(fabs(z_at_node - expected_peaks[p].z_expected) < 1e-15);
+        double rel_error = fabs(val_at_node - expected_val[0]) / fabs(expected_val[0]);
+        TEST_CHECK(rel_error < 1e-15);
+      }
+    }
+  }
+
+  gkyl_array_release(nodes);
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+void test_1d_cos_p1() { test_1d_find_peaks_cos(1); }
+void test_1d_mirror_p1() { test_1d_find_peaks_mirror(1); }
+void test_1d_complex_p1() { test_1d_find_peaks_complex(1); }
+void test_2d_p1() { test_2d_find_peaks(1); }
+void test_2d_complex_p1() { test_2d_find_peaks_complex(1); }
+
+TEST_LIST = {
+  {"test_1d_cos_p1", test_1d_cos_p1},
+  {"test_1d_mirror_p1", test_1d_mirror_p1},
+  {"test_1d_complex_p1", test_1d_complex_p1},
+  {"test_2d_p1", test_2d_p1},
+  {"test_2d_complex_p1", test_2d_complex_p1},
+  {NULL, NULL},
+};
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
new file mode 100644
index 0000000000..868f92b269
--- /dev/null
+++ b/core/zero/array_dg_find_peaks.c
@@ -0,0 +1,543 @@
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_array_dg_find_peaks_priv.h>
+#include <gkyl_nodal_ops.h>
+
+/**
+ * Scan along the search direction at a fixed preserved-direction coordinate
+ * to count the number of peaks and determine their types.
+ * 
+ * @param up Updater (partially initialized - just grid/basis/range/search_dir)
+ * @param in Input field
+ * @param preserved_idx Index in the preserved direction (ignored for 1D)
+ * @param num_peaks_out Output: number of peaks found
+ * @param peak_types_out Output: array of peak types (must be size GKYL_DG_FIND_PEAKS_MAX)
+ */
+static void
+count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in,
+  int preserved_idx, int *num_peaks_out, enum gkyl_peak_type *peak_types_out)
+{
+  int ndim = up->grid.ndim;
+  int search_dir = up->search_dir;
+  int poly_order = up->basis.poly_order;
+
+  // Get node locations.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, ndim, up->basis.num_basis);
+  up->basis.node_list(gkyl_array_fetch(nodes, 0));
+
+  // Copy input to host if needed.
+  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
+  gkyl_array_copy(in_ho, in);
+
+  // Determine number of nodes along search direction.
+  int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
+  int nodes_per_cell = (poly_order == 1) ? 2 : 3;
+  int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+
+  // Allocate arrays to store values and coordinates along search direction.
+  double *vals = gkyl_malloc(sizeof(double) * total_nodes_search);
+  double *coords = gkyl_malloc(sizeof(double) * total_nodes_search);
+  for (int i = 0; i < total_nodes_search; i++) {
+    vals[i] = 0.0;
+    coords[i] = 0.0;
+  }
+
+  // Iterate along cells in search direction and collect nodal values.
+  for (int cell_idx = up->range.lower[search_dir]; cell_idx <= up->range.upper[search_dir]; cell_idx++) {
+    // Build index array for this cell.
+    int idx[GKYL_MAX_DIM];
+    if (ndim == 1) {
+      idx[0] = cell_idx;
+    }
+    else {
+      int preserved_dir = (search_dir == 0) ? 1 : 0;
+      idx[preserved_dir] = preserved_idx;
+      idx[search_dir] = cell_idx;
+    }
+
+    long linidx = gkyl_range_idx(&up->range, idx);
+    const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+    double xc[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+    // Evaluate at each node in this cell.
+    for (int n = 0; n < up->basis.num_basis; n++) {
+      const double *nod_log = gkyl_array_cfetch(nodes, n);
+      
+      // Determine node offset in search direction.
+      int node_offset;
+      if (poly_order == 1) {
+        node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
+      }
+      else {
+        if (nod_log[search_dir] < -0.5)
+          node_offset = 0;
+        else if (nod_log[search_dir] > 0.5)
+          node_offset = 2;
+        else
+          node_offset = 1;
+      }
+
+      int cell_local = cell_idx - up->range.lower[search_dir];
+      int search_node_idx;
+      if (poly_order == 1)
+        search_node_idx = cell_local + node_offset;
+      else
+        search_node_idx = 2*cell_local + node_offset;
+
+      double val = up->basis.eval_expand(nod_log, f_d);
+      double nod_phys[GKYL_MAX_DIM];
+      dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+      // Only store if this is the first time we see this search node
+      // (avoid duplicates at cell boundaries).
+      if (vals[search_node_idx] == 0.0 && coords[search_node_idx] == 0.0) {
+        vals[search_node_idx] = val;
+        coords[search_node_idx] = nod_phys[search_dir];
+      }
+    }
+  }
+
+  // Now scan the values to find peaks.
+  // A peak is: EDGE_LO at index 0, EDGE_HI at last index, LOCAL_MAX/MIN in between.
+  int num_peaks = 0;
+  
+  // Always add lower edge.
+  peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_LO;
+
+  // Scan for local maxima and minima (indices 1 to total_nodes_search-2).
+  for (int i = 1; i < total_nodes_search - 1; i++) {
+    double prev = vals[i-1];
+    double curr = vals[i];
+    double next = vals[i+1];
+
+    if (curr > prev && curr > next) {
+      // Local maximum.
+      assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX);
+      peak_types_out[num_peaks++] = GKYL_PEAK_LOCAL_MAX;
+    }
+    else if (curr < prev && curr < next) {
+      // Local minimum.
+      assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX);
+      peak_types_out[num_peaks++] = GKYL_PEAK_LOCAL_MIN;
+    }
+  }
+
+  // Always add upper edge.
+  assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX);
+  peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_HI;
+
+  *num_peaks_out = num_peaks;
+
+  gkyl_free(vals);
+  gkyl_free(coords);
+  gkyl_array_release(nodes);
+  gkyl_array_release(in_ho);
+}
+
+/**
+ * Find all peaks along the search direction for a given preserved-direction
+ * node index, storing results in the nodal arrays.
+ */
+static void
+find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in_ho,
+  int preserved_node_idx)
+{
+  int ndim = up->grid.ndim;
+  int search_dir = up->search_dir;
+  int poly_order = up->basis.poly_order;
+
+  // Determine number of nodes along search direction.
+  int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
+  int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+
+  // Allocate arrays to store values and coordinates along search direction.
+  double *vals = gkyl_malloc(sizeof(double) * total_nodes_search);
+  double *coords = gkyl_malloc(sizeof(double) * total_nodes_search);
+  bool *visited = gkyl_malloc(sizeof(bool) * total_nodes_search);
+  for (int i = 0; i < total_nodes_search; i++) {
+    vals[i] = 0.0;
+    coords[i] = 0.0;
+    visited[i] = false;
+  }
+
+  // For 2D, determine the preserved direction cell index from the node index.
+  int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+  // Iterate along cells in search direction and collect nodal values.
+  for (int cell_idx = up->range.lower[search_dir]; cell_idx <= up->range.upper[search_dir]; cell_idx++) {
+    // For 2D, we need to iterate over cells in the preserved direction that
+    // contribute to this preserved node index.
+    int pres_cell_start, pres_cell_end;
+    if (ndim == 1) {
+      pres_cell_start = 0;
+      pres_cell_end = 0;
+    }
+    else {
+      // Determine which cells contribute to this preserved node.
+      if (poly_order == 1) {
+        // Node i is shared by cells i and i+1 (0-indexed from lower).
+        // preserved_node_idx 0 is only in cell lower[preserved_dir].
+        // preserved_node_idx N is only in cell upper[preserved_dir].
+        if (preserved_node_idx == 0) {
+          pres_cell_start = up->range.lower[preserved_dir];
+          pres_cell_end = up->range.lower[preserved_dir];
+        }
+        else if (preserved_node_idx == up->out_nrange.upper[0]) {
+          pres_cell_start = up->range.upper[preserved_dir];
+          pres_cell_end = up->range.upper[preserved_dir];
+        }
+        else {
+          pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+          pres_cell_end = pres_cell_start + 1;
+          if (pres_cell_end > up->range.upper[preserved_dir])
+            pres_cell_end = up->range.upper[preserved_dir];
+        }
+      }
+      else { // poly_order == 2
+        // Similar logic for p=2 nodes.
+        int cell_local = preserved_node_idx / 2;
+        pres_cell_start = up->range.lower[preserved_dir] + cell_local;
+        pres_cell_end = pres_cell_start;
+        if (preserved_node_idx % 2 == 0 && preserved_node_idx > 0) {
+          pres_cell_start--;
+        }
+        if (pres_cell_start < up->range.lower[preserved_dir])
+          pres_cell_start = up->range.lower[preserved_dir];
+        if (pres_cell_end > up->range.upper[preserved_dir])
+          pres_cell_end = up->range.upper[preserved_dir];
+      }
+    }
+
+    for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) {
+      // Build index array for this cell.
+      int idx[GKYL_MAX_DIM];
+      if (ndim == 1) {
+        idx[0] = cell_idx;
+      }
+      else {
+        idx[preserved_dir] = pres_cell;
+        idx[search_dir] = cell_idx;
+      }
+
+      long linidx = gkyl_range_idx(&up->range, idx);
+      const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+      double xc[GKYL_MAX_DIM];
+      gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+      // Evaluate at each node in this cell.
+      for (int n = 0; n < up->basis.num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(up->nodes, n);
+
+        // Check if this node corresponds to our preserved node index.
+        if (ndim > 1) {
+          int pres_node_offset;
+          if (poly_order == 1) {
+            pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
+          }
+          else {
+            if (nod_log[preserved_dir] < -0.5)
+              pres_node_offset = 0;
+            else if (nod_log[preserved_dir] > 0.5)
+              pres_node_offset = 2;
+            else
+              pres_node_offset = 1;
+          }
+          int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
+          int this_pres_node;
+          if (poly_order == 1)
+            this_pres_node = pres_cell_local + pres_node_offset;
+          else
+            this_pres_node = 2*pres_cell_local + pres_node_offset;
+
+          if (this_pres_node != preserved_node_idx)
+            continue;
+        }
+
+        // Determine node offset in search direction.
+        int search_node_offset;
+        if (poly_order == 1) {
+          search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
+        }
+        else {
+          if (nod_log[search_dir] < -0.5)
+            search_node_offset = 0;
+          else if (nod_log[search_dir] > 0.5)
+            search_node_offset = 2;
+          else
+            search_node_offset = 1;
+        }
+
+        int cell_local = cell_idx - up->range.lower[search_dir];
+        int search_node_idx;
+        if (poly_order == 1)
+          search_node_idx = cell_local + search_node_offset;
+        else
+          search_node_idx = 2*cell_local + search_node_offset;
+
+        if (!visited[search_node_idx]) {
+          double val = up->basis.eval_expand(nod_log, f_d);
+          double nod_phys[GKYL_MAX_DIM];
+          dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+          vals[search_node_idx] = val;
+          coords[search_node_idx] = nod_phys[search_dir];
+          visited[search_node_idx] = true;
+        }
+      }
+    }
+  }
+
+  // Now extract peaks based on peak_types.
+  int peak_idx = 0;
+  
+  // EDGE_LO is always first peak at index 0.
+  if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) {
+    double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
+    double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+    val_n[0] = vals[0];
+    coord_n[0] = coords[0];
+    peak_idx++;
+  }
+
+  // Find local maxima and minima.
+  for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) {
+    double prev = vals[i-1];
+    double curr = vals[i];
+    double next = vals[i+1];
+
+    bool is_max = (curr > prev && curr > next);
+    bool is_min = (curr < prev && curr < next);
+
+    if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) ||
+        (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
+      double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
+      double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+      val_n[0] = curr;
+      coord_n[0] = coords[i];
+      peak_idx++;
+    }
+  }
+
+  // EDGE_HI is always last peak.
+  if (peak_idx < up->num_peaks && up->peak_types[peak_idx] == GKYL_PEAK_EDGE_HI) {
+    double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
+    double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+    val_n[0] = vals[total_nodes_search - 1];
+    coord_n[0] = coords[total_nodes_search - 1];
+  }
+
+  gkyl_free(vals);
+  gkyl_free(coords);
+  gkyl_free(visited);
+}
+
+struct gkyl_array_dg_find_peaks*
+gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, const struct gkyl_array *field)
+{
+  struct gkyl_array_dg_find_peaks *up = gkyl_malloc(sizeof(*up));
+
+  // Copy input parameters.
+  up->grid = *inp->grid;
+  up->basis = *inp->basis;
+  up->range = *inp->range;
+  up->range_ext = *inp->range_ext;
+  up->search_dir = inp->search_dir;
+  up->use_gpu = inp->use_gpu;
+
+  int ndim = inp->grid->ndim;
+  int poly_order = inp->basis->poly_order;
+  int out_dim = ndim - 1;
+
+  assert(inp->search_dir >= 0 && inp->search_dir < ndim);
+
+  // Set up output grid/basis/range.
+  if (out_dim == 0) {
+    // 1D -> 0D case.
+    int cells_1d[1] = {1};
+    double lower_1d[1] = {0.0};
+    double upper_1d[1] = {1.0};
+    gkyl_rect_grid_init(&up->out_grid, 1, lower_1d, upper_1d, cells_1d);
+    gkyl_range_init(&up->out_range, 1, (int[]){1}, (int[]){1});
+    gkyl_range_init(&up->out_range_ext, 1, (int[]){0}, (int[]){2});
+    gkyl_cart_modal_serendip(&up->out_basis, 1, 0);
+
+    int nodes_shape[1] = {1};
+    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
+  }
+  else if (out_dim == 1) {
+    // 2D -> 1D case.
+    int preserved_dir = (inp->search_dir == 0) ? 1 : 0;
+
+    int cells_out = inp->grid->cells[preserved_dir];
+    double lower_out = inp->grid->lower[preserved_dir];
+    double upper_out = inp->grid->upper[preserved_dir];
+
+    gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out);
+
+    int lower_idx[1] = {inp->range->lower[preserved_dir]};
+    int upper_idx[1] = {inp->range->upper[preserved_dir]};
+    gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx);
+
+    int lower_ext_idx[1] = {inp->range_ext->lower[preserved_dir]};
+    int upper_ext_idx[1] = {inp->range_ext->upper[preserved_dir]};
+    gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx);
+
+    gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order);
+
+    int num_nodes = (poly_order == 1) ? gkyl_range_shape(&up->out_range, 0) + 1
+                                      : 2*gkyl_range_shape(&up->out_range, 0) + 1;
+    int nodes_shape[1] = {num_nodes};
+    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
+  }
+  else {
+    assert(false && "dg_find_peaks: only 1D->0D and 2D->1D supported");
+  }
+
+  // Store node locations for input basis.
+  up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, inp->basis->num_basis);
+  inp->basis->node_list(gkyl_array_fetch(up->nodes, 0));
+
+  // Create nodal-to-modal converter.
+  up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false);
+
+  // Count peaks at middle preserved coordinate.
+  int mid_preserved_idx = 0;
+  if (out_dim == 1) {
+    int preserved_dir = (inp->search_dir == 0) ? 1 : 0;
+    mid_preserved_idx = (inp->range->lower[preserved_dir] + inp->range->upper[preserved_dir]) / 2;
+  }
+
+  count_peaks_along_dir(up, field, mid_preserved_idx, &up->num_peaks, up->peak_types);
+
+  // Allocate output arrays for each peak.
+  for (int p = 0; p < up->num_peaks; p++) {
+    up->out_vals[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
+    up->out_coords[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
+    up->out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+    up->out_coords_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+  }
+
+  // Initialize unused peak arrays to NULL.
+  for (int p = up->num_peaks; p < GKYL_DG_FIND_PEAKS_MAX; p++) {
+    up->out_vals[p] = NULL;
+    up->out_coords[p] = NULL;
+    up->out_vals_nodal[p] = NULL;
+    up->out_coords_nodal[p] = NULL;
+  }
+
+  return up;
+}
+
+void
+gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Copy input to host if on GPU.
+  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
+  gkyl_array_copy(in_ho, in);
+
+  // Find peaks for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    find_peaks_for_preserved_node(up, in_ho, pres_node);
+  }
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    for (int p = 0; p < up->num_peaks; p++) {
+      double *val_m = gkyl_array_fetch(up->out_vals[p], 0);
+      double *coord_m = gkyl_array_fetch(up->out_coords[p], 0);
+      const double *val_n = gkyl_array_cfetch(up->out_vals_nodal[p], 0);
+      const double *coord_n = gkyl_array_cfetch(up->out_coords_nodal[p], 0);
+      val_m[0] = val_n[0];
+      coord_m[0] = coord_n[0];
+    }
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_vals_nodal[p], up->out_vals[p], false);
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_coords_nodal[p], up->out_coords[p], false);
+    }
+  }
+
+  gkyl_array_release(in_ho);
+}
+
+int
+gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up)
+{
+  return up->num_peaks;
+}
+
+enum gkyl_peak_type
+gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return up->peak_types[peak_idx];
+}
+
+const struct gkyl_basis*
+gkyl_array_dg_find_peaks_get_basis(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_basis;
+}
+
+const struct gkyl_rect_grid*
+gkyl_array_dg_find_peaks_get_grid(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_grid;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_range(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_range;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_range_ext;
+}
+
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return up->out_vals[peak_idx];
+}
+
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return up->out_coords[peak_idx];
+}
+
+void
+gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
+{
+  for (int p = 0; p < up->num_peaks; p++) {
+    gkyl_array_release(up->out_vals[p]);
+    gkyl_array_release(up->out_coords[p]);
+    gkyl_array_release(up->out_vals_nodal[p]);
+    gkyl_array_release(up->out_coords_nodal[p]);
+  }
+  gkyl_array_release(up->nodes);
+  gkyl_nodal_ops_release(up->n2m);
+  gkyl_free(up);
+}
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
new file mode 100644
index 0000000000..352bf38cf6
--- /dev/null
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -0,0 +1,145 @@
+#pragma once
+
+#include <gkyl_array.h>
+#include <gkyl_basis.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_grid.h>
+
+/**
+ * Find all peaks (local maxima, local minima, and boundary values) of a DG
+ * field along one direction.
+ * 
+ * For a 2D input array f(psi, z), finding peaks along z (dir=1) gives arrays:
+ *   out_val[k](psi) = value of k-th peak along z for each psi
+ *   out_coord[k](psi) = z-coordinate of k-th peak for each psi
+ * 
+ * For a 1D input array f(z), finding peaks along z (dir=0) gives scalars:
+ *   out_val[k] = value of k-th peak
+ *   out_coord[k] = z-coordinate of k-th peak
+ * 
+ * Peaks are detected by sampling the field at nodal points along the search
+ * direction and identifying:
+ *   - EDGE_LO: Value at the lower boundary of the domain
+ *   - LOCAL_MAX: Points where f increases then decreases
+ *   - LOCAL_MIN: Points where f decreases then increases  
+ *   - EDGE_HI: Value at the upper boundary of the domain
+ * 
+ * The number of peaks is determined by scanning along the search direction
+ * at a middle preserved-direction coordinate.
+ */
+typedef struct gkyl_array_dg_find_peaks gkyl_array_dg_find_peaks;
+
+/** Types of peaks that can be found. */
+enum gkyl_peak_type {
+  GKYL_PEAK_EDGE_LO,    // Value at lower boundary
+  GKYL_PEAK_LOCAL_MAX,  // Local maximum
+  GKYL_PEAK_LOCAL_MIN,  // Local minimum
+  GKYL_PEAK_EDGE_HI,    // Value at upper boundary
+};
+
+/** Input parameters for dg_find_peaks updater. */
+struct gkyl_array_dg_find_peaks_inp {
+  const struct gkyl_basis *basis;       // Input basis (N-dimensional)
+  const struct gkyl_rect_grid *grid;    // Input grid
+  const struct gkyl_range *range;       // Input range (local)
+  const struct gkyl_range *range_ext;   // Input extended range
+  int search_dir;                       // Direction to search for peaks (0-indexed)
+  bool use_gpu;                         // Whether to run on GPU
+};
+
+/**
+ * Create a new peak finder updater. The number of peaks is determined by
+ * scanning the input field along the search direction at a middle coordinate.
+ * This must be called AFTER the input field is initialized, as it scans the
+ * field to determine the number of peaks.
+ * 
+ * @param inp Input parameters
+ * @param field Input field to scan for peak count determination
+ * @return New updater pointer
+ */
+struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new(
+  const struct gkyl_array_dg_find_peaks_inp *inp, const struct gkyl_array *field);
+
+/**
+ * Compute the peaks. For each point along the preserved dimensions,
+ * find all peaks along the search direction.
+ * 
+ * @param up Updater object
+ * @param in Input array (N-dimensional DG field)
+ */
+void gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in);
+
+/**
+ * Get the number of peaks found.
+ * 
+ * @param up Updater object
+ * @return Number of peaks
+ */
+int gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the type of a specific peak (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI).
+ * 
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Type of the peak
+ */
+enum gkyl_peak_type gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Get the output basis ((N-1)-dimensional, or p=0 1D for 1D->0D).
+ * 
+ * @param up Updater object
+ * @return Pointer to output basis
+ */
+const struct gkyl_basis* gkyl_array_dg_find_peaks_get_basis(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output grid.
+ * 
+ * @param up Updater object
+ * @return Pointer to output grid
+ */
+const struct gkyl_rect_grid* gkyl_array_dg_find_peaks_get_grid(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output range.
+ * 
+ * @param up Updater object
+ * @return Pointer to output range
+ */
+const struct gkyl_range* gkyl_array_dg_find_peaks_get_range(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output extended range.
+ * 
+ * @param up Updater object
+ * @return Pointer to output extended range
+ */
+const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output array containing peak values for a specific peak.
+ * 
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output values array (modal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Get the output array containing coordinates of a specific peak.
+ * 
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output coordinates array (modal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Release the updater and all internal arrays.
+ * 
+ * @param up Updater to delete
+ */
+void gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up);
+
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
new file mode 100644
index 0000000000..be63f8b500
--- /dev/null
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <float.h>
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_nodal_ops.h>
+
+// Maximum number of peaks we can handle.
+#define GKYL_DG_FIND_PEAKS_MAX 16
+
+/**
+ * Convert logical (reference) coordinates to computational (physical) coordinates.
+ * xout[d] = xc[d] + 0.5*dx[d]*eta[d]
+ */
+static inline void
+dg_find_peaks_log_to_comp(int ndim, const double *eta,
+  const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
+  double *GKYL_RESTRICT xout)
+{
+  for (int d = 0; d < ndim; ++d)
+    xout[d] = 0.5*dx[d]*eta[d] + xc[d];
+}
+
+/** Internal struct for dg_find_peaks updater. */
+struct gkyl_array_dg_find_peaks {
+  // Input parameters (copies).
+  struct gkyl_rect_grid grid;       // Input grid (copy)
+  struct gkyl_basis basis;          // Input basis (copy)
+  struct gkyl_range range;          // Input local range (copy)
+  struct gkyl_range range_ext;      // Input extended range (copy)
+  int search_dir;                   // Direction to search for peaks
+  bool use_gpu;
+
+  // Output grid/basis/range (owned).
+  struct gkyl_rect_grid out_grid;   // Output grid (N-1 dim, or 1D 1-cell for 1D->0D)
+  struct gkyl_basis out_basis;      // Output basis (N-1 dim, or p=0 1D for 1D->0D)
+  struct gkyl_range out_range;      // Output range
+  struct gkyl_range out_range_ext;  // Output extended range
+  struct gkyl_range out_nrange;     // Nodal range for output
+
+  // Peak information.
+  int num_peaks;                              // Number of peaks detected
+  enum gkyl_peak_type peak_types[GKYL_DG_FIND_PEAKS_MAX]; // Type of each peak
+
+  // Output arrays (owned) - one per peak.
+  struct gkyl_array *out_vals[GKYL_DG_FIND_PEAKS_MAX];        // Peak values (modal DG)
+  struct gkyl_array *out_coords[GKYL_DG_FIND_PEAKS_MAX];      // Peak coordinates (modal DG)
+  struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];  // Nodal peak values
+  struct gkyl_array *out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak coordinates
+
+  // Internal working arrays.
+  struct gkyl_array *nodes;         // Node locations in logical coords
+
+  // Nodal-to-modal converter.
+  struct gkyl_nodal_ops *n2m;
+};
+
diff --git a/gyrokinetic/apps/gyrokinetic.c b/gyrokinetic/apps/gyrokinetic.c
index 72442cf14a..8065d8c90e 100644
--- a/gyrokinetic/apps/gyrokinetic.c
+++ b/gyrokinetic/apps/gyrokinetic.c
@@ -399,14 +399,6 @@ gkyl_gyrokinetic_app_new_geom(struct gkyl_gk *gk)
 
   gkyl_gk_geometry_release(gk_geom_3d); // Release temporary 3d geometry.
 
-  // Initialize per-field-line bmag_max arrays.
-  gkyl_gk_geometry_bmag_max_init(app->gk_geom);
-
-  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_range, NULL, app->gk_geom->bmag_max, "bmag_max.gkyl");
-  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_range, NULL, app->gk_geom->bmag_max_z_coord, "bmag_max_z_coord.gkyl");
-  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_nrange, NULL, app->gk_geom->bmag_max_nodal, "bmag_max_nodal.gkyl");
-  gkyl_comm_array_write(app->comm, &app->gk_geom->bmag_max_grid, &app->gk_geom->bmag_max_nrange, NULL, app->gk_geom->bmag_max_z_coord_nodal, "bmag_max_z_coord_nodal.gkyl");
-
   double bmag_min_local, bmag_min_global;
   bmag_min_local = gkyl_gk_geometry_reduce_bmag(app->gk_geom, GKYL_MIN);
   gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MIN, 1, &bmag_min_local, &bmag_min_global);
diff --git a/gyrokinetic/zero/gk_geometry.c b/gyrokinetic/zero/gk_geometry.c
index ccb950ee95..299f3bf1a6 100644
--- a/gyrokinetic/zero/gk_geometry.c
+++ b/gyrokinetic/zero/gk_geometry.c
@@ -336,239 +336,6 @@ gkyl_gk_geometry_reduce_arg_bmag(struct gk_geometry* up, enum gkyl_array_op op,
   return b_m;
 }
 
-void
-gkyl_gk_geometry_bmag_max_init(struct gk_geometry *gk_geom)
-{
-  int cdim = gk_geom->grid.ndim;
-  int poly_order = gk_geom->basis.poly_order;
-
-  // For 1x: bmag_max is 0D (single value).
-  // For 2x: bmag_max is 1D (varies with psi, constant along z).
-  // For 3x: bmag_max is 2D (varies with psi and alpha, constant along theta).
-  int bmag_max_dim = cdim - 1;
-
-  if (bmag_max_dim == 0) {
-    // 1x case: single scalar value.
-    // Create a 1-cell range and grid for storing the single value.
-    int cells_1d[1] = {1};
-    double lower_1d[1] = {0.0};
-    double upper_1d[1] = {1.0};
-    gkyl_rect_grid_init(&gk_geom->bmag_max_grid, 1, lower_1d, upper_1d, cells_1d);
-    gkyl_range_init(&gk_geom->bmag_max_range, 1, (int[]){1}, (int[]){1});
-    gkyl_range_init(&gk_geom->bmag_max_range_ext, 1, (int[]){0}, (int[]){2});
-    
-    // For 0D, use a p=0 1D basis (just the constant).
-    gkyl_cart_modal_serendip(&gk_geom->bmag_max_basis, 1, 0);
-    
-    // Nodal range: single node.
-    int nodes_1d[1] = {1};
-    gkyl_range_init_from_shape(&gk_geom->bmag_max_nrange, 1, nodes_1d);
-    
-    // Allocate arrays.
-    gk_geom->bmag_max = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_range_ext.volume);
-    gk_geom->bmag_max_z_coord = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_range_ext.volume);
-    gk_geom->bmag_max_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
-    gk_geom->bmag_max_z_coord_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
-    
-    // Compute the global bmag_max by iterating over all cells.
-    double bmag_max_val = -DBL_MAX;
-    double bmag_max_z = 0.0;
-    
-    struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, cdim, gk_geom->basis.num_basis);
-    gk_geom->basis.node_list(gkyl_array_fetch(nodes, 0));
-    
-    struct gkyl_array *bmag_ho = gkyl_array_new(GKYL_DOUBLE, gk_geom->geo_int.bmag->ncomp, gk_geom->geo_int.bmag->size);
-    gkyl_array_copy(bmag_ho, gk_geom->geo_int.bmag);
-    
-    struct gkyl_range_iter iter;
-    gkyl_range_iter_init(&iter, &gk_geom->local);
-    while (gkyl_range_iter_next(&iter)) {
-      long linidx = gkyl_range_idx(&gk_geom->local, iter.idx);
-      double *b_d = gkyl_array_fetch(bmag_ho, linidx);
-      
-      double xc[cdim];
-      gkyl_rect_grid_cell_center(&gk_geom->grid, iter.idx, xc);
-      
-      for (int n = 0; n < gk_geom->basis.num_basis; n++) {
-        const double *nod_log = gkyl_array_cfetch(nodes, n);
-        double b = gk_geom->basis.eval_expand(nod_log, b_d);
-        
-        double nod_phys[cdim];
-        log_to_comp(cdim, nod_log, gk_geom->grid.dx, xc, nod_phys);
-        
-        if (b > bmag_max_val) {
-          bmag_max_val = b;
-          bmag_max_z = nod_phys[cdim-1]; // z is the last coordinate.
-        }
-      }
-    }
-    
-    gkyl_array_release(nodes);
-    gkyl_array_release(bmag_ho);
-    
-    // Store in nodal arrays.
-    double *bmag_max_n = gkyl_array_fetch(gk_geom->bmag_max_nodal, 0);
-    double *bmag_max_z_n = gkyl_array_fetch(gk_geom->bmag_max_z_coord_nodal, 0);
-    bmag_max_n[0] = bmag_max_val;
-    bmag_max_z_n[0] = bmag_max_z;
-    
-    // For 0D (1x case), the modal value is just the nodal value.
-    double *bmag_max_m = gkyl_array_fetch(gk_geom->bmag_max, 0);
-    double *bmag_max_z_m = gkyl_array_fetch(gk_geom->bmag_max_z_coord, 0);
-    bmag_max_m[0] = bmag_max_val;
-    bmag_max_z_m[0] = bmag_max_z;
-  }
-  else if (bmag_max_dim == 1) {
-    // 2x case: bmag_max varies with psi (x-direction).
-    // Create a 1D grid/range in the psi direction.
-    int cells_psi = gk_geom->grid.cells[0];
-    double lower_psi = gk_geom->grid.lower[0];
-    double upper_psi = gk_geom->grid.upper[0];
-    
-    gkyl_rect_grid_init(&gk_geom->bmag_max_grid, 1, &lower_psi, &upper_psi, &cells_psi);
-    
-    // Create range matching the local range in psi direction.
-    int lower_idx[1] = {gk_geom->local.lower[0]};
-    int upper_idx[1] = {gk_geom->local.upper[0]};
-    gkyl_sub_range_init(&gk_geom->bmag_max_range, &gk_geom->local, lower_idx, upper_idx);
-    // Actually need a proper 1D range:
-    gkyl_range_init(&gk_geom->bmag_max_range, 1, lower_idx, upper_idx);
-    
-    int lower_ext_idx[1] = {gk_geom->local_ext.lower[0]};
-    int upper_ext_idx[1] = {gk_geom->local_ext.upper[0]};
-    gkyl_range_init(&gk_geom->bmag_max_range_ext, 1, lower_ext_idx, upper_ext_idx);
-    
-    // Create 1D basis for psi direction.
-    gkyl_cart_modal_serendip(&gk_geom->bmag_max_basis, 1, poly_order);
-    
-    // Create nodal range for psi direction.
-    int num_nodes_psi = (poly_order == 1) ? gkyl_range_shape(&gk_geom->bmag_max_range, 0) + 1
-                                          : 2*gkyl_range_shape(&gk_geom->bmag_max_range, 0) + 1;
-    int nodes_shape[1] = {num_nodes_psi};
-    gkyl_range_init_from_shape(&gk_geom->bmag_max_nrange, 1, nodes_shape);
-    
-    // Allocate arrays.
-    gk_geom->bmag_max = gkyl_array_new(GKYL_DOUBLE, gk_geom->bmag_max_basis.num_basis, gk_geom->bmag_max_range_ext.volume);
-    gk_geom->bmag_max_z_coord = gkyl_array_new(GKYL_DOUBLE, gk_geom->bmag_max_basis.num_basis, gk_geom->bmag_max_range_ext.volume);
-    gk_geom->bmag_max_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
-    gk_geom->bmag_max_z_coord_nodal = gkyl_array_new(GKYL_DOUBLE, 1, gk_geom->bmag_max_nrange.volume);
-    
-    // For each psi (field line), find max bmag over all z values.
-    // We need to iterate over the 2D grid and for each psi, find max over z.
-    
-    struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, cdim, gk_geom->basis.num_basis);
-    gk_geom->basis.node_list(gkyl_array_fetch(nodes, 0));
-    
-    struct gkyl_array *bmag_ho = gkyl_array_new(GKYL_DOUBLE, gk_geom->geo_int.bmag->ncomp, gk_geom->geo_int.bmag->size);
-    gkyl_array_copy(bmag_ho, gk_geom->geo_int.bmag);
-    
-    // Create temporary arrays to store max bmag and z-coord per psi node.
-    double *bmag_max_per_psi = gkyl_malloc(sizeof(double) * num_nodes_psi);
-    double *z_coord_per_psi = gkyl_malloc(sizeof(double) * num_nodes_psi);
-    for (int i = 0; i < num_nodes_psi; i++) {
-      bmag_max_per_psi[i] = -DBL_MAX;
-      z_coord_per_psi[i] = 0.0;
-    }
-    
-    // Iterate over all cells in 2D.
-    struct gkyl_range_iter iter;
-    gkyl_range_iter_init(&iter, &gk_geom->local);
-    while (gkyl_range_iter_next(&iter)) {
-      int psi_idx = iter.idx[0]; // psi cell index
-      long linidx = gkyl_range_idx(&gk_geom->local, iter.idx);
-      double *b_d = gkyl_array_fetch(bmag_ho, linidx);
-      
-      double xc[cdim];
-      gkyl_rect_grid_cell_center(&gk_geom->grid, iter.idx, xc);
-      
-      // Evaluate bmag at each node in this cell.
-      for (int n = 0; n < gk_geom->basis.num_basis; n++) {
-        const double *nod_log = gkyl_array_cfetch(nodes, n);
-        double b = gk_geom->basis.eval_expand(nod_log, b_d);
-        
-        double nod_phys[cdim];
-        log_to_comp(cdim, nod_log, gk_geom->grid.dx, xc, nod_phys);
-        
-        // Determine which psi nodal index this corresponds to.
-        // For p=1: nodes are at cell corners, so node indices are psi_idx-local.lower[0] and psi_idx-local.lower[0]+1.
-        // For p=2: nodes are at cell corners and center.
-        int psi_node_offset;
-        if (poly_order == 1) {
-          // nod_log[0] is -1 or +1, mapping to left or right corner.
-          psi_node_offset = (nod_log[0] < 0) ? 0 : 1;
-        }
-        else { // poly_order == 2
-          // nod_log[0] is -1, 0, or +1.
-          if (nod_log[0] < -0.5)
-            psi_node_offset = 0;
-          else if (nod_log[0] > 0.5)
-            psi_node_offset = 2;
-          else
-            psi_node_offset = 1;
-        }
-        
-        int psi_cell_local = psi_idx - gk_geom->local.lower[0];
-        int psi_node_idx;
-        if (poly_order == 1)
-          psi_node_idx = psi_cell_local + psi_node_offset;
-        else
-          psi_node_idx = 2*psi_cell_local + psi_node_offset;
-        
-        // Update max for this psi node.
-        if (b > bmag_max_per_psi[psi_node_idx]) {
-          bmag_max_per_psi[psi_node_idx] = b;
-          z_coord_per_psi[psi_node_idx] = nod_phys[cdim-1]; // z is last coordinate.
-        }
-      }
-    }
-    
-    gkyl_array_release(nodes);
-    gkyl_array_release(bmag_ho);
-    
-    // Store in nodal arrays.
-    for (int i = 0; i < num_nodes_psi; i++) {
-      double *bmag_max_n = gkyl_array_fetch(gk_geom->bmag_max_nodal, i);
-      double *z_coord_n = gkyl_array_fetch(gk_geom->bmag_max_z_coord_nodal, i);
-      bmag_max_n[0] = bmag_max_per_psi[i];
-      z_coord_n[0] = z_coord_per_psi[i];
-    }
-    
-    gkyl_free(bmag_max_per_psi);
-    gkyl_free(z_coord_per_psi);
-    
-    // Transform nodal to modal using n2m.
-    struct gkyl_nodal_ops *n2m = gkyl_nodal_ops_new(&gk_geom->bmag_max_basis, &gk_geom->bmag_max_grid, false);
-    gkyl_nodal_ops_n2m(n2m, &gk_geom->bmag_max_basis, &gk_geom->bmag_max_grid, 
-      &gk_geom->bmag_max_nrange, &gk_geom->bmag_max_range, 1, 
-      gk_geom->bmag_max_nodal, gk_geom->bmag_max, false);
-    gkyl_nodal_ops_n2m(n2m, &gk_geom->bmag_max_basis, &gk_geom->bmag_max_grid, 
-      &gk_geom->bmag_max_nrange, &gk_geom->bmag_max_range, 1, 
-      gk_geom->bmag_max_z_coord_nodal, gk_geom->bmag_max_z_coord, false);
-    gkyl_nodal_ops_release(n2m);
-
-  }
-  else {
-    // 3x case: bmag_max varies with psi and alpha (2D).
-    // TODO: Implement 3x case if needed.
-    assert(false && "bmag_max per field line not yet implemented for 3x");
-  }
-}
-
-void
-gkyl_gk_geometry_bmag_max_release(struct gk_geometry *gk_geom)
-{
-  if (gk_geom->bmag_max) {
-    gkyl_array_release(gk_geom->bmag_max);
-    gkyl_array_release(gk_geom->bmag_max_z_coord);
-    gkyl_array_release(gk_geom->bmag_max_nodal);
-    gkyl_array_release(gk_geom->bmag_max_z_coord_nodal);
-    gk_geom->bmag_max = NULL;
-    gk_geom->bmag_max_z_coord = NULL;
-    gk_geom->bmag_max_nodal = NULL;
-    gk_geom->bmag_max_z_coord_nodal = NULL;
-  }
-}
-
 void
 gkyl_gk_geometry_init_nodal_range( struct gkyl_range *nrange, struct gkyl_range *range, int poly_order)
 {
@@ -749,12 +516,6 @@ gkyl_gk_geometry_deflate(const struct gk_geometry* up_3d, struct gkyl_gk_geometr
     }
   }
 
-  // Initialize bmag_max pointers to NULL (will be populated later if needed).
-  up->bmag_max = NULL;
-  up->bmag_max_z_coord = NULL;
-  up->bmag_max_nodal = NULL;
-  up->bmag_max_z_coord_nodal = NULL;
-
   up->flags = 0;
   GKYL_CLEAR_CU_ALLOC(up->flags);
   up->ref_count = gkyl_ref_count_init(gkyl_gk_geometry_free);
@@ -823,9 +584,6 @@ gkyl_gk_geometry_free(const struct gkyl_ref_count *ref)
   for (int dir=0; dir<up->grid.ndim; ++dir)
     gk_geometry_surf_release_nodal(up, dir);
 
-  // Release bmag_max arrays.
-  gkyl_gk_geometry_bmag_max_release(up);
-
   if (gkyl_gk_geometry_is_cu_dev(up)) 
     gkyl_cu_free(up->on_dev); 
 

From 220fdbba028ce2be99b8f57ac189f0ca3b2a91c0 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 11 Dec 2025 10:33:04 -0500
Subject: [PATCH 03/32] Add a project on peaks function to array_dg_find_peaks,
 which evaluates another DG array at the peaks of the initilized array. Add
 appropriate unit tests which pass to ctest_array_dg_find_peaks. Update
 gk_species_fdot_multiplier to use the project_on_peaks function with phi.
 Now, everything passed to loss_cone_mask_gyrokinetic is a gkyl_array. The
 loss_cone_mask is updated accordingly

---
 ...nd_peaks.c => ctest_array_dg_find_peaks.c} | 220 ++++++++++++++++++
 core/zero/array_dg_find_peaks.c               | 194 +++++++++++++++
 core/zero/gkyl_array_dg_find_peaks.h          |  78 +++++++
 gyrokinetic/apps/gk_species_fdot_multiplier.c | 109 ++++-----
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |   9 +-
 .../unit/ctest_loss_cone_mask_gyrokinetic.c   |  69 ++++--
 gyrokinetic/zero/gk_geometry.c                |   6 -
 .../zero/gkyl_loss_cone_mask_gyrokinetic.h    |   4 +-
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |   4 +-
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c |  22 +-
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     |  12 +-
 11 files changed, 624 insertions(+), 103 deletions(-)
 rename core/unit/{ctest_dg_find_peaks.c => ctest_array_dg_find_peaks.c} (68%)

diff --git a/core/unit/ctest_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c
similarity index 68%
rename from core/unit/ctest_dg_find_peaks.c
rename to core/unit/ctest_array_dg_find_peaks.c
index 71422cf8dd..3b8ac99a11 100644
--- a/core/unit/ctest_dg_find_peaks.c
+++ b/core/unit/ctest_array_dg_find_peaks.c
@@ -82,6 +82,22 @@ test_func_2d_complex(double t, const double *xn, double *fout, void *ctx)
   fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0) * psi;
 }
 
+// Test function to project onto peaks: g(psi, z) = z^2 * psi^2
+static void
+test_func_quadratic_2d(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  fout[0] = z * z * psi * psi;
+}
+
+// 1D version: g(z) = z^2
+static void
+test_func_quadratic_1d(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  fout[0] = z * z;
+}
+
 // Test 1D peak finding with cos function.
 void
 test_1d_find_peaks_cos(int poly_order)
@@ -512,11 +528,213 @@ test_2d_find_peaks_complex(int poly_order)
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
+// Test 1D project_on_peaks with complex function.
+void
+test_1d_project_on_peaks(int poly_order)
+{
+  double lower[] = {-2.0*M_PI};
+  double upper[] = {2.0*M_PI};
+  int cells[] = {64};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks.
+  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  // Allocate output arrays for projected values.
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < num_peaks; p++) {
+    g_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+  }
+  gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
+
+  // Verify that g evaluated at each peak matches analytical values.
+  // For 1D->0D, output is p=0, so the value is already the cell average.
+  // The cell average of a p=0 expansion is value / sqrt(volume), and
+  // for a 1D cell with volume=1, it's just the value / sqrt(1.0) = value.
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   0.0,},
+    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
+    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+  };
+  for (int p = 0; p < num_peaks; p++) {
+    const double *g_val = gkyl_array_cfetch(g_at_peaks[p], 0);
+    double z = expected_peaks[p].z_expected;
+    double expected = z * z;
+    TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12));
+  }
+
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(g_at_peaks[p]);
+  }
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D project_on_peaks with complex function.
+void
+test_2d_project_on_peaks(int poly_order)
+{
+  double lower[] = {0.5, -2.0*M_PI};
+  double upper[] = {2.0, 2.0*M_PI};
+  int cells[] = {16, 64};
+  int ndim = 2;
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2
+  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Allocate output arrays for projected values.
+  struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < num_peaks; p++) {
+    g_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+  }
+  gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
+
+  // Define expected peak locations (same as before).
+  double expected_z_peaks[] = {
+    -2.0*M_PI, -3.0*M_PI/2.0, -M_PI, -M_PI/2.0, 0.0,
+    M_PI/2.0, M_PI, 3.0*M_PI/2.0, 2.0*M_PI
+  };
+
+  // Get node locations for output basis.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis);
+  out_basis->node_list(gkyl_array_fetch(nodes, 0));
+
+  // Verify that g evaluated at each peak matches analytical values.
+  for (int p = 0; p < num_peaks; p++) {
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    // Check each psi cell.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, out_range);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(out_range, iter.idx);
+      
+      const double *g_val_d = gkyl_array_cfetch(g_at_peaks[p], linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+      
+      // Get cell center for physical psi coordinate.
+      double xc_out[1];
+      gkyl_rect_grid_cell_center(out_grid, (int[]){iter.idx[0]}, xc_out);
+      
+      // Evaluate at each nodal point in this cell.
+      for (int n = 0; n < out_basis->num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double g_at_node = out_basis->eval_expand(nod_log, g_val_d);
+        double z_at_node = out_basis->eval_expand(nod_log, coord_d);
+        
+        // Compute physical psi coordinate at this node.
+        double nod_phys[1];
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0]/2.0;
+        double psi = nod_phys[0];
+        
+        // Analytical value: g(psi, z) = z^2 * psi^2
+        double expected = z_at_node * z_at_node * psi * psi;
+        TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15);
+        TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15));
+      }
+    }
+  }
+
+  // Clean up.
+  gkyl_array_release(nodes);
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(g_at_peaks[p]);
+  }
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
 void test_1d_cos_p1() { test_1d_find_peaks_cos(1); }
 void test_1d_mirror_p1() { test_1d_find_peaks_mirror(1); }
 void test_1d_complex_p1() { test_1d_find_peaks_complex(1); }
 void test_2d_p1() { test_2d_find_peaks(1); }
 void test_2d_complex_p1() { test_2d_find_peaks_complex(1); }
+void test_1d_project_p1() { test_1d_project_on_peaks(1); }
+void test_2d_project_p1() { test_2d_project_on_peaks(1); }
 
 TEST_LIST = {
   {"test_1d_cos_p1", test_1d_cos_p1},
@@ -524,5 +742,7 @@ TEST_LIST = {
   {"test_1d_complex_p1", test_1d_complex_p1},
   {"test_2d_p1", test_2d_p1},
   {"test_2d_complex_p1", test_2d_complex_p1},
+  {"test_1d_project_p1", test_1d_project_p1},
+  {"test_2d_project_p1", test_2d_project_p1},
   {NULL, NULL},
 };
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index 868f92b269..08eb425517 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -339,6 +339,133 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
   gkyl_free(visited);
 }
 
+/**
+ * Evaluate an input array at peak locations for a given preserved-direction
+ * node index, storing results in the nodal output arrays.
+ */
+static void
+eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal)
+{
+  int ndim = up->grid.ndim;
+  int search_dir = up->search_dir;
+  int poly_order = up->basis.poly_order;
+  int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+  // For each peak, evaluate the input array at the peak coordinate.
+  for (int p = 0; p < up->num_peaks; p++) {
+    // Get the peak coordinate that was found during find_peaks.
+    const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[p], preserved_node_idx);
+    double peak_coord_search = peak_coord_n[0];
+
+    // Find the cell containing this coordinate in the search direction.
+    // We need to build a point coordinate to pass to find_cell.
+    double point[GKYL_MAX_DIM];
+    int known_idx[GKYL_MAX_DIM];
+    int cell_idx[GKYL_MAX_DIM];
+    
+    for (int d = 0; d < ndim; d++) {
+      if (d == search_dir) {
+        point[d] = peak_coord_search;
+        known_idx[d] = -1; // Not known
+      }
+      else {
+        // Use dummy value - we'll specify known_idx.
+        point[d] = 0.0;
+        known_idx[d] = -1;
+      }
+    }
+    
+    // If 2D, we need to determine preserved direction cell from preserved_node_idx.
+    // For p=1 with N cells (1-based indexing), nodal points map as:
+    //   Node 0 -> cell 1, logical coord -1 (left edge of first cell)
+    //   Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k)
+    // This ensures proper continuity at shared cell boundaries.
+    if (ndim > 1) {
+      int pres_cell;
+      if (poly_order == 1) {
+        if (preserved_node_idx == 0) {
+          // First node: evaluate at left edge of first cell.
+          pres_cell = up->range.lower[preserved_dir];
+        }
+        else {
+          // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx.
+          // Clamp to upper bound for safety.
+          pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+          if (pres_cell > up->range.upper[preserved_dir]) {
+            pres_cell = up->range.upper[preserved_dir];
+          }
+        }
+      }
+      else {
+        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
+      }
+      known_idx[preserved_dir] = pres_cell;
+      
+      // Set the coordinate in preserved direction to the cell center.
+      int pres_cell_idx[GKYL_MAX_DIM];
+      for (int d = 0; d < ndim; d++) {
+        pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1;
+      }
+      double xc_pres[GKYL_MAX_DIM];
+      gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
+      point[preserved_dir] = xc_pres[preserved_dir];
+    }
+    
+    gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
+
+    // Clamp cell_idx to interior range (avoid ghost cells).
+    for (int d = 0; d < up->grid.ndim; d++) {
+      if (cell_idx[d] < up->range.lower[d]) {
+        cell_idx[d] = up->range.lower[d];
+      }
+      if (cell_idx[d] > up->range.upper[d]) {
+        cell_idx[d] = up->range.upper[d];
+      }
+    }
+
+    // Get the DG coefficients at this cell.
+    long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
+    const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+    // Get cell center.
+    double xc[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
+
+    // Convert peak coordinate to logical space.
+    double nod_log[GKYL_MAX_DIM];
+    for (int d = 0; d < ndim; d++) {
+      if (d == search_dir) {
+        // Convert physical coordinate to logical [-1, 1].
+        nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
+      }
+      else if (ndim > 1) {
+        // In preserved direction, use the node position in the cell.
+        // For p=1: node 0 is at left edge (-1), all others at right edge (+1).
+        if (poly_order == 1) {
+          nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
+        }
+        else {
+          int pres_node_offset = preserved_node_idx % 3;
+          if (pres_node_offset == 0)
+            nod_log[d] = -1.0;
+          else if (pres_node_offset == 1)
+            nod_log[d] = 0.0;
+          else
+            nod_log[d] = 1.0;
+        }
+      }
+    }
+
+    // Evaluate the DG expansion at this logical coordinate.
+    double val = up->basis.eval_expand(nod_log, f_d);
+
+    // Store the result.
+    double *val_n = gkyl_array_fetch(out_vals_nodal[p], preserved_node_idx);
+    val_n[0] = val;
+  }
+}
+
 struct gkyl_array_dg_find_peaks*
 gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, const struct gkyl_array *field)
 {
@@ -514,6 +641,12 @@ gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up
   return &up->out_range_ext;
 }
 
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_nrange;
+}
+
 const struct gkyl_array*
 gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
 {
@@ -521,6 +654,13 @@ gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int
   return up->out_vals[peak_idx];
 }
 
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_get_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return up->out_vals_nodal[peak_idx];
+}
+
 const struct gkyl_array*
 gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
 {
@@ -528,6 +668,60 @@ gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, i
   return up->out_coords[peak_idx];
 }
 
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_get_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return up->out_coords_nodal[peak_idx];
+}
+
+void
+gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Copy input to host if on GPU.
+  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in_array->ncomp, in_array->size);
+  gkyl_array_copy(in_ho, in_array);
+
+  // Allocate temporary nodal arrays for each peak.
+  struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < up->num_peaks; p++) {
+    out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+  }
+
+  // Evaluate the input array at peak locations for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    eval_array_at_peaks_for_preserved_node(up, in_ho, pres_node, out_vals_nodal);
+  }
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    for (int p = 0; p < up->num_peaks; p++) {
+      double *val_m = gkyl_array_fetch(out_vals[p], 0);
+      const double *val_n = gkyl_array_cfetch(out_vals_nodal[p], 0);
+      val_m[0] = val_n[0];
+    }
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, out_vals_nodal[p], out_vals[p], false);
+    }
+  }
+
+  // Clean up temporary arrays.
+  for (int p = 0; p < up->num_peaks; p++) {
+    gkyl_array_release(out_vals_nodal[p]);
+  }
+  gkyl_array_release(in_ho);
+}
+
 void
 gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
 {
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
index 352bf38cf6..a7981591cc 100644
--- a/core/zero/gkyl_array_dg_find_peaks.h
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -118,6 +118,15 @@ const struct gkyl_range* gkyl_array_dg_find_peaks_get_range(const struct gkyl_ar
  */
 const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up);
 
+/**
+ * Get the output nodal range.
+ * 
+ * @param up Updater object
+ * @return Pointer to output nodal range
+ */
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *up);
+
 /**
  * Get the output array containing peak values for a specific peak.
  * 
@@ -127,6 +136,15 @@ const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext(const struct gky
  */
 const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
+/**
+ * Get the output array containing peak values in nodal basis for a specific peak.
+ * 
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output values array (nodal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
 /**
  * Get the output array containing coordinates of a specific peak.
  * 
@@ -136,6 +154,66 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals(const struct gkyl_arr
  */
 const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
+/**
+ * Get the output array containing coordinates in nodal basis of a specific peak.
+ * 
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output coordinates array (nodal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Project (evaluate) an arbitrary array onto the peak locations previously
+ * found by gkyl_array_dg_find_peaks_advance.
+ * 
+ * For a 1D case with 5 peaks, this evaluates the input array at those 5 peak
+ * locations and returns the values.
+ * 
+ * For a 2D case with peaks along lines (e.g., psi vs z with peaks in z),
+ * this evaluates the input array along the contours defined by the peak
+ * locations for each psi.
+ * 
+ * The peak locations must have been previously computed via
+ * gkyl_array_dg_find_peaks_advance. This method evaluates the provided array
+ * at those same locations.
+ * 
+ * Example usage:
+ * @code
+ * // 1. Find peaks in bmag along z direction
+ * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag);
+ * gkyl_array_dg_find_peaks_advance(peak_finder, bmag);
+ * 
+ * // 2. Get bmag_max (LOCAL_MAX peak) location and value
+ * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder);
+ * int bmag_max_idx = -1;
+ * for (int p = 0; p < num_peaks; p++) {
+ *   if (gkyl_array_dg_find_peaks_get_type(peak_finder, p) == GKYL_PEAK_LOCAL_MAX) {
+ *     bmag_max_idx = p;
+ *     break;
+ *   }
+ * }
+ * const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_get_vals(peak_finder, bmag_max_idx);
+ * const struct gkyl_array *z_max = gkyl_array_dg_find_peaks_get_coords(peak_finder, bmag_max_idx);
+ * 
+ * // 3. Evaluate phi at the same locations where bmag has peaks
+ * struct gkyl_array *phi_at_peaks[num_peaks];
+ * for (int p = 0; p < num_peaks; p++) {
+ *   phi_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume);
+ * }
+ * gkyl_array_dg_find_peaks_project_on_peaks(peak_finder, phi, phi_at_peaks);
+ * 
+ * // 4. Now phi_at_peaks[bmag_max_idx] contains phi evaluated at the mirror throat
+ * @endcode
+ * 
+ * @param up Updater object (must have run advance first)
+ * @param in_array Array to evaluate at peak locations (same grid/basis as original field)
+ * @param out_vals Output: array of evaluated values for each peak
+ *                 (must be pre-allocated with num_peaks elements, each matching out_range_ext)
+ */
+void gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals);
+
 /**
  * Release the updater and all internal arrays.
  * 
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 99987191fc..31a8a24d13 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -3,6 +3,7 @@
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_alloc.h>
 #include <gkyl_dg_basis_ops.h>
+#include <gkyl_array_dg_find_peaks.h>
 
 void
 gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
@@ -57,14 +58,16 @@ void
 gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
-  // Find the potential at the mirror throat.
-  gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, fdmul->bmag_max_coord_ref,
-    app->basis_on_dev, &app->grid, &app->local, fdmul->phi_m);
-  gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, fdmul->phi_m, fdmul->phi_m_global);
+  // Find the potential at all peak locations (including the mirror throat).
+  gkyl_array_dg_find_peaks_project_on_peaks(fdmul->bmag_peak_finder, phi, fdmul->phi_at_peaks);
 
-  // Project the loss cone mask.
+  // Get phi at the mirror throat (bmag_max peak location).
+  // phi_at_peaks[bmag_max_peak_idx] is a DG array on the reduced grid.
+  const struct gkyl_array *phi_m_arr = fdmul->phi_at_peaks[fdmul->bmag_max_peak_idx];
+
+  // Project the loss cone mask using the phi_m array.
   gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-    phi, fdmul->phi_m_global, fdmul->multiplier);
+    phi, phi_m_arr, fdmul->multiplier);
 
   // Multiply out by the multplier.
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
@@ -157,51 +160,36 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       //   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
       enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD;
       int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be
-                                              // at leat p+1 for Gauss-Lobatto.
-
-      // Store pointers to per-field-line bmag_max arrays from gk_geometry.
-      fdmul->bmag_max = app->gk_geom->bmag_max;
-      fdmul->bmag_max_z_coord = app->gk_geom->bmag_max_z_coord;
-      fdmul->bmag_max_basis = &app->gk_geom->bmag_max_basis;
-      fdmul->bmag_max_range = &app->gk_geom->bmag_max_range;
-
-      // Compute reference coordinate for phi evaluation at mirror throat.
-      // For 1x: use the single bmag_max_z value.
-      // For 2x: use the bmag_max_z at the center of the psi domain (mid field line).
-      double bmag_max_coord_ref_ho[GKYL_MAX_CDIM];
-      if (app->cdim == 1) {
-        // 1x case: single value.
-        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, 0);
-        bmag_max_coord_ref_ho[0] = bmag_max_z_d[0];
-      } else {
-        // 2x case: use the center psi cell's bmag_max_z.
-        // Get the mid psi index.
-        int mid_psi_idx = (app->gk_geom->bmag_max_range.lower[0] + app->gk_geom->bmag_max_range.upper[0]) / 2;
-        int psi_idx[1] = {mid_psi_idx};
-        long bmag_max_z_linidx = gkyl_range_idx(&app->gk_geom->bmag_max_range, psi_idx);
-        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, bmag_max_z_linidx);
-        // Evaluate at cell center (logical coord 0).
-        double xc[1] = {0.0};
-        double z_val = app->gk_geom->bmag_max_basis.eval_expand(xc, bmag_max_z_d);
-        // Compute the psi coordinate at the mid cell.
-        double psi_lo = app->gk_geom->bmag_max_grid.lower[0];
-        double psi_dx = app->gk_geom->bmag_max_grid.dx[0];
-        double psi_val = psi_lo + (mid_psi_idx - 0.5) * psi_dx;
-        bmag_max_coord_ref_ho[0] = psi_val;
-        bmag_max_coord_ref_ho[1] = z_val;
-      }
-
-      // Allocate and copy reference coordinate.
-      if (app->use_gpu) {
-        fdmul->bmag_max_coord_ref = gkyl_cu_malloc(app->cdim*sizeof(double));
-        gkyl_cu_memcpy(fdmul->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
-        fdmul->phi_m = gkyl_cu_malloc(sizeof(double));
-        fdmul->phi_m_global = gkyl_cu_malloc(sizeof(double));
-      } else {
-        fdmul->bmag_max_coord_ref = gkyl_malloc(app->cdim*sizeof(double));
-        memcpy(fdmul->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double));
-        fdmul->phi_m = gkyl_malloc(sizeof(double));
-        fdmul->phi_m_global = gkyl_malloc(sizeof(double));
+                                              // at least p+1 for Gauss-Lobatto.
+
+      // Create peak finder for bmag to find the mirror throat.
+      // Search along the parallel (z) direction, which is the last configuration space dimension.
+      int search_dir = app->cdim - 1;
+      struct gkyl_array_dg_find_peaks_inp peak_inp = {
+        .basis = &app->basis,
+        .grid = &app->grid,
+        .range = &app->local,
+        .range_ext = &app->local_ext,
+        .search_dir = search_dir,
+        .use_gpu = app->use_gpu,
+      };
+      fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, app->gk_geom->geo_int.bmag);
+      gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, app->gk_geom->geo_int.bmag);
+      
+      // Get the LOCAL_MAX peak (bmag maximum along z direction).
+      int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
+      fdmul->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
+      fdmul->bmag_max = gkyl_array_dg_find_peaks_get_vals(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
+      fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_get_coords(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
+      fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder);
+      fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
+      fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
+
+      // Allocate arrays for phi evaluated at all peak locations.
+      fdmul->phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array*));
+      for (int p = 0; p < num_peaks; p++) {
+        fdmul->phi_at_peaks[p] = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
+          fdmul->bmag_max_range_ext->volume);
       }
 
       // Operator that projects the loss cone mask.
@@ -278,20 +266,17 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
       gkyl_array_release(fdmul->multiplier_host);
     }
 
-    if (fdmul->type == GKYL_GK_DAMPING_USER_INPUT) {
+    if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_USER_INPUT) {
       // Nothing to release.
     }
-    else if (fdmul->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
-      if (app->use_gpu) {
-        gkyl_cu_free(fdmul->bmag_max_coord_ref);
-        gkyl_cu_free(fdmul->phi_m);
-        gkyl_cu_free(fdmul->phi_m_global);
-      } else {
-        gkyl_free(fdmul->bmag_max_coord_ref);
-        gkyl_free(fdmul->phi_m);
-        gkyl_free(fdmul->phi_m_global);
+    else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
+      // Release phi_at_peaks arrays.
+      int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
+      for (int p = 0; p < num_peaks; p++) {
+        gkyl_array_release(fdmul->phi_at_peaks[p]);
       }
+      gkyl_free(fdmul->phi_at_peaks);
+      gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
     }
   }
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index 5e463a88f4..e2a607448d 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -836,13 +836,16 @@ struct gk_fdot_multiplier {
   struct gkyl_array *multiplier_host; // Host copy for use in IO and projecting.
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  // Per-field-line bmag_max arrays (pointers to gk_geometry's arrays).
+  // Updater to find bmag peaks (mirror throat location).
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction.
+  // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
-  double *bmag_max_coord_ref; // Reference coordinate for phi evaluation at mirror throat.
-  double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
+  const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
+  int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
+  struct gkyl_array **phi_at_peaks; // Phi evaluated at all peak locations.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
   void (*advance_times_rate_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 7b17ba65f6..846e53cb8b 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -13,6 +13,7 @@
 #include <gkyl_eval_on_nodes.h>
 #include <gkyl_proj_on_basis.h>
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
+#include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_const.h>
 
 struct loss_cone_mask_test_ctx {
@@ -229,8 +230,35 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
   gkyl_gk_geometry_release(gk_geom_3d);
   
-  // Initialize per-field-line bmag_max arrays.
-  gkyl_gk_geometry_bmag_max_init(gk_geom);
+  // Use array_dg_find_peaks to find bmag_max along the z direction.
+  // Search along the parallel (z) direction, which is the last configuration space dimension.
+  int search_dir = cdim - 1;
+  struct gkyl_array_dg_find_peaks_inp peak_inp = {
+    .basis = &basis_conf,
+    .grid = &grid_conf,
+    .range = &local_conf,
+    .range_ext = &local_ext_conf,
+    .search_dir = search_dir,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder = 
+    gkyl_array_dg_find_peaks_new(&peak_inp, gk_geom->geo_int.bmag);
+  gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag);
+  
+  // Get the LOCAL_MAX peak (bmag maximum along z direction).
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
+  int bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_get_vals(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord = gkyl_array_dg_find_peaks_get_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
+  
+  // Allocate arrays for phi evaluated at all peak locations.
+  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array*));
+  for (int p = 0; p < num_peaks; p++) {
+    phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume);
+  }
   
   // If we are on the gpu, copy from host
   if (use_gpu) {
@@ -255,18 +283,11 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_eval_on_nodes_release(evphi);
   gkyl_array_copy(phi, phi_ho);
 
-  // Get the potential at the mirror throat (z=z_m).
-  double phi_m_ho[1];
-  double xc[] = {ctx.z_m};
-  phi_func_1x(0.0, xc, phi_m_ho, &ctx);
-  double *phi_m;
-  if (use_gpu) {
-    phi_m = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(phi_m, phi_m_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  } else {
-    phi_m = gkyl_malloc(sizeof(double));
-    memcpy(phi_m, phi_m_ho, sizeof(double));
-  }
+  // Project phi onto peak locations to get phi_m at the mirror throat.
+  gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks);
+  
+  // Get phi at the mirror throat (bmag_max peak location).
+  const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx];
 
   // Basis used to project the mask.
   struct gkyl_basis basis_mask;
@@ -286,7 +307,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 	                              : gkyl_array_acquire(mask);
 
   // Project the loss cone mask.
-  // Use bmag_max and bmag_max_z_coord arrays from gk_geometry.
+  // Use bmag_max and bmag_max_z_coord arrays from find_peaks.
   struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
     .phase_grid = &grid,
     .conf_basis = &basis_conf,
@@ -296,10 +317,10 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .vel_range = &local_vel, 
     .vel_map = gvm,
     .bmag = gk_geom->geo_int.bmag,
-    .bmag_max = gk_geom->bmag_max,
-    .bmag_max_z_coord = gk_geom->bmag_max_z_coord,
-    .bmag_max_basis = &gk_geom->bmag_max_basis,
-    .bmag_max_range = &gk_geom->bmag_max_range,
+    .bmag_max = bmag_max,
+    .bmag_max_z_coord = bmag_max_z_coord,
+    .bmag_max_basis = bmag_max_basis,
+    .bmag_max_range = bmag_max_range,
     .mass = ctx.mass,
     .charge = ctx.charge,
     .qtype = ctx.quad_type,
@@ -358,12 +379,11 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ref.gkyl", poly_order);
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ref_ho, fname);
 
-  // Free phi_m (bmag_max arrays are owned by gk_geom).
-  if (use_gpu) {
-    gkyl_cu_free(phi_m);
-  } else {
-    gkyl_free(phi_m);
+  // Free phi_m and phi_at_peaks arrays.
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(phi_at_peaks[p]);
   }
+  gkyl_free(phi_at_peaks);
   gkyl_array_release(phi); 
   gkyl_array_release(phi_ho); 
   gkyl_array_release(mask); 
@@ -371,6 +391,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_array_release(mask_ref_ho);
   gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
   gkyl_velocity_map_release(gvm);
+  gkyl_array_dg_find_peaks_release(bmag_peak_finder);
   gkyl_gk_geometry_release(gk_geom);
 
 #ifdef GKYL_HAVE_CUDA
diff --git a/gyrokinetic/zero/gk_geometry.c b/gyrokinetic/zero/gk_geometry.c
index 299f3bf1a6..c05d209e15 100644
--- a/gyrokinetic/zero/gk_geometry.c
+++ b/gyrokinetic/zero/gk_geometry.c
@@ -77,12 +77,6 @@ gkyl_gk_geometry_new(struct gk_geometry* geo_host, struct gkyl_gk_geometry_inp *
     gk_geometry_surf_alloc_nodal(up, dir);
   }
 
-  // Initialize bmag_max pointers to NULL (will be populated later if needed).
-  up->bmag_max = NULL;
-  up->bmag_max_z_coord = NULL;
-  up->bmag_max_nodal = NULL;
-  up->bmag_max_z_coord_nodal = NULL;
-
   up->flags = 0;
   GKYL_CLEAR_CU_ALLOC(up->flags);
   up->ref_count = gkyl_ref_count_init(gkyl_gk_geometry_free);
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index d6eeb2a586..565ab00e0f 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -66,12 +66,12 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
  * @param phase_rng Phase-space range.
  * @param conf_rng Configuration-space range.
  * @param phi Electrostatic potential.
- * @param phi_m Electrostatic potential at the mirror throat (on GPU if use_gpu=true).
+ * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid).
  * @param mask_out Output masking function.
  */
 void gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out);
 
 /**
  * Delete updater.
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 608de6d7af..c9419bfd47 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -111,11 +111,11 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *u
  * @param phase_rng Phase-space range.
  * @param conf_rng Configuration-space range.
  * @param phi Electrostatic potential.
- * @param phi_m Electrostatic potential at the mirror throat (on GPU).
+ * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid).
  * @param mask_out Output masking function.
  */
 void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out);
 #endif
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index c2c628ee10..30a33993f0 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -404,7 +404,7 @@ nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_a
 void
 gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out)
 {
 
 #ifdef GKYL_HAVE_CUDA
@@ -438,6 +438,24 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
     const double *phi_d = gkyl_array_cfetch(phi, linidx_conf);
     const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
 
+    // Get phi_m value for this field line.
+    // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
+    // For 2x: varies with psi, evaluate at this psi cell.
+    double phi_m_val;
+    if (cdim == 1) {
+      // 1x case: single scalar value stored as p=0 DG expansion.
+      const double *phi_m_d = gkyl_array_cfetch(phi_m, 0);
+      phi_m_val = phi_m_d[0];
+    } else {
+      // 2x case: evaluate phi_m at this psi cell center.
+      int psi_idx[1] = {conf_iter.idx[0]};
+      long phi_m_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+      const double *phi_m_d = gkyl_array_cfetch(phi_m, phi_m_linidx);
+      // Evaluate at cell center (logical coord 0).
+      double xc_log[1] = {0.0};
+      phi_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_m_d);
+    }
+
     // Sum over basis for given potential phi.
     for (int n=0; n<tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
@@ -448,7 +466,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         phi_quad[n] += phi_d[k]*b_ord[k];
 
       if (Dbmag_quad[n] > 0.0)
-        qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m[0])/Dbmag_quad[n];
+        qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m_val)/Dbmag_quad[n];
       else
         qDphiDbmag_quad[n] = 0.0;
     }
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index 2679a9c348..e09a6f6335 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -242,14 +242,22 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
 void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out)
 {
+  // TODO: Full GPU support for phi_m as DG array needs kernel updates.
+  // For now, this works for 1x case where phi_m is a scalar (p=0 DG expansion).
+  // For 2x case, need to update kernels to do per-cell lookup.
+  
   dim3 dimGrid_conf, dimBlock_conf;
   int tot_quad_conf = up->basis_at_ords_conf->size;
   gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, tot_quad_conf);
 
+  // For GPU, phi_m->on_dev is the device pointer to the DG array.
+  // The kernel expects a double*, so pass the underlying data for now (1x case).
+  const double *phi_m_data = (const double*) phi_m->on_dev;
+  
   gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m, up->Dbmag_quad->on_dev,
+    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m_data, up->Dbmag_quad->on_dev,
     up->qDphiDbmag_quad->on_dev);
 
   const struct gkyl_velocity_map *gvm = up->vel_map;

From 96655563ddc5c3f7fecd8d144d3ba789719d7e5b Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 11 Dec 2025 11:43:15 -0500
Subject: [PATCH 04/32] Add a method to only evaluate the array at one peak,
 reducing the ammount of compution we need for evaluating phi at its peak in
 the app. Unit tests pass. Regression tests look fine as well. They're all
 valgrind clean. I think the right way to do the paralellism is to do the peak
 finding on a global bmag, just like how it is done for the position_map, then
 when we evaluate phi, all processes evaluate it at this peak, however only
 one will return a true value. This process will broadcast the array to the
 rest of the processes

---
 core/unit/ctest_array_dg_find_peaks.c         | 212 ++++++++++++++++
 core/zero/array_dg_find_peaks.c               | 234 ++++++++++--------
 core/zero/gkyl_array_dg_find_peaks.h          |  34 +++
 gyrokinetic/apps/gk_species_fdot_multiplier.c |  32 ++-
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |   2 +-
 .../creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c |   6 +-
 6 files changed, 400 insertions(+), 120 deletions(-)

diff --git a/core/unit/ctest_array_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c
index 3b8ac99a11..e8aebe3369 100644
--- a/core/unit/ctest_array_dg_find_peaks.c
+++ b/core/unit/ctest_array_dg_find_peaks.c
@@ -728,6 +728,214 @@ test_2d_project_on_peaks(int poly_order)
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
+
+// Test 1D project_on_peak_idx with complex function.
+void
+test_1d_project_on_peak_idx(int poly_order)
+{
+  double lower[] = {-2.0*M_PI};
+  double upper[] = {2.0*M_PI};
+  int cells[] = {64};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks.
+  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  // Allocate output arrays for projected values.
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  struct gkyl_array *g_at_peaks = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+
+  int chosen_idx = 1;
+  gkyl_array_dg_find_peaks_project_on_peak_idx(peaks, g, chosen_idx, g_at_peaks);
+
+  // Verify that g evaluated at each peak matches analytical values.
+  // For 1D->0D, output is p=0, so the value is already the cell average.
+  // The cell average of a p=0 expansion is value / sqrt(volume), and
+  // for a 1D cell with volume=1, it's just the value / sqrt(1.0) = value.
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   0.0,},
+    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
+    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
+    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
+    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+  };
+  
+  const double *g_val = gkyl_array_cfetch(g_at_peaks, 0);
+  double z = expected_peaks[chosen_idx].z_expected;
+  double expected = z * z;
+  TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12));
+
+  gkyl_array_release(g_at_peaks);
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D project_on_peak_idx with complex function.
+void
+test_2d_project_on_peak_idx(int poly_order)
+{
+  double lower[] = {0.5, -2.0*M_PI};
+  double upper[] = {2.0, 2.0*M_PI};
+  int cells[] = {16, 64};
+  int ndim = 2;
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2
+  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Allocate output arrays for projected values.
+  struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < num_peaks; p++) {
+    g_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+  }
+  gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
+
+  // Define expected peak locations (same as before).
+  double expected_z_peaks[] = {
+    -2.0*M_PI, -3.0*M_PI/2.0, -M_PI, -M_PI/2.0, 0.0,
+    M_PI/2.0, M_PI, 3.0*M_PI/2.0, 2.0*M_PI
+  };
+
+  // Get node locations for output basis.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis);
+  out_basis->node_list(gkyl_array_fetch(nodes, 0));
+
+  // Verify that g evaluated at each peak matches analytical values.
+  for (int p = 0; p < num_peaks; p++) {
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    
+    // Check each psi cell.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, out_range);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(out_range, iter.idx);
+      
+      const double *g_val_d = gkyl_array_cfetch(g_at_peaks[p], linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+      
+      // Get cell center for physical psi coordinate.
+      double xc_out[1];
+      gkyl_rect_grid_cell_center(out_grid, (int[]){iter.idx[0]}, xc_out);
+      
+      // Evaluate at each nodal point in this cell.
+      for (int n = 0; n < out_basis->num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double g_at_node = out_basis->eval_expand(nod_log, g_val_d);
+        double z_at_node = out_basis->eval_expand(nod_log, coord_d);
+        
+        // Compute physical psi coordinate at this node.
+        double nod_phys[1];
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0]/2.0;
+        double psi = nod_phys[0];
+        
+        // Analytical value: g(psi, z) = z^2 * psi^2
+        double expected = z_at_node * z_at_node * psi * psi;
+        TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15);
+        TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15));
+      }
+    }
+  }
+
+  // Clean up.
+  gkyl_array_release(nodes);
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(g_at_peaks[p]);
+  }
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+
+
+
+
+
+
+
+
+
+
+
 void test_1d_cos_p1() { test_1d_find_peaks_cos(1); }
 void test_1d_mirror_p1() { test_1d_find_peaks_mirror(1); }
 void test_1d_complex_p1() { test_1d_find_peaks_complex(1); }
@@ -735,6 +943,8 @@ void test_2d_p1() { test_2d_find_peaks(1); }
 void test_2d_complex_p1() { test_2d_find_peaks_complex(1); }
 void test_1d_project_p1() { test_1d_project_on_peaks(1); }
 void test_2d_project_p1() { test_2d_project_on_peaks(1); }
+void test_1d_project_idx_p1() { test_1d_project_on_peak_idx(1); }
+void test_2d_project_idx_p1() { test_2d_project_on_peak_idx(1); }
 
 TEST_LIST = {
   {"test_1d_cos_p1", test_1d_cos_p1},
@@ -744,5 +954,7 @@ TEST_LIST = {
   {"test_2d_complex_p1", test_2d_complex_p1},
   {"test_1d_project_p1", test_1d_project_p1},
   {"test_2d_project_p1", test_2d_project_p1},
+  {"test_1d_project_idx_p1", test_1d_project_idx_p1},
+  // {"test_2d_project_idx_p1", test_2d_project_idx_p1},
   {NULL, NULL},
 };
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index 08eb425517..fcafa8ac41 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -345,125 +345,122 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
  */
 static void
 eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
-  const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal)
+  const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal, int peak_idx)
 {
   int ndim = up->grid.ndim;
   int search_dir = up->search_dir;
   int poly_order = up->basis.poly_order;
   int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
 
-  // For each peak, evaluate the input array at the peak coordinate.
-  for (int p = 0; p < up->num_peaks; p++) {
-    // Get the peak coordinate that was found during find_peaks.
-    const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[p], preserved_node_idx);
-    double peak_coord_search = peak_coord_n[0];
-
-    // Find the cell containing this coordinate in the search direction.
-    // We need to build a point coordinate to pass to find_cell.
-    double point[GKYL_MAX_DIM];
-    int known_idx[GKYL_MAX_DIM];
-    int cell_idx[GKYL_MAX_DIM];
-    
-    for (int d = 0; d < ndim; d++) {
-      if (d == search_dir) {
-        point[d] = peak_coord_search;
-        known_idx[d] = -1; // Not known
-      }
-      else {
-        // Use dummy value - we'll specify known_idx.
-        point[d] = 0.0;
-        known_idx[d] = -1;
-      }
+  // Get the peak coordinate that was found during find_peaks.
+  const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+  double peak_coord_search = peak_coord_n[0];
+
+  // Find the cell containing this coordinate in the search direction.
+  // We need to build a point coordinate to pass to find_cell.
+  double point[GKYL_MAX_DIM];
+  int known_idx[GKYL_MAX_DIM];
+  int cell_idx[GKYL_MAX_DIM];
+  
+  for (int d = 0; d < ndim; d++) {
+    if (d == search_dir) {
+      point[d] = peak_coord_search;
+      known_idx[d] = -1; // Not known
     }
-    
-    // If 2D, we need to determine preserved direction cell from preserved_node_idx.
-    // For p=1 with N cells (1-based indexing), nodal points map as:
-    //   Node 0 -> cell 1, logical coord -1 (left edge of first cell)
-    //   Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k)
-    // This ensures proper continuity at shared cell boundaries.
-    if (ndim > 1) {
-      int pres_cell;
-      if (poly_order == 1) {
-        if (preserved_node_idx == 0) {
-          // First node: evaluate at left edge of first cell.
-          pres_cell = up->range.lower[preserved_dir];
-        }
-        else {
-          // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx.
-          // Clamp to upper bound for safety.
-          pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-          if (pres_cell > up->range.upper[preserved_dir]) {
-            pres_cell = up->range.upper[preserved_dir];
-          }
-        }
+    else {
+      // Use dummy value - we'll specify known_idx.
+      point[d] = 0.0;
+      known_idx[d] = -1;
+    }
+  }
+  
+  // If 2D, we need to determine preserved direction cell from preserved_node_idx.
+  // For p=1 with N cells (1-based indexing), nodal points map as:
+  //   Node 0 -> cell 1, logical coord -1 (left edge of first cell)
+  //   Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k)
+  // This ensures proper continuity at shared cell boundaries.
+  if (ndim > 1) {
+    int pres_cell;
+    if (poly_order == 1) {
+      if (preserved_node_idx == 0) {
+        // First node: evaluate at left edge of first cell.
+        pres_cell = up->range.lower[preserved_dir];
       }
       else {
-        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
-      }
-      known_idx[preserved_dir] = pres_cell;
-      
-      // Set the coordinate in preserved direction to the cell center.
-      int pres_cell_idx[GKYL_MAX_DIM];
-      for (int d = 0; d < ndim; d++) {
-        pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1;
+        // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx.
+        // Clamp to upper bound for safety.
+        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+        if (pres_cell > up->range.upper[preserved_dir]) {
+          pres_cell = up->range.upper[preserved_dir];
+        }
       }
-      double xc_pres[GKYL_MAX_DIM];
-      gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
-      point[preserved_dir] = xc_pres[preserved_dir];
     }
+    else {
+      pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
+    }
+    known_idx[preserved_dir] = pres_cell;
     
-    gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
+    // Set the coordinate in preserved direction to the cell center.
+    int pres_cell_idx[GKYL_MAX_DIM];
+    for (int d = 0; d < ndim; d++) {
+      pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1;
+    }
+    double xc_pres[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
+    point[preserved_dir] = xc_pres[preserved_dir];
+  }
+  
+  gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
 
-    // Clamp cell_idx to interior range (avoid ghost cells).
-    for (int d = 0; d < up->grid.ndim; d++) {
-      if (cell_idx[d] < up->range.lower[d]) {
-        cell_idx[d] = up->range.lower[d];
-      }
-      if (cell_idx[d] > up->range.upper[d]) {
-        cell_idx[d] = up->range.upper[d];
-      }
+  // Clamp cell_idx to interior range (avoid ghost cells).
+  for (int d = 0; d < up->grid.ndim; d++) {
+    if (cell_idx[d] < up->range.lower[d]) {
+      cell_idx[d] = up->range.lower[d];
     }
+    if (cell_idx[d] > up->range.upper[d]) {
+      cell_idx[d] = up->range.upper[d];
+    }
+  }
 
-    // Get the DG coefficients at this cell.
-    long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
-    const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+  // Get the DG coefficients at this cell.
+  long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
+  const double *f_d = gkyl_array_cfetch(in_ho, linidx);
 
-    // Get cell center.
-    double xc[GKYL_MAX_DIM];
-    gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
+  // Get cell center.
+  double xc[GKYL_MAX_DIM];
+  gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
 
-    // Convert peak coordinate to logical space.
-    double nod_log[GKYL_MAX_DIM];
-    for (int d = 0; d < ndim; d++) {
-      if (d == search_dir) {
-        // Convert physical coordinate to logical [-1, 1].
-        nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
+  // Convert peak coordinate to logical space.
+  double nod_log[GKYL_MAX_DIM];
+  for (int d = 0; d < ndim; d++) {
+    if (d == search_dir) {
+      // Convert physical coordinate to logical [-1, 1].
+      nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
+    }
+    else if (ndim > 1) {
+      // In preserved direction, use the node position in the cell.
+      // For p=1: node 0 is at left edge (-1), all others at right edge (+1).
+      if (poly_order == 1) {
+        nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
       }
-      else if (ndim > 1) {
-        // In preserved direction, use the node position in the cell.
-        // For p=1: node 0 is at left edge (-1), all others at right edge (+1).
-        if (poly_order == 1) {
-          nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
-        }
-        else {
-          int pres_node_offset = preserved_node_idx % 3;
-          if (pres_node_offset == 0)
-            nod_log[d] = -1.0;
-          else if (pres_node_offset == 1)
-            nod_log[d] = 0.0;
-          else
-            nod_log[d] = 1.0;
-        }
+      else {
+        int pres_node_offset = preserved_node_idx % 3;
+        if (pres_node_offset == 0)
+          nod_log[d] = -1.0;
+        else if (pres_node_offset == 1)
+          nod_log[d] = 0.0;
+        else
+          nod_log[d] = 1.0;
       }
     }
+  }
 
-    // Evaluate the DG expansion at this logical coordinate.
-    double val = up->basis.eval_expand(nod_log, f_d);
+  // Evaluate the DG expansion at this logical coordinate.
+  double val = up->basis.eval_expand(nod_log, f_d);
 
-    // Store the result.
-    double *val_n = gkyl_array_fetch(out_vals_nodal[p], preserved_node_idx);
-    val_n[0] = val;
-  }
+  // Store the result.
+  double *val_n = gkyl_array_fetch(out_vals_nodal[peak_idx], preserved_node_idx);
+  val_n[0] = val;
 }
 
 struct gkyl_array_dg_find_peaks*
@@ -695,7 +692,9 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    eval_array_at_peaks_for_preserved_node(up, in_ho, pres_node, out_vals_nodal);
+    for (int p = 0; p < up->num_peaks; p++) {
+      eval_array_at_peaks_for_preserved_node(up, in_ho, pres_node, out_vals_nodal, p);
+    }
   }
 
   // Transform nodal to modal for each peak.
@@ -722,6 +721,45 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
   gkyl_array_release(in_ho);
 }
 
+void
+gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Copy input to host if on GPU.
+  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in_array->ncomp, in_array->size);
+  gkyl_array_copy(in_ho, in_array);
+
+  // Allocate temporary nodal arrays for each peak.
+  struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
+  out_vals_nodal[peak_idx] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+
+  // Evaluate the input array at peak locations for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    eval_array_at_peaks_for_preserved_node(up, in_ho, pres_node, out_vals_nodal, peak_idx);
+  }
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    double *val_m = gkyl_array_fetch(out_val, 0);
+    const double *val_n = gkyl_array_cfetch(out_vals_nodal[peak_idx], 0);
+    val_m[0] = val_n[0];
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1, out_vals_nodal[peak_idx], out_val, false);
+  }
+
+  // Clean up temporary arrays.
+  gkyl_array_release(out_vals_nodal[peak_idx]);
+  gkyl_array_release(in_ho);
+}
+
 void
 gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
 {
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
index a7981591cc..5b7c7d9da2 100644
--- a/core/zero/gkyl_array_dg_find_peaks.h
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -214,6 +214,40 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords_nodal(const struct
 void gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
   const struct gkyl_array *in_array, struct gkyl_array **out_vals);
 
+/**
+ * Project (evaluate) an arbitrary array onto a single peak location previously
+ * found by gkyl_array_dg_find_peaks_advance.
+ * 
+ * This is a more efficient version of gkyl_array_dg_find_peaks_project_on_peaks
+ * when you only need the evaluation at one specific peak (e.g., only at the
+ * mirror throat LOCAL_MAX peak).
+ * 
+ * Example usage:
+ * @code
+ * // 1. Find peaks in bmag along z direction
+ * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag);
+ * gkyl_array_dg_find_peaks_advance(peak_finder, bmag);
+ * 
+ * // 2. Find the LOCAL_MAX peak index
+ * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder);
+ * int bmag_max_idx = num_peaks - 2; // Assuming standard ordering
+ * 
+ * // 3. Evaluate phi only at the mirror throat (bmag_max location)
+ * struct gkyl_array *phi_m = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume);
+ * gkyl_array_dg_find_peaks_project_on_peak_idx(peak_finder, phi, bmag_max_idx, phi_m);
+ * 
+ * // 4. Now phi_m contains phi evaluated at the mirror throat
+ * @endcode
+ * 
+ * @param up Updater object (must have run advance first)
+ * @param in_array Array to evaluate at peak location (same grid/basis as original field)
+ * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1)
+ * @param out_val Output: evaluated values at the specified peak
+ *                (must be pre-allocated to match out_range_ext)
+ */
+void gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val);
+
 /**
  * Release the updater and all internal arrays.
  * 
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 31a8a24d13..2ae0e760d3 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -59,15 +59,14 @@ gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, con
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   // Find the potential at all peak locations (including the mirror throat).
-  gkyl_array_dg_find_peaks_project_on_peaks(fdmul->bmag_peak_finder, phi, fdmul->phi_at_peaks);
-
-  // Get phi at the mirror throat (bmag_max peak location).
-  // phi_at_peaks[bmag_max_peak_idx] is a DG array on the reduced grid.
-  const struct gkyl_array *phi_m_arr = fdmul->phi_at_peaks[fdmul->bmag_max_peak_idx];
+  gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi,
+    fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max);
+  // Allgather on phi_at_bmag_max. It's not an allgather.
+  // One process has the correct one, but the others do not. Is it a bcast or a sync?
 
   // Project the loss cone mask using the phi_m array.
   gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-    phi, phi_m_arr, fdmul->multiplier);
+    phi, fdmul->phi_at_bmag_max, fdmul->multiplier);
 
   // Multiply out by the multplier.
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
@@ -173,8 +172,13 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .search_dir = search_dir,
         .use_gpu = app->use_gpu,
       };
-      fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, app->gk_geom->geo_int.bmag);
+      // Pass a global bmag_int into the peak finder
+      struct gkyl_array *bmag_int_global = mkarr(false, 
+        app->gk_geom->geo_int.bmag->ncomp, app->gk_geom->geo_int.bmag->size);
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
+      fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
       gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, app->gk_geom->geo_int.bmag);
+      gkyl_array_release(bmag_int_global);
       
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
@@ -185,12 +189,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
 
-      // Allocate arrays for phi evaluated at all peak locations.
-      fdmul->phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array*));
-      for (int p = 0; p < num_peaks; p++) {
-        fdmul->phi_at_peaks[p] = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
-          fdmul->bmag_max_range_ext->volume);
-      }
+      fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
+        fdmul->bmag_max_range_ext->volume);
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
@@ -270,12 +270,8 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
       // Nothing to release.
     }
     else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
-      // Release phi_at_peaks arrays.
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
-      for (int p = 0; p < num_peaks; p++) {
-        gkyl_array_release(fdmul->phi_at_peaks[p]);
-      }
-      gkyl_free(fdmul->phi_at_peaks);
+      gkyl_array_release(fdmul->phi_at_bmag_max);
       gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
     }
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index e2a607448d..6e50335fc1 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -845,7 +845,7 @@ struct gk_fdot_multiplier {
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
   int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
-  struct gkyl_array **phi_at_peaks; // Phi evaluated at all peak locations.
+  struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
   void (*advance_times_rate_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
index bac3d878ed..9140f29c35 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -521,9 +521,9 @@ create_ctx(void)
 
   // Grid DOF:
   int Nx = 8;  // Number of cells in x (psi) direction.
-  int Nz = 192; // Number of cells in z direction.
-  int Nvpar = 48; // Number of cells in parallel velocity direction.
-  int Nmu = 16;  // Number of cells in mu direction.
+  int Nz = 96; // Number of cells in z direction.
+  int Nvpar = 16; // Number of cells in parallel velocity direction.
+  int Nmu = 8;  // Number of cells in mu direction.
   int poly_order = 1;
 
   // Initial conditions parameter.s

From aa4afa087021dee3cef26f595e9b46879c9a6c16 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 11 Dec 2025 11:56:23 -0500
Subject: [PATCH 05/32] Remove some memory allocation during the advance
 methods and evaluation at the peak locations

---
 core/zero/array_dg_find_peaks.c           | 63 ++++++-----------------
 core/zero/gkyl_array_dg_find_peaks_priv.h |  1 +
 2 files changed, 17 insertions(+), 47 deletions(-)

diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index fcafa8ac41..55c6538384 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -11,12 +11,6 @@
 /**
  * Scan along the search direction at a fixed preserved-direction coordinate
  * to count the number of peaks and determine their types.
- * 
- * @param up Updater (partially initialized - just grid/basis/range/search_dir)
- * @param in Input field
- * @param preserved_idx Index in the preserved direction (ignored for 1D)
- * @param num_peaks_out Output: number of peaks found
- * @param peak_types_out Output: array of peak types (must be size GKYL_DG_FIND_PEAKS_MAX)
  */
 static void
 count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in,
@@ -547,6 +541,7 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
     up->out_coords[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
     up->out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
     up->out_coords_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+    up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
   }
 
   // Initialize unused peak arrays to NULL.
@@ -555,6 +550,7 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
     up->out_coords[p] = NULL;
     up->out_vals_nodal[p] = NULL;
     up->out_coords_nodal[p] = NULL;
+    up->out_eval_at_peaks_vals_nodal[p] = NULL;
   }
 
   return up;
@@ -563,17 +559,15 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
 void
 gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in)
 {
+  // Needs a gpu implementation
+
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
-  // Copy input to host if on GPU.
-  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
-  gkyl_array_copy(in_ho, in);
-
   // Find peaks for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    find_peaks_for_preserved_node(up, in_ho, pres_node);
+    find_peaks_for_preserved_node(up, in, pres_node);
   }
 
   // Transform nodal to modal for each peak.
@@ -597,8 +591,6 @@ gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const stru
         &up->out_nrange, &up->out_range, 1, up->out_coords_nodal[p], up->out_coords[p], false);
     }
   }
-
-  gkyl_array_release(in_ho);
 }
 
 int
@@ -676,24 +668,16 @@ void
 gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
   const struct gkyl_array *in_array, struct gkyl_array **out_vals)
 {
+  // Needs a GPU implementation
+
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
-  // Copy input to host if on GPU.
-  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in_array->ncomp, in_array->size);
-  gkyl_array_copy(in_ho, in_array);
-
-  // Allocate temporary nodal arrays for each peak.
-  struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
-  for (int p = 0; p < up->num_peaks; p++) {
-    out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
-  }
-
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
     for (int p = 0; p < up->num_peaks; p++) {
-      eval_array_at_peaks_for_preserved_node(up, in_ho, pres_node, out_vals_nodal, p);
+      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, p);
     }
   }
 
@@ -702,7 +686,7 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
     // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
     for (int p = 0; p < up->num_peaks; p++) {
       double *val_m = gkyl_array_fetch(out_vals[p], 0);
-      const double *val_n = gkyl_array_cfetch(out_vals_nodal[p], 0);
+      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
       val_m[0] = val_n[0];
     }
   }
@@ -710,54 +694,38 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
     // 2D -> 1D case: use nodal-to-modal transform.
     for (int p = 0; p < up->num_peaks; p++) {
       gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-        &up->out_nrange, &up->out_range, 1, out_vals_nodal[p], out_vals[p], false);
+        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], false);
     }
   }
-
-  // Clean up temporary arrays.
-  for (int p = 0; p < up->num_peaks; p++) {
-    gkyl_array_release(out_vals_nodal[p]);
-  }
-  gkyl_array_release(in_ho);
 }
 
 void
 gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up,
   const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val)
 {
+  // Needs a GPU implementation
+
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
-  // Copy input to host if on GPU.
-  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in_array->ncomp, in_array->size);
-  gkyl_array_copy(in_ho, in_array);
-
-  // Allocate temporary nodal arrays for each peak.
-  struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
-  out_vals_nodal[peak_idx] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
-
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    eval_array_at_peaks_for_preserved_node(up, in_ho, pres_node, out_vals_nodal, peak_idx);
+    eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
   }
 
   // Transform nodal to modal for each peak.
   if (out_dim == 0) {
     // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
     double *val_m = gkyl_array_fetch(out_val, 0);
-    const double *val_n = gkyl_array_cfetch(out_vals_nodal[peak_idx], 0);
+    const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
     val_m[0] = val_n[0];
   }
   else {
     // 2D -> 1D case: use nodal-to-modal transform.
     gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-      &up->out_nrange, &up->out_range, 1, out_vals_nodal[peak_idx], out_val, false);
+      &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, false);
   }
-
-  // Clean up temporary arrays.
-  gkyl_array_release(out_vals_nodal[peak_idx]);
-  gkyl_array_release(in_ho);
 }
 
 void
@@ -768,6 +736,7 @@ gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
     gkyl_array_release(up->out_coords[p]);
     gkyl_array_release(up->out_vals_nodal[p]);
     gkyl_array_release(up->out_coords_nodal[p]);
+    gkyl_array_release(up->out_eval_at_peaks_vals_nodal[p]);
   }
   gkyl_array_release(up->nodes);
   gkyl_nodal_ops_release(up->n2m);
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
index be63f8b500..49b51fd810 100644
--- a/core/zero/gkyl_array_dg_find_peaks_priv.h
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -48,6 +48,7 @@ struct gkyl_array_dg_find_peaks {
   struct gkyl_array *out_coords[GKYL_DG_FIND_PEAKS_MAX];      // Peak coordinates (modal DG)
   struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];  // Nodal peak values
   struct gkyl_array *out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak coordinates
+  struct gkyl_array *out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Values evaluated at peaks (nodal)
 
   // Internal working arrays.
   struct gkyl_array *nodes;         // Node locations in logical coords

From 976b6bdc53ae150cc2decb59d5c65363f4832556 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 11 Dec 2025 13:24:59 -0500
Subject: [PATCH 06/32] Remove old elements from gk_geometry. Add a
 agkyl_array_dg_reduce_dir method, which is just like find_peaks, but it
 computes the global maximum or minimum.

---
 core/unit/ctest_array_dg_reduce_dir.c     | 394 ++++++++++++++++
 core/zero/array_dg_reduce_dir.c           | 524 ++++++++++++++++++++++
 core/zero/gkyl_array_dg_reduce_dir.h      | 158 +++++++
 core/zero/gkyl_array_dg_reduce_dir_priv.h |  52 +++
 gyrokinetic/zero/gkyl_gk_geometry.h       |  34 --
 5 files changed, 1128 insertions(+), 34 deletions(-)
 create mode 100644 core/unit/ctest_array_dg_reduce_dir.c
 create mode 100644 core/zero/array_dg_reduce_dir.c
 create mode 100644 core/zero/gkyl_array_dg_reduce_dir.h
 create mode 100644 core/zero/gkyl_array_dg_reduce_dir_priv.h

diff --git a/core/unit/ctest_array_dg_reduce_dir.c b/core/unit/ctest_array_dg_reduce_dir.c
new file mode 100644
index 0000000000..bf53b36c88
--- /dev/null
+++ b/core/unit/ctest_array_dg_reduce_dir.c
@@ -0,0 +1,394 @@
+#include <acutest.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_ops.h>
+#include <gkyl_array_dg_reduce_dir.h>
+#include <gkyl_array_dg_reduce_dir_priv.h>
+#include <gkyl_basis.h>
+#include <gkyl_eval_on_nodes.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_decomp.h>
+#include <gkyl_rect_grid.h>
+#include <gkyl_util.h>
+
+#include <math.h>
+
+// 1D test function: f(z) = -z^2 + 1
+// Maximum at z=0 with value 1, minimum at endpoints.
+static void
+test_func_1d_parabola(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  fout[0] = -z*z + 1.0;
+}
+
+// 1D test function: f(z) = sin(z) on [0, pi]
+// Maximum at z=pi/2 with value 1.
+static void
+test_func_1d_sin(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  fout[0] = sin(z);
+}
+
+// 2D test function: f(psi, z) = psi * (-z^2 + 1)
+// Maximum along z is at z=0 for each psi, with value psi.
+static void
+test_func_2d_parabola(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  fout[0] = psi * (-z*z + 1.0);
+}
+
+// 2D mirror-like function: f(psi, z) = B0(psi) * (1 + (R-1)*sin^2(pi*z/L))
+// Minimum at z=0 for all psi.
+static void
+test_func_2d_mirror(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double L = 2.0;
+  double B0 = 1.0 + 0.1*psi;
+  double R = 4.0;
+  double sinval = sin(M_PI * z / L);
+  fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
+}
+
+// Test 1D reduction with MAX operation.
+void
+test_1d_reduce_max(int poly_order)
+{
+  // Grid: z in [-1, 1].
+  double lower[] = {-1.0};
+  double upper[] = {1.0};
+  int cells[] = {16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_parabola, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create reducer.
+  struct gkyl_array_dg_reduce_dir_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .reduce_dir = 0,
+    .op = GKYL_REDUCE_OP_MAX,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
+
+  // Compute reduction.
+  gkyl_array_dg_reduce_dir_advance(reducer, f);
+
+  // Check results: maximum of -z^2+1 on [-1,1] is 1 at z=0.
+  const struct gkyl_array *vals = gkyl_array_dg_reduce_dir_get_vals(reducer);
+  const struct gkyl_array *coords = gkyl_array_dg_reduce_dir_get_coords(reducer);
+  
+  const double *val = gkyl_array_cfetch(vals, 0);
+  const double *coord = gkyl_array_cfetch(coords, 0);
+  
+  TEST_CHECK(gkyl_compare_double(val[0], 1.0, 1e-14));
+  TEST_CHECK(gkyl_compare_double(fabs(coord[0]), 0.0, 1e-14));
+
+  gkyl_array_release(f);
+  gkyl_array_dg_reduce_dir_release(reducer);
+}
+
+// Test 1D reduction with MIN operation.
+void
+test_1d_reduce_min(int poly_order)
+{
+  // Grid: z in [-1, 1].
+  double lower[] = {-1.0};
+  double upper[] = {1.0};
+  int cells[] = {16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = {1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_parabola, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create reducer.
+  struct gkyl_array_dg_reduce_dir_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .reduce_dir = 0,
+    .op = GKYL_REDUCE_OP_MIN,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
+
+  // Compute reduction.
+  gkyl_array_dg_reduce_dir_advance(reducer, f);
+
+  // Check results: minimum of -z^2+1 on [-1,1] is 0 at z=±1.
+  const struct gkyl_array *vals = gkyl_array_dg_reduce_dir_get_vals(reducer);
+  const struct gkyl_array *coords = gkyl_array_dg_reduce_dir_get_coords(reducer);
+  
+  const double *val = gkyl_array_cfetch(vals, 0);
+  const double *coord = gkyl_array_cfetch(coords, 0);
+  
+  TEST_CHECK(gkyl_compare_double(val[0], 0.0, 1e-14));
+  TEST_CHECK(gkyl_compare_double(fabs(coord[0]), 1.0, 1e-14));  // Either -1 or 1.
+
+  gkyl_array_release(f);
+  gkyl_array_dg_reduce_dir_release(reducer);
+}
+
+// Test 2D reduction along z direction with MAX operation.
+void
+test_2d_reduce_max(int poly_order)
+{
+  // Grid: psi in [0.5, 2.0], z in [-1, 1].
+  double lower[] = {0.5, -1.0};
+  double upper[] = {2.0, 1.0};
+  int cells[] = {8, 16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 2, poly_order);
+
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_parabola, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create reducer (reduce along z, which is direction 1).
+  struct gkyl_array_dg_reduce_dir_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .reduce_dir = 1,
+    .op = GKYL_REDUCE_OP_MAX,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
+
+  // Compute reduction.
+  gkyl_array_dg_reduce_dir_advance(reducer, f);
+
+  // Check results.
+  const struct gkyl_basis *out_basis = gkyl_array_dg_reduce_dir_get_basis(reducer);
+  const struct gkyl_range *out_range = gkyl_array_dg_reduce_dir_get_range(reducer);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_reduce_dir_get_grid(reducer);
+  
+  // Access nodal arrays directly for testing.
+  const struct gkyl_array *vals_nodal = reducer->out_vals_nodal;
+  const struct gkyl_array *coords_nodal = reducer->out_coords_nodal;
+  const struct gkyl_range *out_nrange = &reducer->out_nrange;
+
+  // Check each nodal point.
+  struct gkyl_range_iter iter;
+  gkyl_range_iter_init(&iter, out_nrange);
+  while (gkyl_range_iter_next(&iter)) {
+    long linidx = gkyl_range_idx(out_nrange, iter.idx);
+    
+    const double *val_nodal = gkyl_array_cfetch(vals_nodal, linidx);
+    const double *coord_nodal = gkyl_array_cfetch(coords_nodal, linidx);
+    
+    // Compute physical psi coordinate at this nodal point.
+    // For p=1: node 0 at lower bound, node ncells at upper bound.
+    int num_cells_psi = cells[0];
+    double dpsi = (upper[0] - lower[0]) / num_cells_psi;
+    double psi_phys = lower[0] + iter.idx[0] * dpsi;
+    
+    // Expected: max of psi*(-z^2+1) over z is psi at z=0.
+    double expected_val = psi_phys;
+    double expected_coord = 0.0;
+
+    TEST_CHECK(gkyl_compare_double(val_nodal[0], expected_val, 1e-14));
+    TEST_CHECK(gkyl_compare_double(coord_nodal[0], expected_coord, 1e-14));
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_dg_reduce_dir_release(reducer);
+}
+
+// Test 2D reduction with mirror-like function (finding minimum bmag).
+void
+test_2d_reduce_min_mirror(int poly_order)
+{
+  // Grid: psi in [0.0, 1.0], z in [-1, 1].
+  double lower[] = {0.0, -1.0};
+  double upper[] = {1.0, 1.0};
+  int cells[] = {4, 16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 2, poly_order);
+
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project mirror function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_mirror, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create reducer (reduce along z, find minimum).
+  struct gkyl_array_dg_reduce_dir_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .reduce_dir = 1,
+    .op = GKYL_REDUCE_OP_MIN,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
+
+  // Compute reduction.
+  gkyl_array_dg_reduce_dir_advance(reducer, f);
+
+  // Access nodal arrays directly for testing.
+  const struct gkyl_array *vals_nodal = reducer->out_vals_nodal;
+  const struct gkyl_array *coords_nodal = reducer->out_coords_nodal;
+  const struct gkyl_range *out_nrange = &reducer->out_nrange;
+
+  // Check each nodal point.
+  struct gkyl_range_iter iter;
+  gkyl_range_iter_init(&iter, out_nrange);
+  while (gkyl_range_iter_next(&iter)) {
+    long linidx = gkyl_range_idx(out_nrange, iter.idx);
+    
+    const double *val_nodal = gkyl_array_cfetch(vals_nodal, linidx);
+    const double *coord_nodal = gkyl_array_cfetch(coords_nodal, linidx);
+    
+    // Compute physical psi coordinate.
+    int num_cells_psi = cells[0];
+    double dpsi = (upper[0] - lower[0]) / num_cells_psi;
+    double psi_phys = lower[0] + iter.idx[0] * dpsi;
+    
+    // Expected: minimum of B0*(1+(R-1)*sin^2(pi*z/L)) is B0 at z=0.
+    double B0 = 1.0 + 0.1*psi_phys;
+    double expected_val = B0;
+    double expected_coord = 0.0;
+
+    TEST_CHECK(gkyl_compare_double(val_nodal[0], expected_val, 1e-14));
+    TEST_CHECK(gkyl_compare_double(coord_nodal[0], expected_coord, 1e-14));
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_dg_reduce_dir_release(reducer);
+}
+
+// Test eval_at_extremum functionality.
+void
+test_2d_eval_at_extremum(int poly_order)
+{
+  // Grid: psi in [0.5, 2.0], z in [-1, 1].
+  double lower[] = {0.5, -1.0};
+  double upper[] = {2.0, 1.0};
+  int cells[] = {8, 16};
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 2, poly_order);
+
+  int ghost[] = {1, 1};
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis.
+  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_parabola, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create reducer.
+  struct gkyl_array_dg_reduce_dir_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .reduce_dir = 1,
+    .op = GKYL_REDUCE_OP_MAX,
+    .use_gpu = false,
+  };
+  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
+
+  // Compute reduction.
+  gkyl_array_dg_reduce_dir_advance(reducer, f);
+
+  // Evaluate f at the extremum coordinates.
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_reduce_dir_get_range_ext(reducer);
+  const struct gkyl_basis *out_basis = gkyl_array_dg_reduce_dir_get_basis(reducer);
+  
+  struct gkyl_array *f_at_max = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+  gkyl_array_dg_reduce_dir_eval_at_extremum(reducer, f, f_at_max);
+
+  // The value of f at its maximum should equal the maximum value.
+  const struct gkyl_array *vals = gkyl_array_dg_reduce_dir_get_vals(reducer);
+  
+  // Compare at cell centers.
+  double xc_log[1] = {0.0};
+  const struct gkyl_range *out_range = gkyl_array_dg_reduce_dir_get_range(reducer);
+  struct gkyl_range_iter iter;
+  gkyl_range_iter_init(&iter, out_range);
+  while (gkyl_range_iter_next(&iter)) {
+    long linidx = gkyl_range_idx(out_range, iter.idx);
+    
+    const double *max_val = gkyl_array_cfetch(vals, linidx);
+    const double *eval_val = gkyl_array_cfetch(f_at_max, linidx);
+    
+    double max_at_center = out_basis->eval_expand(xc_log, max_val);
+    double eval_at_center = out_basis->eval_expand(xc_log, eval_val);
+
+    TEST_CHECK(gkyl_compare_double(eval_at_center, max_at_center, 1e-14));
+  }
+
+  gkyl_array_release(f);
+  gkyl_array_release(f_at_max);
+  gkyl_array_dg_reduce_dir_release(reducer);
+}
+
+void test_1d_max_p1() { test_1d_reduce_max(1); }
+void test_1d_min_p1() { test_1d_reduce_min(1); }
+void test_2d_max_p1() { test_2d_reduce_max(1); }
+void test_2d_min_mirror_p1() { test_2d_reduce_min_mirror(1); }
+void test_2d_eval_at_extremum_p1() { test_2d_eval_at_extremum(1); }
+
+TEST_LIST = {
+  {"test_1d_max_p1", test_1d_max_p1},
+  {"test_1d_min_p1", test_1d_min_p1},
+  {"test_2d_max_p1", test_2d_max_p1},
+  {"test_2d_min_mirror_p1", test_2d_min_mirror_p1},
+  {"test_2d_eval_at_extremum_p1", test_2d_eval_at_extremum_p1},
+  {NULL, NULL},
+};
diff --git a/core/zero/array_dg_reduce_dir.c b/core/zero/array_dg_reduce_dir.c
new file mode 100644
index 0000000000..fc39963774
--- /dev/null
+++ b/core/zero/array_dg_reduce_dir.c
@@ -0,0 +1,524 @@
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_reduce_dir.h>
+#include <gkyl_array_dg_reduce_dir_priv.h>
+#include <gkyl_nodal_ops.h>
+
+/**
+ * Find the extremum along the reduction direction for a given preserved-direction
+ * node index, storing results in the nodal arrays.
+ */
+static void
+find_extremum_for_preserved_node(struct gkyl_array_dg_reduce_dir *up, const struct gkyl_array *in_ho,
+  int preserved_node_idx)
+{
+  int ndim = up->grid.ndim;
+  int reduce_dir = up->reduce_dir;
+  int poly_order = up->basis.poly_order;
+
+  // Determine number of nodes along reduction direction.
+  int num_cells_reduce = up->range.upper[reduce_dir] - up->range.lower[reduce_dir] + 1;
+  int total_nodes_reduce = (poly_order == 1) ? num_cells_reduce + 1 : 2*num_cells_reduce + 1;
+
+  // Allocate arrays to store values and coordinates along reduction direction.
+  double *vals = gkyl_malloc(sizeof(double) * total_nodes_reduce);
+  double *coords = gkyl_malloc(sizeof(double) * total_nodes_reduce);
+  bool *visited = gkyl_malloc(sizeof(bool) * total_nodes_reduce);
+  for (int i = 0; i < total_nodes_reduce; i++) {
+    vals[i] = 0.0;
+    coords[i] = 0.0;
+    visited[i] = false;
+  }
+
+  // For 2D, determine the preserved direction.
+  int preserved_dir = (ndim == 1) ? -1 : ((reduce_dir == 0) ? 1 : 0);
+
+  // Iterate along cells in reduction direction and collect nodal values.
+  for (int cell_idx = up->range.lower[reduce_dir]; cell_idx <= up->range.upper[reduce_dir]; cell_idx++) {
+    // For 2D, determine which cells in the preserved direction contribute to this node.
+    int pres_cell_start, pres_cell_end;
+    if (ndim == 1) {
+      pres_cell_start = 0;
+      pres_cell_end = 0;
+    }
+    else {
+      if (poly_order == 1) {
+        if (preserved_node_idx == 0) {
+          pres_cell_start = up->range.lower[preserved_dir];
+          pres_cell_end = up->range.lower[preserved_dir];
+        }
+        else if (preserved_node_idx == up->out_nrange.upper[0]) {
+          pres_cell_start = up->range.upper[preserved_dir];
+          pres_cell_end = up->range.upper[preserved_dir];
+        }
+        else {
+          pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+          pres_cell_end = pres_cell_start + 1;
+          if (pres_cell_end > up->range.upper[preserved_dir]) {
+            pres_cell_end = up->range.upper[preserved_dir];
+          }
+        }
+      }
+      else {
+        int cell_local = preserved_node_idx / 2;
+        pres_cell_start = up->range.lower[preserved_dir] + cell_local;
+        pres_cell_end = pres_cell_start;
+        if (preserved_node_idx % 2 == 0 && preserved_node_idx > 0) {
+          pres_cell_start--;
+        }
+        if (pres_cell_start < up->range.lower[preserved_dir]) {
+          pres_cell_start = up->range.lower[preserved_dir];
+        }
+        if (pres_cell_end > up->range.upper[preserved_dir]) {
+          pres_cell_end = up->range.upper[preserved_dir];
+        }
+      }
+    }
+
+    for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) {
+      // Build index array for this cell.
+      int idx[GKYL_MAX_DIM];
+      if (ndim == 1) {
+        idx[0] = cell_idx;
+      }
+      else {
+        idx[preserved_dir] = pres_cell;
+        idx[reduce_dir] = cell_idx;
+      }
+
+      long linidx = gkyl_range_idx(&up->range, idx);
+      const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+      double xc[GKYL_MAX_DIM];
+      gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+      // Evaluate at each node in this cell.
+      for (int n = 0; n < up->basis.num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(up->nodes, n);
+
+        // Check if this node corresponds to our preserved node index.
+        if (ndim > 1) {
+          int pres_node_offset;
+          if (poly_order == 1) {
+            pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
+          }
+          else {
+            if (nod_log[preserved_dir] < -0.5) {
+              pres_node_offset = 0;
+            }
+            else if (nod_log[preserved_dir] > 0.5) {
+              pres_node_offset = 2;
+            }
+            else {
+              pres_node_offset = 1;
+            }
+          }
+          int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
+          int this_pres_node;
+          if (poly_order == 1) {
+            this_pres_node = pres_cell_local + pres_node_offset;
+          }
+          else {
+            this_pres_node = 2*pres_cell_local + pres_node_offset;
+          }
+
+          if (this_pres_node != preserved_node_idx) {
+            continue;
+          }
+        }
+
+        // Determine node offset in reduction direction.
+        int reduce_node_offset;
+        if (poly_order == 1) {
+          reduce_node_offset = (nod_log[reduce_dir] < 0) ? 0 : 1;
+        }
+        else {
+          if (nod_log[reduce_dir] < -0.5) {
+            reduce_node_offset = 0;
+          }
+          else if (nod_log[reduce_dir] > 0.5) {
+            reduce_node_offset = 2;
+          }
+          else {
+            reduce_node_offset = 1;
+          }
+        }
+
+        int cell_local = cell_idx - up->range.lower[reduce_dir];
+        int reduce_node_idx;
+        if (poly_order == 1) {
+          reduce_node_idx = cell_local + reduce_node_offset;
+        }
+        else {
+          reduce_node_idx = 2*cell_local + reduce_node_offset;
+        }
+
+        if (!visited[reduce_node_idx]) {
+          double val = up->basis.eval_expand(nod_log, f_d);
+          double nod_phys[GKYL_MAX_DIM];
+          dg_reduce_dir_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+          vals[reduce_node_idx] = val;
+          coords[reduce_node_idx] = nod_phys[reduce_dir];
+          visited[reduce_node_idx] = true;
+        }
+      }
+    }
+  }
+
+  // Find the global extremum.
+  int extremum_idx = 0;
+  double extremum_val = vals[0];
+  
+  for (int i = 1; i < total_nodes_reduce; i++) {
+    bool is_better;
+    if (up->op == GKYL_REDUCE_OP_MAX) {
+      is_better = (vals[i] > extremum_val);
+    }
+    else {
+      is_better = (vals[i] < extremum_val);
+    }
+    
+    if (is_better) {
+      extremum_val = vals[i];
+      extremum_idx = i;
+    }
+  }
+
+  // Store the result.
+  double *val_n = gkyl_array_fetch(up->out_vals_nodal, preserved_node_idx);
+  double *coord_n = gkyl_array_fetch(up->out_coords_nodal, preserved_node_idx);
+  val_n[0] = extremum_val;
+  coord_n[0] = coords[extremum_idx];
+
+  gkyl_free(vals);
+  gkyl_free(coords);
+  gkyl_free(visited);
+}
+
+/**
+ * Evaluate an input array at the extremum coordinate for a given preserved-direction
+ * node index, storing result in the nodal output array.
+ */
+static void
+eval_at_extremum_for_preserved_node(struct gkyl_array_dg_reduce_dir *up,
+  const struct gkyl_array *in_ho, int preserved_node_idx)
+{
+  int ndim = up->grid.ndim;
+  int reduce_dir = up->reduce_dir;
+  int poly_order = up->basis.poly_order;
+  int preserved_dir = (ndim == 1) ? -1 : ((reduce_dir == 0) ? 1 : 0);
+
+  // Get the extremum coordinate.
+  const double *extremum_coord_n = gkyl_array_cfetch(up->out_coords_nodal, preserved_node_idx);
+  double extremum_coord = extremum_coord_n[0];
+
+  // Find the cell containing this coordinate in the reduction direction.
+  double point[GKYL_MAX_DIM];
+  int known_idx[GKYL_MAX_DIM];
+  int cell_idx[GKYL_MAX_DIM];
+  
+  for (int d = 0; d < ndim; d++) {
+    if (d == reduce_dir) {
+      point[d] = extremum_coord;
+      known_idx[d] = -1;
+    }
+    else {
+      point[d] = 0.0;
+      known_idx[d] = -1;
+    }
+  }
+  
+  // If 2D, determine preserved direction cell from preserved_node_idx.
+  if (ndim > 1) {
+    int pres_cell;
+    if (poly_order == 1) {
+      if (preserved_node_idx == 0) {
+        pres_cell = up->range.lower[preserved_dir];
+      }
+      else {
+        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+        if (pres_cell > up->range.upper[preserved_dir]) {
+          pres_cell = up->range.upper[preserved_dir];
+        }
+      }
+    }
+    else {
+      pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
+    }
+    known_idx[preserved_dir] = pres_cell;
+    
+    int pres_cell_idx[GKYL_MAX_DIM];
+    for (int d = 0; d < ndim; d++) {
+      pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1;
+    }
+    double xc_pres[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
+    point[preserved_dir] = xc_pres[preserved_dir];
+  }
+  
+  gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
+
+  // Clamp cell_idx to interior range.
+  for (int d = 0; d < up->grid.ndim; d++) {
+    if (cell_idx[d] < up->range.lower[d]) {
+      cell_idx[d] = up->range.lower[d];
+    }
+    if (cell_idx[d] > up->range.upper[d]) {
+      cell_idx[d] = up->range.upper[d];
+    }
+  }
+
+  // Get the DG coefficients at this cell.
+  long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
+  const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+  // Get cell center.
+  double xc[GKYL_MAX_DIM];
+  gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
+
+  // Convert extremum coordinate to logical space.
+  double nod_log[GKYL_MAX_DIM];
+  for (int d = 0; d < ndim; d++) {
+    if (d == reduce_dir) {
+      nod_log[d] = 2.0 * (extremum_coord - xc[d]) / up->grid.dx[d];
+    }
+    else if (ndim > 1) {
+      if (poly_order == 1) {
+        nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
+      }
+      else {
+        int pres_node_offset = preserved_node_idx % 3;
+        if (pres_node_offset == 0) {
+          nod_log[d] = -1.0;
+        }
+        else if (pres_node_offset == 1) {
+          nod_log[d] = 0.0;
+        }
+        else {
+          nod_log[d] = 1.0;
+        }
+      }
+    }
+  }
+
+  // Evaluate the DG expansion at this logical coordinate.
+  double val = up->basis.eval_expand(nod_log, f_d);
+
+  // Store the result.
+  double *val_n = gkyl_array_fetch(up->out_eval_nodal, preserved_node_idx);
+  val_n[0] = val;
+}
+
+struct gkyl_array_dg_reduce_dir*
+gkyl_array_dg_reduce_dir_new(const struct gkyl_array_dg_reduce_dir_inp *inp)
+{
+  struct gkyl_array_dg_reduce_dir *up = gkyl_malloc(sizeof(*up));
+
+  // Copy input parameters.
+  up->grid = *inp->grid;
+  up->basis = *inp->basis;
+  up->range = *inp->range;
+  up->range_ext = *inp->range_ext;
+  up->reduce_dir = inp->reduce_dir;
+  up->op = inp->op;
+  up->use_gpu = inp->use_gpu;
+
+  int ndim = inp->grid->ndim;
+  int poly_order = inp->basis->poly_order;
+  int out_dim = ndim - 1;
+
+  assert(inp->reduce_dir >= 0 && inp->reduce_dir < ndim);
+
+  // Set up output grid/basis/range.
+  if (out_dim == 0) {
+    // 1D -> 0D case.
+    int cells_1d[1] = {1};
+    double lower_1d[1] = {0.0};
+    double upper_1d[1] = {1.0};
+    gkyl_rect_grid_init(&up->out_grid, 1, lower_1d, upper_1d, cells_1d);
+    gkyl_range_init(&up->out_range, 1, (int[]){1}, (int[]){1});
+    gkyl_range_init(&up->out_range_ext, 1, (int[]){0}, (int[]){2});
+    gkyl_cart_modal_serendip(&up->out_basis, 1, 0);
+
+    int nodes_shape[1] = {1};
+    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
+  }
+  else if (out_dim == 1) {
+    // 2D -> 1D case.
+    int preserved_dir = (inp->reduce_dir == 0) ? 1 : 0;
+
+    int cells_out = inp->grid->cells[preserved_dir];
+    double lower_out = inp->grid->lower[preserved_dir];
+    double upper_out = inp->grid->upper[preserved_dir];
+
+    gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out);
+
+    int lower_idx[1] = {inp->range->lower[preserved_dir]};
+    int upper_idx[1] = {inp->range->upper[preserved_dir]};
+    gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx);
+
+    int lower_ext_idx[1] = {inp->range_ext->lower[preserved_dir]};
+    int upper_ext_idx[1] = {inp->range_ext->upper[preserved_dir]};
+    gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx);
+
+    gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order);
+
+    int num_nodes = (poly_order == 1) ? gkyl_range_shape(&up->out_range, 0) + 1
+                                      : 2*gkyl_range_shape(&up->out_range, 0) + 1;
+    int nodes_shape[1] = {num_nodes};
+    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
+  }
+  else {
+    assert(false); // Unsupported dimension
+  }
+
+  // Store node locations for input basis.
+  up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, inp->basis->num_basis);
+  inp->basis->node_list(gkyl_array_fetch(up->nodes, 0));
+
+  // Create nodal-to-modal converter.
+  up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false);
+
+  // Allocate output arrays.
+  up->out_vals = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
+  up->out_coords = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
+  up->out_vals_nodal = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+  up->out_coords_nodal = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+  up->out_eval_nodal = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+
+  return up;
+}
+
+void
+gkyl_array_dg_reduce_dir_advance(struct gkyl_array_dg_reduce_dir *up, const struct gkyl_array *in)
+{
+  // Needs a GPU implementation.
+
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Find extremum for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    find_extremum_for_preserved_node(up, in, pres_node);
+  }
+
+  // Transform nodal to modal.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    double *val_m = gkyl_array_fetch(up->out_vals, 0);
+    double *coord_m = gkyl_array_fetch(up->out_coords, 0);
+    const double *val_n = gkyl_array_cfetch(up->out_vals_nodal, 0);
+    const double *coord_n = gkyl_array_cfetch(up->out_coords_nodal, 0);
+    val_m[0] = val_n[0];
+    coord_m[0] = coord_n[0];
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1, up->out_vals_nodal, up->out_vals, false);
+    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1, up->out_coords_nodal, up->out_coords, false);
+  }
+}
+
+void
+gkyl_array_dg_reduce_dir_eval_at_extremum(struct gkyl_array_dg_reduce_dir *up,
+  const struct gkyl_array *in_array, struct gkyl_array *out_val)
+{
+  // Needs a GPU implementation.
+
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Evaluate the input array at extremum locations for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    eval_at_extremum_for_preserved_node(up, in_array, pres_node);
+  }
+
+  // Transform nodal to modal.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal.
+    double *val_m = gkyl_array_fetch(out_val, 0);
+    const double *val_n = gkyl_array_cfetch(up->out_eval_nodal, 0);
+    val_m[0] = val_n[0];
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1, up->out_eval_nodal, out_val, false);
+  }
+}
+
+const struct gkyl_basis*
+gkyl_array_dg_reduce_dir_get_basis(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return &up->out_basis;
+}
+
+const struct gkyl_rect_grid*
+gkyl_array_dg_reduce_dir_get_grid(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return &up->out_grid;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_reduce_dir_get_range(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return &up->out_range;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_reduce_dir_get_range_ext(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return &up->out_range_ext;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_reduce_dir_get_nodal_range(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return &up->out_nrange;
+}
+
+const struct gkyl_array*
+gkyl_array_dg_reduce_dir_get_vals(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return up->out_vals;
+}
+
+const struct gkyl_array*
+gkyl_array_dg_reduce_dir_get_vals_nodal(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return up->out_vals_nodal;
+}
+
+const struct gkyl_array*
+gkyl_array_dg_reduce_dir_get_coords(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return up->out_coords;
+}
+
+const struct gkyl_array*
+gkyl_array_dg_reduce_dir_get_coords_nodal(const struct gkyl_array_dg_reduce_dir *up)
+{
+  return up->out_coords_nodal;
+}
+
+void
+gkyl_array_dg_reduce_dir_release(struct gkyl_array_dg_reduce_dir *up)
+{
+  gkyl_array_release(up->out_vals);
+  gkyl_array_release(up->out_coords);
+  gkyl_array_release(up->out_vals_nodal);
+  gkyl_array_release(up->out_coords_nodal);
+  gkyl_array_release(up->out_eval_nodal);
+  gkyl_array_release(up->nodes);
+  gkyl_nodal_ops_release(up->n2m);
+  gkyl_free(up);
+}
diff --git a/core/zero/gkyl_array_dg_reduce_dir.h b/core/zero/gkyl_array_dg_reduce_dir.h
new file mode 100644
index 0000000000..cb91ca7cf6
--- /dev/null
+++ b/core/zero/gkyl_array_dg_reduce_dir.h
@@ -0,0 +1,158 @@
+#pragma once
+
+#include <gkyl_array.h>
+#include <gkyl_basis.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_grid.h>
+
+/**
+ * Reduce a DG array along one direction, computing max/min and argmax/argmin.
+ * 
+ * For a 2D input array f(psi, z), reducing along z (dir=1) gives:
+ *   out_val(psi) = max_z f(psi, z)    or    min_z f(psi, z)
+ *   out_coord(psi) = argmax_z f(psi, z)  or  argmin_z f(psi, z)
+ * 
+ * For a 1D input array f(z), reducing along z (dir=0) gives scalars:
+ *   out_val = max f   or   min f
+ *   out_coord = argmax f  or  argmin f
+ * 
+ * The reduction is computed by sampling the field at nodal points along the
+ * reduction direction and finding the global extremum.
+ */
+typedef struct gkyl_array_dg_reduce_dir gkyl_array_dg_reduce_dir;
+
+/** Type of reduction operation. */
+enum gkyl_reduce_op {
+  GKYL_REDUCE_OP_MAX,   // Find maximum
+  GKYL_REDUCE_OP_MIN,   // Find minimum
+};
+
+/** Input parameters for dg_reduce_dir updater. */
+struct gkyl_array_dg_reduce_dir_inp {
+  const struct gkyl_basis *basis;       // Input basis (N-dimensional)
+  const struct gkyl_rect_grid *grid;    // Input grid
+  const struct gkyl_range *range;       // Input range (local)
+  const struct gkyl_range *range_ext;   // Input extended range
+  int reduce_dir;                       // Direction to reduce (0-indexed)
+  enum gkyl_reduce_op op;               // Reduction operation (MAX or MIN)
+  bool use_gpu;                         // Whether to run on GPU
+};
+
+/**
+ * Create a new directional reduction updater.
+ * 
+ * @param inp Input parameters
+ * @return New updater pointer
+ */
+struct gkyl_array_dg_reduce_dir* gkyl_array_dg_reduce_dir_new(
+  const struct gkyl_array_dg_reduce_dir_inp *inp);
+
+/**
+ * Compute the reduction. For each point along the preserved dimensions,
+ * find the maximum or minimum along the reduction direction.
+ * 
+ * @param up Updater object
+ * @param in Input array (N-dimensional DG field)
+ */
+void gkyl_array_dg_reduce_dir_advance(struct gkyl_array_dg_reduce_dir *up,
+  const struct gkyl_array *in);
+
+/**
+ * Evaluate a field at the extremum coordinates.
+ * For example, if we found z_max(psi) = argmax_z f(psi,z), this evaluates
+ * g(psi, z_max(psi)) for any field g.
+ * 
+ * @param up Updater object
+ * @param in_array Input field to evaluate (N-dimensional)
+ * @param out_val Output values at extremum coordinates (N-1 dimensional)
+ */
+void gkyl_array_dg_reduce_dir_eval_at_extremum(struct gkyl_array_dg_reduce_dir *up,
+  const struct gkyl_array *in_array, struct gkyl_array *out_val);
+
+/**
+ * Get the output basis ((N-1)-dimensional, or p=0 1D for 1D->0D).
+ * 
+ * @param up Updater object
+ * @return Pointer to output basis
+ */
+const struct gkyl_basis* gkyl_array_dg_reduce_dir_get_basis(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output grid.
+ * 
+ * @param up Updater object
+ * @return Pointer to output grid
+ */
+const struct gkyl_rect_grid* gkyl_array_dg_reduce_dir_get_grid(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output range.
+ * 
+ * @param up Updater object
+ * @return Pointer to output range
+ */
+const struct gkyl_range* gkyl_array_dg_reduce_dir_get_range(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output extended range.
+ * 
+ * @param up Updater object
+ * @return Pointer to output extended range
+ */
+const struct gkyl_range* gkyl_array_dg_reduce_dir_get_range_ext(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output nodal range.
+ * 
+ * @param up Updater object
+ * @return Pointer to output nodal range
+ */
+const struct gkyl_range* gkyl_array_dg_reduce_dir_get_nodal_range(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output array containing extremal values (modal DG expansion).
+ * 
+ * @param up Updater object
+ * @return Pointer to output values array
+ */
+const struct gkyl_array* gkyl_array_dg_reduce_dir_get_vals(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output array containing extremal values (nodal representation).
+ * 
+ * @param up Updater object
+ * @return Pointer to output nodal values array
+ */
+const struct gkyl_array* gkyl_array_dg_reduce_dir_get_vals_nodal(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output array containing coordinates of extrema (modal DG expansion).
+ * 
+ * @param up Updater object
+ * @return Pointer to output coordinates array
+ */
+const struct gkyl_array* gkyl_array_dg_reduce_dir_get_coords(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Get the output array containing coordinates of extrema (nodal representation).
+ * 
+ * @param up Updater object
+ * @return Pointer to output nodal coordinates array
+ */
+const struct gkyl_array* gkyl_array_dg_reduce_dir_get_coords_nodal(
+  const struct gkyl_array_dg_reduce_dir *up);
+
+/**
+ * Release the updater and all internal arrays.
+ * 
+ * @param up Updater to delete
+ */
+void gkyl_array_dg_reduce_dir_release(struct gkyl_array_dg_reduce_dir *up);
diff --git a/core/zero/gkyl_array_dg_reduce_dir_priv.h b/core/zero/gkyl_array_dg_reduce_dir_priv.h
new file mode 100644
index 0000000000..bd7f40518e
--- /dev/null
+++ b/core/zero/gkyl_array_dg_reduce_dir_priv.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <float.h>
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_reduce_dir.h>
+#include <gkyl_nodal_ops.h>
+
+/**
+ * Convert logical (reference) coordinates to computational (physical) coordinates.
+ * xout[d] = xc[d] + 0.5*dx[d]*eta[d]
+ */
+static inline void
+dg_reduce_dir_log_to_comp(int ndim, const double *eta,
+  const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
+  double *GKYL_RESTRICT xout)
+{
+  for (int d = 0; d < ndim; ++d)
+    xout[d] = 0.5*dx[d]*eta[d] + xc[d];
+}
+
+/** Internal struct for dg_reduce_dir updater. */
+struct gkyl_array_dg_reduce_dir {
+  // Input parameters (copies).
+  struct gkyl_rect_grid grid;       // Input grid (copy)
+  struct gkyl_basis basis;          // Input basis (copy)
+  struct gkyl_range range;          // Input local range (copy)
+  struct gkyl_range range_ext;      // Input extended range (copy)
+  int reduce_dir;                   // Direction to reduce
+  enum gkyl_reduce_op op;           // Reduction operation (MAX or MIN)
+  bool use_gpu;
+
+  // Output grid/basis/range (owned).
+  struct gkyl_rect_grid out_grid;   // Output grid (N-1 dim, or 1D 1-cell for 1D->0D)
+  struct gkyl_basis out_basis;      // Output basis (N-1 dim, or p=0 1D for 1D->0D)
+  struct gkyl_range out_range;      // Output range
+  struct gkyl_range out_range_ext;  // Output extended range
+  struct gkyl_range out_nrange;     // Nodal range for output
+
+  // Output arrays (owned).
+  struct gkyl_array *out_vals;          // Extremal values (modal DG)
+  struct gkyl_array *out_coords;        // Extremal coordinates (modal DG)
+  struct gkyl_array *out_vals_nodal;    // Nodal extremal values
+  struct gkyl_array *out_coords_nodal;  // Nodal extremal coordinates
+  struct gkyl_array *out_eval_nodal;    // Nodal array for eval_at_extremum
+
+  // Internal working arrays.
+  struct gkyl_array *nodes;         // Node locations in logical coords
+
+  // Nodal-to-modal converter.
+  struct gkyl_nodal_ops *n2m;
+};
diff --git a/gyrokinetic/zero/gkyl_gk_geometry.h b/gyrokinetic/zero/gkyl_gk_geometry.h
index dae4a6f4b7..2e783e52d4 100644
--- a/gyrokinetic/zero/gkyl_gk_geometry.h
+++ b/gyrokinetic/zero/gkyl_gk_geometry.h
@@ -188,20 +188,6 @@ struct gk_geometry {
                  // in the eqdsk.
   int idx_LCFS_lo; // Index of the cell that abuts the LCFS from below.
 
-  // Per-field-line bmag_max for loss cone calculations.
-  // In 1x: single value. In 2x: array indexed by psi (x-direction).
-  // These are computed by finding max(bmag) along z for each field line,
-  // assuming symmetric bmag with a single peak for positive z.
-  struct gkyl_array *bmag_max;         // Maximum bmag on each field line (modal DG expansion).
-  struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max on each field line (modal DG expansion).
-  struct gkyl_array *bmag_max_nodal;         // Nodal values of bmag_max.
-  struct gkyl_array *bmag_max_z_coord_nodal; // Nodal values of z-coordinate of bmag_max.
-  struct gkyl_range bmag_max_range;          // Range for bmag_max arrays (1D in psi for 2x, 0D for 1x).
-  struct gkyl_range bmag_max_range_ext;      // Extended range for bmag_max arrays.
-  struct gkyl_range bmag_max_nrange;         // Nodal range for bmag_max arrays.
-  struct gkyl_rect_grid bmag_max_grid;       // Grid for bmag_max arrays (1D in psi for 2x).
-  struct gkyl_basis bmag_max_basis;          // Basis for bmag_max arrays (1D for 2x, 0D for 1x).
-
   uint32_t flags;
   struct gkyl_ref_count ref_count;  
   struct gk_geometry *on_dev; // Pointer to itself or device object.
@@ -329,26 +315,6 @@ double gkyl_gk_geometry_reduce_bmag(struct gk_geometry* up, enum gkyl_array_op o
  */
 double gkyl_gk_geometry_reduce_arg_bmag(struct gk_geometry* up, enum gkyl_array_op op, double *coord);
 
-/**
- * Compute bmag_max per field line. For each psi value (field line), finds the
- * maximum bmag along z (assuming symmetry with a single peak for positive z).
- * Stores the result in gk_geom->bmag_max (modal expansion) and the z-coordinate
- * of the maximum in gk_geom->bmag_max_z_coord.
- * 
- * For 1x simulations, this is a single value. For 2x simulations, this is a
- * 1D array varying with psi.
- *
- * @param gk_geom gk_geometry object (modified in place).
- */
-void gkyl_gk_geometry_bmag_max_init(struct gk_geometry *gk_geom);
-
-/**
- * Release bmag_max arrays in gk_geometry.
- *
- * @param gk_geom gk_geometry object.
- */
-void gkyl_gk_geometry_bmag_max_release(struct gk_geometry *gk_geom);
-
 /**
  * Init nodal range from modal range
  *

From 3b933bbc3e5d4cac05a983ecbac4dc5f6aacb80b Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Wed, 7 Jan 2026 12:08:18 -0500
Subject: [PATCH 07/32] Implement kinetic electrons into the POA scheme. The
 unit tests run and regression tests are brought over from another branch.
 Unit tests for the array mask, loss cone mask, and the regression tests for
 the kinetic electron POA mirror are valgrind free

---
 core/unit/ctest_array_dg_find_peaks.c         |   38 +-
 core/zero/array_dg_find_peaks.c               |   16 +-
 core/zero/gkyl_array_dg_find_peaks.h          |   12 +-
 gyrokinetic/apps/gk_species.c                 |    2 +
 gyrokinetic/apps/gk_species_damping.c         |  140 +-
 gyrokinetic/apps/gk_species_fdot_multiplier.c |   50 +-
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |   26 +-
 .../rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c    | 1279 +++++++++++++++++
 .../creg/rt_gk_wham_kinetic_poa_1x2v_p1.c     |  957 ++++++++++++
 .../unit/ctest_loss_cone_mask_gyrokinetic.c   |   19 +-
 .../zero/gkyl_loss_cone_mask_gyrokinetic.h    |    2 +
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |    9 +-
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c |   57 +-
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     |   74 +-
 14 files changed, 2554 insertions(+), 127 deletions(-)
 create mode 100644 gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
 create mode 100644 gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c

diff --git a/core/unit/ctest_array_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c
index e8aebe3369..2ac00cf1c0 100644
--- a/core/unit/ctest_array_dg_find_peaks.c
+++ b/core/unit/ctest_array_dg_find_peaks.c
@@ -153,8 +153,8 @@ test_1d_find_peaks_cos(int poly_order)
 
   for (int p = 0; p < 3 && p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
@@ -166,6 +166,9 @@ test_1d_find_peaks_cos(int poly_order)
     TEST_CHECK(ptype == expected_peaks[p].type);
     TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 0.1);
     TEST_CHECK(gkyl_compare_double(val[0], expected_val[0], 0.2));
+
+    gkyl_array_release(coords);
+    gkyl_array_release(vals);
   }
 
   gkyl_array_release(f);
@@ -217,8 +220,8 @@ test_1d_find_peaks_mirror(int poly_order)
 
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
@@ -236,6 +239,8 @@ test_1d_find_peaks_mirror(int poly_order)
       TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15));
       TEST_CHECK(fabs(coord[0] - 1.0) < 1e-15);
     }
+    gkyl_array_release(vals);
+    gkyl_array_release(coords);
   }
 
   gkyl_array_release(f);
@@ -290,8 +295,8 @@ test_2d_find_peaks(int poly_order)
   // Check that values and coordinates are reasonable for each peak.
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     double xc_log[1] = {0.0};
     
@@ -323,6 +328,8 @@ test_2d_find_peaks(int poly_order)
         TEST_CHECK(fabs(coord_at_center - 1.0) < 1e-15);
       }
     }
+    gkyl_array_release(vals);
+    gkyl_array_release(coords);
   }
 
   gkyl_array_release(f);
@@ -389,8 +396,8 @@ test_1d_find_peaks_complex(int poly_order)
 
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
@@ -403,6 +410,9 @@ test_1d_find_peaks_complex(int poly_order)
     TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 1e-15);
     double rel_error = fabs(val[0] - expected_val[0]) / fabs(expected_val[0]);
     TEST_CHECK(rel_error < 1e-15);
+
+    gkyl_array_release(coords);
+    gkyl_array_release(vals);
   }
 
   gkyl_array_release(f);
@@ -483,8 +493,8 @@ test_2d_find_peaks_complex(int poly_order)
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
     TEST_CHECK(ptype == expected_peaks[p].type);
     
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_get_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     // Check each psi cell.
     struct gkyl_range_iter iter;
@@ -521,6 +531,8 @@ test_2d_find_peaks_complex(int poly_order)
         TEST_CHECK(rel_error < 1e-15);
       }
     }
+    gkyl_array_release(vals);
+    gkyl_array_release(coords);
   }
 
   gkyl_array_release(nodes);
@@ -684,7 +696,7 @@ test_2d_project_on_peaks(int poly_order)
 
   // Verify that g evaluated at each peak matches analytical values.
   for (int p = 0; p < num_peaks; p++) {
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     // Check each psi cell.
     struct gkyl_range_iter iter;
@@ -716,6 +728,7 @@ test_2d_project_on_peaks(int poly_order)
         TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15));
       }
     }
+    gkyl_array_release(coords);
   }
 
   // Clean up.
@@ -881,7 +894,7 @@ test_2d_project_on_peak_idx(int poly_order)
 
   // Verify that g evaluated at each peak matches analytical values.
   for (int p = 0; p < num_peaks; p++) {
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_get_coords(peaks, p);
+    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
     
     // Check each psi cell.
     struct gkyl_range_iter iter;
@@ -913,6 +926,7 @@ test_2d_project_on_peak_idx(int poly_order)
         TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15));
       }
     }
+    gkyl_array_release(coords);
   }
 
   // Clean up.
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index 55c6538384..877c91aff6 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -637,31 +637,31 @@ gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *
 }
 
 const struct gkyl_array*
-gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+gkyl_array_dg_find_peaks_acquire_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
 {
   assert(peak_idx >= 0 && peak_idx < up->num_peaks);
-  return up->out_vals[peak_idx];
+  return gkyl_array_acquire(up->out_vals[peak_idx]);
 }
 
 const struct gkyl_array*
-gkyl_array_dg_find_peaks_get_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+gkyl_array_dg_find_peaks_acquire_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
 {
   assert(peak_idx >= 0 && peak_idx < up->num_peaks);
-  return up->out_vals_nodal[peak_idx];
+  return gkyl_array_acquire(up->out_vals_nodal[peak_idx]);
 }
 
 const struct gkyl_array*
-gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+gkyl_array_dg_find_peaks_acquire_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
 {
   assert(peak_idx >= 0 && peak_idx < up->num_peaks);
-  return up->out_coords[peak_idx];
+  return gkyl_array_acquire(up->out_coords[peak_idx]);
 }
 
 const struct gkyl_array*
-gkyl_array_dg_find_peaks_get_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
 {
   assert(peak_idx >= 0 && peak_idx < up->num_peaks);
-  return up->out_coords_nodal[peak_idx];
+  return gkyl_array_acquire(up->out_coords_nodal[peak_idx]);
 }
 
 void
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
index 5b7c7d9da2..28872ff53c 100644
--- a/core/zero/gkyl_array_dg_find_peaks.h
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -134,7 +134,7 @@ gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output values array (modal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Get the output array containing peak values in nodal basis for a specific peak.
@@ -143,7 +143,7 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals(const struct gkyl_arr
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output values array (nodal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Get the output array containing coordinates of a specific peak.
@@ -152,7 +152,7 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_get_vals_nodal(const struct gk
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output coordinates array (modal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Get the output array containing coordinates in nodal basis of a specific peak.
@@ -161,7 +161,7 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords(const struct gkyl_a
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output coordinates array (nodal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Project (evaluate) an arbitrary array onto the peak locations previously
@@ -193,8 +193,8 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_get_coords_nodal(const struct
  *     break;
  *   }
  * }
- * const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_get_vals(peak_finder, bmag_max_idx);
- * const struct gkyl_array *z_max = gkyl_array_dg_find_peaks_get_coords(peak_finder, bmag_max_idx);
+ * const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(peak_finder, bmag_max_idx);
+ * const struct gkyl_array *z_max = gkyl_array_dg_find_peaks_acquire_coords(peak_finder, bmag_max_idx);
  * 
  * // 3. Evaluate phi at the same locations where bmag has peaks
  * struct gkyl_array *phi_at_peaks[num_peaks];
diff --git a/gyrokinetic/apps/gk_species.c b/gyrokinetic/apps/gk_species.c
index 4074d267d0..f3ab4f0c82 100644
--- a/gyrokinetic/apps/gk_species.c
+++ b/gyrokinetic/apps/gk_species.c
@@ -136,6 +136,8 @@ gk_species_rhs_dynamic(gkyl_gyrokinetic_app *app, struct gk_species *species,
   
   // Enforce the omega_H constraint on dt.
   double dt_omegaH = gk_species_omegaH_dt(app, species, fin);
+  
+  gk_species_fdot_multiplier_advance_times_omegaH(app, species, &species->fdot_mult, &dt_omegaH);
   dt_out = fmin(dt_out, dt_omegaH);
 
   app->stat.species_omega_cfl_tm += gkyl_time_diff_now_sec(tm);
diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index fc61d3213e..206b4fa180 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -1,8 +1,16 @@
 #include <assert.h>
-#include <gkyl_gyrokinetic_priv.h>
-#include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_alloc.h>
+#include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_dg_basis_ops.h>
+#include <gkyl_gyrokinetic_priv.h>
+#include <gkyl_loss_cone_mask_gyrokinetic.h>
+
+static void
+proj_on_basis_c2p_position_func(const double *xcomp, double *xphys, void *ctx)
+{
+  struct gk_proj_on_basis_c2p_func_ctx *c2p_ctx = ctx;
+  gkyl_position_map_eval_mc2nu(c2p_ctx->pos_map, xcomp, xphys);
+}
 
 void
 gk_species_damping_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
@@ -66,6 +74,13 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
   // Default function pointers.
   damp->write_func = gk_species_damping_write_disabled;
 
+
+  damp->proj_on_basis_c2p_ctx.cdim = app->cdim;
+  damp->proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
+  damp->proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
+  damp->proj_on_basis_c2p_ctx.pos_map = app->position_map;
+
+
   if (damp->type) {
     // Allocate rate array.
     damp->rate = mkarr(app->use_gpu, num_quad==1? 1 : gks->basis.num_basis, gks->local_ext.volume);
@@ -99,47 +114,46 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       damp->evolve = true; // Since the loss cone boundary is proportional to phi(t).
 
-      // Store pointers to per-field-line bmag_max arrays from gk_geometry.
-      damp->bmag_max = app->gk_geom->bmag_max;
-      damp->bmag_max_z_coord = app->gk_geom->bmag_max_z_coord;
-      damp->bmag_max_basis = &app->gk_geom->bmag_max_basis;
-      damp->bmag_max_range = &app->gk_geom->bmag_max_range;
-
-      // Compute reference coordinate for phi evaluation at mirror throat.
-      // For 1x: use the single bmag_max_z value.
-      // For 2x: use the bmag_max_z at the center of the psi domain (mid field line).
-      double bmag_max_coord_ref_ho[GKYL_MAX_CDIM];
-      if (app->cdim == 1) {
-        // 1x case: single value.
-        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, 0);
-        bmag_max_coord_ref_ho[0] = bmag_max_z_d[0];
-      } else {
-        // 2x case: use the center psi cell's bmag_max_z.
-        int mid_psi_idx = (app->gk_geom->bmag_max_range.lower[0] + app->gk_geom->bmag_max_range.upper[0]) / 2;
-        int psi_idx[1] = {mid_psi_idx};
-        long bmag_max_z_linidx = gkyl_range_idx(&app->gk_geom->bmag_max_range, psi_idx);
-        const double *bmag_max_z_d = gkyl_array_cfetch(app->gk_geom->bmag_max_z_coord, bmag_max_z_linidx);
-        double xc[1] = {0.0};
-        double z_val = app->gk_geom->bmag_max_basis.eval_expand(xc, bmag_max_z_d);
-        double psi_lo = app->gk_geom->bmag_max_grid.lower[0];
-        double psi_dx = app->gk_geom->bmag_max_grid.dx[0];
-        double psi_val = psi_lo + (mid_psi_idx - 0.5) * psi_dx;
-        bmag_max_coord_ref_ho[0] = psi_val;
-        bmag_max_coord_ref_ho[1] = z_val;
-      }
-
-      // Allocate and copy reference coordinate.
-      if (app->use_gpu) {
-        damp->bmag_max_coord_ref = gkyl_cu_malloc(app->cdim*sizeof(double));
-        gkyl_cu_memcpy(damp->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
-        damp->phi_m = gkyl_cu_malloc(sizeof(double));
-        damp->phi_m_global = gkyl_cu_malloc(sizeof(double));
-      } else {
-        damp->bmag_max_coord_ref = gkyl_malloc(app->cdim*sizeof(double));
-        memcpy(damp->bmag_max_coord_ref, bmag_max_coord_ref_ho, app->cdim*sizeof(double));
-        damp->phi_m = gkyl_malloc(sizeof(double));
-        damp->phi_m_global = gkyl_malloc(sizeof(double));
-      }
+      // Available options:
+      //   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
+      //   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
+      enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD;
+      int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be
+                                              // at least p+1 for Gauss-Lobatto.
+
+      // Create peak finder for bmag to find the mirror throat.
+      // Search along the parallel (z) direction, which is the last configuration space dimension.
+      int search_dir = app->cdim - 1;
+      struct gkyl_array_dg_find_peaks_inp peak_inp = {
+        .basis = &app->basis,
+        .grid = &app->grid,
+        .range = &app->local,
+        .range_ext = &app->local_ext,
+        .search_dir = search_dir,
+        .use_gpu = app->use_gpu,
+      };
+      // Pass a global bmag_int into the peak finder
+      struct gkyl_array *bmag_int_global = mkarr(false, 
+        app->gk_geom->geo_int.bmag->ncomp, app->gk_geom->geo_int.bmag->size);
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
+      damp->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
+      gkyl_array_dg_find_peaks_advance(damp->bmag_peak_finder, app->gk_geom->geo_int.bmag);
+      gkyl_array_release(bmag_int_global);
+      
+      // Get the LOCAL_MAX peak (bmag maximum along z direction).
+      int num_peaks = gkyl_array_dg_find_peaks_num_peaks(damp->bmag_peak_finder);
+      damp->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
+      damp->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, damp->bmag_max_peak_idx);
+      damp->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, damp->bmag_max_peak_idx);
+      damp->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, num_peaks-1);
+      damp->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, num_peaks-1);
+      damp->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(damp->bmag_peak_finder);
+      damp->bmag_max_range = gkyl_array_dg_find_peaks_get_range(damp->bmag_peak_finder);
+      damp->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(damp->bmag_peak_finder);
+
+      damp->phi_at_bmag_max = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, 
+        damp->bmag_max_range_ext->volume);
+      // phi is defined as 0 at the wall
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
@@ -157,7 +171,11 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         .bmag_max_range = damp->bmag_max_range,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
+        .qtype = qtype,
         .num_quad = num_quad,
+        .cellwise_trap_loss = true,
+        .c2p_pos_func = proj_on_basis_c2p_position_func,
+        .c2p_pos_func_ctx = &damp->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
       damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
@@ -181,12 +199,11 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 
       // Compute the initial damping rate (assuming phi=0 because phi hasn't been computed).
       // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(app->field->phi_smooth, damp->bmag_max_coord_ref,
-        app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
-      gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
+      gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
+        damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
       // Project the loss cone mask.
       gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        app->field->phi_smooth, damp->phi_m_global, damp->rate);
+        app->field->phi_smooth, damp->phi_at_bmag_max, damp->rate);
       // Multiply by the user's scaling profile.
       gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
     }
@@ -214,14 +231,15 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
       gkyl_array_accumulate(rhs, -1.0, f_buffer);
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, damp->bmag_max_coord_ref,
-        app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
-      gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
+      // Find the potential at all peak locations (including the mirror throat).
+      gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, phi,
+        damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
+      // Allgather on phi_at_bmag_max. It's not an allgather.
+      // One process has the correct one, but the others do not. Is it a bcast or a sync?
 
-      // Project the loss cone mask.
+      // Project the loss cone mask using the phi_m array.
       gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        phi, damp->phi_m_global, damp->rate);
+        phi, damp->phi_at_bmag_max, damp->rate);
 
       // Assemble the damping term -scale_prof * mask * f.
       gkyl_array_set(f_buffer, 1.0, fin);
@@ -258,15 +276,13 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
-      if (app->use_gpu) {
-        gkyl_cu_free(damp->bmag_max_coord_ref);
-        gkyl_cu_free(damp->phi_m);
-        gkyl_cu_free(damp->phi_m_global);
-      } else {
-        gkyl_free(damp->bmag_max_coord_ref);
-        gkyl_free(damp->phi_m);
-        gkyl_free(damp->phi_m_global);
-      }
+      gkyl_array_release(damp->bmag_max);
+      gkyl_array_release(damp->bmag_max_z_coord);
+      gkyl_array_release(damp->bmag_wall);
+      gkyl_array_release(damp->bmag_wall_z_coord);
+      gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder);
+      gkyl_array_release(damp->phi_at_bmag_max);
+
       gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op);
       gkyl_array_release(damp->scale_prof);
     }
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 2ae0e760d3..895ce1cc67 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -1,9 +1,9 @@
 #include <assert.h>
-#include <gkyl_gyrokinetic_priv.h>
-#include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_alloc.h>
-#include <gkyl_dg_basis_ops.h>
 #include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_dg_basis_ops.h>
+#include <gkyl_gyrokinetic_priv.h>
+#include <gkyl_loss_cone_mask_gyrokinetic.h>
 
 void
 gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
@@ -54,6 +54,20 @@ gk_species_fdot_multiplier_advance_mult(gkyl_gyrokinetic_app *app, const struct
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
 }
 
+void
+gk_species_fdot_multiplier_advance_omegaH_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out)
+{
+  // Multiply out by the multplier.
+  out[0] = out[0] / gks->collisionless.scale_fac;
+}
+
+void
+gk_species_fdot_multiplier_advance_omegaH_disabled(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out)
+{
+}
+
 void
 gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
@@ -104,6 +118,7 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
   // Default function pointers.
   fdmul->write_func = gk_species_fdot_multiplier_write_disabled;
   fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_disabled;
+  fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_disabled;
   fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_disabled;
 
   if (fdmul->type) {
@@ -146,6 +161,7 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       gkyl_array_copy(fdmul->multiplier, fdmul->multiplier_host);
 
       fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_mult;
+      fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult;
       fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult;
       if (fdmul->write_diagnostics)
         fdmul->write_func = gk_species_fdot_multiplier_write_init_only;
@@ -183,14 +199,17 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
       fdmul->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
-      fdmul->bmag_max = gkyl_array_dg_find_peaks_get_vals(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
-      fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_get_coords(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
+      fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
+      fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
+      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-1);
+      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-1);
       fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
 
       fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
         fdmul->bmag_max_range_ext->volume);
+      // phi is defined as 0 at the wall
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
@@ -202,6 +221,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .vel_range = &gks->local_vel, 
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
+        .bmag_wall = fdmul->bmag_wall,
+        .bmag_wall_z_coord = fdmul->bmag_wall_z_coord,
         .bmag_max = fdmul->bmag_max,
         .bmag_max_z_coord = fdmul->bmag_max_z_coord,
         .bmag_max_basis = fdmul->bmag_max_basis,
@@ -218,6 +239,7 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
 
       fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_loss_cone_mult;
+      fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult;
       fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult;
       if (fdmul->write_diagnostics) {
         fdmul->write_func = gk_species_fdot_multiplier_write_enabled;
@@ -238,6 +260,18 @@ gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const st
 
   app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
 }
+
+
+void
+gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out)
+{
+  struct timespec wst = gkyl_wall_clock();
+
+  fdmul->advance_times_omegaH_func(app, gks, fdmul, out);
+
+  app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
+}
   
 void
 gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
@@ -270,7 +304,11 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
       // Nothing to release.
     }
     else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
-      int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
+      gkyl_array_release(fdmul->bmag_max);
+      gkyl_array_release(fdmul->bmag_max_z_coord);
+      gkyl_array_release(fdmul->bmag_wall);
+      gkyl_array_release(fdmul->bmag_wall_z_coord);
+
       gkyl_array_release(fdmul->phi_at_bmag_max);
       gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index 6e50335fc1..7613d4e68e 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -815,14 +815,19 @@ struct gk_damping {
   bool evolve; // Whether the source is time dependent.
   struct gkyl_array *rate; // Damping rate.
   struct gkyl_array *rate_host; // Host copy for use in IO and projecting.
+  struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  // Per-field-line bmag_max arrays (pointers to gk_geometry's arrays).
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction.
+  // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line.
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line.
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
-  double *bmag_max_coord_ref; // Reference coordinate for phi evaluation at mirror throat.
-  double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
+  const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
+  int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
+  struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations.
   struct gkyl_array *scale_prof; // Conf-space scaling factor profile.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
@@ -841,6 +846,8 @@ struct gk_fdot_multiplier {
   // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line.
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line.
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
@@ -852,6 +859,8 @@ struct gk_fdot_multiplier {
     struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out);
   void (*advance_times_cfl_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
     struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out);
+  void (*advance_times_omegaH_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out);
 };
 
 struct gk_heating {
@@ -2859,6 +2868,17 @@ void gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk
 void gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out);
 
+/**
+ * Multiply the omegaH rate.
+ *
+ * @param app gyrokinetic app object.
+ * @param gks Species object.
+ * @param fdmul Species df/dt multiplier object.
+ * @param out omegaH rate to multiply.
+ */
+void gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out);
+
 /**
  * Multiply df/dt.
  *
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
new file mode 100644
index 0000000000..37d64c85bd
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -0,0 +1,1279 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx
+{
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double kperpRhos;
+  // Parameters controlling initial conditions.
+  double alim;
+  double alphaIC0;
+  double alphaIC1;
+  double nuFrac;
+  // Electron-electron collision freq.
+  double logLambdaElc;
+  double nuElc;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  // Thermal speeds.
+  double vti;
+  double vte;
+  double c_s;
+  // Gyrofrequencies and gyroradii.
+  double omega_ci;
+  double rho_s;
+  double kperp; // Perpendicular wavenumber in SI units.
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double Z_min;
+  double Z_max;
+  double z_min;
+  double z_max;
+  double psi_eval;
+  double psi_in;
+  double z_in;
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m;
+  // Bananna tip info. Hardcoad to avoid dependency on ctx
+  double B_bt;
+  double R_bt;
+  double Z_bt;
+  double z_bt;
+  double R_m;
+  double B_m;
+  double z_m;
+  // Physics parameters at mirror throat
+  double n_m;
+  double Te_m;
+  double Ti_m;
+  double cs_m;
+  // Source parameters
+  double NSrcIon;
+  double lineLengthSrcIon;
+  double sigSrcIon;
+  double NSrcFloorIon;
+  double TSrc0Ion;
+  double TSrcFloorIon;
+  double NSrcElc;
+  double lineLengthSrcElc;
+  double sigSrcElc;
+  double NSrcFloorElc;
+  double TSrc0Elc;
+  double TSrcFloorElc;
+  double alpha; // Multirate factor.
+  // Grid parameters
+  double vpar_max_ion;
+  double vpar_max_elc;
+  double mu_max_ion;
+  double mu_max_elc;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+  *BZ = mcB *
+        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn)
+  {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else
+  {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0)
+  {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else
+  {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+// -- Source functions.
+void
+eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double NSrc = app->NSrcElc;
+  double zSrc = app->lineLengthSrcElc;
+  double sigSrc = app->sigSrcElc;
+  double NSrcFloor = app->NSrcFloorElc;
+  if (fabs(Z) <= app->Z_m)
+  {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2.))) *
+                                  exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2.))));
+  }
+  else
+  {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double sigSrc = app->sigSrcElc;
+  double TSrc0 = app->TSrc0Elc;
+  double Tfloor = app->TSrcFloorElc;
+  if (fabs(z) <= 2.0 * sigSrc)
+  {
+    fout[0] = TSrc0;
+  }
+  else
+  {
+    fout[0] = Tfloor;
+  }
+}
+
+void
+eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double NSrc = app->NSrcIon;
+  double zSrc = app->lineLengthSrcIon;
+  double sigSrc = app->sigSrcIon;
+  double NSrcFloor = app->NSrcFloorIon;
+  if (fabs(Z) <= app->Z_m)
+  {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
+                                  exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2))));
+  }
+  else
+  {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double sigSrc = app->sigSrcIon;
+  double TSrc0 = app->TSrc0Ion;
+  double Tfloor = app->TSrcFloorIon;
+  if (fabs(z) <= 2.0 * sigSrc)
+  {
+    fout[0] = TSrc0;
+  }
+  else
+  {
+    fout[0] = Tfloor;
+  }
+}
+
+// Electrons initial conditions
+void
+eval_density_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt)
+  {
+    fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
+  }
+  else if (fabs(Z) <= app->Z_m)
+  {
+    fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
+  }
+  else
+  {
+    fout[0] = app->n_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+void
+eval_upar_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  if (fabs(z) <= app->z_m)
+  {
+    fout[0] = 0.0;
+  }
+  else if (z > app->z_m)
+  {
+    fout[0] = app->cs_m * (z - app->z_m);
+  }
+  else
+  {
+    fout[0] = app->cs_m * (z + app->z_m);
+  }
+}
+
+void
+eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt)
+  {
+    fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
+  }
+  else if (fabs(Z) <= app->Z_m)
+  {
+    fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
+  }
+  else
+  {
+    fout[0] = app->Te_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+// Ion initial conditions
+void
+eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt)
+  {
+    fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC0 / 2);
+  }
+  else if (fabs(Z) <= app->Z_m)
+  {
+    fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC1 / 2);
+  }
+  else
+  {
+    fout[0] = app->n_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+void
+eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  if (fabs(z) <= app->z_m)
+  {
+    fout[0] = 0.0;
+  }
+  else if (z > app->z_m)
+  {
+    fout[0] = app->cs_m * (z - app->z_m); //* (z -  / app->z_m);
+  }
+  else
+  {
+    fout[0] = app->cs_m * (z + app->z_m); //* (z + app->z_m) / app->z_m;
+  }
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt)
+  {
+    fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC0 / 2);
+  }
+  else if (fabs(Z) <= app->Z_m)
+  {
+    fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC1 / 2);
+  }
+  else
+  {
+    fout[0] = app->Ti_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+
+// Potential initial condition
+void
+eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double z_m = 0.98;
+  double z_max = app->z_max;
+  double sigma = 0.2*z_m;
+  double center_potential = 8.0 * app->Te0 / app->qi;
+  if (fabs(z) <= sigma)
+  {
+    fout[0] = center_potential;
+  }
+  else
+  {
+    fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
+  }
+}
+
+// Evaluate collision frequencies
+void
+evalNuElc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuElc;
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bmag_func must assume a 3d input xc
+void
+bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  fout[0] = Bmag;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords. 
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad*cos(phi);
+  fout[1] = BRad*sin(phi);
+  fout[2] = BZ;
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0; // Not sure if this is right
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS; // ion mass
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+  double kperpRhos = 0.1;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  // Electron-electron collision freq.
+  double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
+  double nuElc = nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
+                 (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Perpendicular wavenumber in SI units:
+  double kperp = kperpRhos / rho_s;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -2.5;
+  double Z_max =  2.5;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 6.51292;
+  double gamma = 0.124904;
+  double Z_m = 0.98;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0;
+  double lineLengthSrcIon = 0.0;
+  double sigSrcIon = Z_m / 4.0;
+  double NSrcFloorIon = 0.05 * NSrcIon;
+  double TSrc0Ion = Ti0 * 1.25;
+  double TSrcFloorIon = TSrc0Ion / 8.0;
+  double NSrcElc = NSrcIon;
+  double lineLengthSrcElc = lineLengthSrcIon;
+  double sigSrcElc = sigSrcIon;
+  double NSrcFloorElc = NSrcFloorIon;
+  double TSrc0Elc = TSrc0Ion / tau;
+  double TSrcFloorElc = TSrcFloorIon / tau;
+
+  // Bananna tip info. Hardcoad to avoid dependency on ctx
+  double B_bt = 1.058278;
+  double R_bt = 0.071022;
+  double Z_bt = 0.467101;
+  double z_bt = 0.468243;
+  double R_m = 0.017845;
+  double B_m = 16.662396;
+  double z_m = 0.982544;
+
+  // Physics parameters at mirror throat
+  double n_m = 1.105617e19;
+  double Te_m = 346.426583 * eV;
+  double Ti_m = 3081.437703 * eV;
+  double cs_m = 4.037740e5;
+
+  double alpha = 0.01; // Multirate factor.
+
+  // Grid parameters
+  double vpar_max_elc = 20 * vte;
+  double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p);
+  double vpar_max_ion = 20 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 32;
+  int Nvpar = 32; // Number of cells in the paralell velocity direction 96
+  int Nmu = 16;  // Number of cells in the mu direction 192
+  int poly_order = 1;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 1e-7;
+  double tau_fdp = 3e-10;
+  double tau_fdp_extra = 2*tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 4; // Frames per OAP phase
+  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2*num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
+  for (int i=0; i<(num_phases-1)/2; i++) {
+    // OAPs.
+    poa_phases[2*i].phase = GK_POA_OAP;
+    poa_phases[2*i].num_frames = num_frames_oap;
+    poa_phases[2*i].duration = tau_oap;
+    poa_phases[2*i].alpha = alpha_oap;
+    poa_phases[2*i].is_static_field = is_static_field_oap;
+    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2*i+1].phase = GK_POA_FDP;
+    poa_phases[2*i+1].num_frames = num_frames_fdp;
+    poa_phases[2*i+1].duration = tau_fdp;
+    poa_phases[2*i+1].alpha = alpha_fdp;
+    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
+    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases-1].phase = GK_POA_FDP;
+  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases-1].duration = tau_fdp_extra;
+  poa_phases[num_phases-1].alpha = alpha_fdp;
+  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .kperpRhos = kperpRhos,
+    .alim = alim,
+    .alphaIC0 = alphaIC0,
+    .alphaIC1 = alphaIC1,
+    .nuFrac = nuFrac,
+    .logLambdaElc = logLambdaElc,
+    .nuElc = nuElc,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .vte = vte,
+    .c_s = c_s,
+    .omega_ci = omega_ci,
+    .rho_s = rho_s,
+    .kperp = kperp, 
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .B_bt = B_bt,
+    .R_bt = R_bt,
+    .Z_bt = Z_bt,
+    .z_bt = z_bt,
+    .R_m = R_m,
+    .B_m = B_m,
+    .z_m = z_m,
+    .n_m = n_m,
+    .Te_m = Te_m,
+    .Ti_m = Ti_m,
+    .cs_m = cs_m,
+    .NSrcIon = NSrcIon,
+    .lineLengthSrcIon = lineLengthSrcIon,
+    .sigSrcIon = sigSrcIon,
+    .NSrcFloorIon = NSrcFloorIon,
+    .TSrc0Ion = TSrc0Ion,
+    .TSrcFloorIon = TSrcFloorIon,
+    .NSrcElc = NSrcElc,
+    .lineLengthSrcElc = lineLengthSrcElc,
+    .sigSrcElc = sigSrcElc,
+    .NSrcFloorElc = NSrcFloorElc,
+    .TSrc0Elc = TSrc0Elc,
+    .TSrcFloorElc = TSrcFloorElc,
+    .vpar_max_ion = vpar_max_ion,
+    .vpar_max_elc = vpar_max_elc,
+    .mu_max_ion = mu_max_ion,
+    .mu_max_elc = mu_max_elc,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = {Nz, Nvpar, Nmu},
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq     = write_phase_freq    , 
+    .int_diag_calc_freq   = int_diag_calc_freq  , 
+    .dt_failure_tol       = dt_failure_tol      , 
+    .num_failures_max     = num_failures_max    , 
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if ( !(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
+  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .polarization_bmag = ctx->B_p,
+    .kperpSq = pow(ctx->kperp, 2.),
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps))
+  {
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success)
+    {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d=0; d<ctx.cdim; d++)
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  for (int d=0; d<ctx.vdim; d++)
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species elc = {
+    .name = "elc",
+    .charge = ctx.qe,
+    .mass = ctx.me,
+    .vdim = ctx.vdim,
+    .lower = {-ctx.vpar_max_elc, 0.0},
+    .upper = {ctx.vpar_max_elc, ctx.mu_max_elc},
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+      .ctx_density = &ctx,
+      .density = eval_density_elc,
+      .ctx_upar = &ctx,
+      .upar= eval_upar_elc,
+      .ctx_temp = &ctx,
+      .temp = eval_temp_elc,      
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions =  {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .num_cross_collisions = 1,
+      .collide_with = { "ion" },
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .ctx_density = &ctx,
+        .density = eval_density_elc_source,
+        .ctx_upar = &ctx,
+        .upar= eval_upar_elc_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_elc_source,      
+      }, 
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = {GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+  };
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = {-ctx.vpar_max_ion, 0.0},
+    .upper = { ctx.vpar_max_ion, ctx.mu_max_ion},
+    .cells = { cells_v[0], cells_v[1] },
+    .scale_with_polarization = true,
+
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+      .ctx_density = &ctx,
+      .density = eval_density,
+      .ctx_upar = &ctx,
+      .upar= eval_upar,
+      .ctx_temp = &ctx,
+      .temp = eval_temp_ion,      
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions =  {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Ti0,
+      .num_cross_collisions = 1,
+      .collide_with = { "elc" },
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar= eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,      
+      }, 
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = {GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .polarization_bmag = ctx.B_p, // Issue here. B0 from soloviev, so not sure what to do. Ours is not constant
+    .kperpSq = pow(ctx.kperp, 2.),
+    .is_static = false, // Will be replaced below.
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = { 
+    .name = "gk_mirror_kinetic_elc_poa_1x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = {ctx.z_min},
+    .upper = {ctx.z_max},
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_MAPC2P,
+      .world = {ctx.psi_eval, 0.0},
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+    .num_species = 2,
+    .species = {elc, ion},
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit=0; pit<ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    };
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0)
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+  freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+  
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
new file mode 100644
index 0000000000..75c0a024cd
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -0,0 +1,957 @@
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_parproj.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx
+{
+  int cdim, vdim; // Dimensionality.
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double kperpRhos;
+  // Parameters controlling initial conditions.
+  double alim;
+  double nuFrac;
+  // Electron-electron collision freq.
+  double logLambdaElc;
+  double nuElc;
+  double elc_nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  // Thermal speeds.
+  double vti;
+  double vte;
+  double c_s;
+  // Gyrofrequencies and gyroradii.
+  double omega_ci;
+  double rho_s;
+  double kperp; // Perpendicular wavenumber in SI units.
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_min;
+  double psi_eval;
+  double psi_max;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
+  double vpar_max_elc;
+  double mu_max_ion;
+  double mu_max_elc;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Source parameters
+  double source_amplitude;
+  double source_sigma;
+  double ion_source_temp;
+  double elc_source_temp;
+};
+
+
+void
+eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 1e17;
+}
+
+void
+eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti0;
+}
+
+void
+eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Te0;
+}
+
+void
+eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double src_amp = app->source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->source_sigma;
+  double src_amp_floor = src_amp*1e-2;
+  if (fabs(z) <= 1.0)
+  {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6));
+  }
+  else
+  {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0*1e-2;
+  if (fabs(z) <= 1.0)
+  {
+    fout[0] = TSrc0;
+  }
+  else
+  {
+    fout[0] = Tfloor;
+  }
+}
+
+void
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double TSrc0 = app->elc_source_temp; // Using same temp as ion source for simplicity
+  double Tfloor = TSrc0*1e-2;
+  if (fabs(z) <= 1.0)
+  {
+    fout[0] = TSrc0;
+  }
+  else
+  {
+    fout[0] = Tfloor;
+  }
+}
+
+// Potential initial condition
+void
+eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double z_m = 0.98;
+  double z_max = app->z_max;
+  double sigma = 0.2*z_m;
+  double center_potential = 8.0 * app->Te0 / app->qi;
+  if (fabs(z) <= sigma)
+  {
+    fout[0] = center_potential;
+  }
+  else
+  {
+    fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
+  }
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.45;
+  double linear_velocity_threshold = 1./6.;
+  double frac_linear = 1/b*atan(linear_velocity_threshold*tan(b));
+  if (fabs(cvpar) < frac_linear) {
+    double func_frac = tan(frac_linear*b) / tan(b);
+    vp[0] = vpar_max_ion*func_frac*cvpar/frac_linear;
+  }
+  else {
+    vp[0] = vpar_max_ion*tan(cvpar*b)/tan(b);
+  }
+  // Quadratic map in mu.
+  vp[1] = mu_max_ion*pow(cmu,3);
+}
+
+void mapc2p_vel_elc(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_elc = app->vpar_max_elc;
+  double mu_max_elc = app->mu_max_elc;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.45;
+  double linear_velocity_threshold = 1./6.;
+  double frac_linear = 1/b*atan(linear_velocity_threshold*tan(b));
+  if (fabs(cvpar) < frac_linear) {
+    double func_frac = tan(frac_linear*b) / tan(b);
+    vp[0] = vpar_max_elc*func_frac*cvpar/frac_linear;
+  }
+  else {
+    vp[0] = vpar_max_elc*tan(cvpar*b)/tan(b);
+  }
+  // Quadratic map in mu.
+  vp[1] = mu_max_elc*pow(cmu,3.0/2.0);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0; // Not sure if this is right
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS; // ion mass
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+  double kperpRhos = 0.1;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  double elc_nuFrac = 1/5.489216862238348;
+  // Electron-electron collision freq.
+  double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
+  double nuElc = elc_nuFrac * nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
+                 (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Perpendicular wavenumber in SI units:
+  double kperp = kperpRhos / rho_s;
+
+  // Geometry parameters.
+  double z_min = -2.0;
+  double z_max =  2.0;
+  double psi_eval= 1e-3;
+
+  // Grid parameters
+  double vpar_max_elc = 30 * vte;
+  double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p);
+  double vpar_max_ion = 30 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 32;
+  int Nvpar = 32; // Number of cells in the paralell velocity direction 96
+  int Nmu = 16;  // Number of cells in the mu direction 192
+  int poly_order = 1;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 1.5e-8;
+  double tau_fdp = 1.5e-10;
+  double tau_fdp_extra = 2*tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 4; // Frames per OAP phase
+  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2*num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
+  for (int i=0; i<(num_phases-1)/2; i++) {
+    // OAPs.
+    poa_phases[2*i].phase = GK_POA_OAP;
+    poa_phases[2*i].num_frames = num_frames_oap;
+    poa_phases[2*i].duration = tau_oap;
+    poa_phases[2*i].alpha = alpha_oap;
+    poa_phases[2*i].is_static_field = is_static_field_oap;
+    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2*i+1].phase = GK_POA_FDP;
+    poa_phases[2*i+1].num_frames = num_frames_fdp;
+    poa_phases[2*i+1].duration = tau_fdp;
+    poa_phases[2*i+1].alpha = alpha_fdp;
+    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
+    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases-1].phase = GK_POA_FDP;
+  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases-1].duration = tau_fdp_extra;
+  poa_phases[num_phases-1].alpha = alpha_fdp;
+  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  // Source parameters
+  double source_amplitude = 1.e20;
+  double source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
+  double elc_source_temp = 5000. * eV; // Using same temp as ion source for simplicity
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .kperpRhos = kperpRhos,
+    .alim = alim,
+    .nuFrac = nuFrac,
+    .logLambdaElc = logLambdaElc,
+    .nuElc = nuElc,
+    .elc_nuFrac = elc_nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .vte = vte,
+    .c_s = c_s,
+    .omega_ci = omega_ci,
+    .rho_s = rho_s,
+    .kperp = kperp,
+    .z_min = z_min,
+    .z_max = z_max,
+    .psi_eval = psi_eval,
+    .vpar_max_ion = vpar_max_ion,
+    .vpar_max_elc = vpar_max_elc,
+    .mu_max_ion = mu_max_ion,
+    .mu_max_elc = mu_max_elc,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = {Nz, Nvpar, Nmu},
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq     = write_phase_freq    , 
+    .int_diag_calc_freq   = int_diag_calc_freq  , 
+    .dt_failure_tol       = dt_failure_tol      , 
+    .num_failures_max     = num_failures_max    , 
+  };
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if ( !(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
+  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .polarization_bmag = ctx->B_p,
+    .kperpSq = pow(ctx->kperp, 2.),
+    .is_static = pparams->is_static_field,
+    .time_rate_diagnostics = true,
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps))
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success)
+    {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d=0; d<ctx.cdim; d++)
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  for (int d=0; d<ctx.vdim; d++)
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species elc = {
+    .name = "elc",
+    .charge = ctx.qe,
+    .mass = ctx.me,
+    .vdim = ctx.vdim,
+    .lower = {-1.0, 0.0},
+    .upper = { 1.0, 1.0},
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_elc,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density,
+      .ctx_density = &ctx,
+      .upar = eval_upar,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_elc,
+      .ctx_temp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions =  {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .num_cross_collisions = 1,
+      .collide_with = { "ion" },
+      .write_diagnostics = true,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar= eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_elc_source,      
+      },
+      .diagnostics = {
+        .num_diag_moments = 5,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+      }
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = {GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+    },
+  };
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = {-1.0, 0.0},
+    .upper = { 1.0, 1.0},
+    .cells = { cells_v[0], cells_v[1]},
+    .polarization_density = ctx.n0,
+    .scale_with_polarization = true,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density,
+      .ctx_density = &ctx,
+      .upar = eval_upar,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+    
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Ti0,
+      .num_cross_collisions = 1,
+      .collide_with = { "elc" },
+      .write_diagnostics = true,
+    },
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar= eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,      
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_HAMILTONIAN},
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = {GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP},
+    },
+  };
+  struct gkyl_gyrokinetic_field field = {
+    .polarization_bmag = ctx.B_p,
+    .kperpSq = pow(ctx.kperp, 2.),
+    .time_rate_diagnostics = true,
+    .is_static = false,
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+
+  struct gkyl_mirror_geo_grid_inp grid_inp = {
+    .filename_psi = "gyrokinetic/data/unit/wham_hires.geqdsk_psi.gkyl", // psi file to use
+    .rclose = 0.2, // closest R to region of interest
+    .zmin = -2.0,  // Z of lower boundary
+    .zmax =  2.0,  // Z of upper boundary
+    .include_axis = false, // Include R=0 axis in grid
+    .fl_coord = GKYL_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid
+  };
+
+  struct gkyl_gk app_inp = {  // GK app
+    .name = "gk_wham_kinetic_poa_1x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = {ctx.z_min},
+    .upper = {ctx.z_max},
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+    .geometry = {
+      .geometry_id = GKYL_MIRROR,
+      .world = {ctx.psi_eval, 0.0},
+      .mirror_grid_info = grid_inp,
+    },
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+    .num_species = 2,
+    .species = {elc, ion},
+    .field = field,
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit=0; pit<ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    };
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0)
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+  freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+  
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 846e53cb8b..40554e6328 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -206,6 +206,8 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   struct gkyl_range local, local_ext; // local, local-ext phase-space ranges
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
+  struct gkyl_position_map *pmap = gkyl_position_map_null_new();
+
   // Initialize geometry
   struct gkyl_gk_geometry_inp geometry_input = {
     .geometry_id = GKYL_MAPC2P,
@@ -220,6 +222,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .global = local_conf,
     .global_ext = local_ext_conf,
     .basis = basis_conf,
+    .position_map = pmap,
   };
   geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
   gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext, &geometry_input.geo_local);
@@ -248,8 +251,10 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Get the LOCAL_MAX peak (bmag maximum along z direction).
   int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
   int bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
-  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_get_vals(bmag_peak_finder, bmag_max_peak_idx);
-  const struct gkyl_array *bmag_max_z_coord = gkyl_array_dg_find_peaks_get_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, num_peaks-1); // First peak is wall
+  const struct gkyl_array *bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks-1);
   const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
   const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
   const struct gkyl_range *bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
@@ -317,8 +322,10 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .vel_range = &local_vel, 
     .vel_map = gvm,
     .bmag = gk_geom->geo_int.bmag,
-    .bmag_max = bmag_max,
     .bmag_max_z_coord = bmag_max_z_coord,
+    .bmag_max = bmag_max,
+    .bmag_wall = bmag_wall,
+    .bmag_wall_z_coord = bmag_wall_z_coord,
     .bmag_max_basis = bmag_max_basis,
     .bmag_max_range = bmag_max_range,
     .mass = ctx.mass,
@@ -391,7 +398,13 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_array_release(mask_ref_ho);
   gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
   gkyl_velocity_map_release(gvm);
+  // Release acquired peak arrays.
+  gkyl_array_release(bmag_max);
+  gkyl_array_release(bmag_max_z_coord);
+  gkyl_array_release(bmag_wall);
+  gkyl_array_release(bmag_wall_z_coord);
   gkyl_array_dg_find_peaks_release(bmag_peak_finder);
+  gkyl_position_map_release(pmap);
   gkyl_gk_geometry_release(gk_geom);
 
 #ifdef GKYL_HAVE_CUDA
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index 565ab00e0f..3848acf2ba 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -30,6 +30,8 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
   const struct gkyl_array *bmag; // Magnetic field magnitude (cdim DG expansion).
   const struct gkyl_array *bmag_max; // Maximum bmag per field line (1D DG expansion for 2x, scalar for 1x).
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x).
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays (1D for 2x, 0D for 1x).
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   double mass; // Species mass.
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index c9419bfd47..234fcad4fb 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -48,12 +48,15 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   // Per-field-line bmag_max arrays (1D for 2x, scalar for 1x).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x).
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   
   // GPU helper: scalar bmag_max_z value for simple 1x cases.
   // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
   double *bmag_max_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
+  double *bmag_wall_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
   
   bool use_gpu; // Boolean if we are performing projection on device.
 
@@ -83,7 +86,10 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   struct gkyl_array *mask_out_quad; // Array keeping f_lte at phase-space quadrature nodes.
   struct gkyl_array *qDphiDbmag_quad; // Array keeping q*(phi-phi_m)/(B_max-B)
                                       // at configuration-space quadrature nodes.
+  struct gkyl_array *qDphiDbmag_quad_wall; // Array keeping q*phi/(B_wall-B)
+                                      // at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad; // B_max-B at configuration-space quadrature nodes.
+  struct gkyl_array *Dbmag_quad_wall; // B_wall-B at configuration-space quadrature nodes.
 
   struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts  
                                                           // stores the info to convert phase
@@ -98,10 +104,11 @@ struct gkyl_loss_cone_mask_gyrokinetic {
  * @param conf_rng Configuration-space range.
  * @param bmag Magnetic field magnitude.
  * @param bmag_max Maximum bmag.
+ * @param bmag_wall Minimum bmag.
  */
 void 
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max);
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max, const double *bmag_wall);
 
 /**
  * Compute projection of the loss cone masking function on the phase-space basis
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 30a33993f0..2808c7106c 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -177,34 +177,44 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
 
     const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
     double *Dbmag_quad = gkyl_array_fetch(up->Dbmag_quad, linidx);
+    double *Dbmag_quad_wall = gkyl_array_fetch(up->Dbmag_quad_wall, linidx);
 
     // Get bmag_max for this field line (psi value).
     // For 1x: bmag_max is a single value (index 0).
     // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0].
     double bmag_max_val;
+    double bmag_wall_val;
     if (cdim == 1) {
       // 1x case: single value.
       const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, 0);
       bmag_max_val = bmag_max_d[0]; // Just the constant coefficient.
+
+      const double *bmag_wall_d = gkyl_array_cfetch(up->bmag_wall, 0);
+      bmag_wall_val = bmag_wall_d[0]; // Just the constant coefficient.
     }
     else {
       // 2x case: evaluate bmag_max at this psi cell.
       // The bmag_max array is 1D in psi, so we need the psi index.
       int psi_idx[1] = {conf_iter.idx[0]};
-      long bmag_max_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
-      const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, bmag_max_linidx);
+      long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+      const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, psi_linidx);
       // For simplicity, evaluate at cell center (logical coord 0).
       double xc[1] = {0.0};
       bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
+
+      const double *bmag_wall_d = gkyl_array_cfetch(up->bmag_wall, psi_linidx);
+      bmag_wall_val = up->bmag_max_basis->eval_expand(xc, bmag_wall_d);
     }
 
     // Sum over basis 
     for (int n=0; n<tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
-      for (int k=0; k<num_basis_conf; ++k)
+      for (int k=0; k<num_basis_conf; ++k) {
         Dbmag_quad[n] += bmag_d[k]*b_ord[k];
-
+        Dbmag_quad_wall[n] += bmag_d[k]*b_ord[k];
+      }
       Dbmag_quad[n] = bmag_max_val - Dbmag_quad[n];
+      Dbmag_quad_wall[n] = Dbmag_quad_wall[n] - bmag_wall_val;
     }
   }
 }
@@ -286,6 +296,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     up->mask_out_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_phase,
       inp->conf_range_ext->volume*inp->vel_range->volume);
     up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+    up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
 
     // Allocate the memory for computing the specific phase nodal to modal calculation
     struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho;
@@ -344,18 +355,25 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
 
   // Store references to bmag_max arrays (no copy, just store pointers).
   // Must be done before calling gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad.
-  up->bmag_max = inp->bmag_max;
-  up->bmag_max_z_coord = inp->bmag_max_z_coord;
+  up->bmag_max = gkyl_array_acquire(inp->bmag_max);
+  up->bmag_max_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
+  up->bmag_wall = gkyl_array_acquire(inp->bmag_wall);
+  up->bmag_wall_z_coord = gkyl_array_acquire(inp->bmag_wall_z_coord);
   up->bmag_max_basis = inp->bmag_max_basis;
   up->bmag_max_range = inp->bmag_max_range;
 
   // Allocate and obtain bmag_max-bmag at quadrature points.
-  if (up->use_gpu) 
+  if (up->use_gpu) {
     up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-  else
+    up->Dbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+  } else {
     up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+    up->Dbmag_quad_wall = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+  }
 
   gkyl_array_clear(up->Dbmag_quad, 0.0); 
+  gkyl_array_clear(up->Dbmag_quad_wall, 0.0);
+  
   gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag);
     
   return up;
@@ -428,6 +446,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
   double phi_quad[tot_quad_conf];
   double qDphiDbmag_quad[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag[0]).
+  double qDphiDbmag_quad_wall[tot_quad_conf]; // charge*(phi-phi_m)/(bmag[0]-bmag_wall).
 
   // Outer loop over configuration space cells; for each
   // config-space cell inner loop walks over velocity space.
@@ -437,6 +456,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 
     const double *phi_d = gkyl_array_cfetch(phi, linidx_conf);
     const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall = gkyl_array_cfetch(up->Dbmag_quad_wall, linidx_conf);
 
     // Get phi_m value for this field line.
     // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
@@ -469,6 +489,11 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m_val)/Dbmag_quad[n];
       else
         qDphiDbmag_quad[n] = 0.0;
+
+      if (Dbmag_quad_wall[n] > 0.0)
+        qDphiDbmag_quad_wall[n] = up->charge*phi_quad[n]/Dbmag_quad_wall[n];
+      else
+        qDphiDbmag_quad_wall[n] = 0.0;
     }
 
     // Inner loop over velocity space.
@@ -506,13 +531,20 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 
         // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
         double KEparDbmag = 0.0;
+        double KEparDbmag_wall = 0.0;
         if (Dbmag_quad[cqidx] > 0.0) {
           KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx];
         } else {
           KEparDbmag = 0.0;
         }
 
+        if (Dbmag_quad_wall[cqidx] > 0.0)
+          KEparDbmag_wall = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall[cqidx];
+        else
+          KEparDbmag_wall = 0.0;
+
         double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
+        double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall[cqidx]));
 
         // Get the z-coordinate of bmag_max for this field line.
         // For 1x: single value (index 0).
@@ -536,6 +568,8 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         // xmu[cdim-1] is the z-coordinate (last config space coordinate).
         if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) {
           fq[0] = 1.0 * up->norm_fac;
+        } else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) {
+          fq[0] = 1.0 * up->norm_fac;
         } else {
           fq[0] = 0.0;
         }
@@ -564,11 +598,18 @@ gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
 
   gkyl_array_release(up->fun_at_ords);
   gkyl_array_release(up->Dbmag_quad);
+  gkyl_array_release(up->Dbmag_quad_wall);
+
+  gkyl_array_release(up->bmag_max);
+  gkyl_array_release(up->bmag_max_z_coord);
+  gkyl_array_release(up->bmag_wall);
+  gkyl_array_release(up->bmag_wall_z_coord);
 
   if (up->use_gpu) {
     gkyl_cu_free(up->p2c_qidx);
     gkyl_array_release(up->mask_out_quad);
     gkyl_array_release(up->qDphiDbmag_quad);
+    gkyl_array_release(up->qDphiDbmag_quad_wall);
     gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem);
     gkyl_cu_free(up->bmag_max_z_scalar_gpu);
     // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index e09a6f6335..821261fb04 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -19,7 +19,7 @@ extern "C" {
 __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(struct gkyl_range conf_range,
   const struct gkyl_array* basis_at_ords_conf, const struct gkyl_array* bmag, const double *bmag_max,
-  struct gkyl_array* Dbmag_quad_d)
+  const double *bmag_wall, struct gkyl_array* Dbmag_quad_d, struct gkyl_array* Dbmag_quad_wall_d)
 {    
   int num_basis_conf = basis_at_ords_conf->ncomp;
   int tot_quad_conf = basis_at_ords_conf->size;
@@ -35,25 +35,30 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(struct gkyl_range conf_range,
     const double *bmag_d = (const double*) gkyl_array_cfetch(bmag, linidx);
 
     double *bmag_quad = (double*) gkyl_array_fetch(Dbmag_quad_d, linidx);
+    double *bmag_quad_wall = (double*) gkyl_array_fetch(Dbmag_quad_wall_d, linidx);
 
     for (int n=0; n<tot_quad_conf; ++n) {
       const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, n);
 
-      for (int k=0; k<num_basis_conf; ++k)
+      for (int k=0; k<num_basis_conf; ++k){
         bmag_quad[n] += bmag_d[k]*b_ord[k];
+        bmag_quad_wall[n] += bmag_d[k]*b_ord[k];
+      }
 
       bmag_quad[n] = bmag_max[0] - bmag_quad[n];
+      bmag_quad_wall[n] = bmag_quad_wall[n] - bmag_wall[0];
     }
   }
 }
 
 void 
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max)
+    const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max,
+  const double *bmag_wall)
 {
   int nblocks = conf_range->nblocks, nthreads = conf_range->nthreads;
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_max, up->Dbmag_quad->on_dev);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(*conf_range,
+    up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_max, bmag_wall, up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev);
 }
 
 static void
@@ -69,8 +74,8 @@ gkyl_parallelize_components_kernel_launch_dims(dim3* dimGrid, dim3* dimBlock, gk
 
 __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range, 
-  const struct gkyl_array* basis_at_ords_conf, double charge, const struct gkyl_array* phi,
-  const double *phi_m, const struct gkyl_array* Dbmag_quad, struct gkyl_array* qDphiDbmag_quad)
+  const struct gkyl_array* basis_at_ords_conf, double charge, const struct gkyl_array* phi,  const double *phi_m, const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
+  struct gkyl_array* qDphiDbmag_quad, struct gkyl_array* qDphiDbmag_quad_wall)
 {
   int num_basis_conf = basis_at_ords_conf->ncomp;
 
@@ -87,6 +92,7 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
 
     const double *phi_d = (const double*) gkyl_array_cfetch(phi, linidx);
     const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx);
+    const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx);
 
     // Sum over basis at configuration-space quadrature points. 
     const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, linc2);
@@ -96,18 +102,26 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
 
     // Potential energy term at each quadrature point.
     double *qDphiDbmag_quad_d = (double*) gkyl_array_fetch(qDphiDbmag_quad, linidx);
+    double *qDphiDbmag_quad_wall_d = (double*) gkyl_array_fetch(qDphiDbmag_quad_wall, linidx);
     if (Dbmag_quad_d[linc2] > 0.0)
       qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m[0])/Dbmag_quad_d[linc2];
     else
       qDphiDbmag_quad_d[linc2] = 0.0;
+
+    if (Dbmag_quad_wall_d[linc2] > 0.0)
+      qDphiDbmag_quad_wall_d[linc2] = charge*phi_quad/Dbmag_quad_wall_d[linc2];
+    else
+      qDphiDbmag_quad_wall_d[linc2] = 0.0;
   }
 }
 
 __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  double mass, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_z_scalar, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
+  double mass, const struct gkyl_array* phase_ordinates,
+  const double *bmag_max_z_scalar, const double *bmag_wall_z_scalar,
+  const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* qDphiDbmag_quad_wall,
+  const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
   const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out)
 {
   int pdim = phase_range.ndim, cdim = conf_range.ndim;
@@ -127,7 +141,9 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
     long linidx_conf = gkyl_range_idx(&conf_range, cidx);
 
     const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx_conf);
     const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
+    const double *qDphiDbmag_quad_wall_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad_wall, linidx_conf);
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
@@ -156,16 +172,23 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
       }
   
       // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-      double KEparDbmag = 0.0;
+      double KEparDbmag = 0.0, KEparDbmag_wall = 0.0;
       if (Dbmag_quad_d[cqidx] > 0.0)
         KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
       else
         KEparDbmag = 0.0;
   
+      if (Dbmag_quad_wall_d[cqidx] > 0.0)
+        KEparDbmag_wall = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall_d[cqidx];
+      else
+        KEparDbmag_wall = 0.0; 
+
       double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
+      double mu_bound_wall = GKYL_MAX2(0.0, KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]);
   
-      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0])) ) {
-        mask_d[0] = 0.0;
+      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0])) &&
+           !(mu_bound_wall < xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_scalar[0])) ) {
+              mask_d[0] = 0.0;
         break;
       }
     }
@@ -176,7 +199,9 @@ __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
   double mass, double norm_fac, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_z_scalar, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
+  const double *bmag_max_z_scalar, const double *bmag_wall_z_scalar,
+  const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* qDphiDbmag_quad_wall,
+  const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
   const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad)
 {
   int pdim = phase_range.ndim, cdim = conf_range.ndim;
@@ -198,7 +223,9 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
     long linidx_conf = gkyl_range_idx(&conf_range, cidx);
 
     const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx_conf);
     const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
+    const double *qDphiDbmag_quad_wall_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad_wall, linidx_conf);
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
@@ -224,15 +251,24 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
 
     // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
     double KEparDbmag = 0.0;
+    double KEparDbmag_wall = 0.0;
     if (Dbmag_quad_d[cqidx] > 0.0)
       KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
     else
       KEparDbmag = 0.0;
 
+   if (Dbmag_quad_wall_d[cqidx] > 0.0)
+      KEparDbmag_wall = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall_d[cqidx];
+    else
+      KEparDbmag_wall = 0.0;
+
     double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
+    double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]));
 
     double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase);
-    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0])) 
+    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0]))
+      fq[linc2] = norm_fac;
+    else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_scalar[0]))
       fq[linc2] = norm_fac;
     else
       fq[linc2] = 0.0;
@@ -257,8 +293,8 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const double *phi_m_data = (const double*) phi_m->on_dev;
   
   gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m_data, up->Dbmag_quad->on_dev,
-    up->qDphiDbmag_quad->on_dev);
+    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m_data, up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev);
 
   const struct gkyl_velocity_map *gvm = up->vel_map;
 
@@ -267,7 +303,8 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
     int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
     gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(*up->grid_phase, *phase_range, *conf_range,
       gvm->local_ext_vel, up->mass, up->ordinates_phase->on_dev,
-      up->bmag_max_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
+      up->bmag_max_z_scalar_gpu, up->bmag_wall_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
       gvm->vmap_basis, mask_out->on_dev);
   }
   else {
@@ -278,7 +315,8 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
 
     gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(*up->grid_phase, *phase_range, *conf_range,
       gvm->local_ext_vel, up->mass, up->norm_fac, up->ordinates_phase->on_dev,
-      up->bmag_max_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
+      up->bmag_max_z_scalar_gpu, up->bmag_wall_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
       gvm->vmap_basis, up->mask_out_quad->on_dev);
 
     // Call cublas to do the matrix multiplication nodal to modal conversion

From 79133e7593da5f2090c1eafd208ff72e8424799f Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 8 Jan 2026 09:56:33 -0500
Subject: [PATCH 08/32] Add support for symmetric tandem  mirrors. Unit tests
 pass and are valgrind clean. Regression test is added and produces reasonable
 results.

---
 gyrokinetic/apps/gk_species_damping.c         |   4 +-
 gyrokinetic/apps/gk_species_fdot_multiplier.c |  68 +-
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |   5 +
 .../rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c  | 929 ++++++++++++++++++
 .../unit/ctest_loss_cone_mask_gyrokinetic.c   |   2 +-
 .../zero/gkyl_loss_cone_mask_gyrokinetic.h    |   7 +-
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |  13 +-
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c | 237 ++++-
 8 files changed, 1209 insertions(+), 56 deletions(-)
 create mode 100644 gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c

diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index 206b4fa180..8142f2b829 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -203,7 +203,7 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
       // Project the loss cone mask.
       gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        app->field->phi_smooth, damp->phi_at_bmag_max, damp->rate);
+        app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
       // Multiply by the user's scaling profile.
       gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
     }
@@ -239,7 +239,7 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
 
       // Project the loss cone mask using the phi_m array.
       gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        phi, damp->phi_at_bmag_max, damp->rate);
+        phi, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
 
       // Assemble the damping term -scale_prof * mask * f.
       gkyl_array_set(f_buffer, 1.0, fin);
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 895ce1cc67..076c8f0c0e 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -72,15 +72,24 @@ void
 gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
-  // Find the potential at all peak locations (including the mirror throat).
+  // Find the potential at bmag_max
   gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi,
     fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max);
   // Allgather on phi_at_bmag_max. It's not an allgather.
   // One process has the correct one, but the others do not. Is it a bcast or a sync?
-
-  // Project the loss cone mask using the phi_m array.
-  gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-    phi, fdmul->phi_at_bmag_max, fdmul->multiplier);
+  
+  if (fdmul->is_tandem) {
+    gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi,
+      fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem);
+    // Allgather on phi_at_bmag_tandem. It's not an allgather.
+    // One process has the correct one, but the others do not. Is it a bcast or a sync?
+    gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
+      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem, fdmul->multiplier);
+  } else {
+    // Project the loss cone mask using the phi_m array.
+    gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
+      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
+  }
 
   // Multiply out by the multplier.
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
@@ -199,18 +208,52 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
       fdmul->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
+      fdmul->bmag_tandem_peak_idx = num_peaks-1; 
       fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
       fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
-      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-1);
-      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-1);
+      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
+      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
       fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
 
       fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
         fdmul->bmag_max_range_ext->volume);
+      fdmul->phi_at_bmag_tandem = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
+        fdmul->bmag_max_range_ext->volume);
       // phi is defined as 0 at the wall
 
+
+      bool is_symmetric, is_tandem;
+      int cdim = app->cdim;
+      if (gkyl_compare_double(-app->grid.lower[cdim-1], app->grid.upper[cdim-1], 1e-12)) {
+        is_symmetric = true;
+      }
+      else if (gkyl_compare_double(app->grid.lower[cdim-1], 0.0, 1e-12)){
+        is_symmetric = false;
+      }
+      else {
+        assert(false); // Needs either the lower bound at 0 or symmetric grid
+      }
+
+      if ( (is_symmetric && num_peaks == 5) || (!is_symmetric && num_peaks == 3) ) {
+        is_tandem = false;
+      }
+      else if ((is_symmetric && num_peaks == 9) || (!is_symmetric && num_peaks == 5)) {
+        is_tandem = true;
+      }
+      else {
+        assert(false); // Unsupported number of extrema for loss-cone multiplier
+      }
+
+      if (is_tandem) {
+        fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-4);
+        fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-4);
+      } else {
+        fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-2);
+        fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-2);
+      }
+
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
         .phase_grid = &gks->grid,
@@ -221,10 +264,13 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .vel_range = &gks->local_vel, 
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
-        .bmag_wall = fdmul->bmag_wall,
-        .bmag_wall_z_coord = fdmul->bmag_wall_z_coord,
         .bmag_max = fdmul->bmag_max,
         .bmag_max_z_coord = fdmul->bmag_max_z_coord,
+        .bmag_wall = fdmul->bmag_wall,
+        .bmag_wall_z_coord = fdmul->bmag_wall_z_coord,
+        .bmag_tandem = fdmul->bmag_tandem,
+        .bmag_tandem_z_coord = fdmul->bmag_tandem_z_coord,
+        .is_tandem = is_tandem,
         .bmag_max_basis = fdmul->bmag_max_basis,
         .bmag_max_range = fdmul->bmag_max_range,
         .mass = gks->info.mass,
@@ -308,8 +354,12 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
       gkyl_array_release(fdmul->bmag_max_z_coord);
       gkyl_array_release(fdmul->bmag_wall);
       gkyl_array_release(fdmul->bmag_wall_z_coord);
+      gkyl_array_release(fdmul->bmag_tandem);
+      gkyl_array_release(fdmul->bmag_tandem_z_coord);
 
       gkyl_array_release(fdmul->phi_at_bmag_max);
+      gkyl_array_release(fdmul->phi_at_bmag_tandem);
+
       gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
     }
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index 7613d4e68e..034285e7d5 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -837,6 +837,7 @@ struct gk_fdot_multiplier {
   enum gkyl_gyrokinetic_fdot_multiplier_type type; // Type of multiplicative function term.
   bool write_diagnostics; // Whether to write diagnostics out.
   bool evolve; // Whether the multiplicative function is time dependent.
+  bool is_tandem; // Whether we are doing a tandem mirror
   struct gkyl_array *multiplier; // Damping rate.
   struct gkyl_array *multiplier_host; // Host copy for use in IO and projecting.
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
@@ -848,11 +849,15 @@ struct gk_fdot_multiplier {
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
   const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line.
   const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line.
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line.
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
   int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
+  int bmag_tandem_peak_idx; // Index of the TANDEM_MIRROR peak in the peak finder.
   struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations.
+  struct gkyl_array *phi_at_bmag_tandem; // Phi evaluated at tandem mirror locations.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
   void (*advance_times_rate_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
new file mode 100644
index 0000000000..ad3a128ae9
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -0,0 +1,929 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx
+{
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi; // Ion mass.
+  double me; // Electron mass.
+  double qi; // Ion charge.
+  double qe; // Electron charge.
+  double Te0; // Electron temperature.
+  double Ti0; // Ion temperature.
+  double n0; // Density.
+  double B_p; // Plasma magnetic field (mirror center).
+  double beta; // Plasma beta in the center.
+  double tau; // Temperature ratio.
+
+ 
+  double Ti_perp0; // Reference ion perp temperature.
+  double Ti_par0; // Reference ion par temperature.
+  double cs_m; // Ion sound speed at the throat.
+
+  double nuFrac; // Fraction multiplying collision frequency.
+  double logLambdaIon; // Ion Coulomb logarithm.
+  double nuIon; // Ion-ion collision freq.
+
+  double vti; // Ion thermal speed.
+  double vte; // Electron thermal speed.
+  double c_s; // Ion sound speed.
+  double omega_ci; // Ion gyrofrequency.
+  double rho_s; // Ion sound gyroradius.
+
+  double RatZeq0; // Radius of the field line at Z=0.
+  double Z_min; // Minimum axial coordinate Z.
+  double Z_max; // Maximum axial coordinate Z.
+  double z_min; // Minimum value of the position along the field line.
+  double z_max; // Maximum value of the position along the field line.
+  double psi_eval; // Psi (poloidal flux) of the field line.
+  double psi_in, z_in; // Auxiliary psi and z.
+
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m; // Axial coordinate at mirror throat.
+  double z_m; // Computational coordinate at mirror throat.
+
+  // Source parameters
+  double NSrcIon;
+  double TSrc0Ion;
+
+  // Physical velocity space limits.
+  double vpar_min_ion, vpar_max_ion;
+  double mu_max_ion;
+  // Computational velocity space limits.
+  double vpar_min_ion_c, vpar_max_ion_c;
+  double mu_min_ion_c, mu_max_ion_c;
+
+  // Grid DOF.
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) + 
+                2. / (M_PI * gamma * (1. + pow((ZIn - 2*Z_m) / gamma, 2.))) +
+                2. / (M_PI * gamma * (1. + pow((ZIn + 2*Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))) +
+     2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - 2*app->Z_m) / app->gamma, 2.))) +
+     2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + 2*app->Z_m) / app->gamma, 2.)))
+    )));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) +
+           -2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))) +
+           -4.0 * (ZIn - 2*Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - 2*Z_m) / gamma, 2.), 2.))) +
+           -4.0 * (ZIn + 2*Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + 2*Z_m) / gamma, 2.), 2.)))
+          );
+  *BZ = mcB *
+        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))) +
+         2.0 / (M_PI * gamma * (1.0 + pow((ZIn - 2*Z_m) / gamma, 2.))) +
+         2.0 / (M_PI * gamma * (1.0 + pow((ZIn + 2*Z_m) / gamma, 2.)))
+        );
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn)
+  {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else
+  {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0)
+  {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else
+  {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+void
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->NSrcIon;
+}
+
+void
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->TSrc0Ion;
+}
+
+// Ion initial conditions
+void
+eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->n0;
+}
+
+void
+eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti_par0;
+}
+
+void
+eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti_perp0;
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[2];
+
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords. 
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad*cos(phi);
+  fout[1] = BRad*sin(phi);
+  fout[2] = BZ;
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.4;
+  vp[0] = vpar_max_ion*tan(cvpar*b)/tan(b);
+  // Cubic map in mu.
+  vp[1] = mu_max_ion*pow(cmu,3);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0;
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS;
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+
+  double nuFrac = 1.0;
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -3.0;
+  double Z_max =  3.0;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 1;
+  double gamma = 0.124904;
+  double Z_m = 1.0;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
+  double TSrc0Ion = Ti0 * 1.25;
+
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double vpar_min_ion = -vpar_max_ion;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+
+  // Computational velocity space limits.
+  double vpar_min_ion_c = -1.0;
+  double vpar_max_ion_c =  1.0;
+  double mu_min_ion_c = 0.;
+  double mu_max_ion_c = 1.;
+
+  // Grid DOF:
+  int Nz = 200; // Number of cells in z direction.
+  int Nvpar = 48; // Number of cells in parallel velocity direction.
+  int Nmu = 16;  // Number of cells in mu direction.
+  int poly_order = 1;
+
+  // Initial conditions parameter.s
+  double Ti_perp0 = 10000 * eV;
+  double Ti_par0 = 7500 * eV;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 5e-7;
+  double tau_fdp = 3e-9;
+  double tau_fdp_extra = 2*tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 4; // Frames per OAP phase
+  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2*num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
+  for (int i=0; i<(num_phases-1)/2; i++) {
+    // OAPs.
+    poa_phases[2*i].phase = GK_POA_OAP;
+    poa_phases[2*i].num_frames = num_frames_oap;
+    poa_phases[2*i].duration = tau_oap;
+    poa_phases[2*i].alpha = alpha_oap;
+    poa_phases[2*i].is_static_field = is_static_field_oap;
+    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2*i+1].phase = GK_POA_FDP;
+    poa_phases[2*i+1].num_frames = num_frames_fdp;
+    poa_phases[2*i+1].duration = tau_fdp;
+    poa_phases[2*i+1].alpha = alpha_fdp;
+    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
+    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases-1].phase = GK_POA_FDP;
+  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases-1].duration = tau_fdp_extra;
+  poa_phases[num_phases-1].alpha = alpha_fdp;
+  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,  .vdim = vdim,
+    .mi = mi,  .qi = qi,
+    .me = me,  .qe = qe,
+    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
+    .B_p = B_p,  .beta = beta,  .tau = tau,
+    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
+    .vti = vti,  .vte = vte,  .c_s = c_s,
+    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min,  .Z_max = Z_max,
+    // Parameters controlling the magnetic equilibrium model.
+    .mcB = mcB,  .gamma = gamma,
+    .Z_m = Z_m,
+    // Initial condition parameters.
+    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
+    // Source parameters
+    .NSrcIon = NSrcIon,
+    .TSrc0Ion = TSrc0Ion,
+    // Physical velocity space limits.
+    .vpar_min_ion = vpar_min_ion,
+    .vpar_max_ion = vpar_max_ion,
+    .mu_max_ion = mu_max_ion,
+    // Computational velocity space limits.
+    .vpar_min_ion_c = vpar_min_ion_c,
+    .vpar_max_ion_c = vpar_max_ion_c,
+    .mu_min_ion_c = mu_min_ion_c,
+    .mu_max_ion_c = mu_max_ion_c,
+    // Grid DOF.
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = {Nz, Nvpar, Nmu},
+    .poly_order = poly_order,
+    // Time integration and I/O parameters.
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq     = write_phase_freq    , 
+    .int_diag_calc_freq   = int_diag_calc_freq  , 
+    .dt_failure_tol       = dt_failure_tol      , 
+    .num_failures_max     = num_failures_max    , 
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if ( !(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
+  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx->me,
+    .electron_charge = ctx->qe,
+    .electron_temp = ctx->Te0,
+    .polarization_bmag = ctx->B_p,
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps))
+  {
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success)
+    {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d=0; d<ctx.cdim; d++)
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  for (int d=0; d<ctx.vdim; d++)
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,  .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
+    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .density = eval_density_ion,
+      .upar = eval_upar_ion,
+      .temppar = eval_temp_par_ion,
+      .tempperp = eval_temp_perp_ion,
+      .ctx_density = &ctx,
+      .ctx_upar = &ctx,
+      .ctx_temppar = &ctx,
+      .ctx_tempperp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions =  {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .self_nu = evalNuIon,
+      .self_nu_ctx = &ctx,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+	.density = eval_density_ion_source,
+        .upar = eval_upar_ion_source,
+        .temp = eval_temp_ion_source,
+        .ctx_density = &ctx,
+        .ctx_upar = &ctx,
+        .ctx_temp = &ctx,
+      }, 
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .num_diag_moments = 4,
+    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx.me,
+    .electron_charge = ctx.qe,
+    .electron_temp = ctx.Te0,
+    .is_static = false, // So solvers are allocated.
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = { 
+    .name = "gk_mirror_tandem_boltz_elc_poa_1x2v",
+    .cdim = ctx.cdim,
+    .lower = {ctx.z_min},
+    .upper = {ctx.z_max},
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_MAPC2P,
+      .world = {ctx.psi_eval, 0.0},
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+
+    .num_species = 1,
+    .species = {ion},
+
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit=0; pit<ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    };
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0)
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+  freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+  
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
\ No newline at end of file
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 40554e6328..dbedc75b6c 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -337,7 +337,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   };
   struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
 
-  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, mask);
+  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask);
 
   gkyl_array_copy(mask_ho, mask);
 
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index 3848acf2ba..c4b67aab06 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -32,8 +32,11 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line (1D DG expansion for 2x, scalar for 1x).
   const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x).
   const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line.
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays (1D for 2x, 0D for 1x).
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  bool is_tandem; // =True
   double mass; // Species mass.
   double charge; // Species charge.
   enum gkyl_quad_type qtype; // Quadrature rule/nodes.
@@ -69,11 +72,13 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
  * @param conf_rng Configuration-space range.
  * @param phi Electrostatic potential.
  * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid).
+ * @param phi_tandem Electrostatic potential at the tandem mirror throat (DG array on reduced grid).
  * @param mask_out Output masking function.
  */
 void gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  struct gkyl_array *mask_out);
 
 /**
  * Delete updater.
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 234fcad4fb..cb11caf772 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -50,6 +50,8 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
   const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x).
   const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   
@@ -58,6 +60,7 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   double *bmag_max_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
   double *bmag_wall_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
   
+  bool is_tandem; // Whether we are dealing with a tandem mirror case.
   bool use_gpu; // Boolean if we are performing projection on device.
 
   loss_cone_mask_gyrokinetic_c2p_t c2p_pos; // Function transforming position comp to phys coords.
@@ -88,8 +91,11 @@ struct gkyl_loss_cone_mask_gyrokinetic {
                                       // at configuration-space quadrature nodes.
   struct gkyl_array *qDphiDbmag_quad_wall; // Array keeping q*phi/(B_wall-B)
                                       // at configuration-space quadrature nodes.
+  struct gkyl_array *qDphiDbmag_quad_tandem; // Array keeping q*(phi-phi_tandem)/(B_tandem-B)
+                                      // at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad; // B_max-B at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad_wall; // B_wall-B at configuration-space quadrature nodes.
+  struct gkyl_array *Dbmag_quad_tandem; // B_tandem-B at configuration-space quadrature nodes.
 
   struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts  
                                                           // stores the info to convert phase
@@ -108,7 +114,8 @@ struct gkyl_loss_cone_mask_gyrokinetic {
  */
 void 
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max, const double *bmag_wall);
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max,
+  const double *bmag_wall);
 
 /**
  * Compute projection of the loss cone masking function on the phase-space basis
@@ -119,10 +126,12 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *u
  * @param conf_rng Configuration-space range.
  * @param phi Electrostatic potential.
  * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid).
+ * @param phi_tandem Electrostatic potential at the tandem mirror throat (DG array on reduced grid).
  * @param mask_out Output masking function.
  */
 void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  struct gkyl_array *mask_out);
 #endif
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 2808c7106c..db950b60ff 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -156,13 +156,15 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
 
 static void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, 
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag)
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_max)
 {
   // Get bmag_max-bmag at quadrature nodes.
   // bmag_max is now a per-field-line array (1D for 2x, scalar for 1x).
 #ifdef GKYL_HAVE_CUDA
   if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag);
+    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag,
+      Dbmag_quad, bmag_max);
 #endif
 
   int cdim = up->cdim, pdim = up->pdim;
@@ -176,45 +178,92 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
     long linidx = gkyl_range_idx(conf_range, conf_iter.idx);
 
     const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
-    double *Dbmag_quad = gkyl_array_fetch(up->Dbmag_quad, linidx);
-    double *Dbmag_quad_wall = gkyl_array_fetch(up->Dbmag_quad_wall, linidx);
+    double *Dbmag_quad_d = gkyl_array_fetch(Dbmag_quad, linidx);
 
     // Get bmag_max for this field line (psi value).
     // For 1x: bmag_max is a single value (index 0).
     // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0].
     double bmag_max_val;
-    double bmag_wall_val;
     if (cdim == 1) {
       // 1x case: single value.
-      const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, 0);
+      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, 0);
       bmag_max_val = bmag_max_d[0]; // Just the constant coefficient.
-
-      const double *bmag_wall_d = gkyl_array_cfetch(up->bmag_wall, 0);
-      bmag_wall_val = bmag_wall_d[0]; // Just the constant coefficient.
     }
     else {
       // 2x case: evaluate bmag_max at this psi cell.
       // The bmag_max array is 1D in psi, so we need the psi index.
       int psi_idx[1] = {conf_iter.idx[0]};
       long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
-      const double *bmag_max_d = gkyl_array_cfetch(up->bmag_max, psi_linidx);
+      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, psi_linidx);
       // For simplicity, evaluate at cell center (logical coord 0).
       double xc[1] = {0.0};
       bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
+    }
 
-      const double *bmag_wall_d = gkyl_array_cfetch(up->bmag_wall, psi_linidx);
-      bmag_wall_val = up->bmag_max_basis->eval_expand(xc, bmag_wall_d);
+    // Sum over basis 
+    for (int n=0; n<tot_quad_conf; ++n) {
+      const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
+      for (int k=0; k<num_basis_conf; ++k) {
+        Dbmag_quad_d[n] += bmag_d[k]*b_ord[k];
+      }
+      Dbmag_quad_d[n] = bmag_max_val - Dbmag_quad_d[n];
+    }
+  }
+}
+
+static void
+gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall(gkyl_loss_cone_mask_gyrokinetic *up, 
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_max)
+{
+  // Get bmag_max-bmag at quadrature nodes.
+  // bmag_max is now a per-field-line array (1D for 2x, scalar for 1x).
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu)
+    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall_cu(up, conf_range, bmag,
+      Dbmag_quad, bmag_max);
+#endif
+
+  int cdim = up->cdim, pdim = up->pdim;
+
+  int tot_quad_conf = up->tot_quad_conf;
+  int num_basis_conf = up->num_basis_conf;
+
+  struct gkyl_range_iter conf_iter;
+  gkyl_range_iter_init(&conf_iter, conf_range);
+  while (gkyl_range_iter_next(&conf_iter)) {
+    long linidx = gkyl_range_idx(conf_range, conf_iter.idx);
+
+    const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
+    double *Dbmag_quad_n = gkyl_array_fetch(Dbmag_quad, linidx);
+
+    // Get bmag_max for this field line (psi value).
+    // For 1x: bmag_max is a single value (index 0).
+    // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0].
+    double bmag_max_val;
+    if (cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, 0);
+      bmag_max_val = bmag_max_d[0]; // Just the constant coefficient.
+    }
+    else {
+      // 2x case: evaluate bmag_max at this psi cell.
+      // The bmag_max array is 1D in psi, so we need the psi index.
+      int psi_idx[1] = {conf_iter.idx[0]};
+      long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, psi_linidx);
+      // For simplicity, evaluate at cell center (logical coord 0).
+      double xc[1] = {0.0};
+      bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
     }
 
     // Sum over basis 
     for (int n=0; n<tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
       for (int k=0; k<num_basis_conf; ++k) {
-        Dbmag_quad[n] += bmag_d[k]*b_ord[k];
-        Dbmag_quad_wall[n] += bmag_d[k]*b_ord[k];
+        Dbmag_quad_n[n] += bmag_d[k]*b_ord[k];
       }
-      Dbmag_quad[n] = bmag_max_val - Dbmag_quad[n];
-      Dbmag_quad_wall[n] = Dbmag_quad_wall[n] - bmag_wall_val;
+      Dbmag_quad_n[n] = Dbmag_quad_n[n] - bmag_max_val;
     }
   }
 }
@@ -228,6 +277,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->vel_map = gkyl_velocity_map_acquire(inp->vel_map);
   up->mass = inp->mass;
   up->charge = inp->charge;
+  up->is_tandem = inp->is_tandem;
 
   up->cdim = inp->conf_basis->ndim;
   up->pdim = inp->phase_basis->ndim;
@@ -297,6 +347,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
       inp->conf_range_ext->volume*inp->vel_range->volume);
     up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
     up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+    up->qDphiDbmag_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
 
     // Allocate the memory for computing the specific phase nodal to modal calculation
     struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho;
@@ -359,6 +410,13 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->bmag_max_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
   up->bmag_wall = gkyl_array_acquire(inp->bmag_wall);
   up->bmag_wall_z_coord = gkyl_array_acquire(inp->bmag_wall_z_coord);
+  if (up->is_tandem) {
+    up->bmag_tandem = gkyl_array_acquire(inp->bmag_tandem);
+    up->bmag_tandem_z_coord = gkyl_array_acquire(inp->bmag_tandem_z_coord);
+  } else {
+    up->bmag_tandem = gkyl_array_acquire(inp->bmag_max);
+    up->bmag_tandem_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
+  }
   up->bmag_max_basis = inp->bmag_max_basis;
   up->bmag_max_range = inp->bmag_max_range;
 
@@ -366,15 +424,20 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   if (up->use_gpu) {
     up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
     up->Dbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+    up->Dbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
   } else {
     up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
     up->Dbmag_quad_wall = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+    up->Dbmag_quad_tandem = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
   }
 
   gkyl_array_clear(up->Dbmag_quad, 0.0); 
   gkyl_array_clear(up->Dbmag_quad_wall, 0.0);
+  gkyl_array_clear(up->Dbmag_quad_tandem, 0.0);
   
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad, up->bmag_max);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall, up->bmag_wall);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem, up->bmag_tandem);
     
   return up;
 }
@@ -389,12 +452,15 @@ proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array
   const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
   const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
 
-  for (int k=0; k<num_basis; ++k) f[k] = 0.0;
-  
+  for (int k=0; k<num_basis; ++k) {
+    f[k] = 0.0;
+  }
+
   for (int imu=0; imu<tot_quad; ++imu) {
     double tmp = weights[imu]*func_at_ords[imu];
-    for (int k=0; k<num_basis; ++k)
+    for (int k=0; k<num_basis; ++k) {
       f[k] += tmp*basis_at_ords[k+num_basis*imu];
+    }
   }
 }
 
@@ -408,7 +474,9 @@ nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_a
   const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
   const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
 
-  for (int k=0; k<num_basis; ++k) f[k] = 0.0;
+  for (int k=0; k<num_basis; ++k) {
+    f[k] = 0.0;
+  }
   f[0] = 1.0;
   
   for (int imu=0; imu<tot_quad; ++imu) {
@@ -422,13 +490,14 @@ nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_a
 void
 gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, 
+  const struct gkyl_array *phi_tandem, struct gkyl_array *mask_out)
 {
 
 #ifdef GKYL_HAVE_CUDA
   if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range, 
-      phi, phi_m, mask_out);
+    return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range,
+      phi, phi_m, phi_tandem, mask_out);
 #endif
 
   int cdim = up->cdim, pdim = up->pdim;
@@ -437,16 +506,21 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   int tot_quad_conf = up->tot_quad_conf;
   int num_basis_conf = up->num_basis_conf;
 
+  bool is_tandem = up->is_tandem;
+
   struct gkyl_range vel_rng;
   struct gkyl_range_iter conf_iter, vel_iter;
 
   int pidx[GKYL_MAX_DIM], rem_dir[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<conf_range->ndim; ++d) rem_dir[d] = 1;
+  for (int d=0; d<conf_range->ndim; ++d) {
+    rem_dir[d] = 1;
+  }
 
   double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
   double phi_quad[tot_quad_conf];
   double qDphiDbmag_quad[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag[0]).
   double qDphiDbmag_quad_wall[tot_quad_conf]; // charge*(phi-phi_m)/(bmag[0]-bmag_wall).
+  double qDphiDbmag_quad_tandem[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag_tandem).
 
   // Outer loop over configuration space cells; for each
   // config-space cell inner loop walks over velocity space.
@@ -457,23 +531,29 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
     const double *phi_d = gkyl_array_cfetch(phi, linidx_conf);
     const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
     const double *Dbmag_quad_wall = gkyl_array_cfetch(up->Dbmag_quad_wall, linidx_conf);
+    const double *Dbmag_quad_tandem = is_tandem ? 
+      gkyl_array_cfetch(up->Dbmag_quad_tandem, linidx_conf) : gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
 
     // Get phi_m value for this field line.
     // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
     // For 2x: varies with psi, evaluate at this psi cell.
-    double phi_m_val;
+    double phi_m_val, phi_tandem_m_val;
     if (cdim == 1) {
       // 1x case: single scalar value stored as p=0 DG expansion.
       const double *phi_m_d = gkyl_array_cfetch(phi_m, 0);
+      const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, 0);
       phi_m_val = phi_m_d[0];
+      phi_tandem_m_val = phi_tandem_m_d[0];
     } else {
       // 2x case: evaluate phi_m at this psi cell center.
       int psi_idx[1] = {conf_iter.idx[0]};
       long phi_m_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
       const double *phi_m_d = gkyl_array_cfetch(phi_m, phi_m_linidx);
+      const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, phi_m_linidx);
       // Evaluate at cell center (logical coord 0).
       double xc_log[1] = {0.0};
       phi_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_m_d);
+      phi_tandem_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_tandem_m_d);
     }
 
     // Sum over basis for given potential phi.
@@ -482,18 +562,29 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 
       // Compute the configuration-space quadrature
       phi_quad[n] = 0.0;
-      for (int k=0; k<num_basis_conf; ++k)
+      for (int k=0; k<num_basis_conf; ++k) {
         phi_quad[n] += phi_d[k]*b_ord[k];
+      }
 
-      if (Dbmag_quad[n] > 0.0)
+      if (Dbmag_quad[n] > 0.0) {
         qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m_val)/Dbmag_quad[n];
-      else
+      } else {
         qDphiDbmag_quad[n] = 0.0;
+      }
 
-      if (Dbmag_quad_wall[n] > 0.0)
+      if (Dbmag_quad_wall[n] > 0.0) {
         qDphiDbmag_quad_wall[n] = up->charge*phi_quad[n]/Dbmag_quad_wall[n];
-      else
+      } else {
         qDphiDbmag_quad_wall[n] = 0.0;
+      }
+
+      if (is_tandem) {
+        if (Dbmag_quad_tandem[n] > 0.0) {
+          qDphiDbmag_quad_tandem[n] = up->charge*(phi_quad[n]-phi_tandem_m_val)/Dbmag_quad_tandem[n];
+        } else {
+          qDphiDbmag_quad_tandem[n] = 0.0;
+        }
+      }
     }
 
     // Inner loop over velocity space.
@@ -530,30 +621,46 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         }
 
         // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
+        // KEparDbmag_wall = 0.5*mass*pow(vpar,2)/(bmag[0]-bmag_wall).
+        // KEparDbmag_tandem = 0.5*mass*pow(vpar,2)/(bmag_tandem-bmag[0]).
         double KEparDbmag = 0.0;
         double KEparDbmag_wall = 0.0;
+        double KEparDbmag_tandem = 0.0;
+
         if (Dbmag_quad[cqidx] > 0.0) {
           KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx];
         } else {
           KEparDbmag = 0.0;
         }
 
-        if (Dbmag_quad_wall[cqidx] > 0.0)
+        if (Dbmag_quad_wall[cqidx] > 0.0) {
           KEparDbmag_wall = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall[cqidx];
-        else
+        } else {
           KEparDbmag_wall = 0.0;
+        }
+
+        if (Dbmag_quad_tandem[cqidx] > 0.0) {
+          KEparDbmag_tandem = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad_tandem[cqidx];
+        } else {
+          KEparDbmag_tandem = 0.0;
+        }
 
         double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
         double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall[cqidx]));
+        double mu_bound_tandem = GKYL_MAX2(0.0, KEparDbmag_tandem+qDphiDbmag_quad_tandem[cqidx]);
 
         // Get the z-coordinate of bmag_max for this field line.
         // For 1x: single value (index 0).
         // For 2x: varies with psi, so use conf_iter.idx[0].
-        double bmag_max_z_val;
+        double bmag_max_z_val, bmag_tandem_z_val;
         if (cdim == 1) {
           // 1x case: single value.
           const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, 0);
           bmag_max_z_val = bmag_max_z_d[0];
+          if (is_tandem) {
+            const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, 0);
+            bmag_tandem_z_val = bmag_tandem_z_d[0];
+          }
         } else {
           // 2x case: evaluate bmag_max_z at this psi cell.
           int psi_idx[1] = {conf_iter.idx[0]};
@@ -562,16 +669,61 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
           // For simplicity, evaluate at cell center (logical coord 0).
           double xc[1] = {0.0};
           bmag_max_z_val = up->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
+          if (is_tandem) {
+            const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, bmag_max_z_linidx);
+            bmag_tandem_z_val = up->bmag_max_basis->eval_expand(xc, bmag_tandem_z_d);
+          }
         }
 
         double *fq = gkyl_array_fetch(up->fun_at_ords, pqidx);
         // xmu[cdim-1] is the z-coordinate (last config space coordinate).
-        if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) {
-          fq[0] = 1.0 * up->norm_fac;
-        } else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) {
-          fq[0] = 1.0 * up->norm_fac;
+        
+        if (is_tandem) {
+          // Tandem mirror trapping condition:
+          // Calculate mu_outer (barrier to outer mirror) and mu_inner (barrier to inner tandem).
+          // A particle is trapped if mu < min(mu_outer, mu_inner).
+          // This captures particles that need to overcome the lower of the two barriers to escape.
+          
+          double mu_bound_outer = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad[cqidx]);  // Barrier to outer mirror peak
+          double mu_bound_inner = GKYL_MAX2(0.0, KEparDbmag_tandem + qDphiDbmag_quad_tandem[cqidx]);  // Barrier to inner tandem peak
+          
+          // Determine which region we're in based on position
+          bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
+                               fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
+          bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
+          
+          if (in_outer_cell) {
+            // Between tandem and outer mirror - check outer barrier
+            if (mu_bound_outer < xmu[cdim+1]) {
+              fq[0] = 1.0 * up->norm_fac;
+            } else {
+              fq[0] = 0.0;
+            }
+          } else if (in_central_cell) {
+            // In central cell - must overcome the maximum of both barriers
+            double mu_bound_max = GKYL_MIN2(mu_bound_outer, mu_bound_inner);
+            if (mu_bound_max < xmu[cdim+1]) {
+              fq[0] = 1.0 * up->norm_fac;
+            } else {
+              fq[0] = 0.0;
+            }
+          } else {
+            // In the outer wall region beyond outer mirror
+            if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) {
+              fq[0] = 1.0 * up->norm_fac;
+            } else {
+              fq[0] = 0.0;
+            }
+          }
         } else {
-          fq[0] = 0.0;
+          // Single mirror case (original logic)
+          if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) {
+            fq[0] = 1.0 * up->norm_fac;
+          } else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) {
+            fq[0] = 1.0 * up->norm_fac;
+          } else {
+            fq[0] = 0.0;
+          }
         }
       }
       // Compute DG expansion coefficients of the mask.
@@ -599,22 +751,25 @@ gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
   gkyl_array_release(up->fun_at_ords);
   gkyl_array_release(up->Dbmag_quad);
   gkyl_array_release(up->Dbmag_quad_wall);
+  gkyl_array_release(up->Dbmag_quad_tandem);
 
   gkyl_array_release(up->bmag_max);
   gkyl_array_release(up->bmag_max_z_coord);
   gkyl_array_release(up->bmag_wall);
   gkyl_array_release(up->bmag_wall_z_coord);
+  gkyl_array_release(up->bmag_tandem);
+  gkyl_array_release(up->bmag_tandem_z_coord);
 
   if (up->use_gpu) {
     gkyl_cu_free(up->p2c_qidx);
     gkyl_array_release(up->mask_out_quad);
     gkyl_array_release(up->qDphiDbmag_quad);
     gkyl_array_release(up->qDphiDbmag_quad_wall);
+    gkyl_array_release(up->qDphiDbmag_quad_tandem);
+
     gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem);
     gkyl_cu_free(up->bmag_max_z_scalar_gpu);
-    // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
   }
-  // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
 
   gkyl_free(up);
 }

From 4229238e4d960d5681c8cf049ac13158c15541d1 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 8 Jan 2026 10:30:25 -0500
Subject: [PATCH 09/32] Refactor loss cone mask gyrokinetic code for improved
 clarity and performance

- Introduced a helper function `mkarr` to streamline array allocation for GPU and CPU.
- Removed the `gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall` function and integrated its logic into the main processing flow.
- Updated the GPU kernel `gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker` to compute `Dbmag_quad` directly from `bmag_peak` instead of `bmag_max`.
- Enhanced tandem mirror support by adding handling for `bmag_peak` and `phi_m` in the GPU kernels.
- Simplified the logic for determining trapped particles in the `gkyl_loss_cone_mask_gyrokinetic_ker` and `gkyl_loss_cone_mask_gyrokinetic_quad_ker` functions.
- Improved readability and maintainability by restructuring conditional checks and variable assignments.
---
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |  16 +-
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c | 114 ++-----
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     | 323 +++++++++++++-----
 3 files changed, 273 insertions(+), 180 deletions(-)

diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index cb11caf772..f19b41ad76 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -94,7 +94,7 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   struct gkyl_array *qDphiDbmag_quad_tandem; // Array keeping q*(phi-phi_tandem)/(B_tandem-B)
                                       // at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad; // B_max-B at configuration-space quadrature nodes.
-  struct gkyl_array *Dbmag_quad_wall; // B_wall-B at configuration-space quadrature nodes.
+  struct gkyl_array *Dbmag_quad_wall; // B-B_wall at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad_tandem; // B_tandem-B at configuration-space quadrature nodes.
 
   struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts  
@@ -104,18 +104,18 @@ struct gkyl_loss_cone_mask_gyrokinetic {
 
 #ifdef GKYL_HAVE_CUDA
 /**
- * Obtain bmag_max-bmag at conf-space quadrature nodes and store it in up->Dbmag_quad.
+ * Obtain bmag_peak-bmag at conf-space quadrature nodes and store it in Dbmag_quad.
  *
- * @param up Project on basis updater to run.
- * @param conf_rng Configuration-space range.
+ * @param up Loss cone mask updater.
+ * @param conf_range Configuration-space range.
  * @param bmag Magnetic field magnitude.
- * @param bmag_max Maximum bmag.
- * @param bmag_wall Minimum bmag.
+ * @param Dbmag_quad Output array (bmag_peak - bmag) at quadrature nodes.
+ * @param bmag_peak Peak bmag value (per-field-line array for 2x, scalar for 1x).
  */
 void 
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max,
-  const double *bmag_wall);
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak);
 
 /**
  * Compute projection of the loss cone masking function on the phase-space basis
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index db950b60ff..0ef9fba4cd 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -18,6 +18,14 @@
 //          = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]);
 //
 
+// allocate array (filled with zeros)
+static struct gkyl_array*
+mkarr(long nc, long size, bool use_gpu)
+{
+  return use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+    : gkyl_array_new(GKYL_DOUBLE, nc, size);
+}
+
 // Identity comp to phys coord mapping, for when user doesn't provide a map.
 static inline void
 c2p_pos_identity(const double *xcomp, double *xphys, void *ctx)
@@ -211,63 +219,6 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
   }
 }
 
-static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall(gkyl_loss_cone_mask_gyrokinetic *up, 
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
-  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_max)
-{
-  // Get bmag_max-bmag at quadrature nodes.
-  // bmag_max is now a per-field-line array (1D for 2x, scalar for 1x).
-#ifdef GKYL_HAVE_CUDA
-  if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall_cu(up, conf_range, bmag,
-      Dbmag_quad, bmag_max);
-#endif
-
-  int cdim = up->cdim, pdim = up->pdim;
-
-  int tot_quad_conf = up->tot_quad_conf;
-  int num_basis_conf = up->num_basis_conf;
-
-  struct gkyl_range_iter conf_iter;
-  gkyl_range_iter_init(&conf_iter, conf_range);
-  while (gkyl_range_iter_next(&conf_iter)) {
-    long linidx = gkyl_range_idx(conf_range, conf_iter.idx);
-
-    const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
-    double *Dbmag_quad_n = gkyl_array_fetch(Dbmag_quad, linidx);
-
-    // Get bmag_max for this field line (psi value).
-    // For 1x: bmag_max is a single value (index 0).
-    // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0].
-    double bmag_max_val;
-    if (cdim == 1) {
-      // 1x case: single value.
-      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, 0);
-      bmag_max_val = bmag_max_d[0]; // Just the constant coefficient.
-    }
-    else {
-      // 2x case: evaluate bmag_max at this psi cell.
-      // The bmag_max array is 1D in psi, so we need the psi index.
-      int psi_idx[1] = {conf_iter.idx[0]};
-      long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
-      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, psi_linidx);
-      // For simplicity, evaluate at cell center (logical coord 0).
-      double xc[1] = {0.0};
-      bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
-    }
-
-    // Sum over basis 
-    for (int n=0; n<tot_quad_conf; ++n) {
-      const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
-      for (int k=0; k<num_basis_conf; ++k) {
-        Dbmag_quad_n[n] += bmag_d[k]*b_ord[k];
-      }
-      Dbmag_quad_n[n] = Dbmag_quad_n[n] - bmag_max_val;
-    }
-  }
-}
-
 struct gkyl_loss_cone_mask_gyrokinetic* 
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp)
 {
@@ -410,34 +361,24 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->bmag_max_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
   up->bmag_wall = gkyl_array_acquire(inp->bmag_wall);
   up->bmag_wall_z_coord = gkyl_array_acquire(inp->bmag_wall_z_coord);
-  if (up->is_tandem) {
-    up->bmag_tandem = gkyl_array_acquire(inp->bmag_tandem);
-    up->bmag_tandem_z_coord = gkyl_array_acquire(inp->bmag_tandem_z_coord);
-  } else {
-    up->bmag_tandem = gkyl_array_acquire(inp->bmag_max);
-    up->bmag_tandem_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
-  }
+  up->bmag_tandem = up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem) : gkyl_array_acquire(inp->bmag_max);
+  up->bmag_tandem_z_coord = up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem_z_coord) : gkyl_array_acquire(inp->bmag_max_z_coord);
   up->bmag_max_basis = inp->bmag_max_basis;
   up->bmag_max_range = inp->bmag_max_range;
 
   // Allocate and obtain bmag_max-bmag at quadrature points.
-  if (up->use_gpu) {
-    up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->Dbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->Dbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-  } else {
-    up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->Dbmag_quad_wall = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->Dbmag_quad_tandem = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-  }
+  up->Dbmag_quad = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu);
+  up->Dbmag_quad_wall = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu);
+  up->Dbmag_quad_tandem = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu);
 
-  gkyl_array_clear(up->Dbmag_quad, 0.0); 
+  gkyl_array_clear(up->Dbmag_quad, 0.0);
   gkyl_array_clear(up->Dbmag_quad_wall, 0.0);
   gkyl_array_clear(up->Dbmag_quad_tandem, 0.0);
   
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad, up->bmag_max);
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_wall(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall, up->bmag_wall);
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem, up->bmag_tandem);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad, up->bmag_max); // bmag_max - bmag
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall, up->bmag_wall); // bmag_wall - bmag
+  gkyl_array_scale(up->Dbmag_quad_wall, -1.0); // bmag - bmag_wall
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem, up->bmag_tandem); // bmag_tandem - bmag
     
   return up;
 }
@@ -455,7 +396,6 @@ proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array
   for (int k=0; k<num_basis; ++k) {
     f[k] = 0.0;
   }
-
   for (int imu=0; imu<tot_quad; ++imu) {
     double tmp = weights[imu]*func_at_ords[imu];
     for (int k=0; k<num_basis; ++k) {
@@ -680,29 +620,23 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         
         if (is_tandem) {
           // Tandem mirror trapping condition:
-          // Calculate mu_outer (barrier to outer mirror) and mu_inner (barrier to inner tandem).
-          // A particle is trapped if mu < min(mu_outer, mu_inner).
-          // This captures particles that need to overcome the lower of the two barriers to escape.
-          
-          double mu_bound_outer = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad[cqidx]);  // Barrier to outer mirror peak
-          double mu_bound_inner = GKYL_MAX2(0.0, KEparDbmag_tandem + qDphiDbmag_quad_tandem[cqidx]);  // Barrier to inner tandem peak
-          
-          // Determine which region we're in based on position
+          // Determine which region we're in based on position.
           bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
                                fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
           bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
           
           if (in_outer_cell) {
             // Between tandem and outer mirror - check outer barrier
-            if (mu_bound_outer < xmu[cdim+1]) {
+            if (mu_bound < xmu[cdim+1]) {
               fq[0] = 1.0 * up->norm_fac;
             } else {
               fq[0] = 0.0;
             }
           } else if (in_central_cell) {
-            // In central cell - must overcome the maximum of both barriers
-            double mu_bound_max = GKYL_MIN2(mu_bound_outer, mu_bound_inner);
-            if (mu_bound_max < xmu[cdim+1]) {
+            // In central cell - must overcome the minimum of both barriers to escape.
+            // A particle is trapped if mu > min(mu_bound, mu_bound_tandem).
+            double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
+            if (mu_bound_min < xmu[cdim+1]) {
               fq[0] = 1.0 * up->norm_fac;
             } else {
               fq[0] = 0.0;
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index 821261fb04..6508c52e71 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -16,49 +16,70 @@ extern "C" {
 #include <gkyl_mat_priv.h>
 }
 
+// Kernel to compute Dbmag_quad = bmag_peak - bmag at quadrature nodes.
+// bmag_peak is a per-field-line array (1D for 2x, scalar for 1x).
+// For 1x: bmag_peak has a single value at index 0.
+// For 2x: bmag_peak varies with psi (x-direction).
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(struct gkyl_range conf_range,
-  const struct gkyl_array* basis_at_ords_conf, const struct gkyl_array* bmag, const double *bmag_max,
-  const double *bmag_wall, struct gkyl_array* Dbmag_quad_d, struct gkyl_array* Dbmag_quad_wall_d)
+gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(int cdim, struct gkyl_range conf_range,
+  struct gkyl_range bmag_peak_range, const struct gkyl_array* basis_at_ords_conf,
+  const struct gkyl_array* bmag, const struct gkyl_array* bmag_peak,
+  const struct gkyl_basis* bmag_peak_basis, struct gkyl_array* Dbmag_quad_out)
 {    
   int num_basis_conf = basis_at_ords_conf->ncomp;
   int tot_quad_conf = basis_at_ords_conf->size;
 
   int cidx[GKYL_MAX_CDIM];
 
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
+  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
       tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
 
     gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
     long linidx = gkyl_range_idx(&conf_range, cidx);
 
     const double *bmag_d = (const double*) gkyl_array_cfetch(bmag, linidx);
+    double *Dbmag_quad_d = (double*) gkyl_array_fetch(Dbmag_quad_out, linidx);
+
+    // Get bmag_peak for this field line.
+    // For 1x: single value (index 0).
+    // For 2x: varies with psi, so use cidx[0].
+    double bmag_peak_val;
+    if (cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_peak_d = (const double*) gkyl_array_cfetch(bmag_peak, 0);
+      bmag_peak_val = bmag_peak_d[0]; // Just the constant coefficient.
+    } else {
+      // 2x case: evaluate bmag_peak at this psi cell.
+      int psi_idx[1] = {cidx[0]};
+      long psi_linidx = gkyl_range_idx(&bmag_peak_range, psi_idx);
+      const double *bmag_peak_d = (const double*) gkyl_array_cfetch(bmag_peak, psi_linidx);
+      // Evaluate at cell center (logical coord 0).
+      double xc[1] = {0.0};
+      bmag_peak_val = bmag_peak_basis->eval_expand(xc, bmag_peak_d);
+    }
 
-    double *bmag_quad = (double*) gkyl_array_fetch(Dbmag_quad_d, linidx);
-    double *bmag_quad_wall = (double*) gkyl_array_fetch(Dbmag_quad_wall_d, linidx);
-
+    // Sum over basis to get bmag at quadrature points, then compute difference.
     for (int n=0; n<tot_quad_conf; ++n) {
       const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, n);
 
-      for (int k=0; k<num_basis_conf; ++k){
-        bmag_quad[n] += bmag_d[k]*b_ord[k];
-        bmag_quad_wall[n] += bmag_d[k]*b_ord[k];
+      double bmag_quad = 0.0;
+      for (int k=0; k<num_basis_conf; ++k) {
+        bmag_quad += bmag_d[k]*b_ord[k];
       }
-
-      bmag_quad[n] = bmag_max[0] - bmag_quad[n];
-      bmag_quad_wall[n] = bmag_quad_wall[n] - bmag_wall[0];
+      Dbmag_quad_d[n] = bmag_peak_val - bmag_quad;
     }
   }
 }
 
 void 
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-    const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max,
-  const double *bmag_wall)
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak)
 {
   int nblocks = conf_range->nblocks, nthreads = conf_range->nthreads;
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(*conf_range,
-    up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_max, bmag_wall, up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(up->cdim, *conf_range,
+    *up->bmag_max_range, up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_peak->on_dev,
+    up->bmag_max_basis, Dbmag_quad->on_dev);
 }
 
 static void
@@ -72,10 +93,17 @@ gkyl_parallelize_components_kernel_launch_dims(dim3* dimGrid, dim3* dimBlock, gk
   dimGrid->x = gkyl_int_div_up(range.volume, dimBlock->x);
 }
 
+// Kernel to compute qDphiDbmag_quad = charge*(phi-phi_m)/(bmag_max-bmag) at quadrature nodes.
+// Supports per-field-line phi_m lookup for 2x mirrors.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range, 
-  const struct gkyl_array* basis_at_ords_conf, double charge, const struct gkyl_array* phi,  const double *phi_m, const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
-  struct gkyl_array* qDphiDbmag_quad, struct gkyl_array* qDphiDbmag_quad_wall)
+gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range conf_range, 
+  struct gkyl_range phi_m_range, const struct gkyl_array* basis_at_ords_conf, 
+  const struct gkyl_basis* phi_m_basis, double charge, bool is_tandem,
+  const struct gkyl_array* phi, const struct gkyl_array* phi_m, const struct gkyl_array* phi_tandem,
+  const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall, 
+  const struct gkyl_array* Dbmag_quad_tandem,
+  struct gkyl_array* qDphiDbmag_quad, struct gkyl_array* qDphiDbmag_quad_wall,
+  struct gkyl_array* qDphiDbmag_quad_tandem)
 {
   int num_basis_conf = basis_at_ords_conf->ncomp;
 
@@ -84,7 +112,7 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
   // 2D thread grid
   // linc2 goes from 0 to tot_quad_conf= basis_at_ords_conf->size.
   long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
+  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
       tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
     gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
 
@@ -93,6 +121,34 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
     const double *phi_d = (const double*) gkyl_array_cfetch(phi, linidx);
     const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx);
     const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx);
+    const double *Dbmag_quad_tandem_d = is_tandem ? 
+      (const double*) gkyl_array_cfetch(Dbmag_quad_tandem, linidx) : Dbmag_quad_d;
+
+    // Get phi_m value for this field line.
+    // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
+    // For 2x: varies with psi, evaluate at this psi cell.
+    double phi_m_val, phi_tandem_m_val;
+    if (cdim == 1) {
+      // 1x case: single scalar value stored as p=0 DG expansion.
+      const double *phi_m_d = (const double*) gkyl_array_cfetch(phi_m, 0);
+      phi_m_val = phi_m_d[0];
+      if (is_tandem) {
+        const double *phi_tandem_m_d = (const double*) gkyl_array_cfetch(phi_tandem, 0);
+        phi_tandem_m_val = phi_tandem_m_d[0];
+      }
+    } else {
+      // 2x case: evaluate phi_m at this psi cell center.
+      int psi_idx[1] = {cidx[0]};
+      long phi_m_linidx = gkyl_range_idx(&phi_m_range, psi_idx);
+      const double *phi_m_d = (const double*) gkyl_array_cfetch(phi_m, phi_m_linidx);
+      // Evaluate at cell center (logical coord 0).
+      double xc[1] = {0.0};
+      phi_m_val = phi_m_basis->eval_expand(xc, phi_m_d);
+      if (is_tandem) {
+        const double *phi_tandem_m_d = (const double*) gkyl_array_cfetch(phi_tandem, phi_m_linidx);
+        phi_tandem_m_val = phi_m_basis->eval_expand(xc, phi_tandem_m_d);
+      }
+    }
 
     // Sum over basis at configuration-space quadrature points. 
     const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, linc2);
@@ -103,8 +159,9 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
     // Potential energy term at each quadrature point.
     double *qDphiDbmag_quad_d = (double*) gkyl_array_fetch(qDphiDbmag_quad, linidx);
     double *qDphiDbmag_quad_wall_d = (double*) gkyl_array_fetch(qDphiDbmag_quad_wall, linidx);
+    
     if (Dbmag_quad_d[linc2] > 0.0)
-      qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m[0])/Dbmag_quad_d[linc2];
+      qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m_val)/Dbmag_quad_d[linc2];
     else
       qDphiDbmag_quad_d[linc2] = 0.0;
 
@@ -112,19 +169,32 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
       qDphiDbmag_quad_wall_d[linc2] = charge*phi_quad/Dbmag_quad_wall_d[linc2];
     else
       qDphiDbmag_quad_wall_d[linc2] = 0.0;
+
+    if (is_tandem) {
+      double *qDphiDbmag_quad_tandem_d = (double*) gkyl_array_fetch(qDphiDbmag_quad_tandem, linidx);
+      if (Dbmag_quad_tandem_d[linc2] > 0.0)
+        qDphiDbmag_quad_tandem_d[linc2] = charge*(phi_quad-phi_tandem_m_val)/Dbmag_quad_tandem_d[linc2];
+      else
+        qDphiDbmag_quad_tandem_d[linc2] = 0.0;
+    }
   }
 }
 
+// Cellwise kernel: determines if a cell is trapped or lost without quadrature.
+// Supports tandem mirrors and per-field-line z-coordinate lookup.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
+gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
+  struct gkyl_range bmag_max_range, const struct gkyl_basis* bmag_max_basis, bool is_tandem,
   double mass, const struct gkyl_array* phase_ordinates,
-  const double *bmag_max_z_scalar, const double *bmag_wall_z_scalar,
+  const struct gkyl_array* bmag_max_z_coord, const struct gkyl_array* bmag_tandem_z_coord,
   const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* qDphiDbmag_quad_wall,
+  const struct gkyl_array* qDphiDbmag_quad_tandem,
   const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
+  const struct gkyl_array* Dbmag_quad_tandem,
   const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out)
 {
-  int pdim = phase_range.ndim, cdim = conf_range.ndim;
+  int pdim = phase_range.ndim;
   int vdim = pdim-cdim;
 
   double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
@@ -132,7 +202,7 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
 
   int tot_phase_quad = phase_ordinates->size;
 
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
+  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
       tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
     gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
@@ -142,8 +212,33 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
 
     const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
     const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx_conf);
+    const double *Dbmag_quad_tandem_d = is_tandem ? 
+      (const double*) gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
     const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
     const double *qDphiDbmag_quad_wall_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad_wall, linidx_conf);
+    const double *qDphiDbmag_quad_tandem_d = is_tandem ?
+      (const double*) gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
+
+    // Get z-coordinates for field-line specific values.
+    double bmag_max_z_val, bmag_tandem_z_val;
+    if (cdim == 1) {
+      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, 0);
+      bmag_max_z_val = bmag_max_z_d[0];
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, 0);
+        bmag_tandem_z_val = bmag_tandem_z_d[0];
+      }
+    } else {
+      int psi_idx[1] = {cidx[0]};
+      long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx);
+      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
+      double xc_log[1] = {0.0};
+      bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d);
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, psi_linidx);
+        bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d);
+      }
+    }
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
@@ -162,7 +257,6 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
 
       // Convert comp position coordinate to phys pos coord.
       log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
-//      up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
   
       // Convert comp velocity coordinate to phys velocity coord.
       double xcomp[1];
@@ -171,40 +265,65 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
         xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
       }
   
-      // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-      double KEparDbmag = 0.0, KEparDbmag_wall = 0.0;
+      // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag).
+      double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0;
       if (Dbmag_quad_d[cqidx] > 0.0)
         KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-      else
-        KEparDbmag = 0.0;
   
       if (Dbmag_quad_wall_d[cqidx] > 0.0)
         KEparDbmag_wall = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall_d[cqidx];
-      else
-        KEparDbmag_wall = 0.0; 
+
+      if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0)
+        KEparDbmag_tandem = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_tandem_d[cqidx];
 
       double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
-      double mu_bound_wall = GKYL_MAX2(0.0, KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]);
+      double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]));
+      double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0, KEparDbmag_tandem+qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
+
+      bool is_trapped;
+      if (is_tandem) {
+        // Tandem mirror trapping condition.
+        bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
+                             fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
+        bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
+        
+        if (in_outer_cell) {
+          is_trapped = mu_bound < xmu[cdim+1];
+        } else if (in_central_cell) {
+          double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
+          is_trapped = mu_bound_min < xmu[cdim+1];
+        } else {
+          is_trapped = mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val);
+        }
+      } else {
+        // Single mirror case.
+        is_trapped = (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) ||
+                     (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val));
+      }
   
-      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0])) &&
-           !(mu_bound_wall < xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_scalar[0])) ) {
-              mask_d[0] = 0.0;
+      if (!is_trapped) {
+        mask_d[0] = 0.0;
         break;
       }
     }
   }
 }
 
+// Quadrature kernel: computes mask at phase-space quadrature nodes.
+// Supports tandem mirrors and per-field-line z-coordinate lookup.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
+gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
+  struct gkyl_range bmag_max_range, const struct gkyl_basis* bmag_max_basis, bool is_tandem,
   double mass, double norm_fac, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_z_scalar, const double *bmag_wall_z_scalar,
+  const struct gkyl_array* bmag_max_z_coord, const struct gkyl_array* bmag_tandem_z_coord,
   const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* qDphiDbmag_quad_wall,
+  const struct gkyl_array* qDphiDbmag_quad_tandem,
   const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
+  const struct gkyl_array* Dbmag_quad_tandem,
   const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad)
 {
-  int pdim = phase_range.ndim, cdim = conf_range.ndim;
+  int pdim = phase_range.ndim;
   int vdim = pdim-cdim;
 
   double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
@@ -213,7 +332,7 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
   // 2D thread grid
   // linc2 goes from 0 to tot_quad_phase
   long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
+  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
       tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
     gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
@@ -224,8 +343,33 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
 
     const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
     const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx_conf);
+    const double *Dbmag_quad_tandem_d = is_tandem ?
+      (const double*) gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
     const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
     const double *qDphiDbmag_quad_wall_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad_wall, linidx_conf);
+    const double *qDphiDbmag_quad_tandem_d = is_tandem ?
+      (const double*) gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
+
+    // Get z-coordinates for field-line specific values.
+    double bmag_max_z_val, bmag_tandem_z_val;
+    if (cdim == 1) {
+      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, 0);
+      bmag_max_z_val = bmag_max_z_d[0];
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, 0);
+        bmag_tandem_z_val = bmag_tandem_z_d[0];
+      }
+    } else {
+      int psi_idx[1] = {cidx[0]};
+      long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx);
+      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
+      double xc_log[1] = {0.0};
+      bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d);
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, psi_linidx);
+        bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d);
+      }
+    }
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
@@ -238,9 +382,7 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
     const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, linc2);
 
     // Convert comp position coordinate to phys pos coord.
-    gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
-//    up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
 
     // Convert comp velocity coordinate to phys velocity coord.
     double xcomp[1];
@@ -249,77 +391,94 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
       xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
     }
 
-    // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-    double KEparDbmag = 0.0;
-    double KEparDbmag_wall = 0.0;
+    // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag).
+    double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0;
     if (Dbmag_quad_d[cqidx] > 0.0)
       KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-    else
-      KEparDbmag = 0.0;
 
-   if (Dbmag_quad_wall_d[cqidx] > 0.0)
+    if (Dbmag_quad_wall_d[cqidx] > 0.0)
       KEparDbmag_wall = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall_d[cqidx];
-    else
-      KEparDbmag_wall = 0.0;
+
+    if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0)
+      KEparDbmag_tandem = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_tandem_d[cqidx];
 
     double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
     double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]));
+    double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0, KEparDbmag_tandem+qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
 
     double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase);
-    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_scalar[0]))
-      fq[linc2] = norm_fac;
-    else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_scalar[0]))
-      fq[linc2] = norm_fac;
-    else
-      fq[linc2] = 0.0;
+    
+    if (is_tandem) {
+      // Tandem mirror trapping condition.
+      bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
+                           fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
+      bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
+      
+      if (in_outer_cell) {
+        fq[linc2] = (mu_bound < xmu[cdim+1]) ? norm_fac : 0.0;
+      } else if (in_central_cell) {
+        double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
+        fq[linc2] = (mu_bound_min < xmu[cdim+1]) ? norm_fac : 0.0;
+      } else {
+        fq[linc2] = (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) ? norm_fac : 0.0;
+      }
+    } else {
+      // Single mirror case.
+      if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val))
+        fq[linc2] = norm_fac;
+      else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val))
+        fq[linc2] = norm_fac;
+      else
+        fq[linc2] = 0.0;
+    }
   }
 }
 
 void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const struct gkyl_array *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  struct gkyl_array *mask_out)
 {
-  // TODO: Full GPU support for phi_m as DG array needs kernel updates.
-  // For now, this works for 1x case where phi_m is a scalar (p=0 DG expansion).
-  // For 2x case, need to update kernels to do per-cell lookup.
-  
   dim3 dimGrid_conf, dimBlock_conf;
   int tot_quad_conf = up->basis_at_ords_conf->size;
   gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, tot_quad_conf);
 
-  // For GPU, phi_m->on_dev is the device pointer to the DG array.
-  // The kernel expects a double*, so pass the underlying data for now (1x case).
-  const double *phi_m_data = (const double*) phi_m->on_dev;
-  
-  gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m_data, up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev,
-    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev);
+  // Compute qDphiDbmag at quadrature points.
+  gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(
+    up->cdim, *conf_range, *up->bmag_max_range, 
+    up->basis_at_ords_conf->on_dev, up->bmag_max_basis, up->charge, up->is_tandem,
+    phi->on_dev, phi_m->on_dev, phi_tandem->on_dev,
+    up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev);
 
   const struct gkyl_velocity_map *gvm = up->vel_map;
 
   if (up->cellwise_trap_loss) {
     // Don't do quadrature.
     int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
-    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(*up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, up->mass, up->ordinates_phase->on_dev,
-      up->bmag_max_z_scalar_gpu, up->bmag_wall_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
-      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
-      gvm->vmap_basis, mask_out->on_dev);
-  }
-  else {
+    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
+      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis, up->is_tandem,
+      up->mass, up->ordinates_phase->on_dev,
+      up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev, 
+      up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev,
+      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev, 
+      up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, mask_out->on_dev);
+  } else {
     // Use quadrature.
     dim3 dimGrid, dimBlock;
     int tot_quad_phase = up->basis_at_ords_phase->size;
     gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, tot_quad_phase);
 
-    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(*up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, up->mass, up->norm_fac, up->ordinates_phase->on_dev,
-      up->bmag_max_z_scalar_gpu, up->bmag_wall_z_scalar_gpu, up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
-      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
-      gvm->vmap_basis, up->mask_out_quad->on_dev);
+    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
+      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis, up->is_tandem,
+      up->mass, up->norm_fac, up->ordinates_phase->on_dev,
+      up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
+      up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev,
+      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+      up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, up->mask_out_quad->on_dev);
 
-    // Call cublas to do the matrix multiplication nodal to modal conversion
+    // Call cublas to do the matrix multiplication nodal to modal conversion.
     gkyl_mat_mm_array(up->phase_nodal_to_modal_mem, up->mask_out_quad, mask_out);
   }
 }

From aa8b733dc148e5f3288b4f7e547d1c84189f0669 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 9 Jan 2026 09:34:29 -0500
Subject: [PATCH 10/32] Update gk_species_damping to match the new capabilities
 of fdot_multiplier. Add possibility of kinetic electrons and tandem mirrors.
 The damping regression test is failing, both here and on main. They are for
 different issues. Main fails because the loss_cone updater has an issue.
 Here, it fails because it's using scale_by_cell with a multi-component array.
 I'm not sure the right way to fix this

---
 gyrokinetic/apps/gk_species_damping.c         | 76 +++++++++++++++++--
 gyrokinetic/apps/gk_species_fdot_multiplier.c | 13 ++--
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |  5 ++
 3 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index 8142f2b829..04f0aa66f9 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -67,6 +67,7 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 {
   damp->type = gks->info.damping.type;
   damp->evolve = false; // Whether the rate is time dependent.
+  damp->is_tandem = false; // Default to single mirror.
 
   int num_quad = gks->info.damping.num_quad? gks->info.damping.num_quad : 1; // Default is a p=0 mask.
   assert(num_quad == 1); // MF 2025/06/11: Limited to this for now.
@@ -153,8 +154,41 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 
       damp->phi_at_bmag_max = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, 
         damp->bmag_max_range_ext->volume);
+      damp->phi_at_bmag_tandem = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, 
+        damp->bmag_max_range_ext->volume);
       // phi is defined as 0 at the wall
 
+      bool is_symmetric;
+      int cdim = app->cdim;
+      if (gkyl_compare_double(-app->grid.lower[cdim-1], app->grid.upper[cdim-1], 1e-12)) {
+        is_symmetric = true;
+      }
+      else if (gkyl_compare_double(app->grid.lower[cdim-1], 0.0, 1e-12)) {
+        is_symmetric = false;
+      }
+      else {
+        assert(false); // Needs either the lower bound at 0 or symmetric grid
+      }
+
+      if ( (is_symmetric && num_peaks == 5) || (!is_symmetric && num_peaks == 3) ) {
+        damp->is_tandem = false;
+      }
+      else if ((is_symmetric && num_peaks == 9) || (!is_symmetric && num_peaks == 5)) {
+        damp->is_tandem = true;
+      }
+      else {
+        assert(false); // Unsupported number of extrema for loss-cone damping
+      }
+
+      if (damp->is_tandem) {
+        damp->bmag_tandem_peak_idx = num_peaks-4;
+      } else {
+        damp->bmag_tandem_peak_idx = num_peaks-2;
+      }
+      damp->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, damp->bmag_tandem_peak_idx);
+      damp->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, damp->bmag_tandem_peak_idx);
+
+
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
         .phase_grid = &gks->grid,
@@ -167,6 +201,11 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = damp->bmag_max,
         .bmag_max_z_coord = damp->bmag_max_z_coord,
+        .bmag_wall = damp->bmag_wall,
+        .bmag_wall_z_coord = damp->bmag_wall_z_coord,
+        .bmag_tandem = damp->bmag_tandem,
+        .bmag_tandem_z_coord = damp->bmag_tandem_z_coord,
+        .is_tandem = damp->is_tandem,
         .bmag_max_basis = damp->bmag_max_basis,
         .bmag_max_range = damp->bmag_max_range,
         .mass = gks->info.mass,
@@ -201,9 +240,18 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
       // Find the potential at the mirror throat.
       gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
         damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
-      // Project the loss cone mask.
-      gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+      
+      if (damp->is_tandem) {
+        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
+          damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
+        // Project the loss cone mask.
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
+      } else {
+        // Project the loss cone mask using the phi_m array.
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+      }
       // Multiply by the user's scaling profile.
       gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
     }
@@ -237,9 +285,18 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
       // Allgather on phi_at_bmag_max. It's not an allgather.
       // One process has the correct one, but the others do not. Is it a bcast or a sync?
 
-      // Project the loss cone mask using the phi_m array.
-      gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        phi, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+      if (damp->is_tandem) {
+        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, phi,
+          damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
+        // Allgather on phi_at_bmag_tandem. It's not an allgather.
+        // One process has the correct one, but the others do not. Is it a bcast or a sync?
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          phi, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
+      } else {
+        // Project the loss cone mask using the phi_m array.
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          phi, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+      }
 
       // Assemble the damping term -scale_prof * mask * f.
       gkyl_array_set(f_buffer, 1.0, fin);
@@ -275,14 +332,17 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
       // Nothing to release.
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      // Note: bmag_max and bmag_max_z_coord are owned by gk_geometry, not us.
       gkyl_array_release(damp->bmag_max);
       gkyl_array_release(damp->bmag_max_z_coord);
       gkyl_array_release(damp->bmag_wall);
       gkyl_array_release(damp->bmag_wall_z_coord);
-      gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder);
+      gkyl_array_release(damp->bmag_tandem);
+      gkyl_array_release(damp->bmag_tandem_z_coord);
+
       gkyl_array_release(damp->phi_at_bmag_max);
+      gkyl_array_release(damp->phi_at_bmag_tandem);
 
+      gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op);
       gkyl_array_release(damp->scale_prof);
     }
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 076c8f0c0e..4c3eaa505e 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -208,11 +208,10 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
       fdmul->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
-      fdmul->bmag_tandem_peak_idx = num_peaks-1; 
       fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
       fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
-      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
-      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
+      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-1);
+      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-1);
       fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
@@ -247,12 +246,12 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       }
 
       if (is_tandem) {
-        fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-4);
-        fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-4);
+        fdmul->bmag_tandem_peak_idx = num_peaks-4;
       } else {
-        fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-2);
-        fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-2);
+        fdmul->bmag_tandem_peak_idx = num_peaks-2;
       }
+      fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
+      fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index 034285e7d5..4f0af6533b 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -813,6 +813,7 @@ struct gk_source {
 struct gk_damping {
   enum gkyl_gyrokinetic_damping_type type; // Type of damping term.
   bool evolve; // Whether the source is time dependent.
+  bool is_tandem; // Whether we are doing a tandem mirror.
   struct gkyl_array *rate; // Damping rate.
   struct gkyl_array *rate_host; // Host copy for use in IO and projecting.
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
@@ -823,11 +824,15 @@ struct gk_damping {
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
   const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line.
   const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line.
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line.
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
   int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
+  int bmag_tandem_peak_idx; // Index of the TANDEM_MIRROR peak in the peak finder.
   struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations.
+  struct gkyl_array *phi_at_bmag_tandem; // Phi evaluated at tandem mirror locations.
   struct gkyl_array *scale_prof; // Conf-space scaling factor profile.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);

From a6bff16c58b603e270d6e755ae115ffbf192f222 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 9 Jan 2026 10:11:09 -0500
Subject: [PATCH 11/32] gk_species_damping is working on this branch too now. I
 needed to remove the aspects about the cellwise evaluation and quadrature
 points because that breaks the array_scale_by_cell method which is used

---
 gyrokinetic/apps/gk_species_damping.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index 04f0aa66f9..cfc2663df9 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -115,13 +115,6 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       damp->evolve = true; // Since the loss cone boundary is proportional to phi(t).
 
-      // Available options:
-      //   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
-      //   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
-      enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD;
-      int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be
-                                              // at least p+1 for Gauss-Lobatto.
-
       // Create peak finder for bmag to find the mirror throat.
       // Search along the parallel (z) direction, which is the last configuration space dimension.
       int search_dir = app->cdim - 1;
@@ -210,9 +203,7 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         .bmag_max_range = damp->bmag_max_range,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
-        .qtype = qtype,
         .num_quad = num_quad,
-        .cellwise_trap_loss = true,
         .c2p_pos_func = proj_on_basis_c2p_position_func,
         .c2p_pos_func_ctx = &damp->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,

From e2bc28ff188f4df3b0e217af4308726221e9fe5e Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 9 Jan 2026 10:29:41 -0500
Subject: [PATCH 12/32] Add another unit test to the loss cone mask

---
 .../unit/ctest_loss_cone_mask_gyrokinetic.c   | 390 +++++++++++++++++-
 1 file changed, 389 insertions(+), 1 deletion(-)

diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index dbedc75b6c..f1f3fb3282 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -1,5 +1,7 @@
 #include <acutest.h>
 
+#define _USE_MATH_DEFINES
+#include <math.h>
 #include <gkyl_util.h>
 #include <gkyl_array.h>
 #include <gkyl_array_ops.h>
@@ -16,6 +18,10 @@
 #include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_const.h>
 
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
 struct loss_cone_mask_test_ctx {
   int cdim; // Configuration space dimensionality.
   double eV; // Elementary charge.
@@ -25,11 +31,15 @@ struct loss_cone_mask_test_ctx {
   double mass, charge; // Species mass and charge.
   double n0, T0, B0; // Reference parameters.
   double phi_fac; // phi(z=0) = phi_fac*T0/e;
+  double psi_max; // For 2x: upper limit of psi (radial coordinate).
   double z_max, vpar_max, mu_max; // Upper grid extents.
-  int Nz, Nvpar, Nmu; // Number of cells in each direction.
+  int Npsi, Nz, Nvpar, Nmu; // Number of cells in each direction.
   enum gkyl_quad_type quad_type; // Type of quadrature/nodes.
   int num_quad; // Number of quadrature points to use in projection, 1 or p+1.
   bool cellwise_trap_loss; // Whether a whole cell is either trapped or lost.
+  bool is_tandem; // Whether this is a tandem mirror configuration.
+  double B_tandem; // Field at tandem mirror (for tandem case).
+  double z_tandem; // z-coordinate of tandem mirror (for tandem case).
 };
 
 // allocate array (filled with zeros)
@@ -75,6 +85,57 @@ phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   fout[0] = 0.0; //0.5 * phi_fac*T0/eV * (1.0 + cos(z));
 }
 
+// Non-zero electrostatic potential: peaked at center, zero at wall.
+void
+phi_func_1x_nonzero(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[0];
+
+  struct loss_cone_mask_test_ctx *params = ctx;
+  double phi_fac = params->phi_fac;
+  double T0 = params->T0;
+  double eV = params->eV;
+  double z_max = params->z_max;
+
+  // Parabolic potential profile: phi(z) = phi_fac*T0/eV * (1 - (z/z_max)^2)
+  // This gives phi=phi_fac*T0/eV at z=0, and phi=0 at z=+/-z_max.
+  fout[0] = phi_fac*T0/eV * (1.0 - pow(z/z_max, 2.0));
+}
+
+// Reference mask for nonzero phi case.
+void
+mask_ref_1x2v_nonzero_phi(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[0], vpar = xc[1], mu = xc[2];
+  struct loss_cone_mask_test_ctx *params = ctx;
+
+  double z_m = params->z_m;
+  double mass = params->mass;
+  double charge = params->charge;
+
+  double phi, phi_m;
+  phi_func_1x_nonzero(t, xc, &phi, ctx);
+  phi_func_1x_nonzero(t, &z_m, &phi_m, ctx);
+
+  double bfield[3], bmag;
+  double zinfl[3] = {0.0}, z_minfl[3] = {0.0};
+  zinfl[2] = z, z_minfl[2] = z_m;
+  bfield_func_3x(t, zinfl, bfield, ctx);
+  bmag = bfield[2];
+
+  double bfield_m[3], bmag_m;
+  bfield_func_3x(t, z_minfl, bfield_m, ctx);
+  bmag_m = bfield_m[2];
+
+  // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
+  double mu_bound = (0.5*mass*pow(vpar,2)+charge*(phi-phi_m))/(bmag*(bmag_m/bmag-1));
+  if (mu_bound < mu && fabs(z) < z_m) {
+    fout[0] = 1.0;
+  } else {
+    fout[0] = 0.0;
+  }
+}
+
 void
 mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
 {
@@ -415,17 +476,344 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 #endif  
 }
 
+// Test with non-zero electrostatic potential.
+// This tests that the trapped-passing boundary correctly accounts for
+// the q*(phi-phi_m) term in the mu_bound calculation.
+void
+test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
+{
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mass_proton = GKYL_PROTON_MASS;
+
+  // Set reference parameters.
+  struct loss_cone_mask_test_ctx ctx = {
+    .cdim = 1,
+    .eV = eV,
+    .R_m = 8.0,
+    .B_m = 4.0,
+    .z_m = M_PI/2.0,
+    .mass = 2.014*mass_proton,
+    .charge = eV,  // Positive ions.
+    .n0 = 1e18,
+    .T0 = 100*eV,
+    .phi_fac = 3.0,  // phi(z=0) = 3*T0/e = 300 V.
+    .z_max = M_PI,
+    .Nz = 8,
+    .Nvpar = 8,
+    .Nmu = 4,
+    .quad_type = GKYL_GAUSS_LOBATTO_QUAD,
+    .num_quad = 2,
+    .cellwise_trap_loss = true,
+  };
+  ctx.B0 = ctx.B_m/2.0;
+  ctx.vpar_max = 6.0*sqrt(ctx.T0/ctx.mass);
+  ctx.mu_max = 0.5*ctx.mass*pow(ctx.vpar_max,2)/ctx.B0;
+
+  double lower[] = {-ctx.z_max, -ctx.vpar_max, 0.0};
+  double upper[] = {ctx.z_max, ctx.vpar_max, ctx.mu_max};
+  int cells[] = {ctx.Nz, ctx.Nvpar, ctx.Nmu};
+  const int ndim = sizeof(cells)/sizeof(cells[0]);
+  const int cdim = ctx.cdim;
+  const int vdim = ndim - ctx.cdim;
+
+  // Grids.
+  double lower_conf[cdim], upper_conf[cdim];
+  int cells_conf[cdim];
+  for (int d=0; d<cdim; d++) {
+    lower_conf[d] = lower[d];
+    upper_conf[d] = upper[d];
+    cells_conf[d] = cells[d];
+  }
+  double lower_vel[vdim], upper_vel[vdim];
+  int cells_vel[vdim];
+  for (int d=0; d<vdim; d++) {
+    lower_vel[d] = lower[cdim+d];
+    upper_vel[d] = upper[cdim+d];
+    cells_vel[d] = cells[cdim+d];
+  }
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+  struct gkyl_rect_grid grid_conf;
+  gkyl_rect_grid_init(&grid_conf, cdim, lower_conf, upper_conf, cells_conf);
+  struct gkyl_rect_grid grid_vel;
+  gkyl_rect_grid_init(&grid_vel, vdim, lower_vel, upper_vel, cells_vel);
+
+  // Basis functions.
+  struct gkyl_basis basis, basis_conf;
+  if (poly_order == 1) {
+    gkyl_cart_modal_gkhybrid(&basis, cdim, vdim);
+  } else {
+    gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+  }
+  gkyl_cart_modal_serendip(&basis_conf, cdim, poly_order);
+
+  struct gkyl_basis *basis_on_dev, *basis_on_dev_conf;
+  if (use_gpu) {
+#ifdef GKYL_HAVE_CUDA
+    basis_on_dev = gkyl_cu_malloc(sizeof(struct gkyl_basis));
+    basis_on_dev_conf = gkyl_cu_malloc(sizeof(struct gkyl_basis));
+    if (poly_order == 1) {
+      gkyl_cart_modal_gkhybrid_cu_dev(basis_on_dev, cdim, vdim);
+    } else {
+      gkyl_cart_modal_serendip_cu_dev(basis_on_dev, ndim, poly_order);
+    }
+    gkyl_cart_modal_serendip_cu_dev(basis_on_dev_conf, cdim, poly_order);
+#endif
+  } else { 
+    basis_on_dev = &basis;
+    basis_on_dev_conf = &basis_conf;
+  }
+
+  // Ranges.
+  int ghost_conf[] = { 1, 1, 1 };
+  struct gkyl_range local_conf, local_ext_conf;
+  gkyl_create_grid_ranges(&grid_conf, ghost_conf, &local_ext_conf, &local_conf);
+
+  int ghost_vel[] = { 0, 0 };
+  struct gkyl_range local_vel, local_ext_vel;
+  gkyl_create_grid_ranges(&grid_vel, ghost_vel, &local_ext_vel, &local_vel);
+
+  int ghost[GKYL_MAX_DIM] = { 0 };
+  for (int d=0; d<cdim; d++) { ghost[d] = ghost_conf[d]; }
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  struct gkyl_position_map *pmap = gkyl_position_map_null_new();
+
+  // Initialize geometry.
+  struct gkyl_gk_geometry_inp geometry_input = {
+    .geometry_id = GKYL_MAPC2P,
+    .world = {0.0, 0.0},
+    .mapc2p = mapc2p_3x,
+    .c2p_ctx = 0,
+    .bfield_func = bfield_func_3x,
+    .bfield_ctx = &ctx,
+    .grid = grid_conf,
+    .local = local_conf,
+    .local_ext = local_ext_conf,
+    .global = local_conf,
+    .global_ext = local_ext_conf,
+    .basis = basis_conf,
+    .position_map = pmap,
+  };
+  geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
+  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext, &geometry_input.geo_local);
+  gkyl_cart_modal_serendip(&geometry_input.geo_basis, 3, poly_order);
+  struct gk_geometry* gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
+  struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
+  gkyl_gk_geometry_release(gk_geom_3d);
+  
+  // Use array_dg_find_peaks to find bmag_max.
+  int search_dir = cdim - 1;
+  struct gkyl_array_dg_find_peaks_inp peak_inp = {
+    .basis = &basis_conf,
+    .grid = &grid_conf,
+    .range = &local_conf,
+    .range_ext = &local_ext_conf,
+    .search_dir = search_dir,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder = 
+    gkyl_array_dg_find_peaks_new(&peak_inp, gk_geom->geo_int.bmag);
+  gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag);
+  
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
+  int bmag_max_peak_idx = num_peaks - 2;
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, num_peaks-1);
+  const struct gkyl_array *bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks-1);
+  const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
+  
+  // Allocate arrays for phi evaluated at peak locations.
+  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array*));
+  for (int p = 0; p < num_peaks; p++) {
+    phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume);
+  }
+  
+  if (use_gpu) {
+    struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
+    gkyl_gk_geometry_release(gk_geom);
+    gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev);
+    gkyl_gk_geometry_release(gk_geom_dev);
+  }
+
+  // Velocity space mapping.
+  struct gkyl_mapc2p_inp c2p_in = { };
+  struct gkyl_velocity_map *gvm = gkyl_velocity_map_new(c2p_in, grid, grid_vel,
+    local, local_ext, local_vel, local_ext_vel, use_gpu);
+
+  // Project the electrostatic potential with NON-ZERO phi.
+  struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume);
+  struct gkyl_array *phi_ho = use_gpu ? mkarr(false, phi->ncomp, phi->size)
+                                      : gkyl_array_acquire(phi);
+
+  gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func_1x_nonzero, &ctx);
+  gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho);
+  gkyl_eval_on_nodes_release(evphi);
+  gkyl_array_copy(phi, phi_ho);
+
+  // Project phi onto peak locations to get phi_m.
+  gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks);
+  const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx];
+
+  // Basis used to project the mask.
+  struct gkyl_basis basis_mask;
+  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) {
+    gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
+  } else {
+    if (poly_order == 1) {
+      gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
+    } else {
+      gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
+    }
+  }
+
+  // Create mask array.
+  struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume);
+  struct gkyl_array *mask_ho = use_gpu ? mkarr(false, mask->ncomp, mask->size)
+                                       : gkyl_array_acquire(mask);
+
+  // Project the loss cone mask.
+  struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
+    .phase_grid = &grid,
+    .conf_basis = &basis_conf,
+    .phase_basis = &basis,
+    .conf_range =  &local_conf,
+    .conf_range_ext = &local_ext_conf,
+    .vel_range = &local_vel, 
+    .vel_map = gvm,
+    .bmag = gk_geom->geo_int.bmag,
+    .bmag_max_z_coord = bmag_max_z_coord,
+    .bmag_max = bmag_max,
+    .bmag_wall = bmag_wall,
+    .bmag_wall_z_coord = bmag_wall_z_coord,
+    .bmag_max_basis = bmag_max_basis,
+    .bmag_max_range = bmag_max_range,
+    .mass = ctx.mass,
+    .charge = ctx.charge,
+    .qtype = ctx.quad_type,
+    .num_quad = ctx.num_quad,
+    .cellwise_trap_loss = ctx.cellwise_trap_loss,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
+
+  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask);
+
+  gkyl_array_copy(mask_ho, mask);
+
+  // Verify physical properties of the mask:
+  // 1. At the center (z≈0), high-mu particles should be trapped (mask=1)
+  // 2. At the wall (|z| ≈ z_max), particles should not be in the trapped region  
+  // 3. Low-mu particles near center should be passing (mask=0)
+  
+  // Check specific cells to verify correct behavior.
+  // Cell indices: [iz, ivpar, imu] where each starts at 1 in local range.
+  // Grid: z in [-pi, pi], vpar in [-vpar_max, vpar_max], mu in [0, mu_max]
+  // Central z cells are around iz=4,5 (8 cells, symmetric)
+  // High mu cells are imu=3,4 (4 cells)
+  // Low mu cells are imu=1
+  
+  int num_trapped_high_mu_center = 0;
+  int num_passing_low_mu_center = 0;
+  int total_high_mu_center = 0;
+  int total_low_mu_center = 0;
+  
+  struct gkyl_range_iter iter;
+  gkyl_range_iter_init(&iter, &local);
+  while (gkyl_range_iter_next(&iter)) {
+    int iz = iter.idx[0];
+    int imu = iter.idx[2];
+    
+    // Determine if we're at center (iz = 4 or 5 for 8 cells in [-pi, pi])
+    // and if we're at high mu (imu = 3 or 4) or low mu (imu = 1)
+    bool is_center = (iz == 4 || iz == 5);
+    bool is_high_mu = (imu == 3 || imu == 4);
+    bool is_low_mu = (imu == 1);
+    
+    long linidx = gkyl_range_idx(&local, iter.idx);
+    const double *mask_val = gkyl_array_cfetch(mask_ho, linidx);
+    
+    if (is_center && is_high_mu) {
+      total_high_mu_center++;
+      if (mask_val[0] > 0.5) { num_trapped_high_mu_center++; }
+    }
+    if (is_center && is_low_mu) {
+      total_low_mu_center++;
+      if (mask_val[0] < 0.5) { num_passing_low_mu_center++; }
+    }
+  }
+  
+  // High mu particles at center should mostly be trapped.
+  double trapped_frac = (double)num_trapped_high_mu_center / (double)total_high_mu_center;
+  TEST_CHECK(trapped_frac > 0.5);
+  if (trapped_frac <= 0.5) {
+    printf("High-mu center trapped fraction: %g (%d / %d)\n", 
+           trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+  }
+  
+  // Low mu particles at center should mostly be passing.
+  double passing_frac = (double)num_passing_low_mu_center / (double)total_low_mu_center;
+  TEST_CHECK(passing_frac > 0.5);
+  if (passing_frac <= 0.5) {
+    printf("Low-mu center passing fraction: %g (%d / %d)\n", 
+           passing_frac, num_passing_low_mu_center, total_low_mu_center);
+  }
+
+  // Write output for debugging.
+  char fname[1024];
+  if (use_gpu) {
+    sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_dev.gkyl", poly_order);
+  } else {
+    sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_ho.gkyl", poly_order);
+  }
+  gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
+
+  // Cleanup.
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(phi_at_peaks[p]);
+  }
+  gkyl_free(phi_at_peaks);
+  gkyl_array_release(phi); 
+  gkyl_array_release(phi_ho); 
+  gkyl_array_release(mask); 
+  gkyl_array_release(mask_ho);
+  gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
+  gkyl_velocity_map_release(gvm);
+  gkyl_array_release(bmag_max);
+  gkyl_array_release(bmag_max_z_coord);
+  gkyl_array_release(bmag_wall);
+  gkyl_array_release(bmag_wall_z_coord);
+  gkyl_array_dg_find_peaks_release(bmag_peak_finder);
+  gkyl_position_map_release(pmap);
+  gkyl_gk_geometry_release(gk_geom);
+
+#ifdef GKYL_HAVE_CUDA
+  if (use_gpu) {
+    gkyl_cu_free(basis_on_dev);
+    gkyl_cu_free(basis_on_dev_conf);
+  }
+#endif  
+}
+
 void test_1x2v_p1_gk_ho() { test_1x2v_gk(1, false); }
+void test_1x2v_p1_nonzero_phi_gk_ho() { test_1x2v_nonzero_phi_gk(1, false); }
 
 #ifdef GKYL_HAVE_CUDA
 void test_1x2v_p1_gk_dev() { test_1x2v_gk(1, true); }
+void test_1x2v_p1_nonzero_phi_gk_dev() { test_1x2v_nonzero_phi_gk(1, true); }
 #endif
 
 TEST_LIST = {
   { "test_1x2v_p1_gk_ho", test_1x2v_p1_gk_ho },
+  { "test_1x2v_p1_nonzero_phi_gk_ho", test_1x2v_p1_nonzero_phi_gk_ho },
 
 #ifdef GKYL_HAVE_CUDA
   { "test_1x2v_p1_gk_dev", test_1x2v_p1_gk_dev },
+  { "test_1x2v_p1_nonzero_phi_gk_dev", test_1x2v_p1_nonzero_phi_gk_dev },
 #endif
   { NULL, NULL },
 };

From 1fb0516a6390178df551fb357773a8881633e84e Mon Sep 17 00:00:00 2001
From: mrquell <mrquell@gmail.com>
Date: Thu, 19 Feb 2026 13:56:30 -0500
Subject: [PATCH 13/32] Update variable name

---
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 0ef9fba4cd..9c250e6c89 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -298,7 +298,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
       inp->conf_range_ext->volume*inp->vel_range->volume);
     up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
     up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->qDphiDbmag_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+    up->qDphiDbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
 
     // Allocate the memory for computing the specific phase nodal to modal calculation
     struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho;

From ce57b6abf5488b95940c240d6bca00a64fecff0d Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 20 Feb 2026 06:54:13 -0800
Subject: [PATCH 14/32] GPU unit tests all pass. It's compute sanitizer clean.
 I will have to refactor this in the future, but it is just proof of concept
 for now to make sure it works correctly.

---
 core/unit/ctest_array_dg_find_peaks.c     | 361 +++++++++++++++-------
 core/zero/array_dg_find_peaks.c           | 206 ++++++++----
 core/zero/gkyl_array_dg_find_peaks_priv.h |   4 +
 3 files changed, 397 insertions(+), 174 deletions(-)

diff --git a/core/unit/ctest_array_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c
index 2ac00cf1c0..0cc2a200e2 100644
--- a/core/unit/ctest_array_dg_find_peaks.c
+++ b/core/unit/ctest_array_dg_find_peaks.c
@@ -16,6 +16,15 @@
 #include <math.h>
 #include <stdio.h>
 
+// Helper function to create test arrays on CPU or GPU.
+static struct gkyl_array *
+mkarr(bool use_gpu, long nc, long size)
+{
+  struct gkyl_array *a = use_gpu ? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+                                 : gkyl_array_new(GKYL_DOUBLE, nc, size);
+  return a;
+}
+
 // 1D test function with multiple peaks: f(z) = cos(2*pi*z/L) 
 // Has maxima at z=0, z=L and minimum at z=L/2.
 static void
@@ -100,7 +109,7 @@ test_func_quadratic_1d(double t, const double *xn, double *fout, void *ctx)
 
 // Test 1D peak finding with cos function.
 void
-test_1d_find_peaks_cos(int poly_order)
+test_1d_find_peaks_cos(int poly_order, bool use_gpu)
 {
   // Grid: z in [-1, 1] (one period of cos(2*pi*z/2)).
   double lower[] = {-1.0};
@@ -118,12 +127,16 @@ test_1d_find_peaks_cos(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_cos, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
   // Create peak finder.
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -131,7 +144,7 @@ test_1d_find_peaks_cos(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 0,
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
 
@@ -153,8 +166,14 @@ test_1d_find_peaks_cos(int poly_order)
 
   for (int p = 0; p < 3 && p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
     
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
@@ -169,15 +188,18 @@ test_1d_find_peaks_cos(int poly_order)
 
     gkyl_array_release(coords);
     gkyl_array_release(vals);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(vals_d);
   }
 
+  gkyl_array_release(f_ho);
   gkyl_array_release(f);
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
 // Test 1D peak finding with mirror-like function.
 void
-test_1d_find_peaks_mirror(int poly_order)
+test_1d_find_peaks_mirror(int poly_order, bool use_gpu)
 {
   // Grid: z in [-1, 1].
   double lower[] = {-1.0};
@@ -195,12 +217,16 @@ test_1d_find_peaks_mirror(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_mirror, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
   // Create peak finder.
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -208,7 +234,7 @@ test_1d_find_peaks_mirror(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 0,
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
 
@@ -220,8 +246,14 @@ test_1d_find_peaks_mirror(int poly_order)
 
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
     
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
@@ -241,15 +273,18 @@ test_1d_find_peaks_mirror(int poly_order)
     }
     gkyl_array_release(vals);
     gkyl_array_release(coords);
+    gkyl_array_release(vals_d);
+    gkyl_array_release(coords_d);
   }
 
+  gkyl_array_release(f_ho);
   gkyl_array_release(f);
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
 // Test 2D peak finding.
 void
-test_2d_find_peaks(int poly_order)
+test_2d_find_peaks(int poly_order, bool use_gpu)
 {
   double lower[] = {0.0, -1.0};
   double upper[] = {1.0, 1.0};
@@ -264,12 +299,16 @@ test_2d_find_peaks(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_mirror, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
   // Create peak finder (search along z, which is direction 1).
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -277,7 +316,7 @@ test_2d_find_peaks(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 1,  // Search along z.
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
 
@@ -295,8 +334,14 @@ test_2d_find_peaks(int poly_order)
   // Check that values and coordinates are reasonable for each peak.
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
     
     double xc_log[1] = {0.0};
     
@@ -330,15 +375,18 @@ test_2d_find_peaks(int poly_order)
     }
     gkyl_array_release(vals);
     gkyl_array_release(coords);
+    gkyl_array_release(vals_d);
+    gkyl_array_release(coords_d);
   }
 
+  gkyl_array_release(f_ho);
   gkyl_array_release(f);
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
 // Test 1D peak finding with complex oscillatory function.
 void
-test_1d_find_peaks_complex(int poly_order)
+test_1d_find_peaks_complex(int poly_order, bool use_gpu)
 {
   double lower[] = {-2.0*M_PI};
   double upper[] = {2.0*M_PI};
@@ -353,12 +401,16 @@ test_1d_find_peaks_complex(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
   // Create peak finder.
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -366,7 +418,7 @@ test_1d_find_peaks_complex(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 0,
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
 
@@ -396,8 +448,14 @@ test_1d_find_peaks_complex(int poly_order)
 
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
     
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
@@ -413,15 +471,18 @@ test_1d_find_peaks_complex(int poly_order)
 
     gkyl_array_release(coords);
     gkyl_array_release(vals);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(vals_d);
   }
 
+  gkyl_array_release(f_ho);
   gkyl_array_release(f);
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
 // Test 2D peak finding with complex oscillatory function.
 void
-test_2d_find_peaks_complex(int poly_order)
+test_2d_find_peaks_complex(int poly_order, bool use_gpu)
 {
   // Grid: psi in [0.5, 2.0], z in [-5, 5].
   double lower[] = {0.5, -2.0*M_PI};
@@ -440,12 +501,16 @@ test_2d_find_peaks_complex(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
   // Create peak finder (search along z, which is direction 1).
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -453,7 +518,7 @@ test_2d_find_peaks_complex(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 1,  // Search along z.
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
 
@@ -493,8 +558,14 @@ test_2d_find_peaks_complex(int poly_order)
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
     TEST_CHECK(ptype == expected_peaks[p].type);
     
-    const struct gkyl_array *vals = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
     
     // Check each psi cell.
     struct gkyl_range_iter iter;
@@ -533,16 +604,19 @@ test_2d_find_peaks_complex(int poly_order)
     }
     gkyl_array_release(vals);
     gkyl_array_release(coords);
+    gkyl_array_release(vals_d);
+    gkyl_array_release(coords_d);
   }
 
   gkyl_array_release(nodes);
+  gkyl_array_release(f_ho);
   gkyl_array_release(f);
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
 // Test 1D project_on_peaks with complex function.
 void
-test_1d_project_on_peaks(int poly_order)
+test_1d_project_on_peaks(int poly_order, bool use_gpu)
 {
   double lower[] = {-2.0*M_PI};
   double upper[] = {2.0*M_PI};
@@ -557,18 +631,24 @@ test_1d_project_on_peaks(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function for peak finding.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
   // Project quadratic function to evaluate at peaks.
-  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
   // Create peak finder.
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -576,7 +656,7 @@ test_1d_project_on_peaks(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 0,
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
   gkyl_array_dg_find_peaks_advance(peaks, f);
@@ -589,14 +669,11 @@ test_1d_project_on_peaks(int poly_order)
   const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
   struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
   for (int p = 0; p < num_peaks; p++) {
-    g_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+    g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
   }
   gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
 
   // Verify that g evaluated at each peak matches analytical values.
-  // For 1D->0D, output is p=0, so the value is already the cell average.
-  // The cell average of a p=0 expansion is value / sqrt(volume), and
-  // for a 1D cell with volume=1, it's just the value / sqrt(1.0) = value.
   struct {
     enum gkyl_peak_type type;
     double z_expected;
@@ -612,15 +689,24 @@ test_1d_project_on_peaks(int poly_order)
     {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
   };
   for (int p = 0; p < num_peaks; p++) {
-    const double *g_val = gkyl_array_cfetch(g_at_peaks[p], 0);
+    // Copy back to host for verification.
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, g_at_peaks[p]->size);
+    gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
+
+    const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0);
     double z = expected_peaks[p].z_expected;
     double expected = z * z;
     TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12));
+    TEST_MSG("Peak %d: z=%.5f, g_at_peak=%.5f, expected=%.5f", p, z, g_val[0], expected);
+
+    gkyl_array_release(g_at_peaks_ho);
   }
 
   for (int p = 0; p < num_peaks; p++) {
     gkyl_array_release(g_at_peaks[p]);
   }
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
   gkyl_array_release(f);
   gkyl_array_release(g);
   gkyl_array_dg_find_peaks_release(peaks);
@@ -628,7 +714,7 @@ test_1d_project_on_peaks(int poly_order)
 
 // Test 2D project_on_peaks with complex function.
 void
-test_2d_project_on_peaks(int poly_order)
+test_2d_project_on_peaks(int poly_order, bool use_gpu)
 {
   double lower[] = {0.5, -2.0*M_PI};
   double upper[] = {2.0, 2.0*M_PI};
@@ -644,18 +730,24 @@ test_2d_project_on_peaks(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function for peak finding.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
   // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2
-  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
   // Create peak finder (search along z, which is direction 1).
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -663,7 +755,7 @@ test_2d_project_on_peaks(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 1,  // Search along z.
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
   gkyl_array_dg_find_peaks_advance(peaks, f);
@@ -680,7 +772,7 @@ test_2d_project_on_peaks(int poly_order)
   // Allocate output arrays for projected values.
   struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
   for (int p = 0; p < num_peaks; p++) {
-    g_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+    g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
   }
   gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
 
@@ -696,7 +788,13 @@ test_2d_project_on_peaks(int poly_order)
 
   // Verify that g evaluated at each peak matches analytical values.
   for (int p = 0; p < num_peaks; p++) {
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(coords, coords_d);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, g_at_peaks[p]->size);
+    gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
     
     // Check each psi cell.
     struct gkyl_range_iter iter;
@@ -704,7 +802,7 @@ test_2d_project_on_peaks(int poly_order)
     while (gkyl_range_iter_next(&iter)) {
       long linidx = gkyl_range_idx(out_range, iter.idx);
       
-      const double *g_val_d = gkyl_array_cfetch(g_at_peaks[p], linidx);
+      const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx);
       const double *coord_d = gkyl_array_cfetch(coords, linidx);
       
       // Get cell center for physical psi coordinate.
@@ -729,6 +827,8 @@ test_2d_project_on_peaks(int poly_order)
       }
     }
     gkyl_array_release(coords);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(g_at_peaks_ho);
   }
 
   // Clean up.
@@ -736,6 +836,8 @@ test_2d_project_on_peaks(int poly_order)
   for (int p = 0; p < num_peaks; p++) {
     gkyl_array_release(g_at_peaks[p]);
   }
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
   gkyl_array_release(f);
   gkyl_array_release(g);
   gkyl_array_dg_find_peaks_release(peaks);
@@ -744,7 +846,7 @@ test_2d_project_on_peaks(int poly_order)
 
 // Test 1D project_on_peak_idx with complex function.
 void
-test_1d_project_on_peak_idx(int poly_order)
+test_1d_project_on_peak_idx(int poly_order, bool use_gpu)
 {
   double lower[] = {-2.0*M_PI};
   double upper[] = {2.0*M_PI};
@@ -759,18 +861,24 @@ test_1d_project_on_peak_idx(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function for peak finding.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
   // Project quadratic function to evaluate at peaks.
-  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
   // Create peak finder.
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -778,7 +886,7 @@ test_1d_project_on_peak_idx(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 0,
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
   gkyl_array_dg_find_peaks_advance(peaks, f);
@@ -789,15 +897,11 @@ test_1d_project_on_peak_idx(int poly_order)
   // Allocate output arrays for projected values.
   const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
   const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
-  struct gkyl_array *g_at_peaks = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+  struct gkyl_array *g_at_peaks = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
 
   int chosen_idx = 1;
   gkyl_array_dg_find_peaks_project_on_peak_idx(peaks, g, chosen_idx, g_at_peaks);
 
-  // Verify that g evaluated at each peak matches analytical values.
-  // For 1D->0D, output is p=0, so the value is already the cell average.
-  // The cell average of a p=0 expansion is value / sqrt(volume), and
-  // for a 1D cell with volume=1, it's just the value / sqrt(1.0) = value.
   struct {
     enum gkyl_peak_type type;
     double z_expected;
@@ -812,13 +916,20 @@ test_1d_project_on_peak_idx(int poly_order)
     {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
     {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
   };
+
+  // Copy back to host for verification.
+  struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks->ncomp, g_at_peaks->size);
+  gkyl_array_copy(g_at_peaks_ho, g_at_peaks);
   
-  const double *g_val = gkyl_array_cfetch(g_at_peaks, 0);
+  const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0);
   double z = expected_peaks[chosen_idx].z_expected;
   double expected = z * z;
   TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12));
 
+  gkyl_array_release(g_at_peaks_ho);
   gkyl_array_release(g_at_peaks);
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
   gkyl_array_release(f);
   gkyl_array_release(g);
   gkyl_array_dg_find_peaks_release(peaks);
@@ -826,7 +937,7 @@ test_1d_project_on_peak_idx(int poly_order)
 
 // Test 2D project_on_peak_idx with complex function.
 void
-test_2d_project_on_peak_idx(int poly_order)
+test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
 {
   double lower[] = {0.5, -2.0*M_PI};
   double upper[] = {2.0, 2.0*M_PI};
@@ -842,18 +953,24 @@ test_2d_project_on_peak_idx(int poly_order)
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
-  // Project test function for peak finding.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
   gkyl_eval_on_nodes_release(ev);
 
   // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2
-  struct gkyl_array *g = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
   ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
   gkyl_eval_on_nodes_release(ev);
 
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
   // Create peak finder (search along z, which is direction 1).
   struct gkyl_array_dg_find_peaks_inp inp = {
     .basis = &basis,
@@ -861,7 +978,7 @@ test_2d_project_on_peak_idx(int poly_order)
     .range = &local,
     .range_ext = &local_ext,
     .search_dir = 1,  // Search along z.
-    .use_gpu = false,
+    .use_gpu = use_gpu,
   };
   struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
   gkyl_array_dg_find_peaks_advance(peaks, f);
@@ -878,7 +995,7 @@ test_2d_project_on_peak_idx(int poly_order)
   // Allocate output arrays for projected values.
   struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
   for (int p = 0; p < num_peaks; p++) {
-    g_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
+    g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
   }
   gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
 
@@ -894,7 +1011,13 @@ test_2d_project_on_peak_idx(int poly_order)
 
   // Verify that g evaluated at each peak matches analytical values.
   for (int p = 0; p < num_peaks; p++) {
-    const struct gkyl_array *coords = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(coords, coords_d);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, g_at_peaks[p]->size);
+    gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
     
     // Check each psi cell.
     struct gkyl_range_iter iter;
@@ -902,7 +1025,7 @@ test_2d_project_on_peak_idx(int poly_order)
     while (gkyl_range_iter_next(&iter)) {
       long linidx = gkyl_range_idx(out_range, iter.idx);
       
-      const double *g_val_d = gkyl_array_cfetch(g_at_peaks[p], linidx);
+      const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx);
       const double *coord_d = gkyl_array_cfetch(coords, linidx);
       
       // Get cell center for physical psi coordinate.
@@ -927,6 +1050,8 @@ test_2d_project_on_peak_idx(int poly_order)
       }
     }
     gkyl_array_release(coords);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(g_at_peaks_ho);
   }
 
   // Clean up.
@@ -934,41 +1059,59 @@ test_2d_project_on_peak_idx(int poly_order)
   for (int p = 0; p < num_peaks; p++) {
     gkyl_array_release(g_at_peaks[p]);
   }
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
   gkyl_array_release(f);
   gkyl_array_release(g);
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
-
-
-
-
-
-
-
-
-
-
-
-void test_1d_cos_p1() { test_1d_find_peaks_cos(1); }
-void test_1d_mirror_p1() { test_1d_find_peaks_mirror(1); }
-void test_1d_complex_p1() { test_1d_find_peaks_complex(1); }
-void test_2d_p1() { test_2d_find_peaks(1); }
-void test_2d_complex_p1() { test_2d_find_peaks_complex(1); }
-void test_1d_project_p1() { test_1d_project_on_peaks(1); }
-void test_2d_project_p1() { test_2d_project_on_peaks(1); }
-void test_1d_project_idx_p1() { test_1d_project_on_peak_idx(1); }
-void test_2d_project_idx_p1() { test_2d_project_on_peak_idx(1); }
+// CPU test wrappers
+void test_1d_cos_p1_ho() { test_1d_find_peaks_cos(1, false); }
+void test_1d_mirror_p1_ho() { test_1d_find_peaks_mirror(1, false); }
+void test_1d_complex_p1_ho() { test_1d_find_peaks_complex(1, false); }
+void test_2d_p1_ho() { test_2d_find_peaks(1, false); }
+void test_2d_complex_p1_ho() { test_2d_find_peaks_complex(1, false); }
+void test_1d_project_p1_ho() { test_1d_project_on_peaks(1, false); }
+void test_2d_project_p1_ho() { test_2d_project_on_peaks(1, false); }
+void test_1d_project_idx_p1_ho() { test_1d_project_on_peak_idx(1, false); }
+void test_2d_project_idx_p1_ho() { test_2d_project_on_peak_idx(1, false); }
+
+#ifdef GKYL_HAVE_CUDA
+
+// GPU test wrappers
+void test_1d_cos_p1_dev() { test_1d_find_peaks_cos(1, true); }
+void test_1d_mirror_p1_dev() { test_1d_find_peaks_mirror(1, true); }
+void test_1d_complex_p1_dev() { test_1d_find_peaks_complex(1, true); }
+void test_2d_p1_dev() { test_2d_find_peaks(1, true); }
+void test_2d_complex_p1_dev() { test_2d_find_peaks_complex(1, true); }
+void test_1d_project_p1_dev() { test_1d_project_on_peaks(1, true); }
+void test_2d_project_p1_dev() { test_2d_project_on_peaks(1, true); }
+void test_1d_project_idx_p1_dev() { test_1d_project_on_peak_idx(1, true); }
+void test_2d_project_idx_p1_dev() { test_2d_project_on_peak_idx(1, true); }
+
+#endif
 
 TEST_LIST = {
-  {"test_1d_cos_p1", test_1d_cos_p1},
-  {"test_1d_mirror_p1", test_1d_mirror_p1},
-  {"test_1d_complex_p1", test_1d_complex_p1},
-  {"test_2d_p1", test_2d_p1},
-  {"test_2d_complex_p1", test_2d_complex_p1},
-  {"test_1d_project_p1", test_1d_project_p1},
-  {"test_2d_project_p1", test_2d_project_p1},
-  {"test_1d_project_idx_p1", test_1d_project_idx_p1},
-  // {"test_2d_project_idx_p1", test_2d_project_idx_p1},
+  {"test_1d_cos_p1", test_1d_cos_p1_ho},
+  {"test_1d_mirror_p1", test_1d_mirror_p1_ho},
+  {"test_1d_complex_p1", test_1d_complex_p1_ho},
+  {"test_2d_p1", test_2d_p1_ho},
+  {"test_2d_complex_p1", test_2d_complex_p1_ho},
+  {"test_1d_project_p1", test_1d_project_p1_ho},
+  {"test_2d_project_p1", test_2d_project_p1_ho},
+  {"test_1d_project_idx_p1", test_1d_project_idx_p1_ho},
+  {"test_2d_project_idx_p1", test_2d_project_idx_p1_ho},
+#ifdef GKYL_HAVE_CUDA
+  {"test_1d_cos_p1_gpu", test_1d_cos_p1_dev},
+  {"test_1d_mirror_p1_gpu", test_1d_mirror_p1_dev},
+  {"test_1d_complex_p1_gpu", test_1d_complex_p1_dev},
+  {"test_2d_p1_gpu", test_2d_p1_dev},
+  {"test_2d_complex_p1_gpu", test_2d_complex_p1_dev},
+  {"test_1d_project_p1_gpu", test_1d_project_p1_dev},
+  {"test_2d_project_p1_gpu", test_2d_project_p1_dev},
+  {"test_1d_project_idx_p1_gpu", test_1d_project_idx_p1_dev},
+  {"test_2d_project_idx_p1_gpu", test_2d_project_idx_p1_dev},
+#endif
   {NULL, NULL},
 };
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index 877c91aff6..bd726d7b3a 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -13,7 +13,7 @@
  * to count the number of peaks and determine their types.
  */
 static void
-count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in,
+count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in_ho,
   int preserved_idx, int *num_peaks_out, enum gkyl_peak_type *peak_types_out)
 {
   int ndim = up->grid.ndim;
@@ -24,10 +24,6 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
   struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, ndim, up->basis.num_basis);
   up->basis.node_list(gkyl_array_fetch(nodes, 0));
 
-  // Copy input to host if needed.
-  struct gkyl_array *in_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
-  gkyl_array_copy(in_ho, in);
-
   // Determine number of nodes along search direction.
   int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
   int nodes_per_cell = (poly_order == 1) ? 2 : 3;
@@ -132,7 +128,6 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
   gkyl_free(vals);
   gkyl_free(coords);
   gkyl_array_release(nodes);
-  gkyl_array_release(in_ho);
 }
 
 /**
@@ -277,7 +272,7 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
           search_node_idx = 2*cell_local + search_node_offset;
 
         if (!visited[search_node_idx]) {
-          double val = up->basis.eval_expand(nod_log, f_d);
+          double val = up->basis.eval_expand(nod_log, f_d); // GPU error here
           double nod_phys[GKYL_MAX_DIM];
           dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
 
@@ -458,23 +453,23 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
 }
 
 struct gkyl_array_dg_find_peaks*
-gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, const struct gkyl_array *field)
+gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_peaks_inp, const struct gkyl_array *in)
 {
   struct gkyl_array_dg_find_peaks *up = gkyl_malloc(sizeof(*up));
 
   // Copy input parameters.
-  up->grid = *inp->grid;
-  up->basis = *inp->basis;
-  up->range = *inp->range;
-  up->range_ext = *inp->range_ext;
-  up->search_dir = inp->search_dir;
-  up->use_gpu = inp->use_gpu;
-
-  int ndim = inp->grid->ndim;
-  int poly_order = inp->basis->poly_order;
+  up->grid = *find_peaks_inp->grid;
+  up->basis = *find_peaks_inp->basis;
+  up->range = *find_peaks_inp->range;
+  up->range_ext = *find_peaks_inp->range_ext;
+  up->search_dir = find_peaks_inp->search_dir;
+  up->use_gpu = find_peaks_inp->use_gpu;
+
+  int ndim = find_peaks_inp->grid->ndim;
+  int poly_order = find_peaks_inp->basis->poly_order;
   int out_dim = ndim - 1;
 
-  assert(inp->search_dir >= 0 && inp->search_dir < ndim);
+  assert(find_peaks_inp->search_dir >= 0 && find_peaks_inp->search_dir < ndim);
 
   // Set up output grid/basis/range.
   if (out_dim == 0) {
@@ -492,20 +487,20 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
   }
   else if (out_dim == 1) {
     // 2D -> 1D case.
-    int preserved_dir = (inp->search_dir == 0) ? 1 : 0;
+    int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0;
 
-    int cells_out = inp->grid->cells[preserved_dir];
-    double lower_out = inp->grid->lower[preserved_dir];
-    double upper_out = inp->grid->upper[preserved_dir];
+    int cells_out = find_peaks_inp->grid->cells[preserved_dir];
+    double lower_out = find_peaks_inp->grid->lower[preserved_dir];
+    double upper_out = find_peaks_inp->grid->upper[preserved_dir];
 
     gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out);
 
-    int lower_idx[1] = {inp->range->lower[preserved_dir]};
-    int upper_idx[1] = {inp->range->upper[preserved_dir]};
+    int lower_idx[1] = {find_peaks_inp->range->lower[preserved_dir]};
+    int upper_idx[1] = {find_peaks_inp->range->upper[preserved_dir]};
     gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx);
 
-    int lower_ext_idx[1] = {inp->range_ext->lower[preserved_dir]};
-    int upper_ext_idx[1] = {inp->range_ext->upper[preserved_dir]};
+    int lower_ext_idx[1] = {find_peaks_inp->range_ext->lower[preserved_dir]};
+    int upper_ext_idx[1] = {find_peaks_inp->range_ext->upper[preserved_dir]};
     gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx);
 
     gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order);
@@ -520,8 +515,8 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
   }
 
   // Store node locations for input basis.
-  up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, inp->basis->num_basis);
-  inp->basis->node_list(gkyl_array_fetch(up->nodes, 0));
+  up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, find_peaks_inp->basis->num_basis);
+  find_peaks_inp->basis->node_list(gkyl_array_fetch(up->nodes, 0));
 
   // Create nodal-to-modal converter.
   up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false);
@@ -529,11 +524,21 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
   // Count peaks at middle preserved coordinate.
   int mid_preserved_idx = 0;
   if (out_dim == 1) {
-    int preserved_dir = (inp->search_dir == 0) ? 1 : 0;
-    mid_preserved_idx = (inp->range->lower[preserved_dir] + inp->range->upper[preserved_dir]) / 2;
+    int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0;
+    mid_preserved_idx = (find_peaks_inp->range->lower[preserved_dir] + find_peaks_inp->range->upper[preserved_dir]) / 2;
   }
 
-  count_peaks_along_dir(up, field, mid_preserved_idx, &up->num_peaks, up->peak_types);
+
+  // Copy input to host if needed.
+  if (up->use_gpu) {
+    struct gkyl_array *field_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
+    gkyl_array_copy(field_ho, in);
+    count_peaks_along_dir(up, field_ho, mid_preserved_idx, &up->num_peaks, up->peak_types);
+    gkyl_array_release(field_ho);
+  }
+  else {
+    count_peaks_along_dir(up, in, mid_preserved_idx, &up->num_peaks, up->peak_types);
+  }
 
   // Allocate output arrays for each peak.
   for (int p = 0; p < up->num_peaks; p++) {
@@ -553,21 +558,37 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *inp, con
     up->out_eval_at_peaks_vals_nodal[p] = NULL;
   }
 
+  // When we are on GPU, we need host duplicate arrays because this updater is only on CPU
+  up->in_ho = NULL;
+  up->out_vals_ho = NULL; 
+  if (up->use_gpu) {
+    up->in_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
+    up->out_vals_ho = gkyl_array_new(GKYL_DOUBLE, up->out_vals[0]->ncomp, up->out_vals[0]->size);
+  }
+
   return up;
 }
 
 void
 gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in)
 {
-  // Needs a gpu implementation
-
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
-  // Find peaks for each preserved-direction node.
-  int num_nodes_out = up->out_nrange.volume;
-  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    find_peaks_for_preserved_node(up, in, pres_node);
+  if (up->use_gpu) {
+    gkyl_array_copy(up->in_ho, in);
+    // Find peaks for each preserved-direction node.
+    int num_nodes_out = up->out_nrange.volume;
+    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+      find_peaks_for_preserved_node(up, up->in_ho, pres_node);
+    }
+  }
+  else {
+    // Find peaks for each preserved-direction node.
+    int num_nodes_out = up->out_nrange.volume;
+    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+      find_peaks_for_preserved_node(up, in, pres_node);
+    }
   }
 
   // Transform nodal to modal for each peak.
@@ -675,28 +696,56 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
 
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
-  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    for (int p = 0; p < up->num_peaks; p++) {
-      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, p);
+  if (up->use_gpu) {
+    gkyl_array_copy(up->in_ho, in_array);
+    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+      for (int p = 0; p < up->num_peaks; p++) {
+        eval_array_at_peaks_for_preserved_node(up, up->in_ho, pres_node, up->out_eval_at_peaks_vals_nodal, p);
+      }
     }
-  }
-
-  // Transform nodal to modal for each peak.
-  if (out_dim == 0) {
-    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-    for (int p = 0; p < up->num_peaks; p++) {
-      double *val_m = gkyl_array_fetch(out_vals[p], 0);
-      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
-      val_m[0] = val_n[0];
+    // Transform nodal to modal for each peak.
+    if (out_dim == 0) {
+      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+      for (int p = 0; p < up->num_peaks; p++) {
+        double *val_m = gkyl_array_fetch(up->out_vals_ho, 0);
+        const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
+        val_m[0] = val_n[0];
+        gkyl_array_copy(out_vals[p], up->out_vals_ho);
+      }
+    }
+    else {
+      // 2D -> 1D case: use nodal-to-modal transform.
+      for (int p = 0; p < up->num_peaks; p++) {
+        gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+          &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], up->out_vals_ho, false);
+        gkyl_array_copy(out_vals[p], up->out_vals_ho);
+      }
     }
   }
   else {
-    // 2D -> 1D case: use nodal-to-modal transform.
-    for (int p = 0; p < up->num_peaks; p++) {
-      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], false);
+    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+      for (int p = 0; p < up->num_peaks; p++) {
+        eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, p);
+      }
+    }
+    // Transform nodal to modal for each peak.
+    if (out_dim == 0) {
+      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+      for (int p = 0; p < up->num_peaks; p++) {
+        double *val_m = gkyl_array_fetch(out_vals[p], 0);
+        const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
+        val_m[0] = val_n[0];
+      }
+    }
+    else {
+      // 2D -> 1D case: use nodal-to-modal transform.
+      for (int p = 0; p < up->num_peaks; p++) {
+        gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+          &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], false);
+      }
     }
   }
+
 }
 
 void
@@ -710,21 +759,44 @@ gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up
 
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
-  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
-  }
 
-  // Transform nodal to modal for each peak.
-  if (out_dim == 0) {
-    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-    double *val_m = gkyl_array_fetch(out_val, 0);
-    const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
-    val_m[0] = val_n[0];
+  if (up->use_gpu) {
+    gkyl_array_copy(up->in_ho, in_array);
+    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+      eval_array_at_peaks_for_preserved_node(up, up->in_ho, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
+    }
+    // Transform nodal to modal for each peak.
+    if (out_dim == 0) {
+      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+      double *val_m = gkyl_array_fetch(up->out_vals_ho, 0);
+      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
+      val_m[0] = val_n[0];
+      gkyl_array_copy(out_val, up->out_vals_ho);
+    }
+    else {
+      // 2D -> 1D case: use nodal-to-modal transform.
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], up->out_vals_ho, false);
+      gkyl_array_copy(out_val, up->out_vals_ho);
+    }
   }
   else {
-    // 2D -> 1D case: use nodal-to-modal transform.
-    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-      &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, false);
+    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
+    }
+
+    // Transform nodal to modal for each peak.
+    if (out_dim == 0) {
+      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+      double *val_m = gkyl_array_fetch(out_val, 0);
+      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
+      val_m[0] = val_n[0];
+    }
+    else {
+      // 2D -> 1D case: use nodal-to-modal transform.
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, false);
+    }
   }
 }
 
@@ -738,6 +810,10 @@ gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
     gkyl_array_release(up->out_coords_nodal[p]);
     gkyl_array_release(up->out_eval_at_peaks_vals_nodal[p]);
   }
+  if (up->use_gpu) {
+    gkyl_array_release(up->in_ho);
+    gkyl_array_release(up->out_vals_ho);
+  }
   gkyl_array_release(up->nodes);
   gkyl_nodal_ops_release(up->n2m);
   gkyl_free(up);
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
index 49b51fd810..e20dd6f196 100644
--- a/core/zero/gkyl_array_dg_find_peaks_priv.h
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -50,6 +50,10 @@ struct gkyl_array_dg_find_peaks {
   struct gkyl_array *out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak coordinates
   struct gkyl_array *out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Values evaluated at peaks (nodal)
 
+  // GPU implementation specific arrays
+  struct gkyl_array *in_ho; // Host copy of input array
+  struct gkyl_array *out_vals_ho; // Host copy of output values
+
   // Internal working arrays.
   struct gkyl_array *nodes;         // Node locations in logical coords
 

From a3f949e5993f3d980135a860cb840243ed1aaade Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 20 Feb 2026 10:07:07 -0800
Subject: [PATCH 15/32] rework array find peaks to have a full GPU
 implementation because the arrays need to be passed to objects like the
 loss_cone_mask, where it expects these to be GPU arrays. It's just easier to
 have this module fit the archatecture of the rest of the code, rather than
 doing something different and copying between device and host. It wouldn't
 interface well. Claude generated most of the cuda code, with strong guidence
 from Maxwell

---
 core/zero/array_dg_find_peaks.c               | 197 +++--
 core/zero/array_dg_find_peaks_cu.cu           | 581 +++++++++++++++
 core/zero/gkyl_array_dg_find_peaks.h          |  48 ++
 core/zero/gkyl_array_dg_find_peaks_priv.h     |  22 +-
 gyrokinetic/apps/gk_species_fdot_multiplier.c |  18 +-
 .../creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c | 671 +++++++-----------
 .../rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c    |   2 +-
 .../rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c  |   2 +-
 .../creg/rt_gk_wham_kinetic_poa_1x2v_p1.c     |   4 +-
 9 files changed, 1006 insertions(+), 539 deletions(-)
 create mode 100644 core/zero/array_dg_find_peaks_cu.cu

diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index bd726d7b3a..b16e3f1b38 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -3,6 +3,7 @@
 #include <string.h>
 
 #include <gkyl_alloc.h>
+#include <gkyl_alloc_flags_priv.h>
 #include <gkyl_array.h>
 #include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_array_dg_find_peaks_priv.h>
@@ -521,6 +522,9 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
   // Create nodal-to-modal converter.
   up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false);
 
+  // No device basis on CPU.
+  up->out_basis_on_dev = NULL;
+
   // Count peaks at middle preserved coordinate.
   int mid_preserved_idx = 0;
   if (out_dim == 1) {
@@ -558,37 +562,40 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
     up->out_eval_at_peaks_vals_nodal[p] = NULL;
   }
 
-  // When we are on GPU, we need host duplicate arrays because this updater is only on CPU
-  up->in_ho = NULL;
-  up->out_vals_ho = NULL; 
+  up->flags = 0;
+  GKYL_CLEAR_CU_ALLOC(up->flags);
+  up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free);
+  up->on_dev = up; // CPU object points to itself.
+
+  struct gkyl_array_dg_find_peaks *up_out = up;
+#ifdef GKYL_HAVE_CUDA
   if (up->use_gpu) {
-    up->in_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
-    up->out_vals_ho = gkyl_array_new(GKYL_DOUBLE, up->out_vals[0]->ncomp, up->out_vals[0]->size);
+    up_out = gkyl_array_dg_find_peaks_new_cu(up);
+    gkyl_array_dg_find_peaks_release(up);
   }
+#endif
 
-  return up;
+  return up_out;
 }
 
 void
 gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in)
 {
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    gkyl_array_dg_find_peaks_advance_cu(up, in);
+    return;
+  }
+#endif
+
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
-  if (up->use_gpu) {
-    gkyl_array_copy(up->in_ho, in);
-    // Find peaks for each preserved-direction node.
-    int num_nodes_out = up->out_nrange.volume;
-    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-      find_peaks_for_preserved_node(up, up->in_ho, pres_node);
-    }
-  }
-  else {
-    // Find peaks for each preserved-direction node.
-    int num_nodes_out = up->out_nrange.volume;
-    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-      find_peaks_for_preserved_node(up, in, pres_node);
-    }
+
+  // Find peaks for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    find_peaks_for_preserved_node(up, in, pres_node);
   }
 
   // Transform nodal to modal for each peak.
@@ -689,70 +696,51 @@ void
 gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
   const struct gkyl_array *in_array, struct gkyl_array **out_vals)
 {
-  // Needs a GPU implementation
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    gkyl_array_dg_find_peaks_project_on_peaks_cu(up, in_array, out_vals);
+    return;
+  }
+#endif
 
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
-  if (up->use_gpu) {
-    gkyl_array_copy(up->in_ho, in_array);
-    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-      for (int p = 0; p < up->num_peaks; p++) {
-        eval_array_at_peaks_for_preserved_node(up, up->in_ho, pres_node, up->out_eval_at_peaks_vals_nodal, p);
-      }
-    }
-    // Transform nodal to modal for each peak.
-    if (out_dim == 0) {
-      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-      for (int p = 0; p < up->num_peaks; p++) {
-        double *val_m = gkyl_array_fetch(up->out_vals_ho, 0);
-        const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
-        val_m[0] = val_n[0];
-        gkyl_array_copy(out_vals[p], up->out_vals_ho);
-      }
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    for (int p = 0; p < up->num_peaks; p++) {
+      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, p);
     }
-    else {
-      // 2D -> 1D case: use nodal-to-modal transform.
-      for (int p = 0; p < up->num_peaks; p++) {
-        gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-          &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], up->out_vals_ho, false);
-        gkyl_array_copy(out_vals[p], up->out_vals_ho);
-      }
+  }
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    for (int p = 0; p < up->num_peaks; p++) {
+      double *val_m = gkyl_array_fetch(out_vals[p], 0);
+      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
+      val_m[0] = val_n[0];
     }
   }
   else {
-    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-      for (int p = 0; p < up->num_peaks; p++) {
-        eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, p);
-      }
-    }
-    // Transform nodal to modal for each peak.
-    if (out_dim == 0) {
-      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-      for (int p = 0; p < up->num_peaks; p++) {
-        double *val_m = gkyl_array_fetch(out_vals[p], 0);
-        const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
-        val_m[0] = val_n[0];
-      }
-    }
-    else {
-      // 2D -> 1D case: use nodal-to-modal transform.
-      for (int p = 0; p < up->num_peaks; p++) {
-        gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-          &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], false);
-      }
+    // 2D -> 1D case: use nodal-to-modal transform.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], false);
     }
   }
-
 }
 
 void
 gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up,
   const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val)
 {
-  // Needs a GPU implementation
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    gkyl_array_dg_find_peaks_project_on_peak_idx_cu(up, in_array, peak_idx, out_val);
+    return;
+  }
+#endif
 
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
@@ -760,49 +748,37 @@ gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up
   // Evaluate the input array at peak locations for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
 
-  if (up->use_gpu) {
-    gkyl_array_copy(up->in_ho, in_array);
-    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-      eval_array_at_peaks_for_preserved_node(up, up->in_ho, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
-    }
-    // Transform nodal to modal for each peak.
-    if (out_dim == 0) {
-      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-      double *val_m = gkyl_array_fetch(up->out_vals_ho, 0);
-      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
-      val_m[0] = val_n[0];
-      gkyl_array_copy(out_val, up->out_vals_ho);
-    }
-    else {
-      // 2D -> 1D case: use nodal-to-modal transform.
-      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], up->out_vals_ho, false);
-      gkyl_array_copy(out_val, up->out_vals_ho);
-    }
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
   }
-  else {
-    for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
-    }
 
-    // Transform nodal to modal for each peak.
-    if (out_dim == 0) {
-      // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-      double *val_m = gkyl_array_fetch(out_val, 0);
-      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
-      val_m[0] = val_n[0];
-    }
-    else {
-      // 2D -> 1D case: use nodal-to-modal transform.
-      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, false);
-    }
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    double *val_m = gkyl_array_fetch(out_val, 0);
+    const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
+    val_m[0] = val_n[0];
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, false);
   }
 }
 
+struct gkyl_array_dg_find_peaks*
+gkyl_array_dg_find_peaks_acquire(const struct gkyl_array_dg_find_peaks *up)
+{
+  gkyl_ref_count_inc(&up->ref_count);
+  return (struct gkyl_array_dg_find_peaks *)up;
+}
+
 void
-gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
+gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref)
 {
+  struct gkyl_array_dg_find_peaks *up =
+    container_of(ref, struct gkyl_array_dg_find_peaks, ref_count);
+
   for (int p = 0; p < up->num_peaks; p++) {
     gkyl_array_release(up->out_vals[p]);
     gkyl_array_release(up->out_coords[p]);
@@ -810,11 +786,20 @@ gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
     gkyl_array_release(up->out_coords_nodal[p]);
     gkyl_array_release(up->out_eval_at_peaks_vals_nodal[p]);
   }
-  if (up->use_gpu) {
-    gkyl_array_release(up->in_ho);
-    gkyl_array_release(up->out_vals_ho);
-  }
   gkyl_array_release(up->nodes);
   gkyl_nodal_ops_release(up->n2m);
+
+  if (GKYL_IS_CU_ALLOC(up->flags)) {
+    if (up->out_basis_on_dev)
+      gkyl_cart_modal_basis_release_cu(up->out_basis_on_dev);
+    gkyl_cu_free(up->on_dev);
+  }
+
   gkyl_free(up);
 }
+
+void
+gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
+{
+  gkyl_ref_count_dec(&up->ref_count);
+}
diff --git a/core/zero/array_dg_find_peaks_cu.cu b/core/zero/array_dg_find_peaks_cu.cu
new file mode 100644
index 0000000000..ad1ecfde50
--- /dev/null
+++ b/core/zero/array_dg_find_peaks_cu.cu
@@ -0,0 +1,581 @@
+/* -*- c++ -*- */
+extern "C" {
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_array_dg_find_peaks_priv.h>
+#include <gkyl_alloc_flags_priv.h>
+#include <gkyl_nodal_ops.h>
+#include <assert.h>
+}
+
+// Maximum number of nodes along the search direction.
+// For p=2: total_nodes = 2*num_cells + 1. With up to 512 cells this is 1025.
+#define MAX_SEARCH_NODES 1025
+
+/**
+ * CUDA kernel: find peaks along the search direction for each preserved-direction
+ * node index. One thread per preserved_node_idx.
+ *
+ * Each thread:
+ *  1. Scans all cells along the search direction, collecting nodal values/coords
+ *     into thread-local arrays.
+ *  2. Extracts peaks (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI) and writes results
+ *     into the nodal output arrays.
+ */
+__global__ void
+gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in, int num_nodes_out)
+{
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < num_nodes_out; tid += blockDim.x * gridDim.x)
+  {
+    int preserved_node_idx = (int)tid;
+
+    int ndim = up->grid.ndim;
+    int search_dir = up->search_dir;
+    int poly_order = up->basis.poly_order;
+    int num_basis = up->basis.num_basis;
+
+    // Number of cells and nodes along the search direction.
+    int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
+    int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+
+    // Thread-local storage for values, coordinates, and visited flags.
+    double vals[MAX_SEARCH_NODES];
+    double coords[MAX_SEARCH_NODES];
+    bool visited[MAX_SEARCH_NODES];
+    for (int i = 0; i < total_nodes_search; i++) {
+      vals[i] = 0.0;
+      coords[i] = 0.0;
+      visited[i] = false;
+    }
+
+    // Preserved direction (only used for 2D).
+    int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+    // Iterate along cells in the search direction.
+    for (int cell_idx = up->range.lower[search_dir];
+         cell_idx <= up->range.upper[search_dir]; cell_idx++)
+    {
+      // For 2D, determine which cells in the preserved direction
+      // contribute to this preserved_node_idx.
+      int pres_cell_start, pres_cell_end;
+      if (ndim == 1) {
+        pres_cell_start = 0;
+        pres_cell_end = 0;
+      }
+      else {
+        if (poly_order == 1) {
+          if (preserved_node_idx == 0) {
+            pres_cell_start = up->range.lower[preserved_dir];
+            pres_cell_end = up->range.lower[preserved_dir];
+          }
+          else if (preserved_node_idx == up->out_nrange.upper[0]) {
+            pres_cell_start = up->range.upper[preserved_dir];
+            pres_cell_end = up->range.upper[preserved_dir];
+          }
+          else {
+            pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+            pres_cell_end = pres_cell_start + 1;
+            if (pres_cell_end > up->range.upper[preserved_dir])
+              pres_cell_end = up->range.upper[preserved_dir];
+          }
+        }
+        else { // poly_order == 2
+          int cell_local = preserved_node_idx / 2;
+          pres_cell_start = up->range.lower[preserved_dir] + cell_local;
+          pres_cell_end = pres_cell_start;
+          if (preserved_node_idx % 2 == 0 && preserved_node_idx > 0) {
+            pres_cell_start--;
+          }
+          if (pres_cell_start < up->range.lower[preserved_dir])
+            pres_cell_start = up->range.lower[preserved_dir];
+          if (pres_cell_end > up->range.upper[preserved_dir])
+            pres_cell_end = up->range.upper[preserved_dir];
+        }
+      }
+
+      for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) {
+        // Build cell index.
+        int idx[GKYL_MAX_DIM];
+        if (ndim == 1) {
+          idx[0] = cell_idx;
+        }
+        else {
+          idx[preserved_dir] = pres_cell;
+          idx[search_dir] = cell_idx;
+        }
+
+        long linidx = gkyl_range_idx(&up->range, idx);
+        const double *f_d = (const double *)gkyl_array_cfetch(in, linidx);
+
+        double xc[GKYL_MAX_DIM];
+        gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+        // Evaluate at each node in this cell.
+        for (int n = 0; n < num_basis; n++) {
+          const double *nod_log = (const double *)gkyl_array_cfetch(up->nodes, n);
+
+          // Check if this node belongs to our preserved_node_idx (2D only).
+          if (ndim > 1) {
+            int pres_node_offset;
+            if (poly_order == 1) {
+              pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
+            }
+            else {
+              if (nod_log[preserved_dir] < -0.5)
+                pres_node_offset = 0;
+              else if (nod_log[preserved_dir] > 0.5)
+                pres_node_offset = 2;
+              else
+                pres_node_offset = 1;
+            }
+            int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
+            int this_pres_node;
+            if (poly_order == 1)
+              this_pres_node = pres_cell_local + pres_node_offset;
+            else
+              this_pres_node = 2*pres_cell_local + pres_node_offset;
+
+            if (this_pres_node != preserved_node_idx)
+              continue;
+          }
+
+          // Determine node offset in the search direction.
+          int search_node_offset;
+          if (poly_order == 1) {
+            search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
+          }
+          else {
+            if (nod_log[search_dir] < -0.5)
+              search_node_offset = 0;
+            else if (nod_log[search_dir] > 0.5)
+              search_node_offset = 2;
+            else
+              search_node_offset = 1;
+          }
+
+          int cell_local = cell_idx - up->range.lower[search_dir];
+          int search_node_idx;
+          if (poly_order == 1)
+            search_node_idx = cell_local + search_node_offset;
+          else
+            search_node_idx = 2*cell_local + search_node_offset;
+
+          if (!visited[search_node_idx]) {
+            double val = up->basis.eval_expand(nod_log, f_d);
+            double nod_phys[GKYL_MAX_DIM];
+            dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+            vals[search_node_idx] = val;
+            coords[search_node_idx] = nod_phys[search_dir];
+            visited[search_node_idx] = true;
+          }
+        }
+      }
+    }
+
+    // Extract peaks based on peak_types and write to nodal output arrays.
+    int peak_idx = 0;
+
+    // EDGE_LO is always the first peak at index 0.
+    if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) {
+      double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
+        preserved_node_idx);
+      double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
+        preserved_node_idx);
+      val_n[0] = vals[0];
+      coord_n[0] = coords[0];
+      peak_idx++;
+    }
+
+    // Find local maxima and minima.
+    for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) {
+      double prev = vals[i-1];
+      double curr = vals[i];
+      double next = vals[i+1];
+
+      bool is_max = (curr > prev && curr > next);
+      bool is_min = (curr < prev && curr < next);
+
+      if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) ||
+          (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
+        double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
+          preserved_node_idx);
+        double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
+          preserved_node_idx);
+        val_n[0] = curr;
+        coord_n[0] = coords[i];
+        peak_idx++;
+      }
+    }
+
+    // EDGE_HI is always the last peak.
+    if (peak_idx < up->num_peaks && up->peak_types[peak_idx] == GKYL_PEAK_EDGE_HI) {
+      double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
+        preserved_node_idx);
+      double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
+        preserved_node_idx);
+      val_n[0] = vals[total_nodes_search - 1];
+      coord_n[0] = coords[total_nodes_search - 1];
+    }
+  }
+}
+
+/**
+ * CUDA kernel: evaluate an input array at peak locations for given peak indices.
+ * Writes results into out_eval_at_peaks_vals_nodal arrays on device.
+ *
+ * Thread mapping: one thread per (preserved_node_idx, peak_offset) pair.
+ * total_threads = num_nodes_out * num_peaks_to_eval.
+ *
+ * @param up Device-side updater struct
+ * @param in Device-side input array (DG field to evaluate)
+ * @param num_nodes_out Number of preserved-direction nodes
+ * @param peak_start First peak index to evaluate
+ * @param num_peaks_to_eval Number of peaks to evaluate (starting from peak_start)
+ */
+__global__ void
+gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in, int num_nodes_out,
+  int peak_start, int num_peaks_to_eval)
+{
+  unsigned long total_threads = (unsigned long)num_nodes_out * num_peaks_to_eval;
+
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < total_threads; tid += blockDim.x * gridDim.x)
+  {
+    int preserved_node_idx = (int)(tid / num_peaks_to_eval);
+    int peak_offset = (int)(tid % num_peaks_to_eval);
+    int peak_idx = peak_start + peak_offset;
+
+    int ndim = up->grid.ndim;
+    int search_dir = up->search_dir;
+    int poly_order = up->basis.poly_order;
+    int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+    // Get the peak coordinate found during advance.
+    const double *peak_coord_n = (const double *)gkyl_array_cfetch(
+      up->out_coords_nodal[peak_idx], preserved_node_idx);
+    double peak_coord_search = peak_coord_n[0];
+
+    // Determine cell index containing the peak.
+    // We compute the search-direction cell directly from the uniform grid
+    // geometry (avoids calling gkyl_rect_grid_find_cell which is not available
+    // as a device symbol).
+    int cell_idx[GKYL_MAX_DIM];
+
+    // Search direction: compute cell from coordinate on uniform grid.
+    // cell = floor((x - lower) / dx) + 1  (1-based indexing).
+    // Use pick_lower semantics: if exactly on a boundary, pick the lower cell.
+    {
+      double rel = (peak_coord_search - up->grid.lower[search_dir]) / up->grid.dx[search_dir];
+      int c = (int)rel + 1; // 1-based
+      // pick_lower: if exactly on upper boundary of cell c, rel is integer, pick c not c+1.
+      // The (int) cast truncates toward zero which gives pick_lower behavior for positive rel.
+      // Clamp to valid range.
+      if (c < up->range.lower[search_dir])
+        c = up->range.lower[search_dir];
+      if (c > up->range.upper[search_dir])
+        c = up->range.upper[search_dir];
+      cell_idx[search_dir] = c;
+    }
+
+    // For 2D: determine preserved-direction cell from preserved_node_idx.
+    if (ndim > 1) {
+      int pres_cell;
+      if (poly_order == 1) {
+        if (preserved_node_idx == 0) {
+          pres_cell = up->range.lower[preserved_dir];
+        }
+        else {
+          pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+          if (pres_cell > up->range.upper[preserved_dir])
+            pres_cell = up->range.upper[preserved_dir];
+        }
+      }
+      else {
+        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
+      }
+      cell_idx[preserved_dir] = pres_cell;
+    }
+
+    // Fetch DG coefficients at this cell.
+    long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
+    const double *f_d = (const double *)gkyl_array_cfetch(in, linidx);
+
+    // Get cell center for logical coordinate conversion.
+    double xc[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
+
+    // Convert peak coordinate to logical space [-1, 1].
+    double nod_log[GKYL_MAX_DIM];
+    for (int d = 0; d < ndim; d++) {
+      if (d == search_dir) {
+        nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
+      }
+      else if (ndim > 1) {
+        if (poly_order == 1) {
+          nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
+        }
+        else {
+          int pres_node_offset = preserved_node_idx % 3;
+          if (pres_node_offset == 0)
+            nod_log[d] = -1.0;
+          else if (pres_node_offset == 1)
+            nod_log[d] = 0.0;
+          else
+            nod_log[d] = 1.0;
+        }
+      }
+    }
+
+    // Evaluate the DG expansion and store result.
+    double val = up->basis.eval_expand(nod_log, f_d);
+    double *val_n = (double *)gkyl_array_fetch(
+      up->out_eval_at_peaks_vals_nodal[peak_idx], preserved_node_idx);
+    val_n[0] = val;
+  }
+}
+
+// Host function to launch the project_on_peaks kernel and run nodal-to-modal transforms.
+void
+gkyl_array_dg_find_peaks_project_on_peaks_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+  int num_nodes_out = up->out_nrange.volume;
+  int num_peaks = up->num_peaks;
+
+  // Launch kernel: one thread per (preserved_node, peak) pair.
+  long total_threads = (long)num_nodes_out * num_peaks;
+  int nthreads = 256;
+  int nblocks = (total_threads + nthreads - 1) / nthreads;
+
+  gkyl_eval_at_peaks_kernel<<<nblocks, nthreads>>>(
+    up->on_dev, in_array->on_dev, num_nodes_out, 0, num_peaks);
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    for (int p = 0; p < num_peaks; p++) {
+      gkyl_array_copy(out_vals[p], up->out_eval_at_peaks_vals_nodal[p]);
+    }
+  }
+  else {
+    for (int p = 0; p < num_peaks; p++) {
+      gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1,
+        up->out_eval_at_peaks_vals_nodal[p], out_vals[p]);
+    }
+  }
+}
+
+// Host function to launch the project_on_peak_idx kernel and run nodal-to-modal transform.
+void
+gkyl_array_dg_find_peaks_project_on_peak_idx_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+  int num_nodes_out = up->out_nrange.volume;
+
+  // Launch kernel: one thread per preserved_node, single peak.
+  int nthreads = 256;
+  int nblocks = (num_nodes_out + nthreads - 1) / nthreads;
+
+  gkyl_eval_at_peaks_kernel<<<nblocks, nthreads>>>(
+    up->on_dev, in_array->on_dev, num_nodes_out, peak_idx, 1);
+
+  // Transform nodal to modal.
+  if (out_dim == 0) {
+    gkyl_array_copy(out_val, up->out_eval_at_peaks_vals_nodal[peak_idx]);
+  }
+  else {
+    gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1,
+      up->out_eval_at_peaks_vals_nodal[peak_idx], out_val);
+  }
+}
+
+// Host function to launch the advance kernel and run nodal-to-modal transforms.
+void
+gkyl_array_dg_find_peaks_advance_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+  int num_nodes_out = up->out_nrange.volume;
+
+  // Launch the kernel: one thread per preserved node.
+  int nthreads = 256;
+  int nblocks = (num_nodes_out + nthreads - 1) / nthreads;
+
+  gkyl_find_peaks_kernel<<<nblocks, nthreads>>>(
+    up->on_dev, in->on_dev, num_nodes_out);
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0, single value).
+    // Copy from nodal to modal arrays on device.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_array_copy(up->out_vals[p], up->out_vals_nodal[p]);
+      gkyl_array_copy(up->out_coords[p], up->out_coords_nodal[p]);
+    }
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform on GPU.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1,
+        up->out_vals_nodal[p], up->out_vals[p]);
+      gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1,
+        up->out_coords_nodal[p], up->out_coords[p]);
+    }
+  }
+}
+
+struct gkyl_array_dg_find_peaks*
+gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho)
+{
+  struct gkyl_array_dg_find_peaks *up =
+    (struct gkyl_array_dg_find_peaks *)gkyl_malloc(sizeof(*up));
+
+  // Copy all scalar/struct fields from host object.
+  up->grid = up_ho->grid;
+  up->basis = up_ho->basis;
+  up->range = up_ho->range;
+  up->range_ext = up_ho->range_ext;
+  up->search_dir = up_ho->search_dir;
+  up->use_gpu = true;
+
+  up->out_grid = up_ho->out_grid;
+  up->out_basis = up_ho->out_basis;
+  up->out_range = up_ho->out_range;
+  up->out_range_ext = up_ho->out_range_ext;
+  up->out_nrange = up_ho->out_nrange;
+
+  up->num_peaks = up_ho->num_peaks;
+  for (int p = 0; p < GKYL_DG_FIND_PEAKS_MAX; p++)
+    up->peak_types[p] = up_ho->peak_types[p];
+
+  int ndim = up_ho->basis.ndim;
+  int poly_order = up_ho->basis.poly_order;
+  int out_dim = ndim - 1;
+
+  // Create a GPU copy of the nodes array so the kernel can access it.
+  up->nodes = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+    up_ho->nodes->ncomp, up_ho->nodes->size);
+  gkyl_array_copy(up->nodes, up_ho->nodes);
+
+  // Create GPU-enabled nodal-to-modal converter.
+  // Use the host basis (up->out_basis) here because gkyl_nodal_ops_new
+  // calls cbasis->node_list on the host.
+  up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, true);
+
+  // Create a device-resident basis with device-callable function pointers.
+  // This is needed by gkyl_nodal_ops_n2m_cu which passes the basis pointer
+  // directly to a CUDA kernel that dereferences cbasis->nodal_to_modal().
+  if (out_dim > 0)
+    up->out_basis_on_dev = gkyl_cart_modal_serendip_cu_dev_new(1, poly_order);
+  else
+    up->out_basis_on_dev = NULL;
+
+  up->flags = 0;
+  GKYL_SET_CU_ALLOC(up->flags);
+  up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free);
+
+  // Allocate GPU output arrays for each peak and copy data from host arrays.
+  for (int p = 0; p < up->num_peaks; p++) {
+    up->out_vals[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_vals[p]->ncomp, up_ho->out_vals[p]->size);
+    gkyl_array_copy(up->out_vals[p], up_ho->out_vals[p]);
+
+    up->out_coords[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_coords[p]->ncomp, up_ho->out_coords[p]->size);
+    gkyl_array_copy(up->out_coords[p], up_ho->out_coords[p]);
+
+    up->out_vals_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_vals_nodal[p]->ncomp, up_ho->out_vals_nodal[p]->size);
+    gkyl_array_copy(up->out_vals_nodal[p], up_ho->out_vals_nodal[p]);
+
+    up->out_coords_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_coords_nodal[p]->ncomp, up_ho->out_coords_nodal[p]->size);
+    gkyl_array_copy(up->out_coords_nodal[p], up_ho->out_coords_nodal[p]);
+
+    up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_eval_at_peaks_vals_nodal[p]->ncomp,
+      up_ho->out_eval_at_peaks_vals_nodal[p]->size);
+    gkyl_array_copy(up->out_eval_at_peaks_vals_nodal[p],
+      up_ho->out_eval_at_peaks_vals_nodal[p]);
+  }
+
+  // Initialize unused peak arrays to NULL.
+  for (int p = up->num_peaks; p < GKYL_DG_FIND_PEAKS_MAX; p++) {
+    up->out_vals[p] = NULL;
+    up->out_coords[p] = NULL;
+    up->out_vals_nodal[p] = NULL;
+    up->out_coords_nodal[p] = NULL;
+    up->out_eval_at_peaks_vals_nodal[p] = NULL;
+  }
+
+  // Copy struct to device, with on_dev array pointers and device-callable
+  // basis function pointers swapped in.
+  // Save host-side array pointers and basis structs.
+  struct gkyl_array *ho_nodes = up->nodes;
+  struct gkyl_basis ho_basis = up->basis;
+  struct gkyl_basis ho_out_basis = up->out_basis;
+  struct gkyl_array *ho_out_vals[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_coords[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
+
+  // Swap in device-callable basis function pointers for the H2D copy.
+  gkyl_cart_modal_serendip_cu_dev(&up->basis, ndim, poly_order);
+  if (out_dim == 0)
+    gkyl_cart_modal_serendip_cu_dev(&up->out_basis, 1, 0);
+  else
+    gkyl_cart_modal_serendip_cu_dev(&up->out_basis, 1, poly_order);
+
+  // Swap nodes to its device pointer.
+  up->nodes = up->nodes->on_dev;
+
+  for (int p = 0; p < up->num_peaks; p++) {
+    ho_out_vals[p] = up->out_vals[p];
+    ho_out_coords[p] = up->out_coords[p];
+    ho_out_vals_nodal[p] = up->out_vals_nodal[p];
+    ho_out_coords_nodal[p] = up->out_coords_nodal[p];
+    ho_out_eval_at_peaks_vals_nodal[p] = up->out_eval_at_peaks_vals_nodal[p];
+
+    // Swap in device pointers for the H2D copy.
+    up->out_vals[p] = up->out_vals[p]->on_dev;
+    up->out_coords[p] = up->out_coords[p]->on_dev;
+    up->out_vals_nodal[p] = up->out_vals_nodal[p]->on_dev;
+    up->out_coords_nodal[p] = up->out_coords_nodal[p]->on_dev;
+    up->out_eval_at_peaks_vals_nodal[p] = up->out_eval_at_peaks_vals_nodal[p]->on_dev;
+  }
+
+  // Allocate device struct and copy host struct (with device pointers) to device.
+  struct gkyl_array_dg_find_peaks *up_cu =
+    (struct gkyl_array_dg_find_peaks *)gkyl_cu_malloc(sizeof(*up_cu));
+  gkyl_cu_memcpy(up_cu, up, sizeof(struct gkyl_array_dg_find_peaks), GKYL_CU_MEMCPY_H2D);
+  up->on_dev = up_cu;
+
+  // Restore host-side array pointers and basis so the returned object
+  // has usable host handles and host-callable function pointers.
+  up->nodes = ho_nodes;
+  up->basis = ho_basis;
+  up->out_basis = ho_out_basis;
+  for (int p = 0; p < up->num_peaks; p++) {
+    up->out_vals[p] = ho_out_vals[p];
+    up->out_coords[p] = ho_out_coords[p];
+    up->out_vals_nodal[p] = ho_out_vals_nodal[p];
+    up->out_coords_nodal[p] = ho_out_coords_nodal[p];
+    up->out_eval_at_peaks_vals_nodal[p] = ho_out_eval_at_peaks_vals_nodal[p];
+  }
+
+  return up;
+}
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
index 28872ff53c..289a3ae3aa 100644
--- a/core/zero/gkyl_array_dg_find_peaks.h
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -255,3 +255,51 @@ void gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peak
  */
 void gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up);
 
+
+/**
+ * Create a new GPU peak finder updater from an already-initialized host object.
+ * Allocates GPU arrays, copies the struct to device, and returns a host-side
+ * struct with array pointers referencing device memory. Called internally by
+ * gkyl_array_dg_find_peaks_new when use_gpu is true.
+ *
+ * @param up_ho Host-side updater object (fully initialized)
+ * @return New updater pointer with GPU arrays
+ */
+struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new_cu(
+  struct gkyl_array_dg_find_peaks *up_ho);
+
+/**
+ * GPU implementation of the advance method. Launches a CUDA kernel to find
+ * peaks for each preserved-direction node, then runs nodal-to-modal transforms
+ * on device.
+ *
+ * @param up Updater object (with GPU arrays)
+ * @param in Input array (device-side DG field)
+ */
+void gkyl_array_dg_find_peaks_advance_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in);
+
+/**
+ * GPU implementation of project_on_peaks. Launches a CUDA kernel to evaluate
+ * an input array at all peak locations, then runs nodal-to-modal transforms
+ * on device.
+ *
+ * @param up Updater object (with GPU arrays)
+ * @param in_array Input array (device-side DG field)
+ * @param out_vals Output: array of evaluated values for each peak (device-side)
+ */
+void gkyl_array_dg_find_peaks_project_on_peaks_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals);
+
+/**
+ * GPU implementation of project_on_peak_idx. Launches a CUDA kernel to evaluate
+ * an input array at a single peak location, then runs a nodal-to-modal transform
+ * on device.
+ *
+ * @param up Updater object (with GPU arrays)
+ * @param in_array Input array (device-side DG field)
+ * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1)
+ * @param out_val Output: evaluated values at the specified peak (device-side)
+ */
+void gkyl_array_dg_find_peaks_project_on_peak_idx_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val);
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
index e20dd6f196..fa7fd24b5b 100644
--- a/core/zero/gkyl_array_dg_find_peaks_priv.h
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -5,6 +5,7 @@
 #include <gkyl_array.h>
 #include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_nodal_ops.h>
+#include <gkyl_ref_count.h>
 
 // Maximum number of peaks we can handle.
 #define GKYL_DG_FIND_PEAKS_MAX 16
@@ -13,6 +14,7 @@
  * Convert logical (reference) coordinates to computational (physical) coordinates.
  * xout[d] = xc[d] + 0.5*dx[d]*eta[d]
  */
+GKYL_CU_DH
 static inline void
 dg_find_peaks_log_to_comp(int ndim, const double *eta,
   const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
@@ -50,14 +52,26 @@ struct gkyl_array_dg_find_peaks {
   struct gkyl_array *out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak coordinates
   struct gkyl_array *out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Values evaluated at peaks (nodal)
 
-  // GPU implementation specific arrays
-  struct gkyl_array *in_ho; // Host copy of input array
-  struct gkyl_array *out_vals_ho; // Host copy of output values
-
   // Internal working arrays.
   struct gkyl_array *nodes;         // Node locations in logical coords
 
   // Nodal-to-modal converter.
   struct gkyl_nodal_ops *n2m;
+
+  // Device-resident basis for passing to GPU API functions (e.g. gkyl_nodal_ops_n2m_cu).
+  // Allocated via gkyl_cart_modal_serendip_cu_dev_new; NULL on CPU.
+  struct gkyl_basis *out_basis_on_dev;
+
+  uint32_t flags;
+  struct gkyl_array_dg_find_peaks *on_dev; // Pointer to device object (if GPU).
+  struct gkyl_ref_count ref_count;         // Reference counter.
 };
 
+/**
+ * Function that actually frees memory associated with this
+ * object when the number of references has decreased to zero.
+ *
+ * @param ref Reference counter for this object.
+ */
+void gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref);
+
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 33663b0a5c..a24ba350e2 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -78,17 +78,16 @@ void
 gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
+
+  struct gkyl_array *phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
+  gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, phi_smooth_global);
   // Find the potential at bmag_max
-  gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi,
+  gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi_smooth_global,
     fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max);
-  // Allgather on phi_at_bmag_max. It's not an allgather.
-  // One process has the correct one, but the others do not. Is it a bcast or a sync?
   
   if (fdmul->is_tandem) {
-    gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi,
+    gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi_smooth_global,
       fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem);
-    // Allgather on phi_at_bmag_tandem. It's not an allgather.
-    // One process has the correct one, but the others do not. Is it a bcast or a sync?
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
       phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem, fdmul->multiplier);
   } else {
@@ -97,8 +96,9 @@ gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, con
       phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
   }
 
-  // Multiply out by the multplier.
+  // Multiply out by the multiplier.
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
+  gkyl_array_release(phi_smooth_global);
 }
 
 void
@@ -198,8 +198,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       struct gkyl_array_dg_find_peaks_inp peak_inp = {
         .basis = &app->basis,
         .grid = &app->grid,
-        .range = &app->local,
-        .range_ext = &app->local_ext,
+        .range = &app->global,
+        .range_ext = &app->global_ext,
         .search_dir = search_dir,
         .use_gpu = app->use_gpu,
       };
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
index 9140f29c35..57eb3a38c2 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -33,79 +33,43 @@ struct gk_poa_phase_params {
 struct gk_mirror_ctx
 {
   int cdim, vdim; // Dimensionality.
-
   // Plasma parameters
-  double mi; // Ion mass.
-  double me; // Electron mass.
-  double qi; // Ion charge.
-  double qe; // Electron charge.
-  double Te0; // Electron temperature.
-  double Ti0; // Ion temperature.
-  double n0; // Density.
-  double B_p; // Plasma magnetic field (mirror center).
-  double beta; // Plasma beta in the center.
-  double tau; // Temperature ratio.
-
-  // Parameters controlling initial conditions.
-  double alim;
-  double alphaIC0;
-  double alphaIC1;
-  double Ti_perp0; // Reference ion perp temperature.
-  double Ti_par0; // Reference ion par temperature.
-  double Ti_perp_m; // Ion perp temperature at the throat.
-  double Ti_par_m; // Ion par temperature at the throat.
-  double cs_m; // Ion sound speed at the throat.
-
-  double nuFrac; // Fraction multiplying collision frequency.
-  double logLambdaIon; // Ion Coulomb logarithm.
-  double nuIon; // Ion-ion collision freq.
-
-  double vti; // Ion thermal speed.
-  double vte; // Electron thermal speed.
-  double c_s; // Ion sound speed.
-  double omega_ci; // Ion gyrofrequency.
-  double rho_s; // Ion sound gyroradius.
-
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  double vti;
   double RatZeq0; // Radius of the field line at Z=0.
-  double Z_min; // Minimum axial coordinate Z.
-  double Z_max; // Maximum axial coordinate Z.
-  double z_min; // Minimum value of the position along the field line.
-  double z_max; // Maximum value of the position along the field line.
-  double psi_min; // Minimum value of the poloidal flux.
-  double psi_max; // Maximum value of the poloidal flux.
-  double psi_in, z_in; // Auxiliary psi and z.
-
-  // Magnetic equilibrium model.
-  double mcB;
-  double gamma;
-  double Z_m; // Axial coordinate at mirror throat.
-  double z_m; // Computational coordinate at mirror throat.
-
-  // Source parameters
-  double NSrcIon;
-  double lineLengthSrcIon;
-  double sigSrcIon;
-  double NSrcFloorIon;
-  double TSrc0Ion;
-  double TSrcFloorIon;
-
-  // Physical velocity space limits.
-  double vpar_min_ion, vpar_max_ion;
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_max;
+  double psi_min;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
   double mu_max_ion;
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv, mu_lin_fac_inv; // Inverse factor of where linear mapping ends.
-  double vpar_pow, mu_pow; // Power of the velocity grid.
-  double vpar_min_ion_c, vpar_max_ion_c;
-  double mu_min_ion_c, mu_max_ion_c;
-
-  // Grid DOF.
-  int Nx;
+  int Npsi;
   int Nz;
   int Nvpar;
   int Nmu;
   int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
   int poly_order;
 
+  // Source parameters
+  double ion_source_amplitude;
+  double ion_source_sigma;
+  double ion_source_temp;
+
   double t_end; // End time.
   int num_frames; // Number of output frames.
   int num_phases; // Number of phases.
@@ -114,8 +78,18 @@ struct gk_mirror_ctx
   double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol; // Minimum allowable fraction of initial time-step.
   int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Geometry parameters for Lorentzian mirror
+  double mcB;     // Magnetic field parameter
+  double gamma;   // Width parameter for Lorentzian profile
+  double Z_m;     // Mirror throat location
+  double Z_min;   // Minimum Z coordinate
+  double Z_max;   // Maximum Z coordinate
+  double psi_in;  // Working variable for psi integration
+  double z_in;    // Working variable for z integration
 };
 
+
 double
 psi_RZ(double RIn, double ZIn, void *ctx)
 {
@@ -123,6 +97,7 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
+
   double psi = 0.5 * pow(RIn, 2.) * mcB *
                (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
                 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
@@ -133,9 +108,13 @@ double
 R_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
-    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
-     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rout = sqrt(2. * psiIn / (mcB * 
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+     1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
   return Rout;
 }
 
@@ -143,17 +122,21 @@ void
 Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
-  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
-          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
-            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
-  *BZ = mcB *
-        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
-         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
-  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+
+  BRad[0] = -(1. / 2.) * Rcoord * mcB *
+          (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.)))
+           -2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+
+  BZ[0] = mcB *
+        ( 1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.)))
+         +1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+
+  Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
 }
 
 double
@@ -170,8 +153,8 @@ double
 z_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  app->psi_in = psiIn;
   double eps = 0.0;
+  app->psi_in = psiIn;
   struct gkyl_qr_res integral;
   if (eps <= ZIn)
   {
@@ -179,7 +162,7 @@ z_psiZ(double psiIn, double ZIn, void *ctx)
   }
   else
   {
-    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14); 
     integral.res = -integral.res;
   }
   return integral.res;
@@ -202,7 +185,7 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (zIn >= 0.0)
+  if (0.0 <= zIn)
   {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
@@ -217,189 +200,119 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   return Zout.res;
 }
 
+// Geometry evaluation functions for the gk app
 void
-eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
-  double psi = xn[0];
-  double z = xn[1];
+  double psi = xc[0], theta = xc[1], z = xc[2];
 
-  struct gk_mirror_ctx *app = ctx;
-  double NSrc = app->NSrcIon;
-  double zSrc = app->lineLengthSrcIon;
-  double sigSrc = app->sigSrcIon;
-  double NSrcFloor = app->NSrcFloorIon;
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
 
-  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
 
-  if (fabs(Z) <= app->Z_m) {
-    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
-                              exp(-pow(z - zSrc, 2) / (2.0 * pow(sigSrc, 2))));
-  } else {
-    fout[0] = 1e-16;
-  }
+  xp[0] = x;  xp[1] = y;  xp[2] = Z;
 }
 
 void
-eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
-  fout[0] = 0.0;
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords. 
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad*cos(phi);
+  fout[1] = BRad*sin(phi);
+  fout[2] = BZ;
 }
 
+// Evaluate collision frequencies
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[1];
-
   struct gk_mirror_ctx *app = ctx;
-  double sigSrc = app->sigSrcIon;
-  double TSrc0 = app->TSrc0Ion;
-  double Tfloor = app->TSrcFloorIon;
-
-  if (fabs(z) <= 2.0 * sigSrc)
-  {
-    fout[0] = TSrc0;
-  }
-  else
-  {
-    fout[0] = Tfloor;
-  }
+  fout[0] = app->nuIon;
 }
 
-// Ion initial conditions
 void
 eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[1];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double sigma = 0.9*z_m;
-  if (fabs(z) <= sigma)
-  {
-    fout[0] = 0.5*app->n0*(1. + tanh(10. * sigma * fabs(sigma - fabs(z))));
-  }
-  else
-  {
-    fout[0] = 0.5*app->n0*exp(-5 * (fabs(sigma - fabs(z))));
-  }
+  // double b = 8;
+  // double func = (atan(-(xn[0] - 0.7) * b) - atan(-(xn[0] + 0.7) * b))/M_PI;
+  // fout[0] = 1e17*func;
+  fout[0] = 1e17;
 }
 
 void
 eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[1];
-
   struct gk_mirror_ctx *app = ctx;
-  double cs_m = app->cs_m;
-  double z_m = app->z_m;
-  double z_max = app->z_max;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = 0.0;
-  }
-  else
-  {
-    fout[0] = (fabs(z) / z) * cs_m * tanh(3 * (z_max - z_m) * fabs(fabs(z) - z_m));
-  }
+  // double b=30;
+  // double func = (-atan(-(xn[0] - 0.98) * b) - atan(-(xn[0] + 0.98) * b))/M_PI;
+  // fout[0] = 1.2e6*func;
+  fout[0] = 0.0;
 }
 
 void
-eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[1];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_par0 = app->Ti_par0;
-  double Ti_par_m = app->Ti_par_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_par_m+(Ti_par0-Ti_par_m)*tanh(4 * fabs(z_m - fabs(z)));
-  }
-  else
-  {
-    fout[0] = Ti_par_m;
-  }
+  // double b = 5;
+  // double func = (atan(-(xn[0] - 0.7) * b) - atan(-(xn[0] + 0.7) * b))/M_PI;
+  // fout[0] = 15000*GKYL_ELEMENTARY_CHARGE*func;
+  fout[0] = app->Ti0;
 }
 
 void
-eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[1];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_perp0 = app->Ti_perp0;
-  double Ti_perp_m = app->Ti_perp_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_perp_m - Ti_perp0*tanh(3.*fabs(z_m-fabs(z)));
+  double z = xn[1];
+  double src_amp = app->ion_source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->ion_source_sigma;
+  double src_amp_floor = src_amp*1e-2;
+  if (fabs(z) <= 0.98)
+  { 
+    // sixth order polynomial drop of to the edge
+    fout[0] = src_amp * (1 - pow(fabs(z), 6)/0.98);
   }
   else
   {
-    fout[0] = Ti_perp_m * GKYL_MAX2(1.e-3, exp(-5. * (fabs(z_m - fabs(z)))));
+    fout[0] = 1e-16;
   }
 }
 
 void
-evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
-{
-  struct gk_mirror_ctx *app = ctx;
-  fout[0] = app->nuIon;
-}
-
-// Geometry evaluation functions for the gk app
-// mapc2p must assume a 3d input xc
-void
-mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
-{
-  double psi = xc[0];
-  double theta = xc[1];
-  double z = xc[2];
-
-  double Z = Z_psiz(psi, z, ctx);
-  double R = R_psiZ(psi, Z, ctx);
-
-  // Cartesian coordinates on plane perpendicular to Z axis.
-  double x = R * cos(theta);
-  double y = R * sin(theta);
-  xp[0] = x;
-  xp[1] = y;
-  xp[2] = Z;
-}
-
-// bmag_func must assume a 3d input xc
-void
-bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double psi = xc[0];
-  double z = xc[2];
-
-  struct gk_mirror_ctx *app = ctx;
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  fout[0] = Bmag;
+  fout[0] = 0.0;
 }
 
-// bfield_func must assume a 3d input xc
 void
-bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double psi = xc[0];
-  double z = xc[2];
-
   struct gk_mirror_ctx *app = ctx;
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-
-  double phi = xc[1];
-  // zc are computational coords. 
-  // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
-  fout[2] = BZ;
+  double z = xn[1];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0*1e-2;
+  if (fabs(z) <= 0.98)
+  {
+    fout[0] = TSrc0;
+  }
+  else
+  {
+    fout[0] = Tfloor;
+  }
 }
 
 void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
@@ -409,40 +322,17 @@ void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *
   double mu_max_ion = app->mu_max_ion;
 
   double cvpar = vc[0], cmu = vc[1];
-  // Linear map up to vpar_max/lin_frac_inv, then a power grid.
-  double vpar_lin_fac_inv = app->vpar_lin_fac_inv;
-  double vpar_pow = app->vpar_pow;
-  if (fabs(cvpar) <= 1.0/vpar_lin_fac_inv)
-    vp[0] = vpar_max_ion*cvpar;
-  else if (cvpar < -1.0/vpar_lin_fac_inv)
-    vp[0] = -vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-  else
-    vp[0] =  vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-
-//  // Quadratic mu.
-//  vp[1] = mu_max_ion*pow(cmu,2.0);
-  // Linear map up to mu_max/lin_frac_inv, then a power grid.
-  double mu_lin_fac_inv = app->mu_lin_fac_inv;
-  double mu_pow = app->mu_pow;
-//  if (cmu <= 1.0/mu_lin_fac_inv)
-//    vp[0] = mu_max_ion*cmu;
-//  else
-//    vp[0] = mu_max_ion*pow(mu_lin_fac_inv,mu_pow-1)*pow(cmu,mu_pow);
-  double w = 0.3;
-  double f = 0.012;
-  double a = mu_max_ion*(f-1.0)/(w*w-1.0);
-  double b = mu_max_ion*(w*w-f)/(w*w-1.0);
-  if (cmu <= w)
-    vp[1] = (f*mu_max_ion/w)*cmu;
-  else
-    vp[1] = a*pow(cmu,2)+b;
-
+  double b = 1.4;
+  vp[0] = vpar_max_ion*tan(cvpar*b)/tan(b);
+  // Cubic map in mu.
+  vp[1] = mu_max_ion*pow(cmu,3);
 }
 
 struct gk_mirror_ctx
 create_ctx(void)
 {
   int cdim = 2, vdim = 2; // Dimensionality.
+  int poly_order = 1;
 
   // Universal constant parameters.
   double eps0 = GKYL_EPSILON0;
@@ -462,100 +352,57 @@ create_ctx(void)
   double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
   double Ti0 = tau * Te0;
 
-  // Parameters controlling initial conditions.
-  double alim = 0.125;
-  double alphaIC0 = 2;
-  double alphaIC1 = 10;
-
-  double nuFrac = 1.0;
   // Ion-ion collision freq.
+  double nuFrac = 1.0;
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
                  (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
-  double vte = sqrt(Te0 / me);
-  double c_s = sqrt(Te0 / mi);
 
-  // Gyrofrequencies and gyroradii.
-  double omega_ci = eV * B_p / mi;
-  double rho_s = c_s / omega_ci;
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 64;
+  int Npsi = 4;
+  int Nvpar = 32; // 96 uniform
+  int Nmu = 16;  // 192 uniform
+
+  // Source parameters
+  double ion_source_amplitude = 1.e20;
+  double ion_source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
 
   // Geometry parameters.
   double RatZeq0 = 0.10; // Radius of the field line at Z=0.
-  // Axial coordinate Z extents. Endure that Z=0 is not on
-  // the boundary of a cell (due to AD errors).
   double Z_min = -2.5;
   double Z_max =  2.5;
-
-  // Parameters controlling the magnetic equilibrium model.
-  double mcB = 6.51292;
-  double gamma = 0.124904;
+  double mcB = 3.691260;
+  double gamma = 0.226381;
   double Z_m = 0.98;
 
-  // Source parameters
-  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
-  double lineLengthSrcIon = 0.0;
-  double sigSrcIon = Z_m / 4.0;
-  double NSrcFloorIon = 0.05 * NSrcIon;
-  double TSrc0Ion = Ti0 * 1.25;
-  double TSrcFloorIon = TSrc0Ion / 8.0;
-
-  // Grid parameters
-  double vpar_max_ion = 16 * vti;
-  double vpar_min_ion = -vpar_max_ion;
-  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
-
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv = 4;
-  double vpar_pow = 3;
-  double vpar_min_ion_c = -1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double vpar_max_ion_c =  1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double mu_min_ion_c = 0.;
-  double mu_max_ion_c = 1.;
-  double mu_lin_fac_inv = 1.0/0.012;
-  double mu_pow = 2;
-//  double mu_min_ion_c = 0.0;
-//  double mu_max_ion_c = 1.0/pow(mu_lin_fac_inv,(mu_pow-1)/mu_pow);
-
-  // Grid DOF:
-  int Nx = 8;  // Number of cells in x (psi) direction.
-  int Nz = 96; // Number of cells in z direction.
-  int Nvpar = 16; // Number of cells in parallel velocity direction.
-  int Nmu = 8;  // Number of cells in mu direction.
-  int poly_order = 1;
-
-  // Initial conditions parameter.s
-  double Ti_perp0 = 10000 * eV;
-  double Ti_par0 = 7500 * eV;
-
-  // Parameters at mirror throat
-  double Ti_perp_m = 15000 * eV;
-  double Ti_par_m = 1000 * eV;
-  double z_m = 0.982544;
-  double cs_m = sqrt((Te0+3.0*Ti_par_m)/mi);
-
-  // Factor multiplying collisionless terms.
-  double alpha_oap = 0.01;
+  // POA parameters  
+  double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
   double alpha_fdp = 1.0;
-  // Duration of each phase.
-  double tau_oap = 2400.0e-9;
-  double tau_fdp = 24.0e-9;
-  double tau_fdp_extra = 2*tau_fdp;
+  double tau_oap = 0.001;  // Duration of each phase.
+  double tau_fdp = 7e-9;
+  double tau_fdp_extra = 2e-9;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
-
+  
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 4; // Frames per OAP phase
-  int num_frames_fdp = 4; // Frames per FDP phase
-  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
-
+  int num_frames_oap = 2;        // Frames per OAP phase
+  int num_frames_fdp = 2;        // Frames per FDP phase
+  int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
+  
   // Whether to evolve the field.
   bool is_static_field_oap = true;
   bool is_static_field_fdp = false;
-  // Whether to enable positivity.
+
+  // Whether positivity is enabled.
   bool is_positivity_enabled_oap = false;
-  bool is_positivity_enabled_fdp = true;
+  bool is_positivity_enabled_fdp = false;
+
   // Type of df/dt multipler.
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
@@ -586,7 +433,7 @@ create_ctx(void)
     poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
     poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
-  // Add an extra, longer FDP.
+  // The final stage is an extra, longer FDP.
   poa_phases[num_phases-1].phase = GK_POA_FDP;
   poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
   poa_phases[num_phases-1].duration = tau_fdp_extra;
@@ -595,71 +442,62 @@ create_ctx(void)
   poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
   poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
 
-  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
   int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
 
   struct gk_mirror_ctx ctx = {
-    .cdim = cdim,  .vdim = vdim,
-    .mi = mi,  .qi = qi,
-    .me = me,  .qe = qe,
-    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
-    .B_p = B_p,  .beta = beta,  .tau = tau,
-    .alim = alim,
-    .alphaIC0 = alphaIC0,
-    .alphaIC1 = alphaIC1,
-    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
-    .vti = vti,  .vte = vte,  .c_s = c_s,
-    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .nuFrac = nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
     .RatZeq0 = RatZeq0,
-    .Z_min = Z_min,  .Z_max = Z_max,
-    // Parameters controlling the magnetic equilibrium model.
-    .mcB = mcB,  .gamma = gamma,
-    .Z_m = Z_m,
-    .z_m = z_m,
-    // Initial condition parameters.
-    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
-    .Ti_perp_m = Ti_perp_m,  .Ti_par_m = Ti_par_m,  .cs_m = cs_m,
-    // Source parameters
-    .NSrcIon = NSrcIon,  .NSrcFloorIon = NSrcFloorIon,
-    .TSrc0Ion = TSrc0Ion,  .TSrcFloorIon = TSrcFloorIon,
-    .lineLengthSrcIon = lineLengthSrcIon,  .sigSrcIon = sigSrcIon,
-    // Physical velocity space limits.
-    .vpar_min_ion = vpar_min_ion,
     .vpar_max_ion = vpar_max_ion,
     .mu_max_ion = mu_max_ion,
-    // Computational velocity space limits.
-    .vpar_lin_fac_inv = vpar_lin_fac_inv,
-    .vpar_pow = vpar_pow,
-    .vpar_min_ion_c = vpar_min_ion_c,
-    .vpar_max_ion_c = vpar_max_ion_c,
-    .mu_lin_fac_inv = mu_lin_fac_inv,
-    .mu_pow = mu_pow,
-    .mu_min_ion_c = mu_min_ion_c,
-    .mu_max_ion_c = mu_max_ion_c,
-    // Grid DOF.
+    .Npsi = Npsi,
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nx, Nz, Nvpar, Nmu},
+    .cells = {Npsi, Nz, Nvpar, Nmu},
     .poly_order = poly_order,
-    // Time integration and I/O parameters.
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+    
+    .ion_source_amplitude = ion_source_amplitude,
+    .ion_source_sigma = ion_source_sigma,
+    .ion_source_temp = ion_source_temp,
+
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
   };
-
+  
   // Populate a couple more values in the context.
   ctx.psi_max = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.psi_min = ctx.psi_max * 0.1;
-  ctx.z_min   = ctx.Z_min;
-  ctx.z_max   = ctx.Z_max;
+  ctx.psi_min  = psi_RZ(ctx.RatZeq0/10, 0., &ctx);
+  ctx.z_min    = z_psiZ(ctx.psi_max, ctx.Z_min, &ctx);
+  ctx.z_max    = z_psiZ(ctx.psi_max, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -794,14 +632,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   long step = 1;
   while ((t_curr < t_end) && (step <= num_steps))
   {
-    if (step == 1 || step % 20 == 0)
-      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
-
-    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+    dt = t_end - t_curr; // Ensure we don't step beyond t_end.
     struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
-
-    if (step == 1 || step % 20 == 0)
-      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
     if (!status.success)
     {
@@ -811,8 +645,8 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
     t_curr += status.dt_actual;
     dt = status.dt_suggested;
 
-    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
-    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
 
     if (dt_init < 0.0) {
       dt_init = status.dt_actual;
@@ -868,107 +702,112 @@ int main(int argc, char **argv)
 
   struct gkyl_gyrokinetic_species ion = {
     .name = "ion",
-    .charge = ctx.qi,  .mass = ctx.mi,
+    .charge = ctx.qi,
+    .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
-    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
-    .cells = { cells_v[0], cells_v[1] },
-
+    .lower = {-1.0, 0.0},
+    .upper = { 1.0, 1.0},
+    .cells = { cells_v[0], cells_v[1]},
     .polarization_density = ctx.n0,
 
-    .mapc2p = {
-      .mapping = mapc2p_vel_ion,
-      .ctx = &ctx,
-    },
-
     .projection = {
-      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
       .density = eval_density_ion,
-      .upar = eval_upar_ion,
-      .temppar = eval_temp_par_ion,
-      .tempperp = eval_temp_perp_ion,
       .ctx_density = &ctx,
+      .upar = eval_upar_ion,
       .ctx_upar = &ctx,
-      .ctx_temppar = &ctx,
-      .ctx_tempperp = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
     },
 
     .collisionless = {
       .type = GKYL_GK_COLLISIONLESS_ES,
       .scale_factor = 1.0, // Will be replaced below.
+      .write_diagnostics = true,
+    },
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+      .cellwise_const = true,
+      .write_diagnostics = true,
     },
 
     .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
-      .self_nu = evalNuIon,
-      .self_nu_ctx = &ctx,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .write_diagnostics = true,
     },
-
     .source = {
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
         .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
-	      .density = eval_density_ion_source,
-        .upar = eval_upar_ion_source,
-        .temp = eval_temp_ion_source,
         .ctx_density = &ctx,
+        .density = eval_density_ion_source,
         .ctx_upar = &ctx,
+        .upar= eval_upar_ion_source,
         .ctx_temp = &ctx,
-      }, 
-    },
-
-    .time_rate_multiplier = {
-      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
-      .cellwise_const = true,
-      .write_diagnostics = true,
-    },
-
-    .positivity = {
-      .type = GKYL_GK_POSITIVITY_SHIFT,
-      .write_diagnostics = true,
+        .temp = eval_temp_ion_source,      
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
     },
 
     .bcs = {
-      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_REFLECT },
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_ZERO_FLUX },
       { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_ABSORB },
       { .dir = 1, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
       { .dir = 1, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
     },
-
-    .num_diag_moments = 4,
-    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = {GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP},
+    },
   };
-
   struct gkyl_gyrokinetic_field field = {
     .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
     .electron_mass = ctx.me,
     .electron_charge = ctx.qe,
     .electron_temp = ctx.Te0,
-    .is_static = false, // So solvers are allocated.
+    .is_static = false,
   };
 
-  // GK app
-  struct gkyl_gk app_inp = { 
+  struct gkyl_gk app_inp = {  // GK app
     .name = "gk_mirror_boltz_elc_poa_2x2v_p1",
     .cdim = ctx.cdim,
-    .lower = {ctx.psi_min, ctx.z_min},
-    .upper = {ctx.psi_max, ctx.z_max},
+    .upper = {ctx.psi_max, ctx.Z_max},
+    .lower = {ctx.psi_min, ctx.Z_min},
     .cells = { cells_x[0], cells_x[1] },
     .poly_order = ctx.poly_order,
     .basis_type = app_args.basis_type,
 
     .geometry = {
-      .geometry_id = GKYL_MAPC2P,
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
       .world = {0.0},
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
-      .bfield_ctx = &ctx
+      .bfield_ctx = &ctx,
     },
 
-    .num_periodic_dir = 1,
-    .periodic_dirs = {0},
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
 
     .num_species = 1,
     .species = {ion},
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
index 37d64c85bd..9d0c49c37a 100644
--- a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -1160,7 +1160,7 @@ int main(int argc, char **argv)
     .basis_type = app_args.basis_type,
 
     .geometry = {
-      .geometry_id = GKYL_MAPC2P,
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
       .world = {ctx.psi_eval, 0.0},
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
index ad3a128ae9..efa6936153 100644
--- a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -808,7 +808,7 @@ int main(int argc, char **argv)
     .basis_type = app_args.basis_type,
 
     .geometry = {
-      .geometry_id = GKYL_MAPC2P,
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
       .world = {ctx.psi_eval, 0.0},
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
index 75c0a024cd..2caf2e4350 100644
--- a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -832,7 +832,7 @@ int main(int argc, char **argv)
     .zmin = -2.0,  // Z of lower boundary
     .zmax =  2.0,  // Z of upper boundary
     .include_axis = false, // Include R=0 axis in grid
-    .fl_coord = GKYL_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid
+    .fl_coord = GKYL_GEOMETRY_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid
   };
 
   struct gkyl_gk app_inp = {  // GK app
@@ -844,7 +844,7 @@ int main(int argc, char **argv)
     .poly_order = ctx.poly_order,
     .basis_type = app_args.basis_type,
     .geometry = {
-      .geometry_id = GKYL_MIRROR,
+      .geometry_id = GKYL_GEOMETRY_MIRROR,
       .world = {ctx.psi_eval, 0.0},
       .mirror_grid_info = grid_inp,
     },

From 017e3609abcd80bb026ea39625df2cdc2ae1898f Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 20 Feb 2026 11:32:35 -0800
Subject: [PATCH 16/32] Wrap the array_find_peaks into the loss cone mask. I
 had an issue with compute sanitizer with the array_find_peaks which was
 causing crashes in the loss cone mask. These issues are fixed. There was some
 funny business regarding the basis being on the host vs device. Refactor the
 allocations in the GPU kernels to not be inside the kernels. Instead, it's
 allocated at init time. The GPU code pulses, which is odd, but it runs. The
 2x2v POA regression test runs and is compute sanitizer clean. The other POA
 tests do not error either on GPU and are compute sanitizer clean.

---
 core/zero/array_dg_find_peaks.c               | 62 +++++++++----------
 core/zero/array_dg_find_peaks_cu.cu           | 45 +++++++++-----
 core/zero/gkyl_array_dg_find_peaks_priv.h     |  9 +++
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |  1 +
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c | 17 ++++-
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     |  8 +--
 6 files changed, 90 insertions(+), 52 deletions(-)

diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index b16e3f1b38..7d51a2804a 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -21,18 +21,11 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
   int search_dir = up->search_dir;
   int poly_order = up->basis.poly_order;
 
-  // Get node locations.
-  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, ndim, up->basis.num_basis);
-  up->basis.node_list(gkyl_array_fetch(nodes, 0));
-
-  // Determine number of nodes along search direction.
-  int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
-  int nodes_per_cell = (poly_order == 1) ? 2 : 3;
-  int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
-
-  // Allocate arrays to store values and coordinates along search direction.
-  double *vals = gkyl_malloc(sizeof(double) * total_nodes_search);
-  double *coords = gkyl_malloc(sizeof(double) * total_nodes_search);
+  int total_nodes_search = up->total_nodes_search;
+
+  // Use pre-allocated search buffers from the struct.
+  double *vals = up->search_vals;
+  double *coords = up->search_coords;
   for (int i = 0; i < total_nodes_search; i++) {
     vals[i] = 0.0;
     coords[i] = 0.0;
@@ -59,7 +52,7 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
 
     // Evaluate at each node in this cell.
     for (int n = 0; n < up->basis.num_basis; n++) {
-      const double *nod_log = gkyl_array_cfetch(nodes, n);
+      const double *nod_log = gkyl_array_cfetch(up->nodes, n);
       
       // Determine node offset in search direction.
       int node_offset;
@@ -125,10 +118,6 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
   peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_HI;
 
   *num_peaks_out = num_peaks;
-
-  gkyl_free(vals);
-  gkyl_free(coords);
-  gkyl_array_release(nodes);
 }
 
 /**
@@ -143,14 +132,12 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
   int search_dir = up->search_dir;
   int poly_order = up->basis.poly_order;
 
-  // Determine number of nodes along search direction.
-  int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
-  int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+  int total_nodes_search = up->total_nodes_search;
 
-  // Allocate arrays to store values and coordinates along search direction.
-  double *vals = gkyl_malloc(sizeof(double) * total_nodes_search);
-  double *coords = gkyl_malloc(sizeof(double) * total_nodes_search);
-  bool *visited = gkyl_malloc(sizeof(bool) * total_nodes_search);
+  // Use pre-allocated search buffers from the struct.
+  double *vals = up->search_vals;
+  double *coords = up->search_coords;
+  bool *visited = up->search_visited;
   for (int i = 0; i < total_nodes_search; i++) {
     vals[i] = 0.0;
     coords[i] = 0.0;
@@ -323,10 +310,6 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
     val_n[0] = vals[total_nodes_search - 1];
     coord_n[0] = coords[total_nodes_search - 1];
   }
-
-  gkyl_free(vals);
-  gkyl_free(coords);
-  gkyl_free(visited);
 }
 
 /**
@@ -413,7 +396,7 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
   }
 
   // Get the DG coefficients at this cell.
-  long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
+  long linidx = gkyl_range_idx(&up->range, cell_idx);
   const double *f_d = gkyl_array_cfetch(in_ho, linidx);
 
   // Get cell center.
@@ -525,6 +508,16 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
   // No device basis on CPU.
   up->out_basis_on_dev = NULL;
 
+  // Compute total_nodes_search for the struct.
+  int num_cells_search = find_peaks_inp->range->upper[find_peaks_inp->search_dir]
+    - find_peaks_inp->range->lower[find_peaks_inp->search_dir] + 1;
+  up->total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+
+  // Pre-allocate search-direction working buffers (reused by advance).
+  up->search_vals = gkyl_malloc(sizeof(double) * up->total_nodes_search);
+  up->search_coords = gkyl_malloc(sizeof(double) * up->total_nodes_search);
+  up->search_visited = gkyl_malloc(sizeof(bool) * up->total_nodes_search);
+
   // Count peaks at middle preserved coordinate.
   int mid_preserved_idx = 0;
   if (out_dim == 1) {
@@ -790,10 +783,17 @@ gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref)
   gkyl_nodal_ops_release(up->n2m);
 
   if (GKYL_IS_CU_ALLOC(up->flags)) {
-    if (up->out_basis_on_dev)
-      gkyl_cart_modal_basis_release_cu(up->out_basis_on_dev);
+    gkyl_cart_modal_basis_release_cu(up->out_basis_on_dev);
+    gkyl_cu_free(up->search_vals);
+    gkyl_cu_free(up->search_coords);
+    gkyl_cu_free(up->search_visited);
     gkyl_cu_free(up->on_dev);
   }
+  else {
+    gkyl_free(up->search_vals);
+    gkyl_free(up->search_coords);
+    gkyl_free(up->search_visited);
+  }
 
   gkyl_free(up);
 }
diff --git a/core/zero/array_dg_find_peaks_cu.cu b/core/zero/array_dg_find_peaks_cu.cu
index ad1ecfde50..25b71aea77 100644
--- a/core/zero/array_dg_find_peaks_cu.cu
+++ b/core/zero/array_dg_find_peaks_cu.cu
@@ -9,10 +9,6 @@ extern "C" {
 #include <assert.h>
 }
 
-// Maximum number of nodes along the search direction.
-// For p=2: total_nodes = 2*num_cells + 1. With up to 512 cells this is 1025.
-#define MAX_SEARCH_NODES 1025
-
 /**
  * CUDA kernel: find peaks along the search direction for each preserved-direction
  * node index. One thread per preserved_node_idx.
@@ -41,10 +37,12 @@ gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
     int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
     int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
 
-    // Thread-local storage for values, coordinates, and visited flags.
-    double vals[MAX_SEARCH_NODES];
-    double coords[MAX_SEARCH_NODES];
-    bool visited[MAX_SEARCH_NODES];
+    // Each thread gets its own contiguous slice of the pre-allocated
+    // search buffers. Offset = preserved_node_idx * total_nodes_search.
+    long buf_off = (long)preserved_node_idx * total_nodes_search;
+    double *vals = up->search_vals + buf_off;
+    double *coords = up->search_coords + buf_off;
+    bool *visited = up->search_visited + buf_off;
     for (int i = 0; i < total_nodes_search; i++) {
       vals[i] = 0.0;
       coords[i] = 0.0;
@@ -302,7 +300,7 @@ gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
     }
 
     // Fetch DG coefficients at this cell.
-    long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
+    long linidx = gkyl_range_idx(&up->range, cell_idx);
     const double *f_d = (const double *)gkyl_array_cfetch(in, linidx);
 
     // Get cell center for logical coordinate conversion.
@@ -483,6 +481,16 @@ gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho)
   else
     up->out_basis_on_dev = NULL;
 
+  // Pre-allocate search-direction working arrays on device.
+  // Each thread (one per preserved node) gets its own contiguous slice
+  // of total_nodes_search elements, so total size = num_nodes_out * total_nodes_search.
+  up->total_nodes_search = up_ho->total_nodes_search;
+  int num_nodes_out = up->out_nrange.volume;
+  long search_buf_len = (long)num_nodes_out * up->total_nodes_search;
+  up->search_vals = (double *)gkyl_cu_malloc(sizeof(double) * search_buf_len);
+  up->search_coords = (double *)gkyl_cu_malloc(sizeof(double) * search_buf_len);
+  up->search_visited = (bool *)gkyl_cu_malloc(sizeof(bool) * search_buf_len);
+
   up->flags = 0;
   GKYL_SET_CU_ALLOC(up->flags);
   up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free);
@@ -533,12 +541,19 @@ gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho)
   struct gkyl_array *ho_out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX];
   struct gkyl_array *ho_out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
 
-  // Swap in device-callable basis function pointers for the H2D copy.
-  gkyl_cart_modal_serendip_cu_dev(&up->basis, ndim, poly_order);
-  if (out_dim == 0)
-    gkyl_cart_modal_serendip_cu_dev(&up->out_basis, 1, 0);
-  else
-    gkyl_cart_modal_serendip_cu_dev(&up->out_basis, 1, poly_order);
+  // Populate device-callable basis function pointers for the H2D copy.
+  // We allocate temporary device basis structs, initialize them with device
+  // kernels, then copy back to the host struct fields so that when the
+  // whole struct is memcpy'd H2D, it contains device-callable pointers.
+  struct gkyl_basis *tmp_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(ndim, poly_order);
+  gkyl_cu_memcpy(&up->basis, tmp_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H);
+  gkyl_cu_free(tmp_basis_dev);
+
+  int out_basis_dim = (out_dim == 0) ? 1 : 1;
+  int out_basis_po = (out_dim == 0) ? 0 : poly_order;
+  struct gkyl_basis *tmp_out_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(out_basis_dim, out_basis_po);
+  gkyl_cu_memcpy(&up->out_basis, tmp_out_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H);
+  gkyl_cu_free(tmp_out_basis_dev);
 
   // Swap nodes to its device pointer.
   up->nodes = up->nodes->on_dev;
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
index fa7fd24b5b..1686f3291a 100644
--- a/core/zero/gkyl_array_dg_find_peaks_priv.h
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -55,6 +55,15 @@ struct gkyl_array_dg_find_peaks {
   // Internal working arrays.
   struct gkyl_array *nodes;         // Node locations in logical coords
 
+  // Working arrays for the find-peaks scan along the search direction.
+  // On CPU these are malloc'd per call; on GPU they are pre-allocated
+  // with size (num_nodes_out * total_nodes_search) so each thread
+  // can index its own contiguous slice.
+  double *search_vals;              // Nodal values along search dir
+  double *search_coords;            // Physical coordinates along search dir
+  bool *search_visited;             // Visited flags along search dir
+  int total_nodes_search;           // Number of nodes along search dir
+
   // Nodal-to-modal converter.
   struct gkyl_nodal_ops *n2m;
 
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index f19b41ad76..6a28771efd 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -53,6 +53,7 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
   const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  struct gkyl_basis *bmag_max_basis_on_dev; // Device-resident basis with device-callable function pointers.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
   
   // GPU helper: scalar bmag_max_z value for simple 1x cases.
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 9c250e6c89..b02101a2c4 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -249,6 +249,7 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   }
   up->use_gpu = inp->use_gpu;
   up->bmag_max_z_scalar_gpu = NULL; // Will be set for GPU case.
+  up->bmag_max_basis_on_dev = NULL; // Will be set for GPU case.
 
   if (inp->c2p_pos_func == 0) {
     up->c2p_pos = c2p_pos_identity;
@@ -337,21 +338,32 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     
     // Allocate and set scalar bmag_max_z for GPU kernels.
     // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
+    // inp->bmag_max_z_coord is a GPU array, so copy to host before reading.
+    struct gkyl_array *bmag_max_z_coord_ho = gkyl_array_new(GKYL_DOUBLE,
+      inp->bmag_max_z_coord->ncomp, inp->bmag_max_z_coord->size);
+    gkyl_array_copy(bmag_max_z_coord_ho, inp->bmag_max_z_coord);
+
     double bmag_max_z_val;
     if (up->cdim == 1) {
       // 1x case: single value.
-      const double *bmag_max_z_d = gkyl_array_cfetch(inp->bmag_max_z_coord, 0);
+      const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, 0);
       bmag_max_z_val = bmag_max_z_d[0];
     } else {
       // 2x case: use the first field line's value (simplified approach).
       int psi_idx[1] = {inp->bmag_max_range->lower[0]};
       long bmag_max_z_linidx = gkyl_range_idx(inp->bmag_max_range, psi_idx);
-      const double *bmag_max_z_d = gkyl_array_cfetch(inp->bmag_max_z_coord, bmag_max_z_linidx);
+      const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, bmag_max_z_linidx);
       double xc[1] = {0.0};
       bmag_max_z_val = inp->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
     }
+    gkyl_array_release(bmag_max_z_coord_ho);
     up->bmag_max_z_scalar_gpu = gkyl_cu_malloc(sizeof(double));
     gkyl_cu_memcpy(up->bmag_max_z_scalar_gpu, &bmag_max_z_val, sizeof(double), GKYL_CU_MEMCPY_H2D);
+
+    // Create a device-resident basis with device-callable function pointers
+    // for use in GPU kernels that call eval_expand.
+    up->bmag_max_basis_on_dev = gkyl_cart_modal_serendip_cu_dev_new(
+      inp->bmag_max_basis->ndim, inp->bmag_max_basis->poly_order);
   }
 #endif
 
@@ -703,6 +715,7 @@ gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
 
     gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem);
     gkyl_cu_free(up->bmag_max_z_scalar_gpu);
+    gkyl_cu_free(up->bmag_max_basis_on_dev);
   }
 
   gkyl_free(up);
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index 6508c52e71..717f827d49 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -79,7 +79,7 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *u
   int nblocks = conf_range->nblocks, nthreads = conf_range->nthreads;
   gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(up->cdim, *conf_range,
     *up->bmag_max_range, up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_peak->on_dev,
-    up->bmag_max_basis, Dbmag_quad->on_dev);
+    up->bmag_max_basis_on_dev, Dbmag_quad->on_dev);
 }
 
 static void
@@ -447,7 +447,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   // Compute qDphiDbmag at quadrature points.
   gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(
     up->cdim, *conf_range, *up->bmag_max_range, 
-    up->basis_at_ords_conf->on_dev, up->bmag_max_basis, up->charge, up->is_tandem,
+    up->basis_at_ords_conf->on_dev, up->bmag_max_basis_on_dev, up->charge, up->is_tandem,
     phi->on_dev, phi_m->on_dev, phi_tandem->on_dev,
     up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
     up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev);
@@ -458,7 +458,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
     // Don't do quadrature.
     int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
     gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis, up->is_tandem,
+      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
       up->mass, up->ordinates_phase->on_dev,
       up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev, 
       up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev,
@@ -471,7 +471,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
     gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, tot_quad_phase);
 
     gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis, up->is_tandem,
+      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
       up->mass, up->norm_fac, up->ordinates_phase->on_dev,
       up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
       up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev,

From 909d74128db31cfee3105958f4df34aa4d5c21b9 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 20 Feb 2026 13:59:22 -0800
Subject: [PATCH 17/32] Fix loss cone mask unit test. Someone didn't run make
 check when committing code to main and broke a unit test with the geometry
 enum changes

---
 gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 012db64071..2c479c3b82 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -582,7 +582,7 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
 
   // Initialize geometry.
   struct gkyl_gk_geometry_inp geometry_input = {
-    .geometry_id = GKYL_MAPC2P,
+    .geometry_id = GKYL_GEOMETRY_MAPC2P,
     .world = {0.0, 0.0},
     .mapc2p = mapc2p_3x,
     .c2p_ctx = 0,

From c6899925d7e083d380941bb286ed105c81b0354b Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 20 Feb 2026 15:07:55 -0800
Subject: [PATCH 18/32] fdot multiplier works in parallel. Unit test fixed

---
 gyrokinetic/apps/gk_species_fdot_multiplier.c   |  7 ++++---
 .../unit/ctest_loss_cone_mask_gyrokinetic.c     | 17 +++++++++--------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 6ce3263029..2be479219b 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -204,11 +204,12 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .use_gpu = app->use_gpu,
       };
       // Pass a global bmag_int into the peak finder
-      struct gkyl_array *bmag_int_global = mkarr(false, 
-        app->gk_geom->geo_int.bmag->ncomp, app->gk_geom->geo_int.bmag->size);
+      struct gkyl_array *bmag_int_global = mkarr(app->use_gpu,
+        app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
       gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
+
       fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
-      gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, app->gk_geom->geo_int.bmag);
+      gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, bmag_int_global);
       gkyl_array_release(bmag_int_global);
       
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 2c479c3b82..7bd1331a0e 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -407,10 +407,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask, basis_mask.poly_order+1, 1, mask_ref_1x2v, &ctx);
   gkyl_proj_on_basis_advance(evmask_ref, 0.0, &local, mask_ref_ho);
   gkyl_proj_on_basis_release(evmask_ref);
-  if (ctx.num_quad == 1) {
-    // Rescale to deal with normalization.
-    gkyl_array_scale(mask_ref_ho, 1.0/pow(sqrt(2.0),cdim+vdim));
-  }
+
 
 //  // values to compare  at index (1, 9, 9) [remember, lower-left index is (1,1,1)]
 //  double p1_vals[] = {  
@@ -749,16 +746,20 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
   
   // High mu particles at center should mostly be trapped.
   double trapped_frac = (double)num_trapped_high_mu_center / (double)total_high_mu_center;
-  TEST_CHECK(trapped_frac > 0.5);
-  if (trapped_frac <= 0.5) {
+  printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n", 
+         trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+  TEST_CHECK(trapped_frac >= 0.5);
+  if (trapped_frac < 0.5) {
     printf("High-mu center trapped fraction: %g (%d / %d)\n", 
            trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
   }
   
   // Low mu particles at center should mostly be passing.
   double passing_frac = (double)num_passing_low_mu_center / (double)total_low_mu_center;
-  TEST_CHECK(passing_frac > 0.5);
-  if (passing_frac <= 0.5) {
+  printf("Passing fraction for low-mu center particles: %g (%d / %d)\n", 
+         passing_frac, num_passing_low_mu_center, total_low_mu_center);
+  TEST_CHECK(passing_frac >= 0.5);
+  if (passing_frac < 0.5) {
     printf("Low-mu center passing fraction: %g (%d / %d)\n", 
            passing_frac, num_passing_low_mu_center, total_low_mu_center);
   }

From eed58bc63249702df8d73aa2a222865a9aaaa40b Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Tue, 24 Feb 2026 10:26:55 -0800
Subject: [PATCH 19/32] Refactor global potential handling in damping modules
 to use shared phi_smooth_global array for improved performance and
 consistency across computations.

---
 gyrokinetic/apps/gk_species_damping.c         | 28 +++++++++----------
 gyrokinetic/apps/gk_species_fdot_multiplier.c | 17 ++++++-----
 gyrokinetic/apps/gkyl_gyrokinetic_priv.h      |  2 ++
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index 2709a9dd79..9440e702f4 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -127,14 +127,16 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
       struct gkyl_array_dg_find_peaks_inp peak_inp = {
         .basis = &app->basis,
         .grid = &app->grid,
-        .range = &app->local,
-        .range_ext = &app->local_ext,
+        .range = &app->global,
+        .range_ext = &app->global_ext,
         .search_dir = search_dir,
         .use_gpu = app->use_gpu,
       };
       // Pass a global bmag_int into the peak finder
-      struct gkyl_array *bmag_int_global = mkarr(false, 
-        app->gk_geom->geo_int.bmag->ncomp, app->gk_geom->geo_int.bmag->size);
+      struct gkyl_array *bmag_int_global = mkarr(app->use_gpu,
+        app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
+      damp->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
+
       gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
       damp->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
       gkyl_array_dg_find_peaks_advance(damp->bmag_peak_finder, app->gk_geom->geo_int.bmag);
@@ -276,23 +278,19 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
       gkyl_array_accumulate(rhs, -1.0, f_buffer);
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      // Find the potential at all peak locations (including the mirror throat).
-      gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, phi,
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, damp->phi_smooth_global);
+      // Find the potential at bmag_max
+      gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, damp->phi_smooth_global,
         damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
-      // Allgather on phi_at_bmag_max. It's not an allgather.
-      // One process has the correct one, but the others do not. Is it a bcast or a sync?
 
       if (damp->is_tandem) {
-        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, phi,
+        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, damp->phi_smooth_global,
           damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
-        // Allgather on phi_at_bmag_tandem. It's not an allgather.
-        // One process has the correct one, but the others do not. Is it a bcast or a sync?
         gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-          phi, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
+          damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
       } else {
-        // Project the loss cone mask using the phi_m array.
         gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-          phi, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+          damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
       }
 
       // Assemble the damping term -scale_prof * mask * f.
@@ -300,7 +298,6 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
       gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
       gkyl_array_scale_by_cell(f_buffer, damp->rate);
       gkyl_array_accumulate(rhs, -1.0, f_buffer);
-
     }
 
     // Add the frequency to the CFL frequency.
@@ -339,6 +336,7 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
       gkyl_array_release(damp->phi_at_bmag_max);
       gkyl_array_release(damp->phi_at_bmag_tandem);
 
+      gkyl_array_release(damp->phi_smooth_global)
       gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op);
       gkyl_array_release(damp->scale_prof);
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 2be479219b..e61b6ac3e8 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -78,27 +78,23 @@ void
 gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
-
-  struct gkyl_array *phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
-  gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, phi_smooth_global);
+  gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, fdmul->phi_smooth_global);
   // Find the potential at bmag_max
-  gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi_smooth_global,
+  gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
     fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max);
   
   if (fdmul->is_tandem) {
-    gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, phi_smooth_global,
+    gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
       fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem);
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem, fdmul->multiplier);
+      fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem, fdmul->multiplier);
   } else {
-    // Project the loss cone mask using the phi_m array.
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
+      fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
   }
 
   // Multiply out by the multiplier.
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
-  gkyl_array_release(phi_smooth_global);
 }
 
 void
@@ -206,6 +202,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       // Pass a global bmag_int into the peak finder
       struct gkyl_array *bmag_int_global = mkarr(app->use_gpu,
         app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
+      fdmul->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
+
       gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
 
       fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
@@ -366,6 +364,7 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
       gkyl_array_release(fdmul->phi_at_bmag_max);
       gkyl_array_release(fdmul->phi_at_bmag_tandem);
 
+      gkyl_array_release(fdmul->phi_smooth_global);
       gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
     }
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index a7868d0f30..becc2caeb2 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -807,6 +807,7 @@ struct gk_damping {
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
   struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction.
+  struct gkyl_array *phi_smooth_global; // Smoothed electrostatic potential on the global grid.
   // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
@@ -837,6 +838,7 @@ struct gk_fdot_multiplier {
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
   // Updater to find bmag peaks (mirror throat location).
   struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction.
+  struct gkyl_array *phi_smooth_global; // Smoothed electrostatic potential on the global grid.
   // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.

From 0821a275f28ef09a162db07cf533d3990b61b097 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Tue, 24 Feb 2026 10:27:08 -0800
Subject: [PATCH 20/32] Fix missing semicolon in gk_species_damping_release
 function for proper array release

---
 gyrokinetic/apps/gk_species_damping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index 9440e702f4..fda444698f 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -336,7 +336,7 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
       gkyl_array_release(damp->phi_at_bmag_max);
       gkyl_array_release(damp->phi_at_bmag_tandem);
 
-      gkyl_array_release(damp->phi_smooth_global)
+      gkyl_array_release(damp->phi_smooth_global);
       gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op);
       gkyl_array_release(damp->scale_prof);

From dec21d3b8f0e4ea1e29110c13f4944387e207238 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Tue, 24 Feb 2026 10:49:37 -0800
Subject: [PATCH 21/32] remove the array_reduce_dir object. This is leftover
 from an old version of doing the kinetic electron and tandem mirror. The code
 is built again to make sure nothing is affected.

---
 core/unit/ctest_array_dg_reduce_dir.c     | 394 ----------------
 core/zero/array_dg_reduce_dir.c           | 524 ----------------------
 core/zero/gkyl_array_dg_reduce_dir.h      | 158 -------
 core/zero/gkyl_array_dg_reduce_dir_priv.h |  52 ---
 4 files changed, 1128 deletions(-)
 delete mode 100644 core/unit/ctest_array_dg_reduce_dir.c
 delete mode 100644 core/zero/array_dg_reduce_dir.c
 delete mode 100644 core/zero/gkyl_array_dg_reduce_dir.h
 delete mode 100644 core/zero/gkyl_array_dg_reduce_dir_priv.h

diff --git a/core/unit/ctest_array_dg_reduce_dir.c b/core/unit/ctest_array_dg_reduce_dir.c
deleted file mode 100644
index bf53b36c88..0000000000
--- a/core/unit/ctest_array_dg_reduce_dir.c
+++ /dev/null
@@ -1,394 +0,0 @@
-#include <acutest.h>
-
-#include <gkyl_alloc.h>
-#include <gkyl_array.h>
-#include <gkyl_array_ops.h>
-#include <gkyl_array_dg_reduce_dir.h>
-#include <gkyl_array_dg_reduce_dir_priv.h>
-#include <gkyl_basis.h>
-#include <gkyl_eval_on_nodes.h>
-#include <gkyl_range.h>
-#include <gkyl_rect_decomp.h>
-#include <gkyl_rect_grid.h>
-#include <gkyl_util.h>
-
-#include <math.h>
-
-// 1D test function: f(z) = -z^2 + 1
-// Maximum at z=0 with value 1, minimum at endpoints.
-static void
-test_func_1d_parabola(double t, const double *xn, double *fout, void *ctx)
-{
-  double z = xn[0];
-  fout[0] = -z*z + 1.0;
-}
-
-// 1D test function: f(z) = sin(z) on [0, pi]
-// Maximum at z=pi/2 with value 1.
-static void
-test_func_1d_sin(double t, const double *xn, double *fout, void *ctx)
-{
-  double z = xn[0];
-  fout[0] = sin(z);
-}
-
-// 2D test function: f(psi, z) = psi * (-z^2 + 1)
-// Maximum along z is at z=0 for each psi, with value psi.
-static void
-test_func_2d_parabola(double t, const double *xn, double *fout, void *ctx)
-{
-  double psi = xn[0], z = xn[1];
-  fout[0] = psi * (-z*z + 1.0);
-}
-
-// 2D mirror-like function: f(psi, z) = B0(psi) * (1 + (R-1)*sin^2(pi*z/L))
-// Minimum at z=0 for all psi.
-static void
-test_func_2d_mirror(double t, const double *xn, double *fout, void *ctx)
-{
-  double psi = xn[0], z = xn[1];
-  double L = 2.0;
-  double B0 = 1.0 + 0.1*psi;
-  double R = 4.0;
-  double sinval = sin(M_PI * z / L);
-  fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
-}
-
-// Test 1D reduction with MAX operation.
-void
-test_1d_reduce_max(int poly_order)
-{
-  // Grid: z in [-1, 1].
-  double lower[] = {-1.0};
-  double upper[] = {1.0};
-  int cells[] = {16};
-  struct gkyl_rect_grid grid;
-  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
-
-  struct gkyl_basis basis;
-  gkyl_cart_modal_serendip(&basis, 1, poly_order);
-
-  int ghost[] = {1};
-  struct gkyl_range local, local_ext;
-  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
-
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
-  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_parabola, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
-  gkyl_eval_on_nodes_release(ev);
-
-  // Create reducer.
-  struct gkyl_array_dg_reduce_dir_inp inp = {
-    .basis = &basis,
-    .grid = &grid,
-    .range = &local,
-    .range_ext = &local_ext,
-    .reduce_dir = 0,
-    .op = GKYL_REDUCE_OP_MAX,
-    .use_gpu = false,
-  };
-  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
-
-  // Compute reduction.
-  gkyl_array_dg_reduce_dir_advance(reducer, f);
-
-  // Check results: maximum of -z^2+1 on [-1,1] is 1 at z=0.
-  const struct gkyl_array *vals = gkyl_array_dg_reduce_dir_get_vals(reducer);
-  const struct gkyl_array *coords = gkyl_array_dg_reduce_dir_get_coords(reducer);
-  
-  const double *val = gkyl_array_cfetch(vals, 0);
-  const double *coord = gkyl_array_cfetch(coords, 0);
-  
-  TEST_CHECK(gkyl_compare_double(val[0], 1.0, 1e-14));
-  TEST_CHECK(gkyl_compare_double(fabs(coord[0]), 0.0, 1e-14));
-
-  gkyl_array_release(f);
-  gkyl_array_dg_reduce_dir_release(reducer);
-}
-
-// Test 1D reduction with MIN operation.
-void
-test_1d_reduce_min(int poly_order)
-{
-  // Grid: z in [-1, 1].
-  double lower[] = {-1.0};
-  double upper[] = {1.0};
-  int cells[] = {16};
-  struct gkyl_rect_grid grid;
-  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
-
-  struct gkyl_basis basis;
-  gkyl_cart_modal_serendip(&basis, 1, poly_order);
-
-  int ghost[] = {1};
-  struct gkyl_range local, local_ext;
-  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
-
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
-  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_parabola, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
-  gkyl_eval_on_nodes_release(ev);
-
-  // Create reducer.
-  struct gkyl_array_dg_reduce_dir_inp inp = {
-    .basis = &basis,
-    .grid = &grid,
-    .range = &local,
-    .range_ext = &local_ext,
-    .reduce_dir = 0,
-    .op = GKYL_REDUCE_OP_MIN,
-    .use_gpu = false,
-  };
-  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
-
-  // Compute reduction.
-  gkyl_array_dg_reduce_dir_advance(reducer, f);
-
-  // Check results: minimum of -z^2+1 on [-1,1] is 0 at z=±1.
-  const struct gkyl_array *vals = gkyl_array_dg_reduce_dir_get_vals(reducer);
-  const struct gkyl_array *coords = gkyl_array_dg_reduce_dir_get_coords(reducer);
-  
-  const double *val = gkyl_array_cfetch(vals, 0);
-  const double *coord = gkyl_array_cfetch(coords, 0);
-  
-  TEST_CHECK(gkyl_compare_double(val[0], 0.0, 1e-14));
-  TEST_CHECK(gkyl_compare_double(fabs(coord[0]), 1.0, 1e-14));  // Either -1 or 1.
-
-  gkyl_array_release(f);
-  gkyl_array_dg_reduce_dir_release(reducer);
-}
-
-// Test 2D reduction along z direction with MAX operation.
-void
-test_2d_reduce_max(int poly_order)
-{
-  // Grid: psi in [0.5, 2.0], z in [-1, 1].
-  double lower[] = {0.5, -1.0};
-  double upper[] = {2.0, 1.0};
-  int cells[] = {8, 16};
-  struct gkyl_rect_grid grid;
-  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
-
-  struct gkyl_basis basis;
-  gkyl_cart_modal_serendip(&basis, 2, poly_order);
-
-  int ghost[] = {1, 1};
-  struct gkyl_range local, local_ext;
-  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
-
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
-  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_parabola, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
-  gkyl_eval_on_nodes_release(ev);
-
-  // Create reducer (reduce along z, which is direction 1).
-  struct gkyl_array_dg_reduce_dir_inp inp = {
-    .basis = &basis,
-    .grid = &grid,
-    .range = &local,
-    .range_ext = &local_ext,
-    .reduce_dir = 1,
-    .op = GKYL_REDUCE_OP_MAX,
-    .use_gpu = false,
-  };
-  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
-
-  // Compute reduction.
-  gkyl_array_dg_reduce_dir_advance(reducer, f);
-
-  // Check results.
-  const struct gkyl_basis *out_basis = gkyl_array_dg_reduce_dir_get_basis(reducer);
-  const struct gkyl_range *out_range = gkyl_array_dg_reduce_dir_get_range(reducer);
-  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_reduce_dir_get_grid(reducer);
-  
-  // Access nodal arrays directly for testing.
-  const struct gkyl_array *vals_nodal = reducer->out_vals_nodal;
-  const struct gkyl_array *coords_nodal = reducer->out_coords_nodal;
-  const struct gkyl_range *out_nrange = &reducer->out_nrange;
-
-  // Check each nodal point.
-  struct gkyl_range_iter iter;
-  gkyl_range_iter_init(&iter, out_nrange);
-  while (gkyl_range_iter_next(&iter)) {
-    long linidx = gkyl_range_idx(out_nrange, iter.idx);
-    
-    const double *val_nodal = gkyl_array_cfetch(vals_nodal, linidx);
-    const double *coord_nodal = gkyl_array_cfetch(coords_nodal, linidx);
-    
-    // Compute physical psi coordinate at this nodal point.
-    // For p=1: node 0 at lower bound, node ncells at upper bound.
-    int num_cells_psi = cells[0];
-    double dpsi = (upper[0] - lower[0]) / num_cells_psi;
-    double psi_phys = lower[0] + iter.idx[0] * dpsi;
-    
-    // Expected: max of psi*(-z^2+1) over z is psi at z=0.
-    double expected_val = psi_phys;
-    double expected_coord = 0.0;
-
-    TEST_CHECK(gkyl_compare_double(val_nodal[0], expected_val, 1e-14));
-    TEST_CHECK(gkyl_compare_double(coord_nodal[0], expected_coord, 1e-14));
-  }
-
-  gkyl_array_release(f);
-  gkyl_array_dg_reduce_dir_release(reducer);
-}
-
-// Test 2D reduction with mirror-like function (finding minimum bmag).
-void
-test_2d_reduce_min_mirror(int poly_order)
-{
-  // Grid: psi in [0.0, 1.0], z in [-1, 1].
-  double lower[] = {0.0, -1.0};
-  double upper[] = {1.0, 1.0};
-  int cells[] = {4, 16};
-  struct gkyl_rect_grid grid;
-  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
-
-  struct gkyl_basis basis;
-  gkyl_cart_modal_serendip(&basis, 2, poly_order);
-
-  int ghost[] = {1, 1};
-  struct gkyl_range local, local_ext;
-  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
-
-  // Project mirror function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
-  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_mirror, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
-  gkyl_eval_on_nodes_release(ev);
-
-  // Create reducer (reduce along z, find minimum).
-  struct gkyl_array_dg_reduce_dir_inp inp = {
-    .basis = &basis,
-    .grid = &grid,
-    .range = &local,
-    .range_ext = &local_ext,
-    .reduce_dir = 1,
-    .op = GKYL_REDUCE_OP_MIN,
-    .use_gpu = false,
-  };
-  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
-
-  // Compute reduction.
-  gkyl_array_dg_reduce_dir_advance(reducer, f);
-
-  // Access nodal arrays directly for testing.
-  const struct gkyl_array *vals_nodal = reducer->out_vals_nodal;
-  const struct gkyl_array *coords_nodal = reducer->out_coords_nodal;
-  const struct gkyl_range *out_nrange = &reducer->out_nrange;
-
-  // Check each nodal point.
-  struct gkyl_range_iter iter;
-  gkyl_range_iter_init(&iter, out_nrange);
-  while (gkyl_range_iter_next(&iter)) {
-    long linidx = gkyl_range_idx(out_nrange, iter.idx);
-    
-    const double *val_nodal = gkyl_array_cfetch(vals_nodal, linidx);
-    const double *coord_nodal = gkyl_array_cfetch(coords_nodal, linidx);
-    
-    // Compute physical psi coordinate.
-    int num_cells_psi = cells[0];
-    double dpsi = (upper[0] - lower[0]) / num_cells_psi;
-    double psi_phys = lower[0] + iter.idx[0] * dpsi;
-    
-    // Expected: minimum of B0*(1+(R-1)*sin^2(pi*z/L)) is B0 at z=0.
-    double B0 = 1.0 + 0.1*psi_phys;
-    double expected_val = B0;
-    double expected_coord = 0.0;
-
-    TEST_CHECK(gkyl_compare_double(val_nodal[0], expected_val, 1e-14));
-    TEST_CHECK(gkyl_compare_double(coord_nodal[0], expected_coord, 1e-14));
-  }
-
-  gkyl_array_release(f);
-  gkyl_array_dg_reduce_dir_release(reducer);
-}
-
-// Test eval_at_extremum functionality.
-void
-test_2d_eval_at_extremum(int poly_order)
-{
-  // Grid: psi in [0.5, 2.0], z in [-1, 1].
-  double lower[] = {0.5, -1.0};
-  double upper[] = {2.0, 1.0};
-  int cells[] = {8, 16};
-  struct gkyl_rect_grid grid;
-  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
-
-  struct gkyl_basis basis;
-  gkyl_cart_modal_serendip(&basis, 2, poly_order);
-
-  int ghost[] = {1, 1};
-  struct gkyl_range local, local_ext;
-  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
-
-  // Project test function onto basis.
-  struct gkyl_array *f = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
-  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_parabola, NULL);
-  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f);
-  gkyl_eval_on_nodes_release(ev);
-
-  // Create reducer.
-  struct gkyl_array_dg_reduce_dir_inp inp = {
-    .basis = &basis,
-    .grid = &grid,
-    .range = &local,
-    .range_ext = &local_ext,
-    .reduce_dir = 1,
-    .op = GKYL_REDUCE_OP_MAX,
-    .use_gpu = false,
-  };
-  struct gkyl_array_dg_reduce_dir *reducer = gkyl_array_dg_reduce_dir_new(&inp);
-
-  // Compute reduction.
-  gkyl_array_dg_reduce_dir_advance(reducer, f);
-
-  // Evaluate f at the extremum coordinates.
-  const struct gkyl_range *out_range_ext = gkyl_array_dg_reduce_dir_get_range_ext(reducer);
-  const struct gkyl_basis *out_basis = gkyl_array_dg_reduce_dir_get_basis(reducer);
-  
-  struct gkyl_array *f_at_max = gkyl_array_new(GKYL_DOUBLE, out_basis->num_basis, out_range_ext->volume);
-  gkyl_array_dg_reduce_dir_eval_at_extremum(reducer, f, f_at_max);
-
-  // The value of f at its maximum should equal the maximum value.
-  const struct gkyl_array *vals = gkyl_array_dg_reduce_dir_get_vals(reducer);
-  
-  // Compare at cell centers.
-  double xc_log[1] = {0.0};
-  const struct gkyl_range *out_range = gkyl_array_dg_reduce_dir_get_range(reducer);
-  struct gkyl_range_iter iter;
-  gkyl_range_iter_init(&iter, out_range);
-  while (gkyl_range_iter_next(&iter)) {
-    long linidx = gkyl_range_idx(out_range, iter.idx);
-    
-    const double *max_val = gkyl_array_cfetch(vals, linidx);
-    const double *eval_val = gkyl_array_cfetch(f_at_max, linidx);
-    
-    double max_at_center = out_basis->eval_expand(xc_log, max_val);
-    double eval_at_center = out_basis->eval_expand(xc_log, eval_val);
-
-    TEST_CHECK(gkyl_compare_double(eval_at_center, max_at_center, 1e-14));
-  }
-
-  gkyl_array_release(f);
-  gkyl_array_release(f_at_max);
-  gkyl_array_dg_reduce_dir_release(reducer);
-}
-
-void test_1d_max_p1() { test_1d_reduce_max(1); }
-void test_1d_min_p1() { test_1d_reduce_min(1); }
-void test_2d_max_p1() { test_2d_reduce_max(1); }
-void test_2d_min_mirror_p1() { test_2d_reduce_min_mirror(1); }
-void test_2d_eval_at_extremum_p1() { test_2d_eval_at_extremum(1); }
-
-TEST_LIST = {
-  {"test_1d_max_p1", test_1d_max_p1},
-  {"test_1d_min_p1", test_1d_min_p1},
-  {"test_2d_max_p1", test_2d_max_p1},
-  {"test_2d_min_mirror_p1", test_2d_min_mirror_p1},
-  {"test_2d_eval_at_extremum_p1", test_2d_eval_at_extremum_p1},
-  {NULL, NULL},
-};
diff --git a/core/zero/array_dg_reduce_dir.c b/core/zero/array_dg_reduce_dir.c
deleted file mode 100644
index fc39963774..0000000000
--- a/core/zero/array_dg_reduce_dir.c
+++ /dev/null
@@ -1,524 +0,0 @@
-#include <assert.h>
-#include <float.h>
-#include <string.h>
-
-#include <gkyl_alloc.h>
-#include <gkyl_array.h>
-#include <gkyl_array_dg_reduce_dir.h>
-#include <gkyl_array_dg_reduce_dir_priv.h>
-#include <gkyl_nodal_ops.h>
-
-/**
- * Find the extremum along the reduction direction for a given preserved-direction
- * node index, storing results in the nodal arrays.
- */
-static void
-find_extremum_for_preserved_node(struct gkyl_array_dg_reduce_dir *up, const struct gkyl_array *in_ho,
-  int preserved_node_idx)
-{
-  int ndim = up->grid.ndim;
-  int reduce_dir = up->reduce_dir;
-  int poly_order = up->basis.poly_order;
-
-  // Determine number of nodes along reduction direction.
-  int num_cells_reduce = up->range.upper[reduce_dir] - up->range.lower[reduce_dir] + 1;
-  int total_nodes_reduce = (poly_order == 1) ? num_cells_reduce + 1 : 2*num_cells_reduce + 1;
-
-  // Allocate arrays to store values and coordinates along reduction direction.
-  double *vals = gkyl_malloc(sizeof(double) * total_nodes_reduce);
-  double *coords = gkyl_malloc(sizeof(double) * total_nodes_reduce);
-  bool *visited = gkyl_malloc(sizeof(bool) * total_nodes_reduce);
-  for (int i = 0; i < total_nodes_reduce; i++) {
-    vals[i] = 0.0;
-    coords[i] = 0.0;
-    visited[i] = false;
-  }
-
-  // For 2D, determine the preserved direction.
-  int preserved_dir = (ndim == 1) ? -1 : ((reduce_dir == 0) ? 1 : 0);
-
-  // Iterate along cells in reduction direction and collect nodal values.
-  for (int cell_idx = up->range.lower[reduce_dir]; cell_idx <= up->range.upper[reduce_dir]; cell_idx++) {
-    // For 2D, determine which cells in the preserved direction contribute to this node.
-    int pres_cell_start, pres_cell_end;
-    if (ndim == 1) {
-      pres_cell_start = 0;
-      pres_cell_end = 0;
-    }
-    else {
-      if (poly_order == 1) {
-        if (preserved_node_idx == 0) {
-          pres_cell_start = up->range.lower[preserved_dir];
-          pres_cell_end = up->range.lower[preserved_dir];
-        }
-        else if (preserved_node_idx == up->out_nrange.upper[0]) {
-          pres_cell_start = up->range.upper[preserved_dir];
-          pres_cell_end = up->range.upper[preserved_dir];
-        }
-        else {
-          pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-          pres_cell_end = pres_cell_start + 1;
-          if (pres_cell_end > up->range.upper[preserved_dir]) {
-            pres_cell_end = up->range.upper[preserved_dir];
-          }
-        }
-      }
-      else {
-        int cell_local = preserved_node_idx / 2;
-        pres_cell_start = up->range.lower[preserved_dir] + cell_local;
-        pres_cell_end = pres_cell_start;
-        if (preserved_node_idx % 2 == 0 && preserved_node_idx > 0) {
-          pres_cell_start--;
-        }
-        if (pres_cell_start < up->range.lower[preserved_dir]) {
-          pres_cell_start = up->range.lower[preserved_dir];
-        }
-        if (pres_cell_end > up->range.upper[preserved_dir]) {
-          pres_cell_end = up->range.upper[preserved_dir];
-        }
-      }
-    }
-
-    for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) {
-      // Build index array for this cell.
-      int idx[GKYL_MAX_DIM];
-      if (ndim == 1) {
-        idx[0] = cell_idx;
-      }
-      else {
-        idx[preserved_dir] = pres_cell;
-        idx[reduce_dir] = cell_idx;
-      }
-
-      long linidx = gkyl_range_idx(&up->range, idx);
-      const double *f_d = gkyl_array_cfetch(in_ho, linidx);
-
-      double xc[GKYL_MAX_DIM];
-      gkyl_rect_grid_cell_center(&up->grid, idx, xc);
-
-      // Evaluate at each node in this cell.
-      for (int n = 0; n < up->basis.num_basis; n++) {
-        const double *nod_log = gkyl_array_cfetch(up->nodes, n);
-
-        // Check if this node corresponds to our preserved node index.
-        if (ndim > 1) {
-          int pres_node_offset;
-          if (poly_order == 1) {
-            pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
-          }
-          else {
-            if (nod_log[preserved_dir] < -0.5) {
-              pres_node_offset = 0;
-            }
-            else if (nod_log[preserved_dir] > 0.5) {
-              pres_node_offset = 2;
-            }
-            else {
-              pres_node_offset = 1;
-            }
-          }
-          int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
-          int this_pres_node;
-          if (poly_order == 1) {
-            this_pres_node = pres_cell_local + pres_node_offset;
-          }
-          else {
-            this_pres_node = 2*pres_cell_local + pres_node_offset;
-          }
-
-          if (this_pres_node != preserved_node_idx) {
-            continue;
-          }
-        }
-
-        // Determine node offset in reduction direction.
-        int reduce_node_offset;
-        if (poly_order == 1) {
-          reduce_node_offset = (nod_log[reduce_dir] < 0) ? 0 : 1;
-        }
-        else {
-          if (nod_log[reduce_dir] < -0.5) {
-            reduce_node_offset = 0;
-          }
-          else if (nod_log[reduce_dir] > 0.5) {
-            reduce_node_offset = 2;
-          }
-          else {
-            reduce_node_offset = 1;
-          }
-        }
-
-        int cell_local = cell_idx - up->range.lower[reduce_dir];
-        int reduce_node_idx;
-        if (poly_order == 1) {
-          reduce_node_idx = cell_local + reduce_node_offset;
-        }
-        else {
-          reduce_node_idx = 2*cell_local + reduce_node_offset;
-        }
-
-        if (!visited[reduce_node_idx]) {
-          double val = up->basis.eval_expand(nod_log, f_d);
-          double nod_phys[GKYL_MAX_DIM];
-          dg_reduce_dir_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
-
-          vals[reduce_node_idx] = val;
-          coords[reduce_node_idx] = nod_phys[reduce_dir];
-          visited[reduce_node_idx] = true;
-        }
-      }
-    }
-  }
-
-  // Find the global extremum.
-  int extremum_idx = 0;
-  double extremum_val = vals[0];
-  
-  for (int i = 1; i < total_nodes_reduce; i++) {
-    bool is_better;
-    if (up->op == GKYL_REDUCE_OP_MAX) {
-      is_better = (vals[i] > extremum_val);
-    }
-    else {
-      is_better = (vals[i] < extremum_val);
-    }
-    
-    if (is_better) {
-      extremum_val = vals[i];
-      extremum_idx = i;
-    }
-  }
-
-  // Store the result.
-  double *val_n = gkyl_array_fetch(up->out_vals_nodal, preserved_node_idx);
-  double *coord_n = gkyl_array_fetch(up->out_coords_nodal, preserved_node_idx);
-  val_n[0] = extremum_val;
-  coord_n[0] = coords[extremum_idx];
-
-  gkyl_free(vals);
-  gkyl_free(coords);
-  gkyl_free(visited);
-}
-
-/**
- * Evaluate an input array at the extremum coordinate for a given preserved-direction
- * node index, storing result in the nodal output array.
- */
-static void
-eval_at_extremum_for_preserved_node(struct gkyl_array_dg_reduce_dir *up,
-  const struct gkyl_array *in_ho, int preserved_node_idx)
-{
-  int ndim = up->grid.ndim;
-  int reduce_dir = up->reduce_dir;
-  int poly_order = up->basis.poly_order;
-  int preserved_dir = (ndim == 1) ? -1 : ((reduce_dir == 0) ? 1 : 0);
-
-  // Get the extremum coordinate.
-  const double *extremum_coord_n = gkyl_array_cfetch(up->out_coords_nodal, preserved_node_idx);
-  double extremum_coord = extremum_coord_n[0];
-
-  // Find the cell containing this coordinate in the reduction direction.
-  double point[GKYL_MAX_DIM];
-  int known_idx[GKYL_MAX_DIM];
-  int cell_idx[GKYL_MAX_DIM];
-  
-  for (int d = 0; d < ndim; d++) {
-    if (d == reduce_dir) {
-      point[d] = extremum_coord;
-      known_idx[d] = -1;
-    }
-    else {
-      point[d] = 0.0;
-      known_idx[d] = -1;
-    }
-  }
-  
-  // If 2D, determine preserved direction cell from preserved_node_idx.
-  if (ndim > 1) {
-    int pres_cell;
-    if (poly_order == 1) {
-      if (preserved_node_idx == 0) {
-        pres_cell = up->range.lower[preserved_dir];
-      }
-      else {
-        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-        if (pres_cell > up->range.upper[preserved_dir]) {
-          pres_cell = up->range.upper[preserved_dir];
-        }
-      }
-    }
-    else {
-      pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
-    }
-    known_idx[preserved_dir] = pres_cell;
-    
-    int pres_cell_idx[GKYL_MAX_DIM];
-    for (int d = 0; d < ndim; d++) {
-      pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1;
-    }
-    double xc_pres[GKYL_MAX_DIM];
-    gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
-    point[preserved_dir] = xc_pres[preserved_dir];
-  }
-  
-  gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
-
-  // Clamp cell_idx to interior range.
-  for (int d = 0; d < up->grid.ndim; d++) {
-    if (cell_idx[d] < up->range.lower[d]) {
-      cell_idx[d] = up->range.lower[d];
-    }
-    if (cell_idx[d] > up->range.upper[d]) {
-      cell_idx[d] = up->range.upper[d];
-    }
-  }
-
-  // Get the DG coefficients at this cell.
-  long linidx = gkyl_range_idx(&up->range_ext, cell_idx);
-  const double *f_d = gkyl_array_cfetch(in_ho, linidx);
-
-  // Get cell center.
-  double xc[GKYL_MAX_DIM];
-  gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
-
-  // Convert extremum coordinate to logical space.
-  double nod_log[GKYL_MAX_DIM];
-  for (int d = 0; d < ndim; d++) {
-    if (d == reduce_dir) {
-      nod_log[d] = 2.0 * (extremum_coord - xc[d]) / up->grid.dx[d];
-    }
-    else if (ndim > 1) {
-      if (poly_order == 1) {
-        nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
-      }
-      else {
-        int pres_node_offset = preserved_node_idx % 3;
-        if (pres_node_offset == 0) {
-          nod_log[d] = -1.0;
-        }
-        else if (pres_node_offset == 1) {
-          nod_log[d] = 0.0;
-        }
-        else {
-          nod_log[d] = 1.0;
-        }
-      }
-    }
-  }
-
-  // Evaluate the DG expansion at this logical coordinate.
-  double val = up->basis.eval_expand(nod_log, f_d);
-
-  // Store the result.
-  double *val_n = gkyl_array_fetch(up->out_eval_nodal, preserved_node_idx);
-  val_n[0] = val;
-}
-
-struct gkyl_array_dg_reduce_dir*
-gkyl_array_dg_reduce_dir_new(const struct gkyl_array_dg_reduce_dir_inp *inp)
-{
-  struct gkyl_array_dg_reduce_dir *up = gkyl_malloc(sizeof(*up));
-
-  // Copy input parameters.
-  up->grid = *inp->grid;
-  up->basis = *inp->basis;
-  up->range = *inp->range;
-  up->range_ext = *inp->range_ext;
-  up->reduce_dir = inp->reduce_dir;
-  up->op = inp->op;
-  up->use_gpu = inp->use_gpu;
-
-  int ndim = inp->grid->ndim;
-  int poly_order = inp->basis->poly_order;
-  int out_dim = ndim - 1;
-
-  assert(inp->reduce_dir >= 0 && inp->reduce_dir < ndim);
-
-  // Set up output grid/basis/range.
-  if (out_dim == 0) {
-    // 1D -> 0D case.
-    int cells_1d[1] = {1};
-    double lower_1d[1] = {0.0};
-    double upper_1d[1] = {1.0};
-    gkyl_rect_grid_init(&up->out_grid, 1, lower_1d, upper_1d, cells_1d);
-    gkyl_range_init(&up->out_range, 1, (int[]){1}, (int[]){1});
-    gkyl_range_init(&up->out_range_ext, 1, (int[]){0}, (int[]){2});
-    gkyl_cart_modal_serendip(&up->out_basis, 1, 0);
-
-    int nodes_shape[1] = {1};
-    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
-  }
-  else if (out_dim == 1) {
-    // 2D -> 1D case.
-    int preserved_dir = (inp->reduce_dir == 0) ? 1 : 0;
-
-    int cells_out = inp->grid->cells[preserved_dir];
-    double lower_out = inp->grid->lower[preserved_dir];
-    double upper_out = inp->grid->upper[preserved_dir];
-
-    gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out);
-
-    int lower_idx[1] = {inp->range->lower[preserved_dir]};
-    int upper_idx[1] = {inp->range->upper[preserved_dir]};
-    gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx);
-
-    int lower_ext_idx[1] = {inp->range_ext->lower[preserved_dir]};
-    int upper_ext_idx[1] = {inp->range_ext->upper[preserved_dir]};
-    gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx);
-
-    gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order);
-
-    int num_nodes = (poly_order == 1) ? gkyl_range_shape(&up->out_range, 0) + 1
-                                      : 2*gkyl_range_shape(&up->out_range, 0) + 1;
-    int nodes_shape[1] = {num_nodes};
-    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
-  }
-  else {
-    assert(false); // Unsupported dimension
-  }
-
-  // Store node locations for input basis.
-  up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, inp->basis->num_basis);
-  inp->basis->node_list(gkyl_array_fetch(up->nodes, 0));
-
-  // Create nodal-to-modal converter.
-  up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false);
-
-  // Allocate output arrays.
-  up->out_vals = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
-  up->out_coords = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
-  up->out_vals_nodal = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
-  up->out_coords_nodal = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
-  up->out_eval_nodal = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
-
-  return up;
-}
-
-void
-gkyl_array_dg_reduce_dir_advance(struct gkyl_array_dg_reduce_dir *up, const struct gkyl_array *in)
-{
-  // Needs a GPU implementation.
-
-  int ndim = up->grid.ndim;
-  int out_dim = ndim - 1;
-
-  // Find extremum for each preserved-direction node.
-  int num_nodes_out = up->out_nrange.volume;
-  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    find_extremum_for_preserved_node(up, in, pres_node);
-  }
-
-  // Transform nodal to modal.
-  if (out_dim == 0) {
-    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
-    double *val_m = gkyl_array_fetch(up->out_vals, 0);
-    double *coord_m = gkyl_array_fetch(up->out_coords, 0);
-    const double *val_n = gkyl_array_cfetch(up->out_vals_nodal, 0);
-    const double *coord_n = gkyl_array_cfetch(up->out_coords_nodal, 0);
-    val_m[0] = val_n[0];
-    coord_m[0] = coord_n[0];
-  }
-  else {
-    // 2D -> 1D case: use nodal-to-modal transform.
-    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-      &up->out_nrange, &up->out_range, 1, up->out_vals_nodal, up->out_vals, false);
-    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-      &up->out_nrange, &up->out_range, 1, up->out_coords_nodal, up->out_coords, false);
-  }
-}
-
-void
-gkyl_array_dg_reduce_dir_eval_at_extremum(struct gkyl_array_dg_reduce_dir *up,
-  const struct gkyl_array *in_array, struct gkyl_array *out_val)
-{
-  // Needs a GPU implementation.
-
-  int ndim = up->grid.ndim;
-  int out_dim = ndim - 1;
-
-  // Evaluate the input array at extremum locations for each preserved-direction node.
-  int num_nodes_out = up->out_nrange.volume;
-  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    eval_at_extremum_for_preserved_node(up, in_array, pres_node);
-  }
-
-  // Transform nodal to modal.
-  if (out_dim == 0) {
-    // 1D -> 0D case: modal = nodal.
-    double *val_m = gkyl_array_fetch(out_val, 0);
-    const double *val_n = gkyl_array_cfetch(up->out_eval_nodal, 0);
-    val_m[0] = val_n[0];
-  }
-  else {
-    // 2D -> 1D case: use nodal-to-modal transform.
-    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-      &up->out_nrange, &up->out_range, 1, up->out_eval_nodal, out_val, false);
-  }
-}
-
-const struct gkyl_basis*
-gkyl_array_dg_reduce_dir_get_basis(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return &up->out_basis;
-}
-
-const struct gkyl_rect_grid*
-gkyl_array_dg_reduce_dir_get_grid(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return &up->out_grid;
-}
-
-const struct gkyl_range*
-gkyl_array_dg_reduce_dir_get_range(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return &up->out_range;
-}
-
-const struct gkyl_range*
-gkyl_array_dg_reduce_dir_get_range_ext(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return &up->out_range_ext;
-}
-
-const struct gkyl_range*
-gkyl_array_dg_reduce_dir_get_nodal_range(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return &up->out_nrange;
-}
-
-const struct gkyl_array*
-gkyl_array_dg_reduce_dir_get_vals(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return up->out_vals;
-}
-
-const struct gkyl_array*
-gkyl_array_dg_reduce_dir_get_vals_nodal(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return up->out_vals_nodal;
-}
-
-const struct gkyl_array*
-gkyl_array_dg_reduce_dir_get_coords(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return up->out_coords;
-}
-
-const struct gkyl_array*
-gkyl_array_dg_reduce_dir_get_coords_nodal(const struct gkyl_array_dg_reduce_dir *up)
-{
-  return up->out_coords_nodal;
-}
-
-void
-gkyl_array_dg_reduce_dir_release(struct gkyl_array_dg_reduce_dir *up)
-{
-  gkyl_array_release(up->out_vals);
-  gkyl_array_release(up->out_coords);
-  gkyl_array_release(up->out_vals_nodal);
-  gkyl_array_release(up->out_coords_nodal);
-  gkyl_array_release(up->out_eval_nodal);
-  gkyl_array_release(up->nodes);
-  gkyl_nodal_ops_release(up->n2m);
-  gkyl_free(up);
-}
diff --git a/core/zero/gkyl_array_dg_reduce_dir.h b/core/zero/gkyl_array_dg_reduce_dir.h
deleted file mode 100644
index cb91ca7cf6..0000000000
--- a/core/zero/gkyl_array_dg_reduce_dir.h
+++ /dev/null
@@ -1,158 +0,0 @@
-#pragma once
-
-#include <gkyl_array.h>
-#include <gkyl_basis.h>
-#include <gkyl_range.h>
-#include <gkyl_rect_grid.h>
-
-/**
- * Reduce a DG array along one direction, computing max/min and argmax/argmin.
- * 
- * For a 2D input array f(psi, z), reducing along z (dir=1) gives:
- *   out_val(psi) = max_z f(psi, z)    or    min_z f(psi, z)
- *   out_coord(psi) = argmax_z f(psi, z)  or  argmin_z f(psi, z)
- * 
- * For a 1D input array f(z), reducing along z (dir=0) gives scalars:
- *   out_val = max f   or   min f
- *   out_coord = argmax f  or  argmin f
- * 
- * The reduction is computed by sampling the field at nodal points along the
- * reduction direction and finding the global extremum.
- */
-typedef struct gkyl_array_dg_reduce_dir gkyl_array_dg_reduce_dir;
-
-/** Type of reduction operation. */
-enum gkyl_reduce_op {
-  GKYL_REDUCE_OP_MAX,   // Find maximum
-  GKYL_REDUCE_OP_MIN,   // Find minimum
-};
-
-/** Input parameters for dg_reduce_dir updater. */
-struct gkyl_array_dg_reduce_dir_inp {
-  const struct gkyl_basis *basis;       // Input basis (N-dimensional)
-  const struct gkyl_rect_grid *grid;    // Input grid
-  const struct gkyl_range *range;       // Input range (local)
-  const struct gkyl_range *range_ext;   // Input extended range
-  int reduce_dir;                       // Direction to reduce (0-indexed)
-  enum gkyl_reduce_op op;               // Reduction operation (MAX or MIN)
-  bool use_gpu;                         // Whether to run on GPU
-};
-
-/**
- * Create a new directional reduction updater.
- * 
- * @param inp Input parameters
- * @return New updater pointer
- */
-struct gkyl_array_dg_reduce_dir* gkyl_array_dg_reduce_dir_new(
-  const struct gkyl_array_dg_reduce_dir_inp *inp);
-
-/**
- * Compute the reduction. For each point along the preserved dimensions,
- * find the maximum or minimum along the reduction direction.
- * 
- * @param up Updater object
- * @param in Input array (N-dimensional DG field)
- */
-void gkyl_array_dg_reduce_dir_advance(struct gkyl_array_dg_reduce_dir *up,
-  const struct gkyl_array *in);
-
-/**
- * Evaluate a field at the extremum coordinates.
- * For example, if we found z_max(psi) = argmax_z f(psi,z), this evaluates
- * g(psi, z_max(psi)) for any field g.
- * 
- * @param up Updater object
- * @param in_array Input field to evaluate (N-dimensional)
- * @param out_val Output values at extremum coordinates (N-1 dimensional)
- */
-void gkyl_array_dg_reduce_dir_eval_at_extremum(struct gkyl_array_dg_reduce_dir *up,
-  const struct gkyl_array *in_array, struct gkyl_array *out_val);
-
-/**
- * Get the output basis ((N-1)-dimensional, or p=0 1D for 1D->0D).
- * 
- * @param up Updater object
- * @return Pointer to output basis
- */
-const struct gkyl_basis* gkyl_array_dg_reduce_dir_get_basis(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output grid.
- * 
- * @param up Updater object
- * @return Pointer to output grid
- */
-const struct gkyl_rect_grid* gkyl_array_dg_reduce_dir_get_grid(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output range.
- * 
- * @param up Updater object
- * @return Pointer to output range
- */
-const struct gkyl_range* gkyl_array_dg_reduce_dir_get_range(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output extended range.
- * 
- * @param up Updater object
- * @return Pointer to output extended range
- */
-const struct gkyl_range* gkyl_array_dg_reduce_dir_get_range_ext(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output nodal range.
- * 
- * @param up Updater object
- * @return Pointer to output nodal range
- */
-const struct gkyl_range* gkyl_array_dg_reduce_dir_get_nodal_range(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output array containing extremal values (modal DG expansion).
- * 
- * @param up Updater object
- * @return Pointer to output values array
- */
-const struct gkyl_array* gkyl_array_dg_reduce_dir_get_vals(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output array containing extremal values (nodal representation).
- * 
- * @param up Updater object
- * @return Pointer to output nodal values array
- */
-const struct gkyl_array* gkyl_array_dg_reduce_dir_get_vals_nodal(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output array containing coordinates of extrema (modal DG expansion).
- * 
- * @param up Updater object
- * @return Pointer to output coordinates array
- */
-const struct gkyl_array* gkyl_array_dg_reduce_dir_get_coords(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Get the output array containing coordinates of extrema (nodal representation).
- * 
- * @param up Updater object
- * @return Pointer to output nodal coordinates array
- */
-const struct gkyl_array* gkyl_array_dg_reduce_dir_get_coords_nodal(
-  const struct gkyl_array_dg_reduce_dir *up);
-
-/**
- * Release the updater and all internal arrays.
- * 
- * @param up Updater to delete
- */
-void gkyl_array_dg_reduce_dir_release(struct gkyl_array_dg_reduce_dir *up);
diff --git a/core/zero/gkyl_array_dg_reduce_dir_priv.h b/core/zero/gkyl_array_dg_reduce_dir_priv.h
deleted file mode 100644
index bd7f40518e..0000000000
--- a/core/zero/gkyl_array_dg_reduce_dir_priv.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#pragma once
-
-#include <float.h>
-#include <gkyl_alloc.h>
-#include <gkyl_array.h>
-#include <gkyl_array_dg_reduce_dir.h>
-#include <gkyl_nodal_ops.h>
-
-/**
- * Convert logical (reference) coordinates to computational (physical) coordinates.
- * xout[d] = xc[d] + 0.5*dx[d]*eta[d]
- */
-static inline void
-dg_reduce_dir_log_to_comp(int ndim, const double *eta,
-  const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
-  double *GKYL_RESTRICT xout)
-{
-  for (int d = 0; d < ndim; ++d)
-    xout[d] = 0.5*dx[d]*eta[d] + xc[d];
-}
-
-/** Internal struct for dg_reduce_dir updater. */
-struct gkyl_array_dg_reduce_dir {
-  // Input parameters (copies).
-  struct gkyl_rect_grid grid;       // Input grid (copy)
-  struct gkyl_basis basis;          // Input basis (copy)
-  struct gkyl_range range;          // Input local range (copy)
-  struct gkyl_range range_ext;      // Input extended range (copy)
-  int reduce_dir;                   // Direction to reduce
-  enum gkyl_reduce_op op;           // Reduction operation (MAX or MIN)
-  bool use_gpu;
-
-  // Output grid/basis/range (owned).
-  struct gkyl_rect_grid out_grid;   // Output grid (N-1 dim, or 1D 1-cell for 1D->0D)
-  struct gkyl_basis out_basis;      // Output basis (N-1 dim, or p=0 1D for 1D->0D)
-  struct gkyl_range out_range;      // Output range
-  struct gkyl_range out_range_ext;  // Output extended range
-  struct gkyl_range out_nrange;     // Nodal range for output
-
-  // Output arrays (owned).
-  struct gkyl_array *out_vals;          // Extremal values (modal DG)
-  struct gkyl_array *out_coords;        // Extremal coordinates (modal DG)
-  struct gkyl_array *out_vals_nodal;    // Nodal extremal values
-  struct gkyl_array *out_coords_nodal;  // Nodal extremal coordinates
-  struct gkyl_array *out_eval_nodal;    // Nodal array for eval_at_extremum
-
-  // Internal working arrays.
-  struct gkyl_array *nodes;         // Node locations in logical coords
-
-  // Nodal-to-modal converter.
-  struct gkyl_nodal_ops *n2m;
-};

From 6a4931af36f11e1b289b1842bf3395cf137d67fe Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Tue, 24 Feb 2026 14:48:35 -0500
Subject: [PATCH 22/32] Uncrustify format the files this PR modifies and also
 depricate all p=2 relevant code for the peak finders

---
 core/unit/ctest_array_dg_find_peaks.c         | 417 +++++++++++-------
 core/zero/array_dg_find_peaks.c               | 240 ++++------
 core/zero/array_dg_find_peaks_cu.cu           | 132 ++----
 core/zero/gkyl_array_dg_find_peaks.h          |  97 ++--
 core/zero/gkyl_array_dg_find_peaks_priv.h     |   6 +-
 gyrokinetic/apps/gk_species_damping.c         | 132 +++---
 gyrokinetic/apps/gk_species_fdot_multiplier.c | 147 +++---
 .../creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c | 253 +++++------
 .../rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c    | 355 +++++++--------
 .../rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c  | 275 ++++++------
 .../creg/rt_gk_wham_kinetic_poa_1x2v_p1.c     | 274 ++++++------
 .../unit/ctest_loss_cone_mask_gyrokinetic.c   | 361 ++++++++-------
 .../zero/gkyl_loss_cone_mask_gyrokinetic.h    |  12 +-
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |  33 +-
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c | 302 +++++++------
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     | 415 +++++++++--------
 16 files changed, 1801 insertions(+), 1650 deletions(-)

diff --git a/core/unit/ctest_array_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c
index 0cc2a200e2..98861f457d 100644
--- a/core/unit/ctest_array_dg_find_peaks.c
+++ b/core/unit/ctest_array_dg_find_peaks.c
@@ -17,7 +17,7 @@
 #include <stdio.h>
 
 // Helper function to create test arrays on CPU or GPU.
-static struct gkyl_array *
+static struct gkyl_array*
 mkarr(bool use_gpu, long nc, long size)
 {
   struct gkyl_array *a = use_gpu ? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
@@ -25,7 +25,7 @@ mkarr(bool use_gpu, long nc, long size)
   return a;
 }
 
-// 1D test function with multiple peaks: f(z) = cos(2*pi*z/L) 
+// 1D test function with multiple peaks: f(z) = cos(2*pi*z/L)
 // Has maxima at z=0, z=L and minimum at z=L/2.
 static void
 test_func_1d_cos(double t, const double *xn, double *fout, void *ctx)
@@ -56,7 +56,7 @@ test_func_2d_cos(double t, const double *xn, double *fout, void *ctx)
 {
   double psi = xn[0], z = xn[1];
   double L = 2.0;
-  fout[0] = (1.0 + 0.1*psi) * cos(2.0 * M_PI * z / L);
+  fout[0] = (1.0 + 0.1 * psi) * cos(2.0 * M_PI * z / L);
 }
 
 // 2D mirror-like function: peaks at z = +/- z_m(psi).
@@ -65,7 +65,7 @@ test_func_2d_mirror(double t, const double *xn, double *fout, void *ctx)
 {
   double psi = xn[0], z = xn[1];
   double L = 2.0;
-  double B0 = 1.0 + 0.1*psi;  // Varies with psi.
+  double B0 = 1.0 + 0.1 * psi;  // Varies with psi.
   double R = 4.0;
   double sinval = sin(M_PI * z / L);
   fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
@@ -112,9 +112,9 @@ void
 test_1d_find_peaks_cos(int poly_order, bool use_gpu)
 {
   // Grid: z in [-1, 1] (one period of cos(2*pi*z/2)).
-  double lower[] = {-1.0};
-  double upper[] = {1.0};
-  int cells[] = {16};
+  double lower[] = { -1.0 };
+  double upper[] = { 1.0 };
+  int cells[] = { 16 };
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
 
@@ -123,7 +123,7 @@ test_1d_find_peaks_cos(int poly_order, bool use_gpu)
   gkyl_cart_modal_serendip(&basis, 1, poly_order);
 
   // Ranges.
-  int ghost[] = {1};
+  int ghost[] = { 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -159,9 +159,9 @@ test_1d_find_peaks_cos(int poly_order, bool use_gpu)
     enum gkyl_peak_type type;
     double z_expected;
   } expected_peaks[] = {
-    {GKYL_PEAK_EDGE_LO,    -1.0,},
-    {GKYL_PEAK_LOCAL_MAX,   0.0,},
-    {GKYL_PEAK_EDGE_HI,     1.0,},
+    { GKYL_PEAK_EDGE_LO, -1.0, },
+    { GKYL_PEAK_LOCAL_MAX, 0.0, },
+    { GKYL_PEAK_EDGE_HI, 1.0, },
   };
 
   for (int p = 0; p < 3 && p < num_peaks; p++) {
@@ -174,10 +174,10 @@ test_1d_find_peaks_cos(int poly_order, bool use_gpu)
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(vals, vals_d);
     gkyl_array_copy(coords, coords_d);
-    
+
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
-    
+
     double z = coord[0];
     double expected_val[1];
     test_func_1d_cos(0.0, &z, expected_val, NULL);
@@ -202,9 +202,9 @@ void
 test_1d_find_peaks_mirror(int poly_order, bool use_gpu)
 {
   // Grid: z in [-1, 1].
-  double lower[] = {-1.0};
-  double upper[] = {1.0};
-  int cells[] = {16};
+  double lower[] = { -1.0 };
+  double upper[] = { 1.0 };
+  int cells[] = { 16 };
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
 
@@ -213,7 +213,7 @@ test_1d_find_peaks_mirror(int poly_order, bool use_gpu)
   gkyl_cart_modal_serendip(&basis, 1, poly_order);
 
   // Ranges.
-  int ghost[] = {1};
+  int ghost[] = { 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -254,10 +254,10 @@ test_1d_find_peaks_mirror(int poly_order, bool use_gpu)
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(vals, vals_d);
     gkyl_array_copy(coords, coords_d);
-    
+
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
-    
+
     // Check specific peaks.
     if (ptype == GKYL_PEAK_EDGE_LO) {
       TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15));
@@ -286,16 +286,16 @@ test_1d_find_peaks_mirror(int poly_order, bool use_gpu)
 void
 test_2d_find_peaks(int poly_order, bool use_gpu)
 {
-  double lower[] = {0.0, -1.0};
-  double upper[] = {1.0, 1.0};
-  int cells[] = {4, 16};
+  double lower[] = { 0.0, -1.0 };
+  double upper[] = { 1.0, 1.0 };
+  int cells[] = { 4, 16 };
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
 
   struct gkyl_basis basis;
   gkyl_cart_modal_serendip(&basis, 2, poly_order);
 
-  int ghost[] = {1, 1};
+  int ghost[] = { 1, 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -342,34 +342,36 @@ test_2d_find_peaks(int poly_order, bool use_gpu)
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(vals, vals_d);
     gkyl_array_copy(coords, coords_d);
-    
-    double xc_log[1] = {0.0};
-    
+
+    double xc_log[1] = { 0.0 };
+
     // Check first and last psi cells.
-    for (int cell_idx = out_range->lower[0]; cell_idx <= out_range->upper[0]; 
-         cell_idx += (out_range->upper[0] - out_range->lower[0])) {
-      long linidx = gkyl_range_idx(out_range, (int[]){cell_idx});
+    for (int cell_idx = out_range->lower[0]; cell_idx <= out_range->upper[0];
+      cell_idx += (out_range->upper[0] - out_range->lower[0])) {
+      long linidx = gkyl_range_idx(out_range, (int[]){ cell_idx });
       const double *val_d = gkyl_array_cfetch(vals, linidx);
       const double *coord_d = gkyl_array_cfetch(coords, linidx);
 
       double val_at_center = out_basis->eval_expand(xc_log, val_d);
       double coord_at_center = out_basis->eval_expand(xc_log, coord_d);
       double psi_phys = out_grid->lower[0] + (cell_idx - 0.5) * out_grid->dx[0];
-      
+
       // Compute expected value at detected coordinate.
-      double xn[2] = {psi_phys, coord_at_center};
+      double xn[2] = { psi_phys, coord_at_center };
       double expected_val[1];
       test_func_2d_mirror(0.0, xn, expected_val, NULL);
-      
+
       // Check value matches analytical function.
       TEST_CHECK(gkyl_compare_double(val_at_center, expected_val[0], 1e-15));
-      
+
       // Check that coordinate matches expected peak location.
       if (ptype == GKYL_PEAK_EDGE_LO) {
         TEST_CHECK(fabs(coord_at_center - (-1.0)) < 1e-15);
-      } else if (ptype == GKYL_PEAK_LOCAL_MIN) {
+      }
+      else if (ptype == GKYL_PEAK_LOCAL_MIN) {
         TEST_CHECK(fabs(coord_at_center) < 1e-15);
-      } else if (ptype == GKYL_PEAK_EDGE_HI) {
+      }
+      else if (ptype == GKYL_PEAK_EDGE_HI) {
         TEST_CHECK(fabs(coord_at_center - 1.0) < 1e-15);
       }
     }
@@ -388,16 +390,16 @@ test_2d_find_peaks(int poly_order, bool use_gpu)
 void
 test_1d_find_peaks_complex(int poly_order, bool use_gpu)
 {
-  double lower[] = {-2.0*M_PI};
-  double upper[] = {2.0*M_PI};
-  int cells[] = {64};  // Need fine resolution to capture oscillations.
+  double lower[] = { -2.0 * M_PI };
+  double upper[] = { 2.0 * M_PI };
+  int cells[] = { 64 };  // Need fine resolution to capture oscillations.
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
 
   struct gkyl_basis basis;
   gkyl_cart_modal_serendip(&basis, 1, poly_order);
 
-  int ghost[] = {1};
+  int ghost[] = { 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -427,7 +429,7 @@ test_1d_find_peaks_complex(int poly_order, bool use_gpu)
 
   // Check results.
   int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
-  
+
   TEST_CHECK(num_peaks == 9);
 
   // Define expected peak locations and types.
@@ -435,15 +437,15 @@ test_1d_find_peaks_complex(int poly_order, bool use_gpu)
     enum gkyl_peak_type type;
     double z_expected;
   } expected_peaks[] = {
-    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   0.0,},
-    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
-    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
   };
 
   for (int p = 0; p < num_peaks; p++) {
@@ -456,10 +458,10 @@ test_1d_find_peaks_complex(int poly_order, bool use_gpu)
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(vals, vals_d);
     gkyl_array_copy(coords, coords_d);
-    
+
     const double *val = gkyl_array_cfetch(vals, 0);
     const double *coord = gkyl_array_cfetch(coords, 0);
-    
+
     double z = coord[0];
     double expected_val[1];
     test_func_1d_complex(0.0, &z, expected_val, NULL);
@@ -485,9 +487,9 @@ void
 test_2d_find_peaks_complex(int poly_order, bool use_gpu)
 {
   // Grid: psi in [0.5, 2.0], z in [-5, 5].
-  double lower[] = {0.5, -2.0*M_PI};
-  double upper[] = {2.0, 2.0*M_PI};
-  int cells[] = {16, 64};
+  double lower[] = { 0.5, -2.0 * M_PI };
+  double upper[] = { 2.0, 2.0 * M_PI };
+  int cells[] = { 16, 64 };
   int ndim = 2;
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
@@ -497,7 +499,7 @@ test_2d_find_peaks_complex(int poly_order, bool use_gpu)
   gkyl_cart_modal_serendip(&basis, ndim, poly_order);
 
   // Ranges.
-  int ghost[] = {1, 1};
+  int ghost[] = { 1, 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -538,15 +540,15 @@ test_2d_find_peaks_complex(int poly_order, bool use_gpu)
     enum gkyl_peak_type type;
     double z_expected;
   } expected_peaks[] = {
-    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   0.0,},
-    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
-    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
   };
 
   // Get node locations for output basis.
@@ -557,7 +559,7 @@ test_2d_find_peaks_complex(int poly_order, bool use_gpu)
   for (int p = 0; p < num_peaks; p++) {
     enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
     TEST_CHECK(ptype == expected_peaks[p].type);
-    
+
     const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
     const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
 
@@ -566,34 +568,34 @@ test_2d_find_peaks_complex(int poly_order, bool use_gpu)
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(vals, vals_d);
     gkyl_array_copy(coords, coords_d);
-    
+
     // Check each psi cell.
     struct gkyl_range_iter iter;
     gkyl_range_iter_init(&iter, out_range);
     while (gkyl_range_iter_next(&iter)) {
       long linidx = gkyl_range_idx(out_range, iter.idx);
-      
+
       const double *val_d = gkyl_array_cfetch(vals, linidx);
       const double *coord_d = gkyl_array_cfetch(coords, linidx);
-      
+
       // Get cell center for physical psi coordinate.
       double xc_out[1];
-      gkyl_rect_grid_cell_center(out_grid, (int[]){iter.idx[0]}, xc_out);
+      gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out);
       double psi_phys = xc_out[0];
-      
+
       // Evaluate at each nodal point in this cell.
       for (int n = 0; n < out_basis->num_basis; n++) {
         const double *nod_log = gkyl_array_cfetch(nodes, n);
         double val_at_node = out_basis->eval_expand(nod_log, val_d);
         double z_at_node = out_basis->eval_expand(nod_log, coord_d);
-        
+
         // Compute physical psi coordinate at this node.
         // dx/2 away from the center is the nodal location.
         double nod_phys[1];
-        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0]/2.0;
-        
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0;
+
         // Compute expected value at detected coordinates.
-        double xn[2] = {nod_phys[0], z_at_node};
+        double xn[2] = { nod_phys[0], z_at_node };
         double expected_val[1];
         test_func_2d_complex(0.0, xn, expected_val, NULL);
 
@@ -618,16 +620,16 @@ test_2d_find_peaks_complex(int poly_order, bool use_gpu)
 void
 test_1d_project_on_peaks(int poly_order, bool use_gpu)
 {
-  double lower[] = {-2.0*M_PI};
-  double upper[] = {2.0*M_PI};
-  int cells[] = {64};
+  double lower[] = { -2.0 * M_PI };
+  double upper[] = { 2.0 * M_PI };
+  int cells[] = { 64 };
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
 
   struct gkyl_basis basis;
   gkyl_cart_modal_serendip(&basis, 1, poly_order);
 
-  int ghost[] = {1};
+  int ghost[] = { 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -678,19 +680,20 @@ test_1d_project_on_peaks(int poly_order, bool use_gpu)
     enum gkyl_peak_type type;
     double z_expected;
   } expected_peaks[] = {
-    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   0.0,},
-    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
-    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
   };
   for (int p = 0; p < num_peaks; p++) {
     // Copy back to host for verification.
-    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, g_at_peaks[p]->size);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp,
+      g_at_peaks[p]->size);
     gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
 
     const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0);
@@ -716,9 +719,9 @@ test_1d_project_on_peaks(int poly_order, bool use_gpu)
 void
 test_2d_project_on_peaks(int poly_order, bool use_gpu)
 {
-  double lower[] = {0.5, -2.0*M_PI};
-  double upper[] = {2.0, 2.0*M_PI};
-  int cells[] = {16, 64};
+  double lower[] = { 0.5, -2.0 * M_PI };
+  double upper[] = { 2.0, 2.0 * M_PI };
+  int cells[] = { 16, 64 };
   int ndim = 2;
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
@@ -726,7 +729,7 @@ test_2d_project_on_peaks(int poly_order, bool use_gpu)
   struct gkyl_basis basis;
   gkyl_cart_modal_serendip(&basis, ndim, poly_order);
 
-  int ghost[] = {1, 1};
+  int ghost[] = { 1, 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -778,8 +781,8 @@ test_2d_project_on_peaks(int poly_order, bool use_gpu)
 
   // Define expected peak locations (same as before).
   double expected_z_peaks[] = {
-    -2.0*M_PI, -3.0*M_PI/2.0, -M_PI, -M_PI/2.0, 0.0,
-    M_PI/2.0, M_PI, 3.0*M_PI/2.0, 2.0*M_PI
+    -2.0 * M_PI, -3.0 * M_PI / 2.0, -M_PI, -M_PI / 2.0, 0.0,
+    M_PI / 2.0, M_PI, 3.0 * M_PI / 2.0, 2.0 * M_PI
   };
 
   // Get node locations for output basis.
@@ -793,33 +796,34 @@ test_2d_project_on_peaks(int poly_order, bool use_gpu)
     // Copy back to host for verification.
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(coords, coords_d);
-    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, g_at_peaks[p]->size);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp,
+      g_at_peaks[p]->size);
     gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
-    
+
     // Check each psi cell.
     struct gkyl_range_iter iter;
     gkyl_range_iter_init(&iter, out_range);
     while (gkyl_range_iter_next(&iter)) {
       long linidx = gkyl_range_idx(out_range, iter.idx);
-      
+
       const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx);
       const double *coord_d = gkyl_array_cfetch(coords, linidx);
-      
+
       // Get cell center for physical psi coordinate.
       double xc_out[1];
-      gkyl_rect_grid_cell_center(out_grid, (int[]){iter.idx[0]}, xc_out);
-      
+      gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out);
+
       // Evaluate at each nodal point in this cell.
       for (int n = 0; n < out_basis->num_basis; n++) {
         const double *nod_log = gkyl_array_cfetch(nodes, n);
         double g_at_node = out_basis->eval_expand(nod_log, g_val_d);
         double z_at_node = out_basis->eval_expand(nod_log, coord_d);
-        
+
         // Compute physical psi coordinate at this node.
         double nod_phys[1];
-        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0]/2.0;
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0;
         double psi = nod_phys[0];
-        
+
         // Analytical value: g(psi, z) = z^2 * psi^2
         double expected = z_at_node * z_at_node * psi * psi;
         TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15);
@@ -843,21 +847,20 @@ test_2d_project_on_peaks(int poly_order, bool use_gpu)
   gkyl_array_dg_find_peaks_release(peaks);
 }
 
-
 // Test 1D project_on_peak_idx with complex function.
 void
 test_1d_project_on_peak_idx(int poly_order, bool use_gpu)
 {
-  double lower[] = {-2.0*M_PI};
-  double upper[] = {2.0*M_PI};
-  int cells[] = {64};
+  double lower[] = { -2.0 * M_PI };
+  double upper[] = { 2.0 * M_PI };
+  int cells[] = { 64 };
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
 
   struct gkyl_basis basis;
   gkyl_cart_modal_serendip(&basis, 1, poly_order);
 
-  int ghost[] = {1};
+  int ghost[] = { 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -906,21 +909,22 @@ test_1d_project_on_peak_idx(int poly_order, bool use_gpu)
     enum gkyl_peak_type type;
     double z_expected;
   } expected_peaks[] = {
-    {GKYL_PEAK_EDGE_LO,    -2.0*M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -3.0*M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,  -M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,  -M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   0.0,},
-    {GKYL_PEAK_LOCAL_MAX,   M_PI/2.0,},
-    {GKYL_PEAK_LOCAL_MIN,   M_PI,},
-    {GKYL_PEAK_LOCAL_MAX,   3.0*M_PI/2.0,},
-    {GKYL_PEAK_EDGE_HI,     2.0*M_PI,},
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
   };
 
   // Copy back to host for verification.
-  struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks->ncomp, g_at_peaks->size);
+  struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks->ncomp,
+    g_at_peaks->size);
   gkyl_array_copy(g_at_peaks_ho, g_at_peaks);
-  
+
   const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0);
   double z = expected_peaks[chosen_idx].z_expected;
   double expected = z * z;
@@ -939,9 +943,9 @@ test_1d_project_on_peak_idx(int poly_order, bool use_gpu)
 void
 test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
 {
-  double lower[] = {0.5, -2.0*M_PI};
-  double upper[] = {2.0, 2.0*M_PI};
-  int cells[] = {16, 64};
+  double lower[] = { 0.5, -2.0 * M_PI };
+  double upper[] = { 2.0, 2.0 * M_PI };
+  int cells[] = { 16, 64 };
   int ndim = 2;
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
@@ -949,7 +953,7 @@ test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
   struct gkyl_basis basis;
   gkyl_cart_modal_serendip(&basis, ndim, poly_order);
 
-  int ghost[] = {1, 1};
+  int ghost[] = { 1, 1 };
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -1001,8 +1005,8 @@ test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
 
   // Define expected peak locations (same as before).
   double expected_z_peaks[] = {
-    -2.0*M_PI, -3.0*M_PI/2.0, -M_PI, -M_PI/2.0, 0.0,
-    M_PI/2.0, M_PI, 3.0*M_PI/2.0, 2.0*M_PI
+    -2.0 * M_PI, -3.0 * M_PI / 2.0, -M_PI, -M_PI / 2.0, 0.0,
+    M_PI / 2.0, M_PI, 3.0 * M_PI / 2.0, 2.0 * M_PI
   };
 
   // Get node locations for output basis.
@@ -1016,33 +1020,34 @@ test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
     // Copy back to host for verification.
     struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
     gkyl_array_copy(coords, coords_d);
-    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, g_at_peaks[p]->size);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp,
+      g_at_peaks[p]->size);
     gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
-    
+
     // Check each psi cell.
     struct gkyl_range_iter iter;
     gkyl_range_iter_init(&iter, out_range);
     while (gkyl_range_iter_next(&iter)) {
       long linidx = gkyl_range_idx(out_range, iter.idx);
-      
+
       const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx);
       const double *coord_d = gkyl_array_cfetch(coords, linidx);
-      
+
       // Get cell center for physical psi coordinate.
       double xc_out[1];
-      gkyl_rect_grid_cell_center(out_grid, (int[]){iter.idx[0]}, xc_out);
-      
+      gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out);
+
       // Evaluate at each nodal point in this cell.
       for (int n = 0; n < out_basis->num_basis; n++) {
         const double *nod_log = gkyl_array_cfetch(nodes, n);
         double g_at_node = out_basis->eval_expand(nod_log, g_val_d);
         double z_at_node = out_basis->eval_expand(nod_log, coord_d);
-        
+
         // Compute physical psi coordinate at this node.
         double nod_phys[1];
-        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0]/2.0;
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0;
         double psi = nod_phys[0];
-        
+
         // Analytical value: g(psi, z) = z^2 * psi^2
         double expected = z_at_node * z_at_node * psi * psi;
         TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15);
@@ -1067,51 +1072,121 @@ test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
 }
 
 // CPU test wrappers
-void test_1d_cos_p1_ho() { test_1d_find_peaks_cos(1, false); }
-void test_1d_mirror_p1_ho() { test_1d_find_peaks_mirror(1, false); }
-void test_1d_complex_p1_ho() { test_1d_find_peaks_complex(1, false); }
-void test_2d_p1_ho() { test_2d_find_peaks(1, false); }
-void test_2d_complex_p1_ho() { test_2d_find_peaks_complex(1, false); }
-void test_1d_project_p1_ho() { test_1d_project_on_peaks(1, false); }
-void test_2d_project_p1_ho() { test_2d_project_on_peaks(1, false); }
-void test_1d_project_idx_p1_ho() { test_1d_project_on_peak_idx(1, false); }
-void test_2d_project_idx_p1_ho() { test_2d_project_on_peak_idx(1, false); }
+void test_1d_cos_p1_ho()
+{
+  test_1d_find_peaks_cos(1, false);
+}
+
+void test_1d_mirror_p1_ho()
+{
+  test_1d_find_peaks_mirror(1, false);
+}
+
+void test_1d_complex_p1_ho()
+{
+  test_1d_find_peaks_complex(1, false);
+}
+
+void test_2d_p1_ho()
+{
+  test_2d_find_peaks(1, false);
+}
+
+void test_2d_complex_p1_ho()
+{
+  test_2d_find_peaks_complex(1, false);
+}
+
+void test_1d_project_p1_ho()
+{
+  test_1d_project_on_peaks(1, false);
+}
+
+void test_2d_project_p1_ho()
+{
+  test_2d_project_on_peaks(1, false);
+}
+
+void test_1d_project_idx_p1_ho()
+{
+  test_1d_project_on_peak_idx(1, false);
+}
+
+void test_2d_project_idx_p1_ho()
+{
+  test_2d_project_on_peak_idx(1, false);
+}
 
 #ifdef GKYL_HAVE_CUDA
 
 // GPU test wrappers
-void test_1d_cos_p1_dev() { test_1d_find_peaks_cos(1, true); }
-void test_1d_mirror_p1_dev() { test_1d_find_peaks_mirror(1, true); }
-void test_1d_complex_p1_dev() { test_1d_find_peaks_complex(1, true); }
-void test_2d_p1_dev() { test_2d_find_peaks(1, true); }
-void test_2d_complex_p1_dev() { test_2d_find_peaks_complex(1, true); }
-void test_1d_project_p1_dev() { test_1d_project_on_peaks(1, true); }
-void test_2d_project_p1_dev() { test_2d_project_on_peaks(1, true); }
-void test_1d_project_idx_p1_dev() { test_1d_project_on_peak_idx(1, true); }
-void test_2d_project_idx_p1_dev() { test_2d_project_on_peak_idx(1, true); }
+void test_1d_cos_p1_dev()
+{
+  test_1d_find_peaks_cos(1, true);
+}
+
+void test_1d_mirror_p1_dev()
+{
+  test_1d_find_peaks_mirror(1, true);
+}
+
+void test_1d_complex_p1_dev()
+{
+  test_1d_find_peaks_complex(1, true);
+}
+
+void test_2d_p1_dev()
+{
+  test_2d_find_peaks(1, true);
+}
+
+void test_2d_complex_p1_dev()
+{
+  test_2d_find_peaks_complex(1, true);
+}
+
+void test_1d_project_p1_dev()
+{
+  test_1d_project_on_peaks(1, true);
+}
+
+void test_2d_project_p1_dev()
+{
+  test_2d_project_on_peaks(1, true);
+}
+
+void test_1d_project_idx_p1_dev()
+{
+  test_1d_project_on_peak_idx(1, true);
+}
+
+void test_2d_project_idx_p1_dev()
+{
+  test_2d_project_on_peak_idx(1, true);
+}
 
 #endif
 
 TEST_LIST = {
-  {"test_1d_cos_p1", test_1d_cos_p1_ho},
-  {"test_1d_mirror_p1", test_1d_mirror_p1_ho},
-  {"test_1d_complex_p1", test_1d_complex_p1_ho},
-  {"test_2d_p1", test_2d_p1_ho},
-  {"test_2d_complex_p1", test_2d_complex_p1_ho},
-  {"test_1d_project_p1", test_1d_project_p1_ho},
-  {"test_2d_project_p1", test_2d_project_p1_ho},
-  {"test_1d_project_idx_p1", test_1d_project_idx_p1_ho},
-  {"test_2d_project_idx_p1", test_2d_project_idx_p1_ho},
+  { "test_1d_cos_p1", test_1d_cos_p1_ho },
+  { "test_1d_mirror_p1", test_1d_mirror_p1_ho },
+  { "test_1d_complex_p1", test_1d_complex_p1_ho },
+  { "test_2d_p1", test_2d_p1_ho },
+  { "test_2d_complex_p1", test_2d_complex_p1_ho },
+  { "test_1d_project_p1", test_1d_project_p1_ho },
+  { "test_2d_project_p1", test_2d_project_p1_ho },
+  { "test_1d_project_idx_p1", test_1d_project_idx_p1_ho },
+  { "test_2d_project_idx_p1", test_2d_project_idx_p1_ho },
 #ifdef GKYL_HAVE_CUDA
-  {"test_1d_cos_p1_gpu", test_1d_cos_p1_dev},
-  {"test_1d_mirror_p1_gpu", test_1d_mirror_p1_dev},
-  {"test_1d_complex_p1_gpu", test_1d_complex_p1_dev},
-  {"test_2d_p1_gpu", test_2d_p1_dev},
-  {"test_2d_complex_p1_gpu", test_2d_complex_p1_dev},
-  {"test_1d_project_p1_gpu", test_1d_project_p1_dev},
-  {"test_2d_project_p1_gpu", test_2d_project_p1_dev},
-  {"test_1d_project_idx_p1_gpu", test_1d_project_idx_p1_dev},
-  {"test_2d_project_idx_p1_gpu", test_2d_project_idx_p1_dev},
+  { "test_1d_cos_p1_gpu", test_1d_cos_p1_dev },
+  { "test_1d_mirror_p1_gpu", test_1d_mirror_p1_dev },
+  { "test_1d_complex_p1_gpu", test_1d_complex_p1_dev },
+  { "test_2d_p1_gpu", test_2d_p1_dev },
+  { "test_2d_complex_p1_gpu", test_2d_complex_p1_dev },
+  { "test_1d_project_p1_gpu", test_1d_project_p1_dev },
+  { "test_2d_project_p1_gpu", test_2d_project_p1_dev },
+  { "test_1d_project_idx_p1_gpu", test_1d_project_idx_p1_dev },
+  { "test_2d_project_idx_p1_gpu", test_2d_project_idx_p1_dev },
 #endif
-  {NULL, NULL},
+  { NULL, NULL },
 };
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
index 7d51a2804a..d55087e188 100644
--- a/core/zero/array_dg_find_peaks.c
+++ b/core/zero/array_dg_find_peaks.c
@@ -19,7 +19,6 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
 {
   int ndim = up->grid.ndim;
   int search_dir = up->search_dir;
-  int poly_order = up->basis.poly_order;
 
   int total_nodes_search = up->total_nodes_search;
 
@@ -32,7 +31,9 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
   }
 
   // Iterate along cells in search direction and collect nodal values.
-  for (int cell_idx = up->range.lower[search_dir]; cell_idx <= up->range.upper[search_dir]; cell_idx++) {
+  for (int cell_idx = up->range.lower[search_dir];
+    cell_idx <= up->range.upper[search_dir];
+    cell_idx++) {
     // Build index array for this cell.
     int idx[GKYL_MAX_DIM];
     if (ndim == 1) {
@@ -53,27 +54,13 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
     // Evaluate at each node in this cell.
     for (int n = 0; n < up->basis.num_basis; n++) {
       const double *nod_log = gkyl_array_cfetch(up->nodes, n);
-      
+
       // Determine node offset in search direction.
-      int node_offset;
-      if (poly_order == 1) {
-        node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
-      }
-      else {
-        if (nod_log[search_dir] < -0.5)
-          node_offset = 0;
-        else if (nod_log[search_dir] > 0.5)
-          node_offset = 2;
-        else
-          node_offset = 1;
-      }
+      int node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
 
       int cell_local = cell_idx - up->range.lower[search_dir];
-      int search_node_idx;
-      if (poly_order == 1)
-        search_node_idx = cell_local + node_offset;
-      else
-        search_node_idx = 2*cell_local + node_offset;
+
+      int search_node_idx = cell_local + node_offset;
 
       double val = up->basis.eval_expand(nod_log, f_d);
       double nod_phys[GKYL_MAX_DIM];
@@ -91,15 +78,15 @@ count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gk
   // Now scan the values to find peaks.
   // A peak is: EDGE_LO at index 0, EDGE_HI at last index, LOCAL_MAX/MIN in between.
   int num_peaks = 0;
-  
+
   // Always add lower edge.
   peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_LO;
 
   // Scan for local maxima and minima (indices 1 to total_nodes_search-2).
   for (int i = 1; i < total_nodes_search - 1; i++) {
-    double prev = vals[i-1];
+    double prev = vals[i - 1];
     double curr = vals[i];
-    double next = vals[i+1];
+    double next = vals[i + 1];
 
     if (curr > prev && curr > next) {
       // Local maximum.
@@ -130,7 +117,6 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
 {
   int ndim = up->grid.ndim;
   int search_dir = up->search_dir;
-  int poly_order = up->basis.poly_order;
 
   int total_nodes_search = up->total_nodes_search;
 
@@ -148,7 +134,9 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
   int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
 
   // Iterate along cells in search direction and collect nodal values.
-  for (int cell_idx = up->range.lower[search_dir]; cell_idx <= up->range.upper[search_dir]; cell_idx++) {
+  for (int cell_idx = up->range.lower[search_dir];
+    cell_idx <= up->range.upper[search_dir];
+    cell_idx++) {
     // For 2D, we need to iterate over cells in the preserved direction that
     // contribute to this preserved node index.
     int pres_cell_start, pres_cell_end;
@@ -158,35 +146,20 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
     }
     else {
       // Determine which cells contribute to this preserved node.
-      if (poly_order == 1) {
-        // Node i is shared by cells i and i+1 (0-indexed from lower).
-        // preserved_node_idx 0 is only in cell lower[preserved_dir].
-        // preserved_node_idx N is only in cell upper[preserved_dir].
-        if (preserved_node_idx == 0) {
-          pres_cell_start = up->range.lower[preserved_dir];
-          pres_cell_end = up->range.lower[preserved_dir];
-        }
-        else if (preserved_node_idx == up->out_nrange.upper[0]) {
-          pres_cell_start = up->range.upper[preserved_dir];
-          pres_cell_end = up->range.upper[preserved_dir];
-        }
-        else {
-          pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-          pres_cell_end = pres_cell_start + 1;
-          if (pres_cell_end > up->range.upper[preserved_dir])
-            pres_cell_end = up->range.upper[preserved_dir];
-        }
+      // Node i is shared by cells i and i+1 (0-indexed from lower).
+      // preserved_node_idx 0 is only in cell lower[preserved_dir].
+      // preserved_node_idx N is only in cell upper[preserved_dir].
+      if (preserved_node_idx == 0) {
+        pres_cell_start = up->range.lower[preserved_dir];
+        pres_cell_end = up->range.lower[preserved_dir];
       }
-      else { // poly_order == 2
-        // Similar logic for p=2 nodes.
-        int cell_local = preserved_node_idx / 2;
-        pres_cell_start = up->range.lower[preserved_dir] + cell_local;
-        pres_cell_end = pres_cell_start;
-        if (preserved_node_idx % 2 == 0 && preserved_node_idx > 0) {
-          pres_cell_start--;
-        }
-        if (pres_cell_start < up->range.lower[preserved_dir])
-          pres_cell_start = up->range.lower[preserved_dir];
+      else if (preserved_node_idx == up->out_nrange.upper[0]) {
+        pres_cell_start = up->range.upper[preserved_dir];
+        pres_cell_end = up->range.upper[preserved_dir];
+      }
+      else {
+        pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+        pres_cell_end = pres_cell_start + 1;
         if (pres_cell_end > up->range.upper[preserved_dir])
           pres_cell_end = up->range.upper[preserved_dir];
       }
@@ -215,49 +188,18 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
 
         // Check if this node corresponds to our preserved node index.
         if (ndim > 1) {
-          int pres_node_offset;
-          if (poly_order == 1) {
-            pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
-          }
-          else {
-            if (nod_log[preserved_dir] < -0.5)
-              pres_node_offset = 0;
-            else if (nod_log[preserved_dir] > 0.5)
-              pres_node_offset = 2;
-            else
-              pres_node_offset = 1;
-          }
+          int pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
           int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
-          int this_pres_node;
-          if (poly_order == 1)
-            this_pres_node = pres_cell_local + pres_node_offset;
-          else
-            this_pres_node = 2*pres_cell_local + pres_node_offset;
-
+          int this_pres_node = pres_cell_local + pres_node_offset;
           if (this_pres_node != preserved_node_idx)
             continue;
         }
 
         // Determine node offset in search direction.
-        int search_node_offset;
-        if (poly_order == 1) {
-          search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
-        }
-        else {
-          if (nod_log[search_dir] < -0.5)
-            search_node_offset = 0;
-          else if (nod_log[search_dir] > 0.5)
-            search_node_offset = 2;
-          else
-            search_node_offset = 1;
-        }
+        int search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
 
         int cell_local = cell_idx - up->range.lower[search_dir];
-        int search_node_idx;
-        if (poly_order == 1)
-          search_node_idx = cell_local + search_node_offset;
-        else
-          search_node_idx = 2*cell_local + search_node_offset;
+        int search_node_idx = cell_local + search_node_offset;
 
         if (!visited[search_node_idx]) {
           double val = up->basis.eval_expand(nod_log, f_d); // GPU error here
@@ -274,7 +216,7 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
 
   // Now extract peaks based on peak_types.
   int peak_idx = 0;
-  
+
   // EDGE_LO is always first peak at index 0.
   if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) {
     double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
@@ -286,15 +228,15 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
 
   // Find local maxima and minima.
   for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) {
-    double prev = vals[i-1];
+    double prev = vals[i - 1];
     double curr = vals[i];
-    double next = vals[i+1];
+    double next = vals[i + 1];
 
     bool is_max = (curr > prev && curr > next);
     bool is_min = (curr < prev && curr < next);
 
     if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) ||
-        (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
+      (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
       double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
       double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
       val_n[0] = curr;
@@ -318,15 +260,16 @@ find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct
  */
 static void
 eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
-  const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal, int peak_idx)
+  const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal,
+  int peak_idx)
 {
   int ndim = up->grid.ndim;
   int search_dir = up->search_dir;
-  int poly_order = up->basis.poly_order;
   int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
 
   // Get the peak coordinate that was found during find_peaks.
-  const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+  const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[peak_idx],
+    preserved_node_idx);
   double peak_coord_search = peak_coord_n[0];
 
   // Find the cell containing this coordinate in the search direction.
@@ -334,7 +277,7 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
   double point[GKYL_MAX_DIM];
   int known_idx[GKYL_MAX_DIM];
   int cell_idx[GKYL_MAX_DIM];
-  
+
   for (int d = 0; d < ndim; d++) {
     if (d == search_dir) {
       point[d] = peak_coord_search;
@@ -346,33 +289,28 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
       known_idx[d] = -1;
     }
   }
-  
+
   // If 2D, we need to determine preserved direction cell from preserved_node_idx.
   // For p=1 with N cells (1-based indexing), nodal points map as:
-  //   Node 0 -> cell 1, logical coord -1 (left edge of first cell)
-  //   Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k)
+  // Node 0 -> cell 1, logical coord -1 (left edge of first cell)
+  // Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k)
   // This ensures proper continuity at shared cell boundaries.
   if (ndim > 1) {
     int pres_cell;
-    if (poly_order == 1) {
-      if (preserved_node_idx == 0) {
-        // First node: evaluate at left edge of first cell.
-        pres_cell = up->range.lower[preserved_dir];
-      }
-      else {
-        // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx.
-        // Clamp to upper bound for safety.
-        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-        if (pres_cell > up->range.upper[preserved_dir]) {
-          pres_cell = up->range.upper[preserved_dir];
-        }
-      }
+    if (preserved_node_idx == 0) {
+      // First node: evaluate at left edge of first cell.
+      pres_cell = up->range.lower[preserved_dir];
     }
     else {
-      pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
+      // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx.
+      // Clamp to upper bound for safety.
+      pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+      if (pres_cell > up->range.upper[preserved_dir]) {
+        pres_cell = up->range.upper[preserved_dir];
+      }
     }
     known_idx[preserved_dir] = pres_cell;
-    
+
     // Set the coordinate in preserved direction to the cell center.
     int pres_cell_idx[GKYL_MAX_DIM];
     for (int d = 0; d < ndim; d++) {
@@ -382,7 +320,7 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
     gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
     point[preserved_dir] = xc_pres[preserved_dir];
   }
-  
+
   gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
 
   // Clamp cell_idx to interior range (avoid ghost cells).
@@ -412,19 +350,8 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
     }
     else if (ndim > 1) {
       // In preserved direction, use the node position in the cell.
-      // For p=1: node 0 is at left edge (-1), all others at right edge (+1).
-      if (poly_order == 1) {
-        nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
-      }
-      else {
-        int pres_node_offset = preserved_node_idx % 3;
-        if (pres_node_offset == 0)
-          nod_log[d] = -1.0;
-        else if (pres_node_offset == 1)
-          nod_log[d] = 0.0;
-        else
-          nod_log[d] = 1.0;
-      }
+      // Node 0 is at left edge (-1), all others at right edge (+1).
+      nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
     }
   }
 
@@ -437,7 +364,8 @@ eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
 }
 
 struct gkyl_array_dg_find_peaks*
-gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_peaks_inp, const struct gkyl_array *in)
+gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_peaks_inp,
+  const struct gkyl_array *in)
 {
   struct gkyl_array_dg_find_peaks *up = gkyl_malloc(sizeof(*up));
 
@@ -454,19 +382,20 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
   int out_dim = ndim - 1;
 
   assert(find_peaks_inp->search_dir >= 0 && find_peaks_inp->search_dir < ndim);
+  assert(poly_order == 1); // gkyl_array_dg_find_peaks: only p=1 is supported
 
   // Set up output grid/basis/range.
   if (out_dim == 0) {
     // 1D -> 0D case.
-    int cells_1d[1] = {1};
-    double lower_1d[1] = {0.0};
-    double upper_1d[1] = {1.0};
+    int cells_1d[1] = { 1 };
+    double lower_1d[1] = { 0.0 };
+    double upper_1d[1] = { 1.0 };
     gkyl_rect_grid_init(&up->out_grid, 1, lower_1d, upper_1d, cells_1d);
-    gkyl_range_init(&up->out_range, 1, (int[]){1}, (int[]){1});
-    gkyl_range_init(&up->out_range_ext, 1, (int[]){0}, (int[]){2});
+    gkyl_range_init(&up->out_range, 1, (int[]){ 1 }, (int[]){ 1 });
+    gkyl_range_init(&up->out_range_ext, 1, (int[]){ 0 }, (int[]){ 2 });
     gkyl_cart_modal_serendip(&up->out_basis, 1, 0);
 
-    int nodes_shape[1] = {1};
+    int nodes_shape[1] = { 1 };
     gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
   }
   else if (out_dim == 1) {
@@ -479,23 +408,22 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
 
     gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out);
 
-    int lower_idx[1] = {find_peaks_inp->range->lower[preserved_dir]};
-    int upper_idx[1] = {find_peaks_inp->range->upper[preserved_dir]};
+    int lower_idx[1] = { find_peaks_inp->range->lower[preserved_dir] };
+    int upper_idx[1] = { find_peaks_inp->range->upper[preserved_dir] };
     gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx);
 
-    int lower_ext_idx[1] = {find_peaks_inp->range_ext->lower[preserved_dir]};
-    int upper_ext_idx[1] = {find_peaks_inp->range_ext->upper[preserved_dir]};
+    int lower_ext_idx[1] = { find_peaks_inp->range_ext->lower[preserved_dir] };
+    int upper_ext_idx[1] = { find_peaks_inp->range_ext->upper[preserved_dir] };
     gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx);
 
     gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order);
 
-    int num_nodes = (poly_order == 1) ? gkyl_range_shape(&up->out_range, 0) + 1
-                                      : 2*gkyl_range_shape(&up->out_range, 0) + 1;
+    int num_nodes = gkyl_range_shape(&up->out_range, 0) + 1;
     int nodes_shape[1] = {num_nodes};
     gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
   }
   else {
-    assert(false && "dg_find_peaks: only 1D->0D and 2D->1D supported");
+    assert(false); // dg_find_peaks: only 1D->0D and 2D->1D supported
   }
 
   // Store node locations for input basis.
@@ -511,7 +439,7 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
   // Compute total_nodes_search for the struct.
   int num_cells_search = find_peaks_inp->range->upper[find_peaks_inp->search_dir]
     - find_peaks_inp->range->lower[find_peaks_inp->search_dir] + 1;
-  up->total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+  up->total_nodes_search = num_cells_search + 1;
 
   // Pre-allocate search-direction working buffers (reused by advance).
   up->search_vals = gkyl_malloc(sizeof(double) * up->total_nodes_search);
@@ -522,10 +450,10 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
   int mid_preserved_idx = 0;
   if (out_dim == 1) {
     int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0;
-    mid_preserved_idx = (find_peaks_inp->range->lower[preserved_dir] + find_peaks_inp->range->upper[preserved_dir]) / 2;
+    mid_preserved_idx = (find_peaks_inp->range->lower[preserved_dir] +
+      find_peaks_inp->range->upper[preserved_dir]) / 2;
   }
 
-
   // Copy input to host if needed.
   if (up->use_gpu) {
     struct gkyl_array *field_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
@@ -539,8 +467,10 @@ gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_pea
 
   // Allocate output arrays for each peak.
   for (int p = 0; p < up->num_peaks; p++) {
-    up->out_vals[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
-    up->out_coords[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, up->out_range_ext.volume);
+    up->out_vals[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis,
+      up->out_range_ext.volume);
+    up->out_coords[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis,
+      up->out_range_ext.volume);
     up->out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
     up->out_coords_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
     up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
@@ -584,7 +514,6 @@ gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const stru
   int ndim = up->grid.ndim;
   int out_dim = ndim - 1;
 
-
   // Find peaks for each preserved-direction node.
   int num_nodes_out = up->out_nrange.volume;
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
@@ -679,7 +608,8 @@ gkyl_array_dg_find_peaks_acquire_coords(const struct gkyl_array_dg_find_peaks *u
 }
 
 const struct gkyl_array*
-gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up,
+  int peak_idx)
 {
   assert(peak_idx >= 0 && peak_idx < up->num_peaks);
   return gkyl_array_acquire(up->out_coords_nodal[peak_idx]);
@@ -703,7 +633,8 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
   int num_nodes_out = up->out_nrange.volume;
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
     for (int p = 0; p < up->num_peaks; p++) {
-      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, p);
+      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node,
+        up->out_eval_at_peaks_vals_nodal, p);
     }
   }
   // Transform nodal to modal for each peak.
@@ -719,7 +650,8 @@ gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
     // 2D -> 1D case: use nodal-to-modal transform.
     for (int p = 0; p < up->num_peaks; p++) {
       gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], false);
+        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p],
+        false);
     }
   }
 }
@@ -742,7 +674,8 @@ gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up
   int num_nodes_out = up->out_nrange.volume;
 
   for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
-    eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, up->out_eval_at_peaks_vals_nodal, peak_idx);
+    eval_array_at_peaks_for_preserved_node(up, in_array, pres_node,
+      up->out_eval_at_peaks_vals_nodal, peak_idx);
   }
 
   // Transform nodal to modal for each peak.
@@ -755,7 +688,8 @@ gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up
   else {
     // 2D -> 1D case: use nodal-to-modal transform.
     gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
-      &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, false);
+      &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val,
+      false);
   }
 }
 
diff --git a/core/zero/array_dg_find_peaks_cu.cu b/core/zero/array_dg_find_peaks_cu.cu
index 25b71aea77..9581f9a371 100644
--- a/core/zero/array_dg_find_peaks_cu.cu
+++ b/core/zero/array_dg_find_peaks_cu.cu
@@ -24,18 +24,17 @@ gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
   const struct gkyl_array *in, int num_nodes_out)
 {
   for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
-    tid < num_nodes_out; tid += blockDim.x * gridDim.x)
-  {
+    tid < num_nodes_out; tid += blockDim.x * gridDim.x) {
     int preserved_node_idx = (int)tid;
 
     int ndim = up->grid.ndim;
     int search_dir = up->search_dir;
-    int poly_order = up->basis.poly_order;
     int num_basis = up->basis.num_basis;
 
     // Number of cells and nodes along the search direction.
     int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
-    int total_nodes_search = (poly_order == 1) ? num_cells_search + 1 : 2*num_cells_search + 1;
+
+    int total_nodes_search = num_cells_search + 1;
 
     // Each thread gets its own contiguous slice of the pre-allocated
     // search buffers. Offset = preserved_node_idx * total_nodes_search.
@@ -54,8 +53,7 @@ gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
 
     // Iterate along cells in the search direction.
     for (int cell_idx = up->range.lower[search_dir];
-         cell_idx <= up->range.upper[search_dir]; cell_idx++)
-    {
+      cell_idx <= up->range.upper[search_dir]; cell_idx++) {
       // For 2D, determine which cells in the preserved direction
       // contribute to this preserved_node_idx.
       int pres_cell_start, pres_cell_end;
@@ -64,31 +62,20 @@ gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
         pres_cell_end = 0;
       }
       else {
-        if (poly_order == 1) {
-          if (preserved_node_idx == 0) {
-            pres_cell_start = up->range.lower[preserved_dir];
-            pres_cell_end = up->range.lower[preserved_dir];
-          }
-          else if (preserved_node_idx == up->out_nrange.upper[0]) {
-            pres_cell_start = up->range.upper[preserved_dir];
-            pres_cell_end = up->range.upper[preserved_dir];
-          }
-          else {
-            pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-            pres_cell_end = pres_cell_start + 1;
-            if (pres_cell_end > up->range.upper[preserved_dir])
-              pres_cell_end = up->range.upper[preserved_dir];
-          }
+        // Node i is shared by cells i and i+1 (0-indexed from lower).
+        // preserved_node_idx 0 is only in cell lower[preserved_dir].
+        // preserved_node_idx N is only in cell upper[preserved_dir].
+        if (preserved_node_idx == 0) {
+          pres_cell_start = up->range.lower[preserved_dir];
+          pres_cell_end = up->range.lower[preserved_dir];
         }
-        else { // poly_order == 2
-          int cell_local = preserved_node_idx / 2;
-          pres_cell_start = up->range.lower[preserved_dir] + cell_local;
-          pres_cell_end = pres_cell_start;
-          if (preserved_node_idx % 2 == 0 && preserved_node_idx > 0) {
-            pres_cell_start--;
-          }
-          if (pres_cell_start < up->range.lower[preserved_dir])
-            pres_cell_start = up->range.lower[preserved_dir];
+        else if (preserved_node_idx == up->out_nrange.upper[0]) {
+          pres_cell_start = up->range.upper[preserved_dir];
+          pres_cell_end = up->range.upper[preserved_dir];
+        }
+        else {
+          pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+          pres_cell_end = pres_cell_start + 1;
           if (pres_cell_end > up->range.upper[preserved_dir])
             pres_cell_end = up->range.upper[preserved_dir];
         }
@@ -117,49 +104,21 @@ gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
 
           // Check if this node belongs to our preserved_node_idx (2D only).
           if (ndim > 1) {
-            int pres_node_offset;
-            if (poly_order == 1) {
-              pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
-            }
-            else {
-              if (nod_log[preserved_dir] < -0.5)
-                pres_node_offset = 0;
-              else if (nod_log[preserved_dir] > 0.5)
-                pres_node_offset = 2;
-              else
-                pres_node_offset = 1;
-            }
+            int pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
             int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
-            int this_pres_node;
-            if (poly_order == 1)
-              this_pres_node = pres_cell_local + pres_node_offset;
-            else
-              this_pres_node = 2*pres_cell_local + pres_node_offset;
+
+            int this_pres_node = pres_cell_local + pres_node_offset;
 
             if (this_pres_node != preserved_node_idx)
               continue;
           }
 
           // Determine node offset in the search direction.
-          int search_node_offset;
-          if (poly_order == 1) {
-            search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
-          }
-          else {
-            if (nod_log[search_dir] < -0.5)
-              search_node_offset = 0;
-            else if (nod_log[search_dir] > 0.5)
-              search_node_offset = 2;
-            else
-              search_node_offset = 1;
-          }
+          int search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
 
           int cell_local = cell_idx - up->range.lower[search_dir];
-          int search_node_idx;
-          if (poly_order == 1)
-            search_node_idx = cell_local + search_node_offset;
-          else
-            search_node_idx = 2*cell_local + search_node_offset;
+
+          int search_node_idx = cell_local + search_node_offset;
 
           if (!visited[search_node_idx]) {
             double val = up->basis.eval_expand(nod_log, f_d);
@@ -190,15 +149,15 @@ gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
 
     // Find local maxima and minima.
     for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) {
-      double prev = vals[i-1];
+      double prev = vals[i - 1];
       double curr = vals[i];
-      double next = vals[i+1];
+      double next = vals[i + 1];
 
       bool is_max = (curr > prev && curr > next);
       bool is_min = (curr < prev && curr < next);
 
       if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) ||
-          (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
+        (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
         double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
           preserved_node_idx);
         double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
@@ -242,15 +201,13 @@ gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
   unsigned long total_threads = (unsigned long)num_nodes_out * num_peaks_to_eval;
 
   for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
-    tid < total_threads; tid += blockDim.x * gridDim.x)
-  {
+    tid < total_threads; tid += blockDim.x * gridDim.x) {
     int preserved_node_idx = (int)(tid / num_peaks_to_eval);
     int peak_offset = (int)(tid % num_peaks_to_eval);
     int peak_idx = peak_start + peak_offset;
 
     int ndim = up->grid.ndim;
     int search_dir = up->search_dir;
-    int poly_order = up->basis.poly_order;
     int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
 
     // Get the peak coordinate found during advance.
@@ -283,18 +240,13 @@ gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
     // For 2D: determine preserved-direction cell from preserved_node_idx.
     if (ndim > 1) {
       int pres_cell;
-      if (poly_order == 1) {
-        if (preserved_node_idx == 0) {
-          pres_cell = up->range.lower[preserved_dir];
-        }
-        else {
-          pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
-          if (pres_cell > up->range.upper[preserved_dir])
-            pres_cell = up->range.upper[preserved_dir];
-        }
+      if (preserved_node_idx == 0) {
+        pres_cell = up->range.lower[preserved_dir];
       }
       else {
-        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx / 2;
+        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+        if (pres_cell > up->range.upper[preserved_dir])
+          pres_cell = up->range.upper[preserved_dir];
       }
       cell_idx[preserved_dir] = pres_cell;
     }
@@ -314,18 +266,8 @@ gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
         nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
       }
       else if (ndim > 1) {
-        if (poly_order == 1) {
-          nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
-        }
-        else {
-          int pres_node_offset = preserved_node_idx % 3;
-          if (pres_node_offset == 0)
-            nod_log[d] = -1.0;
-          else if (pres_node_offset == 1)
-            nod_log[d] = 0.0;
-          else
-            nod_log[d] = 1.0;
-        }
+        // Node 0 is at left edge (-1), all others at right edge (+1).
+        nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
       }
     }
 
@@ -456,8 +398,9 @@ gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho)
   up->out_nrange = up_ho->out_nrange;
 
   up->num_peaks = up_ho->num_peaks;
-  for (int p = 0; p < GKYL_DG_FIND_PEAKS_MAX; p++)
+  for (int p = 0; p < GKYL_DG_FIND_PEAKS_MAX; p++) {
     up->peak_types[p] = up_ho->peak_types[p];
+  }
 
   int ndim = up_ho->basis.ndim;
   int poly_order = up_ho->basis.poly_order;
@@ -551,7 +494,8 @@ gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho)
 
   int out_basis_dim = (out_dim == 0) ? 1 : 1;
   int out_basis_po = (out_dim == 0) ? 0 : poly_order;
-  struct gkyl_basis *tmp_out_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(out_basis_dim, out_basis_po);
+  struct gkyl_basis *tmp_out_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(out_basis_dim,
+    out_basis_po);
   gkyl_cu_memcpy(&up->out_basis, tmp_out_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H);
   gkyl_cu_free(tmp_out_basis_dev);
 
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
index 289a3ae3aa..ec866a713c 100644
--- a/core/zero/gkyl_array_dg_find_peaks.h
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -8,22 +8,22 @@
 /**
  * Find all peaks (local maxima, local minima, and boundary values) of a DG
  * field along one direction.
- * 
+ *
  * For a 2D input array f(psi, z), finding peaks along z (dir=1) gives arrays:
  *   out_val[k](psi) = value of k-th peak along z for each psi
  *   out_coord[k](psi) = z-coordinate of k-th peak for each psi
- * 
+ *
  * For a 1D input array f(z), finding peaks along z (dir=0) gives scalars:
  *   out_val[k] = value of k-th peak
  *   out_coord[k] = z-coordinate of k-th peak
- * 
+ *
  * Peaks are detected by sampling the field at nodal points along the search
  * direction and identifying:
  *   - EDGE_LO: Value at the lower boundary of the domain
  *   - LOCAL_MAX: Points where f increases then decreases
- *   - LOCAL_MIN: Points where f decreases then increases  
+ *   - LOCAL_MIN: Points where f decreases then increases
  *   - EDGE_HI: Value at the upper boundary of the domain
- * 
+ *
  * The number of peaks is determined by scanning along the search direction
  * at a middle preserved-direction coordinate.
  */
@@ -52,7 +52,7 @@ struct gkyl_array_dg_find_peaks_inp {
  * scanning the input field along the search direction at a middle coordinate.
  * This must be called AFTER the input field is initialized, as it scans the
  * field to determine the number of peaks.
- * 
+ *
  * @param inp Input parameters
  * @param field Input field to scan for peak count determination
  * @return New updater pointer
@@ -63,15 +63,16 @@ struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new(
 /**
  * Compute the peaks. For each point along the preserved dimensions,
  * find all peaks along the search direction.
- * 
+ *
  * @param up Updater object
  * @param in Input array (N-dimensional DG field)
  */
-void gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in);
+void gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in);
 
 /**
  * Get the number of peaks found.
- * 
+ *
  * @param up Updater object
  * @return Number of peaks
  */
@@ -79,48 +80,53 @@ int gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up
 
 /**
  * Get the type of a specific peak (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI).
- * 
+ *
  * @param up Updater object
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Type of the peak
  */
-enum gkyl_peak_type gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+enum gkyl_peak_type gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up,
+  int peak_idx);
 
 /**
  * Get the output basis ((N-1)-dimensional, or p=0 1D for 1D->0D).
- * 
+ *
  * @param up Updater object
  * @return Pointer to output basis
  */
-const struct gkyl_basis* gkyl_array_dg_find_peaks_get_basis(const struct gkyl_array_dg_find_peaks *up);
+const struct gkyl_basis* gkyl_array_dg_find_peaks_get_basis(
+  const struct gkyl_array_dg_find_peaks *up);
 
 /**
  * Get the output grid.
- * 
+ *
  * @param up Updater object
  * @return Pointer to output grid
  */
-const struct gkyl_rect_grid* gkyl_array_dg_find_peaks_get_grid(const struct gkyl_array_dg_find_peaks *up);
+const struct gkyl_rect_grid* gkyl_array_dg_find_peaks_get_grid(
+  const struct gkyl_array_dg_find_peaks *up);
 
 /**
  * Get the output range.
- * 
+ *
  * @param up Updater object
  * @return Pointer to output range
  */
-const struct gkyl_range* gkyl_array_dg_find_peaks_get_range(const struct gkyl_array_dg_find_peaks *up);
+const struct gkyl_range* gkyl_array_dg_find_peaks_get_range(
+  const struct gkyl_array_dg_find_peaks *up);
 
 /**
  * Get the output extended range.
- * 
+ *
  * @param up Updater object
  * @return Pointer to output extended range
  */
-const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up);
+const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext(
+  const struct gkyl_array_dg_find_peaks *up);
 
 /**
  * Get the output nodal range.
- * 
+ *
  * @param up Updater object
  * @return Pointer to output nodal range
  */
@@ -129,61 +135,65 @@ gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *
 
 /**
  * Get the output array containing peak values for a specific peak.
- * 
+ *
  * @param up Updater object
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output values array (modal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Get the output array containing peak values in nodal basis for a specific peak.
- * 
+ *
  * @param up Updater object
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output values array (nodal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals_nodal(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Get the output array containing coordinates of a specific peak.
- * 
+ *
  * @param up Updater object
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output coordinates array (modal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Get the output array containing coordinates in nodal basis of a specific peak.
- * 
+ *
  * @param up Updater object
  * @param peak_idx Index of the peak (0 to num_peaks-1)
  * @return Pointer to output coordinates array (nodal DG expansion)
  */
-const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords_nodal(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
 
 /**
  * Project (evaluate) an arbitrary array onto the peak locations previously
  * found by gkyl_array_dg_find_peaks_advance.
- * 
+ *
  * For a 1D case with 5 peaks, this evaluates the input array at those 5 peak
  * locations and returns the values.
- * 
+ *
  * For a 2D case with peaks along lines (e.g., psi vs z with peaks in z),
  * this evaluates the input array along the contours defined by the peak
  * locations for each psi.
- * 
+ *
  * The peak locations must have been previously computed via
  * gkyl_array_dg_find_peaks_advance. This method evaluates the provided array
  * at those same locations.
- * 
+ *
  * Example usage:
  * @code
  * // 1. Find peaks in bmag along z direction
  * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag);
  * gkyl_array_dg_find_peaks_advance(peak_finder, bmag);
- * 
+ *
  * // 2. Get bmag_max (LOCAL_MAX peak) location and value
  * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder);
  * int bmag_max_idx = -1;
@@ -195,17 +205,17 @@ const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords_nodal(const str
  * }
  * const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(peak_finder, bmag_max_idx);
  * const struct gkyl_array *z_max = gkyl_array_dg_find_peaks_acquire_coords(peak_finder, bmag_max_idx);
- * 
+ *
  * // 3. Evaluate phi at the same locations where bmag has peaks
  * struct gkyl_array *phi_at_peaks[num_peaks];
  * for (int p = 0; p < num_peaks; p++) {
  *   phi_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume);
  * }
  * gkyl_array_dg_find_peaks_project_on_peaks(peak_finder, phi, phi_at_peaks);
- * 
+ *
  * // 4. Now phi_at_peaks[bmag_max_idx] contains phi evaluated at the mirror throat
  * @endcode
- * 
+ *
  * @param up Updater object (must have run advance first)
  * @param in_array Array to evaluate at peak locations (same grid/basis as original field)
  * @param out_vals Output: array of evaluated values for each peak
@@ -217,28 +227,28 @@ void gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *
 /**
  * Project (evaluate) an arbitrary array onto a single peak location previously
  * found by gkyl_array_dg_find_peaks_advance.
- * 
+ *
  * This is a more efficient version of gkyl_array_dg_find_peaks_project_on_peaks
  * when you only need the evaluation at one specific peak (e.g., only at the
  * mirror throat LOCAL_MAX peak).
- * 
+ *
  * Example usage:
  * @code
  * // 1. Find peaks in bmag along z direction
  * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag);
  * gkyl_array_dg_find_peaks_advance(peak_finder, bmag);
- * 
+ *
  * // 2. Find the LOCAL_MAX peak index
  * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder);
  * int bmag_max_idx = num_peaks - 2; // Assuming standard ordering
- * 
+ *
  * // 3. Evaluate phi only at the mirror throat (bmag_max location)
  * struct gkyl_array *phi_m = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume);
  * gkyl_array_dg_find_peaks_project_on_peak_idx(peak_finder, phi, bmag_max_idx, phi_m);
- * 
+ *
  * // 4. Now phi_m contains phi evaluated at the mirror throat
  * @endcode
- * 
+ *
  * @param up Updater object (must have run advance first)
  * @param in_array Array to evaluate at peak location (same grid/basis as original field)
  * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1)
@@ -250,12 +260,11 @@ void gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peak
 
 /**
  * Release the updater and all internal arrays.
- * 
+ *
  * @param up Updater to delete
  */
 void gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up);
 
-
 /**
  * Create a new GPU peak finder updater from an already-initialized host object.
  * Allocates GPU arrays, copies the struct to device, and returns a host-side
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
index 1686f3291a..f8695a569e 100644
--- a/core/zero/gkyl_array_dg_find_peaks_priv.h
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -20,8 +20,9 @@ dg_find_peaks_log_to_comp(int ndim, const double *eta,
   const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
   double *GKYL_RESTRICT xout)
 {
-  for (int d = 0; d < ndim; ++d)
-    xout[d] = 0.5*dx[d]*eta[d] + xc[d];
+  for (int d = 0; d < ndim; ++d) {
+    xout[d] = 0.5 * dx[d] * eta[d] + xc[d];
+  }
 }
 
 /** Internal struct for dg_find_peaks updater. */
@@ -83,4 +84,3 @@ struct gkyl_array_dg_find_peaks {
  * @param ref Reference counter for this object.
  */
 void gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref);
-
diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index fda444698f..fb916f59ca 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -13,12 +13,14 @@ proj_on_basis_c2p_position_func(const double *xcomp, double *xphys, void *ctx)
 }
 
 void
-gk_species_damping_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
 }
 
 void
-gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   struct timespec wst = gkyl_wall_clock();
   // DG metadata for damping rate.
@@ -26,19 +28,21 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g
     { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 },
     { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" },
   };
-  int mpe_drate_len = sizeof(mpe_drate)/sizeof(mpe_drate[0]);
+  int mpe_drate_len = sizeof(mpe_drate) / sizeof(mpe_drate[0]);
   // Update app basic metada with time/frame.
   gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm);
   gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame);
   // Package metadata.
-  int io_meta_len[] = {app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len};
-  const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_drate, app->gk_geom->io_meta};
-  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta);
+  int io_meta_len[] = { app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len };
+  const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_drate,
+                                                    app->gk_geom->io_meta };
+  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int),
+    io_meta_len, io_meta);
 
   // Write out the damping rate.
   const char *fmt = "%s-%s_damping_rate_%d.gkyl";
   int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame);
-  char fileNm[sz+1]; // ensures no buffer overflow
+  char fileNm[sz + 1]; // ensures no buffer overflow
   snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame);
 
   // Copy data from device to host before writing it out.
@@ -48,12 +52,13 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g
   gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->damping.rate_host, fileNm);
   app->stat.n_io += 1;
 
-  gkyl_msgpack_data_release(mt); 
+  gkyl_msgpack_data_release(mt);
   app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst);
 }
 
 void
-gk_species_damping_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   gk_species_damping_write_enabled(app, gks, tm, frame);
   gks->damping.write_func = gk_species_damping_write_disabled;
@@ -81,42 +86,40 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
   // Default function pointers.
   damp->write_func = gk_species_damping_write_disabled;
 
-
   damp->proj_on_basis_c2p_ctx.cdim = app->cdim;
   damp->proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
   damp->proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
   damp->proj_on_basis_c2p_ctx.pos_map = app->position_map;
 
-
   if (damp->type) {
     // Allocate rate array.
-    damp->rate = mkarr(app->use_gpu, num_quad==1? 1 : gks->basis.num_basis, gks->local_ext.volume);
+    damp->rate = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis,
+      gks->local_ext.volume);
     damp->rate_host = damp->rate;
     if (app->use_gpu)
-      damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size); 
+      damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size);
 
     if (damp->type == GKYL_GK_DAMPING_USER_INPUT) {
       struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
       proj_on_basis_c2p_ctx.cdim = app->cdim;
       proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
       proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) {
-          .grid = &gks->grid,
-          .basis = &gks->basis,
-          .num_quad = num_quad,
-          .num_ret_vals = 1,
-          .eval = gks->info.damping.rate_profile,
-          .ctx = gks->info.damping.rate_profile_ctx,
-          .c2p_func = proj_on_basis_c2p_phase_func,
-          .c2p_func_ctx = &proj_on_basis_c2p_ctx,
-        }
-      );
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) {
+        .grid = &gks->grid,
+        .basis = &gks->basis,
+        .num_quad = num_quad,
+        .num_ret_vals = 1,
+        .eval = gks->info.damping.rate_profile,
+        .ctx = gks->info.damping.rate_profile_ctx,
+        .c2p_func = proj_on_basis_c2p_phase_func,
+        .c2p_func_ctx = &proj_on_basis_c2p_ctx,
+      });
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, damp->rate_host);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(damp->rate, damp->rate_host);
 
       if (num_quad == 1)
-        gkyl_array_scale_range(damp->rate, 1.0/pow(sqrt(2.0),gks->grid.ndim), &gks->local);
+        gkyl_array_scale_range(damp->rate, 1.0 / pow(sqrt(2.0), gks->grid.ndim), &gks->local);
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       damp->evolve = true; // Since the loss cone boundary is proportional to phi(t).
@@ -137,34 +140,39 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
       damp->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
 
-      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag,
+        bmag_int_global);
       damp->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
       gkyl_array_dg_find_peaks_advance(damp->bmag_peak_finder, app->gk_geom->geo_int.bmag);
       gkyl_array_release(bmag_int_global);
-      
+
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(damp->bmag_peak_finder);
-      damp->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
-      damp->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, damp->bmag_max_peak_idx);
-      damp->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, damp->bmag_max_peak_idx);
-      damp->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, num_peaks-1);
-      damp->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, num_peaks-1);
+      damp->bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
+      damp->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder,
+        damp->bmag_max_peak_idx);
+      damp->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder,
+        damp->bmag_max_peak_idx);
+      damp->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder,
+        num_peaks - 1);
+      damp->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder,
+        num_peaks - 1);
       damp->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(damp->bmag_peak_finder);
       damp->bmag_max_range = gkyl_array_dg_find_peaks_get_range(damp->bmag_peak_finder);
       damp->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(damp->bmag_peak_finder);
 
-      damp->phi_at_bmag_max = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, 
+      damp->phi_at_bmag_max = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis,
         damp->bmag_max_range_ext->volume);
-      damp->phi_at_bmag_tandem = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, 
+      damp->phi_at_bmag_tandem = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis,
         damp->bmag_max_range_ext->volume);
       // phi is defined as 0 at the wall
 
       bool is_symmetric;
       int cdim = app->cdim;
-      if (gkyl_compare_double(-app->grid.lower[cdim-1], app->grid.upper[cdim-1], 1e-12)) {
+      if (gkyl_compare_double(-app->grid.lower[cdim - 1], app->grid.upper[cdim - 1], 1e-12)) {
         is_symmetric = true;
       }
-      else if (gkyl_compare_double(app->grid.lower[cdim-1], 0.0, 1e-12)) {
+      else if (gkyl_compare_double(app->grid.lower[cdim - 1], 0.0, 1e-12)) {
         is_symmetric = false;
       }
       else {
@@ -182,22 +190,24 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
       }
 
       if (damp->is_tandem) {
-        damp->bmag_tandem_peak_idx = num_peaks-4;
-      } else {
-        damp->bmag_tandem_peak_idx = num_peaks-2;
+        damp->bmag_tandem_peak_idx = num_peaks - 4;
       }
-      damp->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, damp->bmag_tandem_peak_idx);
-      damp->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, damp->bmag_tandem_peak_idx);
-
+      else {
+        damp->bmag_tandem_peak_idx = num_peaks - 2;
+      }
+      damp->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder,
+        damp->bmag_tandem_peak_idx);
+      damp->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder,
+        damp->bmag_tandem_peak_idx);
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
         .phase_grid = &gks->grid,
         .conf_basis = &app->basis,
         .phase_basis = &gks->basis,
-        .conf_range =  &app->local,
+        .conf_range = &app->local,
         .conf_range_ext = &app->local_ext,
-        .vel_range = &gks->local_vel, 
+        .vel_range = &gks->local_vel,
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = damp->bmag_max,
@@ -216,21 +226,25 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         .c2p_pos_func_ctx = &damp->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
-      damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+      damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
       // Project the conf-space rate profile provided.
-      struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis, gks->local_ext.volume);
-      struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false, scale_prof_high_order->ncomp, scale_prof_high_order->size)
+      struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis,
+        gks->local_ext.volume);
+      struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false,
+        scale_prof_high_order->ncomp, scale_prof_high_order->size)
                                                      : gkyl_array_acquire(scale_prof_high_order);
-      
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1, 
+
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1,
         gks->info.damping.rate_profile, gks->info.damping.rate_profile_ctx);
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, scale_prof_high_order_ho);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(scale_prof_high_order, scale_prof_high_order_ho);
 
-      damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis, gks->local_ext.volume);
-      gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0),gks->grid.ndim), scale_prof_high_order, 0);
+      damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis,
+        gks->local_ext.volume);
+      gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0), gks->grid.ndim), scale_prof_high_order,
+        0);
 
       gkyl_array_release(scale_prof_high_order_ho);
       gkyl_array_release(scale_prof_high_order);
@@ -239,14 +253,15 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
       // Find the potential at the mirror throat.
       gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
         damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
-      
+
       if (damp->is_tandem) {
         gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
           damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
         // Project the loss cone mask.
         gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
           app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
-      } else {
+      }
+      else {
         // Project the loss cone mask using the phi_m array.
         gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
           app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
@@ -266,7 +281,8 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 }
 
 void
-gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_damping *damp, 
+gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_damping *damp,
   const struct gkyl_array *phi, const struct gkyl_array *fin, struct gkyl_array *f_buffer,
   struct gkyl_array *rhs, struct gkyl_array *cflrate)
 {
@@ -284,11 +300,13 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
         damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
 
       if (damp->is_tandem) {
-        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, damp->phi_smooth_global,
+        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder,
+          damp->phi_smooth_global,
           damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
         gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
           damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
-      } else {
+      }
+      else {
         gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
           damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
       }
@@ -308,7 +326,7 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
 }
 
 void
-gk_species_damping_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, int frame)
 {
   gks->damping.write_func(app, gks, tm, frame);
 }
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index e61b6ac3e8..fce3d9d568 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -6,12 +6,14 @@
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
 
 void
-gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks,
+  double tm, int frame)
 {
 }
 
 void
-gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks,
+  double tm, int frame)
 {
   struct timespec wst = gkyl_wall_clock();
   // DG metadata for multiplier.
@@ -19,34 +21,38 @@ gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app* app, struct gk_sp
     { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 },
     { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" },
   };
-  int mpe_mult_len = sizeof(mpe_mult)/sizeof(mpe_mult[0]);
+  int mpe_mult_len = sizeof(mpe_mult) / sizeof(mpe_mult[0]);
   // Update app basic metada with time/frame.
   gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm);
   gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame);
   // Package metadata.
-  int io_meta_len[] = {app->io_meta_basic_len, mpe_mult_len, app->gk_geom->io_meta_len};
-  const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_mult, app->gk_geom->io_meta};
-  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta);
+  int io_meta_len[] = { app->io_meta_basic_len, mpe_mult_len, app->gk_geom->io_meta_len };
+  const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_mult,
+                                                    app->gk_geom->io_meta };
+  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int),
+    io_meta_len, io_meta);
 
   // Write out the multiplicative function.
   const char *fmt = "%s-%s_fdot_multiplier_%d.gkyl";
   int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame);
-  char fileNm[sz+1]; // ensures no buffer overflow
+  char fileNm[sz + 1]; // ensures no buffer overflow
   snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame);
 
   // Copy data from device to host before writing it out.
   if (app->use_gpu)
     gkyl_array_copy(gks->fdot_mult.multiplier_host, gks->fdot_mult.multiplier);
 
-  gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->fdot_mult.multiplier_host, fileNm);
+  gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->fdot_mult.multiplier_host,
+    fileNm);
   app->stat.n_io += 1;
 
-  gkyl_msgpack_data_release(mt); 
+  gkyl_msgpack_data_release(mt);
   app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst);
 }
 
 void
-gk_species_fdot_multiplier_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks,
+  double tm, int frame)
 {
   gk_species_fdot_multiplier_write_enabled(app, gks, tm, frame);
   gks->fdot_mult.write_func = gk_species_fdot_multiplier_write_disabled;
@@ -61,7 +67,8 @@ gk_species_fdot_multiplier_advance_mult(gkyl_gyrokinetic_app *app, const struct
 }
 
 void
-gk_species_fdot_multiplier_advance_omegaH_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_omegaH_mult(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, double *out)
 {
   // Multiply out by the multplier.
@@ -69,26 +76,30 @@ gk_species_fdot_multiplier_advance_omegaH_mult(gkyl_gyrokinetic_app *app, const
 }
 
 void
-gk_species_fdot_multiplier_advance_omegaH_disabled(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_omegaH_disabled(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, double *out)
 {
 }
 
 void
-gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, fdmul->phi_smooth_global);
   // Find the potential at bmag_max
   gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
     fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max);
-  
+
   if (fdmul->is_tandem) {
     gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
       fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem);
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-      fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem, fdmul->multiplier);
-  } else {
+      fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem,
+      fdmul->multiplier);
+  }
+  else {
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
       fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
   }
@@ -145,7 +156,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
 
     // Allocate multiplier array.
     fdmul->multiplier = mkarr(app->use_gpu, basis_mult.num_basis, gks->local_ext.volume);
-    fdmul->multiplier_host = app->use_gpu? mkarr(false, fdmul->multiplier->ncomp, fdmul->multiplier->size)
+    fdmul->multiplier_host = app->use_gpu? mkarr(false, fdmul->multiplier->ncomp,
+      fdmul->multiplier->size)
                                          : gkyl_array_acquire(fdmul->multiplier);
 
     // Context for c2p function passed to proj_on_basis.
@@ -156,17 +168,16 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
 
     if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_USER_INPUT) {
 
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) {
-          .grid = &gks->grid,
-          .basis = &basis_mult,
-          .num_quad = basis_mult.poly_order+1,
-          .num_ret_vals = 1,
-          .eval = gks->info.time_rate_multiplier.profile,
-          .ctx = gks->info.time_rate_multiplier.profile_ctx,
-          .c2p_func = proj_on_basis_c2p_phase_func,
-          .c2p_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
-        }
-      );
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) {
+        .grid = &gks->grid,
+        .basis = &basis_mult,
+        .num_quad = basis_mult.poly_order + 1,
+        .num_ret_vals = 1,
+        .eval = gks->info.time_rate_multiplier.profile,
+        .ctx = gks->info.time_rate_multiplier.profile_ctx,
+        .c2p_func = proj_on_basis_c2p_phase_func,
+        .c2p_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
+      });
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, fdmul->multiplier_host);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(fdmul->multiplier, fdmul->multiplier_host);
@@ -178,15 +189,14 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         fdmul->write_func = gk_species_fdot_multiplier_write_init_only;
       else
         gkyl_array_release(fdmul->multiplier_host);
-
     }
     else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
       // Available options:
-      //   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
-      //   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
+      // A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
+      // B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
       enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD;
-      int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be
-                                              // at least p+1 for Gauss-Lobatto.
+      int num_quad = gks->basis.poly_order + 1; // This can be p+1 or 1. Must be
+                                                // at least p+1 for Gauss-Lobatto.
 
       // Create peak finder for bmag to find the mirror throat.
       // Search along the parallel (z) direction, which is the last configuration space dimension.
@@ -204,36 +214,40 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
       fdmul->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
 
-      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, bmag_int_global);
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag,
+        bmag_int_global);
 
       fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
       gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, bmag_int_global);
       gkyl_array_release(bmag_int_global);
-      
+
       // Get the LOCAL_MAX peak (bmag maximum along z direction).
       int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
-      fdmul->bmag_max_peak_idx = num_peaks-2; // Edge is num_peaks-1, so maximum is one less
-      fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
-      fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_max_peak_idx);
-      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, num_peaks-1);
-      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, num_peaks-1);
+      fdmul->bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
+      fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder,
+        fdmul->bmag_max_peak_idx);
+      fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder,
+        fdmul->bmag_max_peak_idx);
+      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder,
+        num_peaks - 1);
+      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder,
+        num_peaks - 1);
       fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
       fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
 
-      fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
+      fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis,
         fdmul->bmag_max_range_ext->volume);
-      fdmul->phi_at_bmag_tandem = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, 
+      fdmul->phi_at_bmag_tandem = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis,
         fdmul->bmag_max_range_ext->volume);
       // phi is defined as 0 at the wall
 
-
       bool is_symmetric, is_tandem;
       int cdim = app->cdim;
-      if (gkyl_compare_double(-app->grid.lower[cdim-1], app->grid.upper[cdim-1], 1e-12)) {
+      if (gkyl_compare_double(-app->grid.lower[cdim - 1], app->grid.upper[cdim - 1], 1e-12)) {
         is_symmetric = true;
       }
-      else if (gkyl_compare_double(app->grid.lower[cdim-1], 0.0, 1e-12)){
+      else if (gkyl_compare_double(app->grid.lower[cdim - 1], 0.0, 1e-12)) {
         is_symmetric = false;
       }
       else {
@@ -251,21 +265,24 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
       }
 
       if (is_tandem) {
-        fdmul->bmag_tandem_peak_idx = num_peaks-4;
-      } else {
-        fdmul->bmag_tandem_peak_idx = num_peaks-2;
+        fdmul->bmag_tandem_peak_idx = num_peaks - 4;
       }
-      fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
-      fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, fdmul->bmag_tandem_peak_idx);
+      else {
+        fdmul->bmag_tandem_peak_idx = num_peaks - 2;
+      }
+      fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder,
+        fdmul->bmag_tandem_peak_idx);
+      fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder,
+        fdmul->bmag_tandem_peak_idx);
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
         .phase_grid = &gks->grid,
         .conf_basis = &app->basis,
         .phase_basis = &gks->basis,
-        .conf_range =  &app->local,
+        .conf_range = &app->local,
         .conf_range_ext = &app->local_ext,
-        .vel_range = &gks->local_vel, 
+        .vel_range = &gks->local_vel,
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = fdmul->bmag_max,
@@ -286,14 +303,15 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .c2p_pos_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
-      fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+      fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
       fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_loss_cone_mult;
       fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult;
       fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult;
       if (fdmul->write_diagnostics) {
         fdmul->write_func = gk_species_fdot_multiplier_write_enabled;
-      } else {
+      }
+      else {
         gkyl_array_release(fdmul->multiplier_host);
       }
     }
@@ -301,7 +319,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
 }
 
 void
-gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   struct timespec wst = gkyl_wall_clock();
@@ -311,9 +330,9 @@ gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const st
   app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
 }
 
-
 void
-gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, double *out)
 {
   struct timespec wst = gkyl_wall_clock();
@@ -322,9 +341,10 @@ gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, const
 
   app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
 }
-  
+
 void
-gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   struct timespec wst = gkyl_wall_clock();
@@ -332,17 +352,18 @@ gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const s
   fdmul->advance_times_rate_func(app, gks, fdmul, phi, out);
 
   app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
-  
 }
 
 void
-gk_species_fdot_multiplier_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   gks->fdot_mult.write_func(app, gks, tm, frame);
 }
 
 void
-gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const struct gk_fdot_multiplier *fdmul)
+gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app,
+  const struct gk_fdot_multiplier *fdmul)
 {
   if (fdmul->type) {
     gkyl_array_release(fdmul->multiplier);
@@ -372,7 +393,7 @@ gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const
 }
 
 void
-gk_species_fdot_multiplier_reset(gkyl_gyrokinetic_app* app, double tm, struct gk_species *gks,
+gk_species_fdot_multiplier_reset(gkyl_gyrokinetic_app *app, double tm, struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp)
 {
   gk_species_fdot_multiplier_release(app, fdmul);
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
index 57eb3a38c2..662b4bbfa5 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -30,8 +30,7 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
   // Plasma parameters
   double mi;
@@ -89,7 +88,6 @@ struct gk_mirror_ctx
   double z_in;    // Working variable for z integration
 };
 
-
 double
 psi_RZ(double RIn, double ZIn, void *ctx)
 {
@@ -99,8 +97,8 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double Z_m = app->Z_m;
 
   double psi = 0.5 * pow(RIn, 2.) * mcB *
-               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
   return psi;
 }
 
@@ -112,9 +110,9 @@ R_psiZ(double psiIn, double ZIn, void *ctx)
   double gamma = app->gamma;
   double Z_m = app->Z_m;
 
-  double Rout = sqrt(2. * psiIn / (mcB * 
+  double Rout = sqrt(2. * psiIn / (mcB *
     (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-     1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
   return Rout;
 }
 
@@ -129,12 +127,12 @@ Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, doubl
   double Rcoord = R_psiZ(psiIn, ZIn, ctx);
 
   BRad[0] = -(1. / 2.) * Rcoord * mcB *
-          (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.)))
-           -2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+    (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
 
   BZ[0] = mcB *
-        ( 1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.)))
-         +1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
 
   Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
 }
@@ -156,13 +154,11 @@ z_psiZ(double psiIn, double ZIn, void *ctx)
   double eps = 0.0;
   app->psi_in = psiIn;
   struct gkyl_qr_res integral;
-  if (eps <= ZIn)
-  {
+  if (eps <= ZIn) {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
   }
-  else
-  {
-    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14); 
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
     integral.res = -integral.res;
   }
   return integral.res;
@@ -185,14 +181,12 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (0.0 <= zIn)
-  {
+  if (0.0 <= zIn) {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
   }
-  else
-  {
+  else {
     double fl = root_Z_psiz(app->Z_min - eps, ctx);
     double fr = root_Z_psiz(eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
@@ -213,7 +207,7 @@ mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
   double x = R * cos(theta);
   double y = R * sin(theta);
 
-  xp[0] = x;  xp[1] = y;  xp[2] = Z;
+  xp[0] = x; xp[1] = y; xp[2] = Z;
 }
 
 void
@@ -227,10 +221,10 @@ bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
 
   double phi = xc[1];
-  // zc are computational coords. 
+  // zc are computational coords.
   // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
   fout[2] = BZ;
 }
 
@@ -273,49 +267,48 @@ eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fo
 }
 
 void
-eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double z = xn[1];
   double src_amp = app->ion_source_amplitude;
   double z_src = 0.0;
   double src_sigma = app->ion_source_sigma;
-  double src_amp_floor = src_amp*1e-2;
-  if (fabs(z) <= 0.98)
-  { 
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 0.98) {
     // sixth order polynomial drop of to the edge
-    fout[0] = src_amp * (1 - pow(fabs(z), 6)/0.98);
+    fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
   }
-  else
-  {
+  else {
     fout[0] = 1e-16;
   }
 }
 
 void
-eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   fout[0] = 0.0;
 }
 
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double z = xn[1];
   double TSrc0 = app->ion_source_temp;
-  double Tfloor = TSrc0*1e-2;
-  if (fabs(z) <= 0.98)
-  {
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 0.98) {
     fout[0] = TSrc0;
   }
-  else
-  {
+  else {
     fout[0] = Tfloor;
   }
 }
 
-void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_ion = app->vpar_max_ion;
@@ -323,9 +316,9 @@ void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *
 
   double cvpar = vc[0], cmu = vc[1];
   double b = 1.4;
-  vp[0] = vpar_max_ion*tan(cvpar*b)/tan(b);
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
   // Cubic map in mu.
-  vp[1] = mu_max_ion*pow(cmu,3);
+  vp[1] = mu_max_ion * pow(cmu, 3);
 }
 
 struct gk_mirror_ctx
@@ -356,7 +349,7 @@ create_ctx(void)
   double nuFrac = 1.0;
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
@@ -377,24 +370,24 @@ create_ctx(void)
   // Geometry parameters.
   double RatZeq0 = 0.10; // Radius of the field line at Z=0.
   double Z_min = -2.5;
-  double Z_max =  2.5;
+  double Z_max = 2.5;
   double mcB = 3.691260;
   double gamma = 0.226381;
   double Z_m = 0.98;
 
-  // POA parameters  
+  // POA parameters
   double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
   double alpha_fdp = 1.0;
   double tau_oap = 0.001;  // Duration of each phase.
   double tau_fdp = 7e-9;
   double tau_fdp_extra = 2e-9;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
-  
+
   // Frame counts for each phase type (specified independently)
   int num_frames_oap = 2;        // Frames per OAP phase
   int num_frames_fdp = 2;        // Frames per FDP phase
   int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
-  
+
   // Whether to evolve the field.
   bool is_static_field_oap = true;
   bool is_static_field_fdp = false;
@@ -408,39 +401,40 @@ create_ctx(void)
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
   // The final stage is an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
 
   double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
@@ -471,7 +465,7 @@ create_ctx(void)
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Npsi, Nz, Nvpar, Nmu},
+    .cells = { Npsi, Nz, Nvpar, Nmu },
     .poly_order = poly_order,
     .t_end = t_end,
     .num_frames = num_frames,
@@ -481,7 +475,7 @@ create_ctx(void)
     .int_diag_calc_freq = int_diag_calc_freq,
     .dt_failure_tol = dt_failure_tol,
     .num_failures_max = num_failures_max,
-    
+
     .ion_source_amplitude = ion_source_amplitude,
     .ion_source_sigma = ion_source_sigma,
     .ion_source_temp = ion_source_temp,
@@ -492,12 +486,12 @@ create_ctx(void)
     .Z_min = Z_min,
     .Z_max = Z_max,
   };
-  
+
   // Populate a couple more values in the context.
   ctx.psi_max = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.psi_min  = psi_RZ(ctx.RatZeq0/10, 0., &ctx);
-  ctx.z_min    = z_psiZ(ctx.psi_max, ctx.Z_min, &ctx);
-  ctx.z_max    = z_psiZ(ctx.psi_max, ctx.Z_max, &ctx);
+  ctx.psi_min = psi_RZ(ctx.RatZeq0 / 10, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_max, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_max, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -509,25 +503,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -537,7 +531,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -559,7 +553,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -573,15 +567,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -590,7 +585,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -630,15 +625,13 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
-  while ((t_curr < t_end) && (step <= num_steps))
-  {
+  while ((t_curr < t_end) && (step <= num_steps)) {
     gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
     dt = t_end - t_curr; // Ensure we don't step beyond t_end.
     struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
     gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
-    if (!status.success)
-    {
+    if (!status.success) {
       gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
       break;
     }
@@ -658,8 +651,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -673,7 +668,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -692,10 +687,12 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
@@ -705,9 +702,9 @@ int main(int argc, char **argv)
     .charge = ctx.qi,
     .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = {-1.0, 0.0},
-    .upper = { 1.0, 1.0},
-    .cells = { cells_v[0], cells_v[1]},
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
     .polarization_density = ctx.n0,
 
     .projection = {
@@ -746,17 +743,18 @@ int main(int argc, char **argv)
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
         .density = eval_density_ion_source,
         .ctx_upar = &ctx,
-        .upar= eval_upar_ion_source,
+        .upar = eval_upar_ion_source,
         .ctx_temp = &ctx,
-        .temp = eval_temp_ion_source,      
+        .temp = eval_temp_ion_source,
       },
       .diagnostics = {
         .num_diag_moments = 6,
-        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN },
         .num_integrated_diag_moments = 1,
         .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
       },
@@ -770,14 +768,16 @@ int main(int argc, char **argv)
     },
     .write_omega_cfl = true,
     .num_diag_moments = 8,
-    .diag_moments = {GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
     .num_integrated_diag_moments = 1,
     .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
     .time_rate_diagnostics = true,
 
     .boundary_flux_diagnostics = {
       .num_integrated_diag_moments = 1,
-      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP},
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
     },
   };
   struct gkyl_gyrokinetic_field field = {
@@ -791,15 +791,15 @@ int main(int argc, char **argv)
   struct gkyl_gk app_inp = {  // GK app
     .name = "gk_mirror_boltz_elc_poa_2x2v_p1",
     .cdim = ctx.cdim,
-    .upper = {ctx.psi_max, ctx.Z_max},
-    .lower = {ctx.psi_min, ctx.Z_min},
+    .upper = { ctx.psi_max, ctx.Z_max },
+    .lower = { ctx.psi_min, ctx.Z_min },
     .cells = { cells_x[0], cells_x[1] },
     .poly_order = ctx.poly_order,
     .basis_type = app_args.basis_type,
 
     .geometry = {
       .geometry_id = GKYL_GEOMETRY_MAPC2P,
-      .world = {0.0},
+      .world = { 0.0 },
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
@@ -810,7 +810,7 @@ int main(int argc, char **argv)
     .periodic_dirs = {},
 
     .num_species = 1,
-    .species = {ion},
+    .species = { ion },
 
     .field = field,
 
@@ -836,10 +836,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -850,14 +852,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -883,10 +886,11 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -896,21 +900,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
index 9d0c49c37a..fe62ae50e7 100644
--- a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -30,8 +30,7 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
 
   // Plasma parameters
@@ -134,8 +133,8 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double gamma = app->gamma;
   double Z_m = app->Z_m;
   double psi = 0.5 * pow(RIn, 2.) * mcB *
-               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
   return psi;
 }
 
@@ -143,9 +142,9 @@ double
 R_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
+  double Rout = sqrt(2.0 * psiIn / (app->mcB *
     (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
-     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
   return Rout;
 }
 
@@ -158,11 +157,11 @@ Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, doubl
   double gamma = app->gamma;
   double Z_m = app->Z_m;
   *BRad = -(1.0 / 2.0) * Rcoord * mcB *
-          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
-            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+    (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
   *BZ = mcB *
-        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
-         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
+    (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
   *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
 }
 
@@ -183,12 +182,10 @@ z_psiZ(double psiIn, double ZIn, void *ctx)
   app->psi_in = psiIn;
   double eps = 0.0;
   struct gkyl_qr_res integral;
-  if (eps <= ZIn)
-  {
+  if (eps <= ZIn) {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
   }
-  else
-  {
+  else {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
     integral.res = -integral.res;
   }
@@ -212,14 +209,12 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (zIn >= 0.0)
-  {
+  if (zIn >= 0.0) {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
   }
-  else
-  {
+  else {
     double fl = root_Z_psiz(app->Z_min - eps, ctx);
     double fr = root_Z_psiz(eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
@@ -229,7 +224,8 @@ Z_psiz(double psiIn, double zIn, void *ctx)
 
 // -- Source functions.
 void
-eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
@@ -239,25 +235,25 @@ eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_R
   double zSrc = app->lineLengthSrcElc;
   double sigSrc = app->sigSrcElc;
   double NSrcFloor = app->NSrcFloorElc;
-  if (fabs(Z) <= app->Z_m)
-  {
+  if (fabs(Z) <= app->Z_m) {
     fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2.))) *
-                                  exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2.))));
+      exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2.))));
   }
-  else
-  {
+  else {
     fout[0] = 1e-16;
   }
 }
 
 void
-eval_upar_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_upar_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   fout[0] = 0.0;
 }
 
 void
-eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
@@ -265,12 +261,10 @@ eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_REST
   double sigSrc = app->sigSrcElc;
   double TSrc0 = app->TSrc0Elc;
   double Tfloor = app->TSrcFloorElc;
-  if (fabs(z) <= 2.0 * sigSrc)
-  {
+  if (fabs(z) <= 2.0 * sigSrc) {
     fout[0] = TSrc0;
   }
-  else
-  {
+  else {
     fout[0] = Tfloor;
   }
 }
@@ -286,13 +280,11 @@ eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTR
   double zSrc = app->lineLengthSrcIon;
   double sigSrc = app->sigSrcIon;
   double NSrcFloor = app->NSrcFloorIon;
-  if (fabs(Z) <= app->Z_m)
-  {
+  if (fabs(Z) <= app->Z_m) {
     fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
-                                  exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2))));
+      exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2))));
   }
-  else
-  {
+  else {
     fout[0] = 1e-16;
   }
 }
@@ -304,7 +296,8 @@ eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT
 }
 
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
@@ -312,12 +305,10 @@ eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_REST
   double sigSrc = app->sigSrcIon;
   double TSrc0 = app->TSrc0Ion;
   double Tfloor = app->TSrcFloorIon;
-  if (fabs(z) <= 2.0 * sigSrc)
-  {
+  if (fabs(z) <= 2.0 * sigSrc) {
     fout[0] = TSrc0;
   }
-  else
-  {
+  else {
     fout[0] = Tfloor;
   }
 }
@@ -333,16 +324,13 @@ eval_density_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT
   double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
   double BRad, BZ, Bmag;
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  if (fabs(Z) <= app->Z_bt)
-  {
+  if (fabs(Z) <= app->Z_bt) {
     fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
   }
-  else if (fabs(Z) <= app->Z_m)
-  {
+  else if (fabs(Z) <= app->Z_m) {
     fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
   }
-  else
-  {
+  else {
     fout[0] = app->n_m * sqrt(Bmag / app->B_m);
   }
 }
@@ -353,16 +341,13 @@ eval_upar_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fo
   struct gk_mirror_ctx *app = ctx;
   double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
   double z = xn[0];
-  if (fabs(z) <= app->z_m)
-  {
+  if (fabs(z) <= app->z_m) {
     fout[0] = 0.0;
   }
-  else if (z > app->z_m)
-  {
+  else if (z > app->z_m) {
     fout[0] = app->cs_m * (z - app->z_m);
   }
-  else
-  {
+  else {
     fout[0] = app->cs_m * (z + app->z_m);
   }
 }
@@ -377,16 +362,13 @@ eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fo
   double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
   double BRad, BZ, Bmag;
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  if (fabs(Z) <= app->Z_bt)
-  {
+  if (fabs(Z) <= app->Z_bt) {
     fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
   }
-  else if (fabs(Z) <= app->Z_m)
-  {
+  else if (fabs(Z) <= app->Z_m) {
     fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
   }
-  else
-  {
+  else {
     fout[0] = app->Te_m * sqrt(Bmag / app->B_m);
   }
 }
@@ -402,16 +384,13 @@ eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fou
   double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
   double BRad, BZ, Bmag;
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  if (fabs(Z) <= app->Z_bt)
-  {
+  if (fabs(Z) <= app->Z_bt) {
     fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC0 / 2);
   }
-  else if (fabs(Z) <= app->Z_m)
-  {
+  else if (fabs(Z) <= app->Z_m) {
     fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC1 / 2);
   }
-  else
-  {
+  else {
     fout[0] = app->n_m * sqrt(Bmag / app->B_m);
   }
 }
@@ -422,17 +401,14 @@ eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
   struct gk_mirror_ctx *app = ctx;
   double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
   double z = xn[0];
-  if (fabs(z) <= app->z_m)
-  {
+  if (fabs(z) <= app->z_m) {
     fout[0] = 0.0;
   }
-  else if (z > app->z_m)
-  {
-    fout[0] = app->cs_m * (z - app->z_m); //* (z -  / app->z_m);
+  else if (z > app->z_m) {
+    fout[0] = app->cs_m * (z - app->z_m); // * (z -  / app->z_m);
   }
-  else
-  {
-    fout[0] = app->cs_m * (z + app->z_m); //* (z + app->z_m) / app->z_m;
+  else {
+    fout[0] = app->cs_m * (z + app->z_m); // * (z + app->z_m) / app->z_m;
   }
 }
 
@@ -446,21 +422,17 @@ eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fo
   double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
   double BRad, BZ, Bmag;
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  if (fabs(Z) <= app->Z_bt)
-  {
+  if (fabs(Z) <= app->Z_bt) {
     fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC0 / 2);
   }
-  else if (fabs(Z) <= app->Z_m)
-  {
+  else if (fabs(Z) <= app->Z_m) {
     fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC1 / 2);
   }
-  else
-  {
+  else {
     fout[0] = app->Ti_m * sqrt(Bmag / app->B_m);
   }
 }
 
-
 // Potential initial condition
 void
 eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
@@ -469,14 +441,12 @@ eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT f
   double z = xn[0];
   double z_m = 0.98;
   double z_max = app->z_max;
-  double sigma = 0.2*z_m;
+  double sigma = 0.2 * z_m;
   double center_potential = 8.0 * app->Te0 / app->qi;
-  if (fabs(z) <= sigma)
-  {
+  if (fabs(z) <= sigma) {
     fout[0] = center_potential;
   }
-  else
-  {
+  else {
     fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
   }
 }
@@ -541,10 +511,10 @@ bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
 
   double phi = xc[1];
-  // zc are computational coords. 
+  // zc are computational coords.
   // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
   fout[2] = BZ;
 }
 
@@ -581,11 +551,11 @@ create_ctx(void)
   // Electron-electron collision freq.
   double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
   double nuElc = nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
-                 (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+    (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
   // Ion-ion collision freq.
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
@@ -604,7 +574,7 @@ create_ctx(void)
   // Axial coordinate Z extents. Endure that Z=0 is not on
   // the boundary of a cell (due to AD errors).
   double Z_min = -2.5;
-  double Z_max =  2.5;
+  double Z_max = 2.5;
 
   // Parameters controlling the magnetic equilibrium model.
   double mcB = 6.51292;
@@ -658,13 +628,13 @@ create_ctx(void)
   // Duration of each phase.
   double tau_oap = 1e-7;
   double tau_fdp = 3e-10;
-  double tau_fdp_extra = 2*tau_fdp;
+  double tau_fdp_extra = 2 * tau_fdp;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
   int num_frames_oap = 4; // Frames per OAP phase
   int num_frames_fdp = 4; // Frames per FDP phase
-  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
   bool is_static_field_oap = true;
@@ -677,39 +647,40 @@ create_ctx(void)
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
   // Add an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
 
   double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
@@ -743,7 +714,7 @@ create_ctx(void)
     .c_s = c_s,
     .omega_ci = omega_ci,
     .rho_s = rho_s,
-    .kperp = kperp, 
+    .kperp = kperp,
     .RatZeq0 = RatZeq0,
     .Z_min = Z_min,
     .Z_max = Z_max,
@@ -780,22 +751,22 @@ create_ctx(void)
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nz, Nvpar, Nmu},
+    .cells = { Nz, Nvpar, Nmu },
     .poly_order = poly_order,
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
   };
 
   // Populate a couple more values in the context.
   ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
-  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -807,25 +778,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -835,7 +806,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -857,7 +828,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -871,15 +842,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -888,7 +860,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -928,8 +900,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
-  while ((t_curr < t_end) && (step <= num_steps))
-  {
+  while ((t_curr < t_end) && (step <= num_steps)) {
     if (step == 1 || step % 1 == 0)
       gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
 
@@ -939,8 +910,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
     if (step == 1 || step % 1 == 0)
       gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
-    if (!status.success)
-    {
+    if (!status.success) {
       gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
       break;
     }
@@ -960,8 +930,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -975,7 +947,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -994,10 +966,12 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
@@ -1007,20 +981,20 @@ int main(int argc, char **argv)
     .charge = ctx.qe,
     .mass = ctx.me,
     .vdim = ctx.vdim,
-    .lower = {-ctx.vpar_max_elc, 0.0},
-    .upper = {ctx.vpar_max_elc, ctx.mu_max_elc},
+    .lower = { -ctx.vpar_max_elc, 0.0 },
+    .upper = { ctx.vpar_max_elc, ctx.mu_max_elc },
     .cells = { cells_v[0], cells_v[1] },
 
     .polarization_density = ctx.n0,
 
     .projection = {
-      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
       .ctx_density = &ctx,
       .density = eval_density_elc,
       .ctx_upar = &ctx,
-      .upar= eval_upar_elc,
+      .upar = eval_upar_elc,
       .ctx_temp = &ctx,
-      .temp = eval_temp_elc,      
+      .temp = eval_temp_elc,
     },
 
     .collisionless = {
@@ -1028,7 +1002,7 @@ int main(int argc, char **argv)
       .scale_factor = 1.0, // Will be replaced below.
     },
 
-    .collisions =  {
+    .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
       .den_ref = ctx.n0,
       .temp_ref = ctx.Te0,
@@ -1040,14 +1014,14 @@ int main(int argc, char **argv)
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
         .density = eval_density_elc_source,
         .ctx_upar = &ctx,
-        .upar= eval_upar_elc_source,
+        .upar = eval_upar_elc_source,
         .ctx_temp = &ctx,
-        .temp = eval_temp_elc_source,      
-      }, 
+        .temp = eval_temp_elc_source,
+      },
     },
 
     .time_rate_multiplier = {
@@ -1068,7 +1042,9 @@ int main(int argc, char **argv)
 
     .write_omega_cfl = true,
     .num_diag_moments = 8,
-    .diag_moments = {GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                      GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
   };
 
   struct gkyl_gyrokinetic_species ion = {
@@ -1076,21 +1052,21 @@ int main(int argc, char **argv)
     .charge = ctx.qi,
     .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = {-ctx.vpar_max_ion, 0.0},
-    .upper = { ctx.vpar_max_ion, ctx.mu_max_ion},
+    .lower = { -ctx.vpar_max_ion, 0.0 },
+    .upper = { ctx.vpar_max_ion, ctx.mu_max_ion },
     .cells = { cells_v[0], cells_v[1] },
     .scale_with_polarization = true,
 
     .polarization_density = ctx.n0,
 
     .projection = {
-      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
       .ctx_density = &ctx,
       .density = eval_density,
       .ctx_upar = &ctx,
-      .upar= eval_upar,
+      .upar = eval_upar,
       .ctx_temp = &ctx,
-      .temp = eval_temp_ion,      
+      .temp = eval_temp_ion,
     },
 
     .collisionless = {
@@ -1098,7 +1074,7 @@ int main(int argc, char **argv)
       .scale_factor = 1.0, // Will be replaced below.
     },
 
-    .collisions =  {
+    .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
       .den_ref = ctx.n0,
       .temp_ref = ctx.Ti0,
@@ -1110,14 +1086,14 @@ int main(int argc, char **argv)
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
         .density = eval_density_source,
         .ctx_upar = &ctx,
-        .upar= eval_upar_source,
+        .upar = eval_upar_source,
         .ctx_temp = &ctx,
-        .temp = eval_temp_ion_source,      
-      }, 
+        .temp = eval_temp_ion_source,
+      },
     },
 
     .time_rate_multiplier = {
@@ -1138,7 +1114,9 @@ int main(int argc, char **argv)
 
     .write_omega_cfl = true,
     .num_diag_moments = 8,
-    .diag_moments = {GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                      GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
   };
 
   struct gkyl_gyrokinetic_field field = {
@@ -1150,18 +1128,18 @@ int main(int argc, char **argv)
   };
 
   // GK app
-  struct gkyl_gk app_inp = { 
+  struct gkyl_gk app_inp = {
     .name = "gk_mirror_kinetic_elc_poa_1x2v_p1",
     .cdim = ctx.cdim,
-    .lower = {ctx.z_min},
-    .upper = {ctx.z_max},
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
     .cells = { cells_x[0] },
     .poly_order = ctx.poly_order,
     .basis_type = app_args.basis_type,
 
     .geometry = {
       .geometry_id = GKYL_GEOMETRY_MAPC2P,
-      .world = {ctx.psi_eval, 0.0},
+      .world = { ctx.psi_eval, 0.0 },
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
@@ -1171,7 +1149,7 @@ int main(int argc, char **argv)
     .num_periodic_dir = 0,
     .periodic_dirs = {},
     .num_species = 2,
-    .species = {elc, ion},
+    .species = { elc, ion },
     .field = field,
 
     .parallelism = {
@@ -1196,10 +1174,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -1210,14 +1190,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -1243,10 +1224,11 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -1256,21 +1238,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
index efa6936153..e0ddd1c38b 100644
--- a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -30,8 +30,7 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
 
   // Plasma parameters
@@ -46,7 +45,6 @@ struct gk_mirror_ctx
   double beta; // Plasma beta in the center.
   double tau; // Temperature ratio.
 
- 
   double Ti_perp0; // Reference ion perp temperature.
   double Ti_par0; // Reference ion par temperature.
   double cs_m; // Ion sound speed at the throat.
@@ -111,10 +109,10 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double gamma = app->gamma;
   double Z_m = app->Z_m;
   double psi = 0.5 * pow(RIn, 2.) * mcB *
-               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) + 
-                2. / (M_PI * gamma * (1. + pow((ZIn - 2*Z_m) / gamma, 2.))) +
-                2. / (M_PI * gamma * (1. + pow((ZIn + 2*Z_m) / gamma, 2.))));
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) +
+    2. / (M_PI * gamma * (1. + pow((ZIn - 2 * Z_m) / gamma, 2.))) +
+    2. / (M_PI * gamma * (1. + pow((ZIn + 2 * Z_m) / gamma, 2.))));
   return psi;
 }
 
@@ -122,11 +120,11 @@ double
 R_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
+  double Rout = sqrt(2.0 * psiIn / (app->mcB *
     (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
-     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))) +
-     2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - 2*app->Z_m) / app->gamma, 2.))) +
-     2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + 2*app->Z_m) / app->gamma, 2.)))
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))) +
+    2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - 2 * app->Z_m) / app->gamma, 2.))) +
+    2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + 2 * app->Z_m) / app->gamma, 2.)))
     )));
   return Rout;
 }
@@ -140,17 +138,19 @@ Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, doubl
   double gamma = app->gamma;
   double Z_m = app->Z_m;
   *BRad = -(1.0 / 2.0) * Rcoord * mcB *
-          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) +
-           -2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))) +
-           -4.0 * (ZIn - 2*Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - 2*Z_m) / gamma, 2.), 2.))) +
-           -4.0 * (ZIn + 2*Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + 2*Z_m) / gamma, 2.), 2.)))
-          );
+    (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) +
+    -2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))) +
+    -4.0 * (ZIn - 2 * Z_m) / (M_PI * pow(gamma,
+      3.) * (pow(1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.), 2.))) +
+    -4.0 * (ZIn + 2 * Z_m) / (M_PI * pow(gamma,
+      3.) * (pow(1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.), 2.)))
+    );
   *BZ = mcB *
-        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
-         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))) +
-         2.0 / (M_PI * gamma * (1.0 + pow((ZIn - 2*Z_m) / gamma, 2.))) +
-         2.0 / (M_PI * gamma * (1.0 + pow((ZIn + 2*Z_m) / gamma, 2.)))
-        );
+    (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))) +
+    2.0 / (M_PI * gamma * (1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.))) +
+    2.0 / (M_PI * gamma * (1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.)))
+    );
   *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
 }
 
@@ -171,12 +171,10 @@ z_psiZ(double psiIn, double ZIn, void *ctx)
   app->psi_in = psiIn;
   double eps = 0.0;
   struct gkyl_qr_res integral;
-  if (eps <= ZIn)
-  {
+  if (eps <= ZIn) {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
   }
-  else
-  {
+  else {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
     integral.res = -integral.res;
   }
@@ -200,14 +198,12 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (zIn >= 0.0)
-  {
+  if (zIn >= 0.0) {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
   }
-  else
-  {
+  else {
     double fl = root_Z_psiz(app->Z_min - eps, ctx);
     double fr = root_Z_psiz(eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
@@ -216,20 +212,23 @@ Z_psiz(double psiIn, double zIn, void *ctx)
 }
 
 void
-eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   fout[0] = app->NSrcIon;
 }
 
 void
-eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   fout[0] = 0.0;
 }
 
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   fout[0] = app->TSrc0Ion;
@@ -303,14 +302,14 @@ bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
   Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
 
   double phi = xc[1];
-  // zc are computational coords. 
+  // zc are computational coords.
   // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
   fout[2] = BZ;
 }
 
-void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_ion = app->vpar_max_ion;
@@ -318,9 +317,9 @@ void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *
 
   double cvpar = vc[0], cmu = vc[1];
   double b = 1.4;
-  vp[0] = vpar_max_ion*tan(cvpar*b)/tan(b);
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
   // Cubic map in mu.
-  vp[1] = mu_max_ion*pow(cmu,3);
+  vp[1] = mu_max_ion * pow(cmu, 3);
 }
 
 struct gk_mirror_ctx
@@ -350,7 +349,7 @@ create_ctx(void)
   // Ion-ion collision freq.
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
@@ -366,7 +365,7 @@ create_ctx(void)
   // Axial coordinate Z extents. Endure that Z=0 is not on
   // the boundary of a cell (due to AD errors).
   double Z_min = -3.0;
-  double Z_max =  3.0;
+  double Z_max = 3.0;
 
   // Parameters controlling the magnetic equilibrium model.
   double mcB = 1;
@@ -384,7 +383,7 @@ create_ctx(void)
 
   // Computational velocity space limits.
   double vpar_min_ion_c = -1.0;
-  double vpar_max_ion_c =  1.0;
+  double vpar_max_ion_c = 1.0;
   double mu_min_ion_c = 0.;
   double mu_max_ion_c = 1.;
 
@@ -404,13 +403,13 @@ create_ctx(void)
   // Duration of each phase.
   double tau_oap = 5e-7;
   double tau_fdp = 3e-9;
-  double tau_fdp_extra = 2*tau_fdp;
+  double tau_fdp_extra = 2 * tau_fdp;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
   int num_frames_oap = 4; // Frames per OAP phase
   int num_frames_fdp = 4; // Frames per FDP phase
-  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
   bool is_static_field_oap = true;
@@ -423,39 +422,40 @@ create_ctx(void)
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
   // Add an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
 
   double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
@@ -463,21 +463,21 @@ create_ctx(void)
   int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
 
   struct gk_mirror_ctx ctx = {
-    .cdim = cdim,  .vdim = vdim,
-    .mi = mi,  .qi = qi,
-    .me = me,  .qe = qe,
-    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
-    .B_p = B_p,  .beta = beta,  .tau = tau,
-    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
-    .vti = vti,  .vte = vte,  .c_s = c_s,
-    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .cdim = cdim, .vdim = vdim,
+    .mi = mi, .qi = qi,
+    .me = me, .qe = qe,
+    .Te0 = Te0, .Ti0 = Ti0, .n0 = n0,
+    .B_p = B_p, .beta = beta, .tau = tau,
+    .nuFrac = nuFrac, .logLambdaIon = logLambdaIon, .nuIon = nuIon,
+    .vti = vti, .vte = vte, .c_s = c_s,
+    .omega_ci = omega_ci, .rho_s = rho_s,
     .RatZeq0 = RatZeq0,
-    .Z_min = Z_min,  .Z_max = Z_max,
+    .Z_min = Z_min, .Z_max = Z_max,
     // Parameters controlling the magnetic equilibrium model.
-    .mcB = mcB,  .gamma = gamma,
+    .mcB = mcB, .gamma = gamma,
     .Z_m = Z_m,
     // Initial condition parameters.
-    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
+    .Ti_perp0 = Ti_perp0, .Ti_par0 = Ti_par0,
     // Source parameters
     .NSrcIon = NSrcIon,
     .TSrc0Ion = TSrc0Ion,
@@ -494,23 +494,23 @@ create_ctx(void)
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nz, Nvpar, Nmu},
+    .cells = { Nz, Nvpar, Nmu },
     .poly_order = poly_order,
     // Time integration and I/O parameters.
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
   };
 
   // Populate a couple more values in the context.
   ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
-  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -522,25 +522,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -550,7 +550,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -572,7 +572,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -586,15 +586,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -603,7 +604,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -643,8 +644,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
-  while ((t_curr < t_end) && (step <= num_steps))
-  {
+  while ((t_curr < t_end) && (step <= num_steps)) {
     if (step == 1 || step % 1 == 0)
       gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
 
@@ -654,8 +654,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
     if (step == 1 || step % 1 == 0)
       gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
-    if (!status.success)
-    {
+    if (!status.success) {
       gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
       break;
     }
@@ -675,8 +674,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -690,7 +691,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -709,20 +710,22 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
 
   struct gkyl_gyrokinetic_species ion = {
     .name = "ion",
-    .charge = ctx.qi,  .mass = ctx.mi,
+    .charge = ctx.qi, .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
-    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
+    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c },
+    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c },
     .cells = { cells_v[0], cells_v[1] },
 
     .polarization_density = ctx.n0,
@@ -749,7 +752,7 @@ int main(int argc, char **argv)
       .scale_factor = 1.0, // Will be replaced below.
     },
 
-    .collisions =  {
+    .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
       .self_nu = evalNuIon,
       .self_nu_ctx = &ctx,
@@ -759,14 +762,14 @@ int main(int argc, char **argv)
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
-	.density = eval_density_ion_source,
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .density = eval_density_ion_source,
         .upar = eval_upar_ion_source,
         .temp = eval_temp_ion_source,
         .ctx_density = &ctx,
         .ctx_upar = &ctx,
         .ctx_temp = &ctx,
-      }, 
+      },
     },
 
     .time_rate_multiplier = {
@@ -786,7 +789,8 @@ int main(int argc, char **argv)
     },
 
     .num_diag_moments = 4,
-    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .diag_moments = { GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
   };
 
   struct gkyl_gyrokinetic_field field = {
@@ -798,18 +802,18 @@ int main(int argc, char **argv)
   };
 
   // GK app
-  struct gkyl_gk app_inp = { 
+  struct gkyl_gk app_inp = {
     .name = "gk_mirror_tandem_boltz_elc_poa_1x2v",
     .cdim = ctx.cdim,
-    .lower = {ctx.z_min},
-    .upper = {ctx.z_max},
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
     .cells = { cells_x[0] },
     .poly_order = ctx.poly_order,
     .basis_type = app_args.basis_type,
 
     .geometry = {
       .geometry_id = GKYL_GEOMETRY_MAPC2P,
-      .world = {ctx.psi_eval, 0.0},
+      .world = { ctx.psi_eval, 0.0 },
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
@@ -820,7 +824,7 @@ int main(int argc, char **argv)
     .periodic_dirs = {},
 
     .num_species = 1,
-    .species = {ion},
+    .species = { ion },
 
     .field = field,
 
@@ -846,10 +850,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -860,14 +866,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -893,10 +900,11 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -906,21 +914,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
index 2caf2e4350..a43d12323b 100644
--- a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -34,8 +34,7 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
   // Plasma parameters
   double mi;
@@ -101,7 +100,6 @@ struct gk_mirror_ctx
   double elc_source_temp;
 };
 
-
 void
 eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
@@ -138,13 +136,11 @@ eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTR
   double src_amp = app->source_amplitude;
   double z_src = 0.0;
   double src_sigma = app->source_sigma;
-  double src_amp_floor = src_amp*1e-2;
-  if (fabs(z) <= 1.0)
-  {
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 1.0) {
     fout[0] = src_amp * (1 - pow(fabs(z), 6));
   }
-  else
-  {
+  else {
     fout[0] = 1e-16;
   }
 }
@@ -156,35 +152,33 @@ eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT
 }
 
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double z = xn[0];
   double TSrc0 = app->ion_source_temp;
-  double Tfloor = TSrc0*1e-2;
-  if (fabs(z) <= 1.0)
-  {
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 1.0) {
     fout[0] = TSrc0;
   }
-  else
-  {
+  else {
     fout[0] = Tfloor;
   }
 }
 
 void
-eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double z = xn[0];
   double TSrc0 = app->elc_source_temp; // Using same temp as ion source for simplicity
-  double Tfloor = TSrc0*1e-2;
-  if (fabs(z) <= 1.0)
-  {
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 1.0) {
     fout[0] = TSrc0;
   }
-  else
-  {
+  else {
     fout[0] = Tfloor;
   }
 }
@@ -197,19 +191,17 @@ eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT f
   double z = xn[0];
   double z_m = 0.98;
   double z_max = app->z_max;
-  double sigma = 0.2*z_m;
+  double sigma = 0.2 * z_m;
   double center_potential = 8.0 * app->Te0 / app->qi;
-  if (fabs(z) <= sigma)
-  {
+  if (fabs(z) <= sigma) {
     fout[0] = center_potential;
   }
-  else
-  {
+  else {
     fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
   }
 }
 
-void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_ion = app->vpar_max_ion;
@@ -217,20 +209,20 @@ void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *
 
   double cvpar = vc[0], cmu = vc[1];
   double b = 1.45;
-  double linear_velocity_threshold = 1./6.;
-  double frac_linear = 1/b*atan(linear_velocity_threshold*tan(b));
+  double linear_velocity_threshold = 1. / 6.;
+  double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b));
   if (fabs(cvpar) < frac_linear) {
-    double func_frac = tan(frac_linear*b) / tan(b);
-    vp[0] = vpar_max_ion*func_frac*cvpar/frac_linear;
+    double func_frac = tan(frac_linear * b) / tan(b);
+    vp[0] = vpar_max_ion * func_frac * cvpar / frac_linear;
   }
   else {
-    vp[0] = vpar_max_ion*tan(cvpar*b)/tan(b);
+    vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
   }
   // Quadratic map in mu.
-  vp[1] = mu_max_ion*pow(cmu,3);
+  vp[1] = mu_max_ion * pow(cmu, 3);
 }
 
-void mapc2p_vel_elc(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_elc(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_elc = app->vpar_max_elc;
@@ -238,17 +230,17 @@ void mapc2p_vel_elc(double t, const double *vc, double* GKYL_RESTRICT vp, void *
 
   double cvpar = vc[0], cmu = vc[1];
   double b = 1.45;
-  double linear_velocity_threshold = 1./6.;
-  double frac_linear = 1/b*atan(linear_velocity_threshold*tan(b));
+  double linear_velocity_threshold = 1. / 6.;
+  double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b));
   if (fabs(cvpar) < frac_linear) {
-    double func_frac = tan(frac_linear*b) / tan(b);
-    vp[0] = vpar_max_elc*func_frac*cvpar/frac_linear;
+    double func_frac = tan(frac_linear * b) / tan(b);
+    vp[0] = vpar_max_elc * func_frac * cvpar / frac_linear;
   }
   else {
-    vp[0] = vpar_max_elc*tan(cvpar*b)/tan(b);
+    vp[0] = vpar_max_elc * tan(cvpar * b) / tan(b);
   }
   // Quadratic map in mu.
-  vp[1] = mu_max_elc*pow(cmu,3.0/2.0);
+  vp[1] = mu_max_elc * pow(cmu, 3.0 / 2.0);
 }
 
 struct gk_mirror_ctx
@@ -281,15 +273,15 @@ create_ctx(void)
   double alphaIC1 = 10;
 
   double nuFrac = 1.0;
-  double elc_nuFrac = 1/5.489216862238348;
+  double elc_nuFrac = 1 / 5.489216862238348;
   // Electron-electron collision freq.
   double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
   double nuElc = elc_nuFrac * nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
-                 (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+    (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
   // Ion-ion collision freq.
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
@@ -305,8 +297,8 @@ create_ctx(void)
 
   // Geometry parameters.
   double z_min = -2.0;
-  double z_max =  2.0;
-  double psi_eval= 1e-3;
+  double z_max = 2.0;
+  double psi_eval = 1e-3;
 
   // Grid parameters
   double vpar_max_elc = 30 * vte;
@@ -324,13 +316,13 @@ create_ctx(void)
   // Duration of each phase.
   double tau_oap = 1.5e-8;
   double tau_fdp = 1.5e-10;
-  double tau_fdp_extra = 2*tau_fdp;
+  double tau_fdp_extra = 2 * tau_fdp;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
   int num_frames_oap = 4; // Frames per OAP phase
   int num_frames_fdp = 4; // Frames per FDP phase
-  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
   bool is_static_field_oap = true;
@@ -343,39 +335,40 @@ create_ctx(void)
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
   // Add an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
 
   double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
@@ -425,16 +418,16 @@ create_ctx(void)
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nz, Nvpar, Nmu},
+    .cells = { Nz, Nvpar, Nmu },
     .poly_order = poly_order,
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
   };
 
   return ctx;
@@ -447,25 +440,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -475,7 +468,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -497,7 +490,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -511,15 +504,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -528,7 +522,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -571,8 +565,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
-  while ((t_curr < t_end) && (step <= num_steps))
-  {
+  while ((t_curr < t_end) && (step <= num_steps)) {
     gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
 
     dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
@@ -580,8 +573,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
 
     gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
-    if (!status.success)
-    {
+    if (!status.success) {
       gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
       break;
     }
@@ -601,8 +593,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -615,7 +609,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -634,10 +628,12 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
@@ -647,8 +643,8 @@ int main(int argc, char **argv)
     .charge = ctx.qe,
     .mass = ctx.me,
     .vdim = ctx.vdim,
-    .lower = {-1.0, 0.0},
-    .upper = { 1.0, 1.0},
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
     .cells = { cells_v[0], cells_v[1] },
 
     .polarization_density = ctx.n0,
@@ -673,7 +669,7 @@ int main(int argc, char **argv)
       .scale_factor = 1.0, // Will be replaced below.
     },
 
-    .collisions =  {
+    .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
       .den_ref = ctx.n0,
       .temp_ref = ctx.Te0,
@@ -686,17 +682,18 @@ int main(int argc, char **argv)
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
         .density = eval_density_source,
         .ctx_upar = &ctx,
-        .upar= eval_upar_source,
+        .upar = eval_upar_source,
         .ctx_temp = &ctx,
-        .temp = eval_temp_elc_source,      
+        .temp = eval_temp_elc_source,
       },
       .diagnostics = {
         .num_diag_moments = 5,
-        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP },
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP },
         .num_integrated_diag_moments = 1,
         .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
       }
@@ -720,7 +717,9 @@ int main(int argc, char **argv)
 
     .write_omega_cfl = true,
     .num_diag_moments = 8,
-    .diag_moments = {GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
     .num_integrated_diag_moments = 1,
     .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
     .time_rate_diagnostics = true,
@@ -736,9 +735,9 @@ int main(int argc, char **argv)
     .charge = ctx.qi,
     .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = {-1.0, 0.0},
-    .upper = { 1.0, 1.0},
-    .cells = { cells_v[0], cells_v[1]},
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
     .polarization_density = ctx.n0,
     .scale_with_polarization = true,
 
@@ -757,12 +756,11 @@ int main(int argc, char **argv)
       .ctx = &ctx,
     },
 
-
     .collisionless = {
       .type = GKYL_GK_COLLISIONLESS_ES,
       .scale_factor = 1.0, // Will be replaced below.
     },
-    
+
     .time_rate_multiplier = {
       .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
       .cellwise_const = true,
@@ -780,17 +778,18 @@ int main(int argc, char **argv)
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
         .density = eval_density_source,
         .ctx_upar = &ctx,
-        .upar= eval_upar_source,
+        .upar = eval_upar_source,
         .ctx_temp = &ctx,
-        .temp = eval_temp_ion_source,      
+        .temp = eval_temp_ion_source,
       },
       .diagnostics = {
         .num_diag_moments = 6,
-        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_HAMILTONIAN},
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_HAMILTONIAN },
         .num_integrated_diag_moments = 1,
         .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
       },
@@ -807,14 +806,16 @@ int main(int argc, char **argv)
     },
     .write_omega_cfl = true,
     .num_diag_moments = 8,
-    .diag_moments = {GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
     .num_integrated_diag_moments = 1,
     .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
     .time_rate_diagnostics = true,
 
     .boundary_flux_diagnostics = {
       .num_integrated_diag_moments = 1,
-      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP},
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
     },
   };
   struct gkyl_gyrokinetic_field field = {
@@ -830,7 +831,7 @@ int main(int argc, char **argv)
     .filename_psi = "gyrokinetic/data/unit/wham_hires.geqdsk_psi.gkyl", // psi file to use
     .rclose = 0.2, // closest R to region of interest
     .zmin = -2.0,  // Z of lower boundary
-    .zmax =  2.0,  // Z of upper boundary
+    .zmax = 2.0,   // Z of upper boundary
     .include_axis = false, // Include R=0 axis in grid
     .fl_coord = GKYL_GEOMETRY_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid
   };
@@ -838,20 +839,20 @@ int main(int argc, char **argv)
   struct gkyl_gk app_inp = {  // GK app
     .name = "gk_wham_kinetic_poa_1x2v_p1",
     .cdim = ctx.cdim,
-    .lower = {ctx.z_min},
-    .upper = {ctx.z_max},
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
     .cells = { cells_x[0] },
     .poly_order = ctx.poly_order,
     .basis_type = app_args.basis_type,
     .geometry = {
       .geometry_id = GKYL_GEOMETRY_MIRROR,
-      .world = {ctx.psi_eval, 0.0},
+      .world = { ctx.psi_eval, 0.0 },
       .mirror_grid_info = grid_inp,
     },
     .num_periodic_dir = 0,
     .periodic_dirs = {},
     .num_species = 2,
-    .species = {elc, ion},
+    .species = { elc, ion },
     .field = field,
     .parallelism = {
       .use_gpu = app_args.use_gpu,
@@ -875,10 +876,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -889,14 +892,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -922,9 +926,10 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -934,21 +939,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 7bd1331a0e..f3e4281caf 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -46,19 +46,19 @@ struct loss_cone_mask_test_ctx {
 static struct gkyl_array*
 mkarr(bool use_gpu, long nc, long size)
 {
-  struct gkyl_array* a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
-	                        : gkyl_array_new(GKYL_DOUBLE, nc, size);
+  struct gkyl_array *a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+                          : gkyl_array_new(GKYL_DOUBLE, nc, size);
   return a;
 }
 
 void
-mapc2p_3x(double t, const double *xc, double* GKYL_RESTRICT xp, void *ctx)
+mapc2p_3x(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
   xp[0] = xc[0]; xp[1] = xc[1]; xp[2] = xc[2];
 }
 
 void
-bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+bfield_func_3x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double x = xc[0], y = xc[1], z = xc[2];
 
@@ -68,12 +68,12 @@ bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx
 
   fout[0] = 0.0;
   fout[1] = 0.0;
-  fout[2] = B_m * (1.0 - ((R_m-1.0)/R_m)*pow(cos(z), 2.0));
-//  fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0));
+  fout[2] = B_m * (1.0 - ((R_m - 1.0) / R_m) * pow(cos(z), 2.0));
+// fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0));
 }
 
 void
-phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+phi_func_1x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double z = xc[0];
 
@@ -82,12 +82,12 @@ phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   double T0 = params->T0;
   double eV = params->eV;
 
-  fout[0] = 0.0; //0.5 * phi_fac*T0/eV * (1.0 + cos(z));
+  fout[0] = 0.0; // 0.5 * phi_fac*T0/eV * (1.0 + cos(z));
 }
 
 // Non-zero electrostatic potential: peaked at center, zero at wall.
 void
-phi_func_1x_nonzero(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+phi_func_1x_nonzero(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double z = xc[0];
 
@@ -99,12 +99,12 @@ phi_func_1x_nonzero(double t, const double *xc, double* GKYL_RESTRICT fout, void
 
   // Parabolic potential profile: phi(z) = phi_fac*T0/eV * (1 - (z/z_max)^2)
   // This gives phi=phi_fac*T0/eV at z=0, and phi=0 at z=+/-z_max.
-  fout[0] = phi_fac*T0/eV * (1.0 - pow(z/z_max, 2.0));
+  fout[0] = phi_fac * T0 / eV * (1.0 - pow(z / z_max, 2.0));
 }
 
 // Reference mask for nonzero phi case.
 void
-mask_ref_1x2v_nonzero_phi(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+mask_ref_1x2v_nonzero_phi(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double z = xc[0], vpar = xc[1], mu = xc[2];
   struct loss_cone_mask_test_ctx *params = ctx;
@@ -118,7 +118,7 @@ mask_ref_1x2v_nonzero_phi(double t, const double *xc, double* GKYL_RESTRICT fout
   phi_func_1x_nonzero(t, &z_m, &phi_m, ctx);
 
   double bfield[3], bmag;
-  double zinfl[3] = {0.0}, z_minfl[3] = {0.0};
+  double zinfl[3] = { 0.0 }, z_minfl[3] = { 0.0 };
   zinfl[2] = z, z_minfl[2] = z_m;
   bfield_func_3x(t, zinfl, bfield, ctx);
   bmag = bfield[2];
@@ -128,16 +128,18 @@ mask_ref_1x2v_nonzero_phi(double t, const double *xc, double* GKYL_RESTRICT fout
   bmag_m = bfield_m[2];
 
   // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
-  double mu_bound = (0.5*mass*pow(vpar,2)+charge*(phi-phi_m))/(bmag*(bmag_m/bmag-1));
+  double mu_bound = (0.5 * mass * pow(vpar,
+    2) + charge * (phi - phi_m)) / (bmag * (bmag_m / bmag - 1));
   if (mu_bound < mu && fabs(z) < z_m) {
     fout[0] = 1.0;
-  } else {
+  }
+  else {
     fout[0] = 0.0;
   }
 }
 
 void
-mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+mask_ref_1x2v(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double z = xc[0], vpar = xc[1], mu = xc[2];
   struct loss_cone_mask_test_ctx *params = ctx;
@@ -151,7 +153,7 @@ mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   phi_func_1x(t, &z_m, &phi_m, ctx);
 
   double bfield[3], bmag;
-  double zinfl[3] = {0.0}, z_minfl[3] = {0.0};
+  double zinfl[3] = { 0.0 }, z_minfl[3] = { 0.0 };
   zinfl[2] = z, z_minfl[2] = z_m;
   bfield_func_3x(t, zinfl, bfield, ctx);
   bmag = bfield[2];
@@ -161,7 +163,8 @@ mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   bmag_m = bfield_m[2];
 
   // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
-  double mu_bound = (0.5*mass*pow(vpar,2)+charge*(phi-phi_m))/(bmag*(bmag_m/bmag-1));
+  double mu_bound = (0.5 * mass * pow(vpar,
+    2) + charge * (phi - phi_m)) / (bmag * (bmag_m / bmag - 1));
   if (mu_bound < mu && fabs(z) < z_m)
     fout[0] = 1.0;
   else
@@ -181,11 +184,11 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .eV = eV,
     .R_m = 8.0,
     .B_m = 4.0,
-    .z_m = M_PI/2.0,
-    .mass = 2.014*mass_proton,
+    .z_m = M_PI / 2.0,
+    .mass = 2.014 * mass_proton,
     .charge = eV,
     .n0 = 1e18,
-    .T0 = 100*eV,
+    .T0 = 100 * eV,
     .phi_fac = 3.0,
     .z_max = M_PI,
     .Nz = 8,
@@ -195,31 +198,32 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .num_quad = 2,
     .cellwise_trap_loss = true,
   };
-  ctx.B0 = ctx.B_m/2.0;
-  ctx.vpar_max = 6.0*sqrt(ctx.T0/ctx.mass);
-  ctx.mu_max = 0.5*ctx.mass*pow(ctx.vpar_max,2)/ctx.B0;
+  ctx.B0 = ctx.B_m / 2.0;
+  ctx.vpar_max = 6.0 * sqrt(ctx.T0 / ctx.mass);
+  ctx.mu_max = 0.5 * ctx.mass * pow(ctx.vpar_max, 2) / ctx.B0;
 
   double mass = ctx.mass;
-  double lower[] = {-ctx.z_max, -ctx.vpar_max, 0.0}, upper[] = {ctx.z_max, ctx.vpar_max, ctx.mu_max};
-  int cells[] = {ctx.Nz, ctx.Nvpar, ctx.Nmu};
-  const int ndim = sizeof(cells)/sizeof(cells[0]);
+  double lower[] = { -ctx.z_max, -ctx.vpar_max, 0.0 },
+    upper[] = { ctx.z_max, ctx.vpar_max, ctx.mu_max };
+  int cells[] = { ctx.Nz, ctx.Nvpar, ctx.Nmu };
+  const int ndim = sizeof(cells) / sizeof(cells[0]);
   const int cdim = ctx.cdim;
-  const int vdim = ndim-ctx.cdim;
+  const int vdim = ndim - ctx.cdim;
 
   // Grids.
   double lower_conf[cdim], upper_conf[cdim];
   int cells_conf[cdim];
-  for (int d=0; d<cdim; d++) {
+  for (int d = 0; d < cdim; d++) {
     lower_conf[d] = lower[d];
     upper_conf[d] = upper[d];
     cells_conf[d] = cells[d];
   }
   double lower_vel[vdim], upper_vel[vdim];
   int cells_vel[vdim];
-  for (int d=0; d<vdim; d++) {
-    lower_vel[d] = lower[cdim+d];
-    upper_vel[d] = upper[cdim+d];
-    cells_vel[d] = cells[cdim+d];
+  for (int d = 0; d < vdim; d++) {
+    lower_vel[d] = lower[cdim + d];
+    upper_vel[d] = upper[cdim + d];
+    cells_vel[d] = cells[cdim + d];
   }
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
@@ -230,7 +234,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 
   // Basis functions.
   struct gkyl_basis basis, basis_conf;
-  if (poly_order == 1) 
+  if (poly_order == 1)
     gkyl_cart_modal_gkhybrid(&basis, cdim, vdim);
   else
     gkyl_cart_modal_serendip(&basis, ndim, poly_order);
@@ -241,14 +245,14 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 #ifdef GKYL_HAVE_CUDA
     basis_on_dev = gkyl_cu_malloc(sizeof(struct gkyl_basis));
     basis_on_dev_conf = gkyl_cu_malloc(sizeof(struct gkyl_basis));
-    if (poly_order == 1) 
+    if (poly_order == 1)
       gkyl_cart_modal_gkhybrid_cu_dev(basis_on_dev, cdim, vdim);
     else
       gkyl_cart_modal_serendip_cu_dev(basis_on_dev, ndim, poly_order);
     gkyl_cart_modal_serendip_cu_dev(basis_on_dev_conf, cdim, poly_order);
 #endif
   }
-  else { 
+  else {
     basis_on_dev = &basis;
     basis_on_dev_conf = &basis_conf;
   }
@@ -263,7 +267,9 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_create_grid_ranges(&grid_vel, ghost_vel, &local_ext_vel, &local_vel);
 
   int ghost[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<cdim; d++) ghost[d] = ghost_conf[d];
+  for (int d = 0; d < cdim; d++) {
+    ghost[d] = ghost_conf[d];
+  }
   struct gkyl_range local, local_ext; // local, local-ext phase-space ranges
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -272,7 +278,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Initialize geometry
   struct gkyl_gk_geometry_inp geometry_input = {
     .geometry_id = GKYL_GEOMETRY_MAPC2P,
-    .world = {0.0, 0.0},
+    .world = { 0.0, 0.0 },
     .mapc2p = mapc2p_3x, // mapping of computational to physical space
     .c2p_ctx = 0,
     .bfield_func = bfield_func_3x, // magnetic field magnitude
@@ -286,14 +292,15 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .position_map = pmap,
   };
   geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
-  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext, &geometry_input.geo_local);
+  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext,
+    &geometry_input.geo_local);
   gkyl_cart_modal_serendip(&geometry_input.geo_basis, 3, poly_order);
-  struct gk_geometry* gk_geom_3d;
+  struct gk_geometry *gk_geom_3d;
   gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
   // Deflate geometry if necessary.
   struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
   gkyl_gk_geometry_release(gk_geom_3d);
-  
+
   // Use array_dg_find_peaks to find bmag_max along the z direction.
   // Search along the parallel (z) direction, which is the last configuration space dimension.
   int search_dir = cdim - 1;
@@ -305,30 +312,35 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .search_dir = search_dir,
     .use_gpu = use_gpu,
   };
-  struct gkyl_array_dg_find_peaks *bmag_peak_finder = 
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder =
     gkyl_array_dg_find_peaks_new(&peak_inp, gk_geom->geo_int.bmag);
   gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag);
-  
+
   // Get the LOCAL_MAX peak (bmag maximum along z direction).
   int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
   int bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
-  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, bmag_max_peak_idx);
-  const struct gkyl_array *bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
-  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, num_peaks-1); // First peak is wall
-  const struct gkyl_array *bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks-1);
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    num_peaks - 1);                                                                                          // First peak is wall
+  const struct gkyl_array *bmag_wall_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks - 1);
   const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
   const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
-  const struct gkyl_range *bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
-  
+  const struct gkyl_range *bmag_max_range_ext =
+    gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
+
   // Allocate arrays for phi evaluated at all peak locations.
-  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array*));
+  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array *));
   for (int p = 0; p < num_peaks; p++) {
     phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume);
   }
-  
+
   // If we are on the gpu, copy from host
   if (use_gpu) {
-    struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
+    struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
     gkyl_gk_geometry_release(gk_geom);
     gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev);
     gkyl_gk_geometry_release(gk_geom_dev);
@@ -342,7 +354,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Project the electostatic potential.
   struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume);
   struct gkyl_array *phi_ho = use_gpu? mkarr(false, phi->ncomp, phi->size)
-	                             : gkyl_array_acquire(phi);
+                               : gkyl_array_acquire(phi);
 
   gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func_1x, &ctx);
   gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho);
@@ -351,7 +363,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 
   // Project phi onto peak locations to get phi_m at the mirror throat.
   gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks);
-  
+
   // Get phi at the mirror throat (bmag_max peak location).
   const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx];
 
@@ -359,10 +371,12 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   struct gkyl_basis basis_mask;
   if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) {
     gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
-  } else {
+  }
+  else {
     if (poly_order == 1) {
       gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
-    } else {
+    }
+    else {
       gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
     }
   }
@@ -370,7 +384,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Create mask array.
   struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume);
   struct gkyl_array *mask_ho = use_gpu? mkarr(false, mask->ncomp, mask->size)
-	                              : gkyl_array_acquire(mask);
+                                : gkyl_array_acquire(mask);
 
   // Project the loss cone mask.
   // Use bmag_max and bmag_max_z_coord arrays from find_peaks.
@@ -378,9 +392,9 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .phase_grid = &grid,
     .conf_basis = &basis_conf,
     .phase_basis = &basis,
-    .conf_range =  &local_conf,
+    .conf_range = &local_conf,
     .conf_range_ext = &local_ext_conf,
-    .vel_range = &local_vel, 
+    .vel_range = &local_vel,
     .vel_map = gvm,
     .bmag = gk_geom->geo_int.bmag,
     .bmag_max_z_coord = bmag_max_z_coord,
@@ -396,7 +410,8 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .cellwise_trap_loss = ctx.cellwise_trap_loss,
     .use_gpu = use_gpu,
   };
-  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask =
+    gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
   gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask);
 
@@ -404,39 +419,40 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 
   // Project expected mask.
   struct gkyl_array *mask_ref_ho = mkarr(false, basis_mask.num_basis, local_ext.volume);
-  gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask, basis_mask.poly_order+1, 1, mask_ref_1x2v, &ctx);
+  gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask,
+    basis_mask.poly_order + 1, 1, mask_ref_1x2v, &ctx);
   gkyl_proj_on_basis_advance(evmask_ref, 0.0, &local, mask_ref_ho);
   gkyl_proj_on_basis_release(evmask_ref);
 
-
-//  // values to compare  at index (1, 9, 9) [remember, lower-left index is (1,1,1)]
-//  double p1_vals[] = {  
-//     7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
-//    -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00,
-//    -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, };
-//  double p2_vals[] = { 
-//    7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
-//    -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19,
-//    0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20,
-//    0.0000000000000000e+00, 0.0000000000000000e+00 };
+//// values to compare  at index (1, 9, 9) [remember, lower-left index is (1,1,1)]
+// double p1_vals[] = {
+// 7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
+// -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00,
+// -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, };
+// double p2_vals[] = {
+// 7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
+// -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19,
+// 0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20,
+// 0.0000000000000000e+00, 0.0000000000000000e+00 };
 //
-//  const double *fv = gkyl_array_cfetch(distf, gkyl_range_idx(&local_ext, (int[3]) { 1, 9, 9 }));
-//  if (poly_order == 1) {
-//    for (int i=0; i<basis.num_basis; ++i) {
-//      TEST_CHECK( gkyl_compare_double(p1_vals[i], fv[i], 1e-2) );
-//    }
-//  }
+// const double *fv = gkyl_array_cfetch(distf, gkyl_range_idx(&local_ext, (int[3]) { 1, 9, 9 }));
+// if (poly_order == 1) {
+// for (int i=0; i<basis.num_basis; ++i) {
+// TEST_CHECK( gkyl_compare_double(p1_vals[i], fv[i], 1e-2) );
+// }
+// }
 //
-//  if (poly_order == 2) {
-//    for (int i=0; i<basis.num_basis; ++i)
-//      TEST_CHECK( gkyl_compare_double(p2_vals[i], fv[i], 1e-2) );
-//  }
+// if (poly_order == 2) {
+// for (int i=0; i<basis.num_basis; ++i)
+// TEST_CHECK( gkyl_compare_double(p2_vals[i], fv[i], 1e-2) );
+// }
 
   // Write mask to file.
   char fname[1024];
   if (use_gpu) {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_dev.gkyl", poly_order);
-  } else {
+  }
+  else {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ho.gkyl", poly_order);
   }
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
@@ -449,9 +465,9 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     gkyl_array_release(phi_at_peaks[p]);
   }
   gkyl_free(phi_at_peaks);
-  gkyl_array_release(phi); 
-  gkyl_array_release(phi_ho); 
-  gkyl_array_release(mask); 
+  gkyl_array_release(phi);
+  gkyl_array_release(phi_ho);
+  gkyl_array_release(mask);
   gkyl_array_release(mask_ho);
   gkyl_array_release(mask_ref_ho);
   gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
@@ -470,7 +486,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     gkyl_cu_free(basis_on_dev);
     gkyl_cu_free(basis_on_dev_conf);
   }
-#endif  
+#endif
 }
 
 // Test with non-zero electrostatic potential.
@@ -488,11 +504,11 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     .eV = eV,
     .R_m = 8.0,
     .B_m = 4.0,
-    .z_m = M_PI/2.0,
-    .mass = 2.014*mass_proton,
+    .z_m = M_PI / 2.0,
+    .mass = 2.014 * mass_proton,
     .charge = eV,  // Positive ions.
     .n0 = 1e18,
-    .T0 = 100*eV,
+    .T0 = 100 * eV,
     .phi_fac = 3.0,  // phi(z=0) = 3*T0/e = 300 V.
     .z_max = M_PI,
     .Nz = 8,
@@ -502,31 +518,31 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     .num_quad = 2,
     .cellwise_trap_loss = true,
   };
-  ctx.B0 = ctx.B_m/2.0;
-  ctx.vpar_max = 6.0*sqrt(ctx.T0/ctx.mass);
-  ctx.mu_max = 0.5*ctx.mass*pow(ctx.vpar_max,2)/ctx.B0;
-
-  double lower[] = {-ctx.z_max, -ctx.vpar_max, 0.0};
-  double upper[] = {ctx.z_max, ctx.vpar_max, ctx.mu_max};
-  int cells[] = {ctx.Nz, ctx.Nvpar, ctx.Nmu};
-  const int ndim = sizeof(cells)/sizeof(cells[0]);
+  ctx.B0 = ctx.B_m / 2.0;
+  ctx.vpar_max = 6.0 * sqrt(ctx.T0 / ctx.mass);
+  ctx.mu_max = 0.5 * ctx.mass * pow(ctx.vpar_max, 2) / ctx.B0;
+
+  double lower[] = { -ctx.z_max, -ctx.vpar_max, 0.0 };
+  double upper[] = { ctx.z_max, ctx.vpar_max, ctx.mu_max };
+  int cells[] = { ctx.Nz, ctx.Nvpar, ctx.Nmu };
+  const int ndim = sizeof(cells) / sizeof(cells[0]);
   const int cdim = ctx.cdim;
   const int vdim = ndim - ctx.cdim;
 
   // Grids.
   double lower_conf[cdim], upper_conf[cdim];
   int cells_conf[cdim];
-  for (int d=0; d<cdim; d++) {
+  for (int d = 0; d < cdim; d++) {
     lower_conf[d] = lower[d];
     upper_conf[d] = upper[d];
     cells_conf[d] = cells[d];
   }
   double lower_vel[vdim], upper_vel[vdim];
   int cells_vel[vdim];
-  for (int d=0; d<vdim; d++) {
-    lower_vel[d] = lower[cdim+d];
-    upper_vel[d] = upper[cdim+d];
-    cells_vel[d] = cells[cdim+d];
+  for (int d = 0; d < vdim; d++) {
+    lower_vel[d] = lower[cdim + d];
+    upper_vel[d] = upper[cdim + d];
+    cells_vel[d] = cells[cdim + d];
   }
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
@@ -539,7 +555,8 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
   struct gkyl_basis basis, basis_conf;
   if (poly_order == 1) {
     gkyl_cart_modal_gkhybrid(&basis, cdim, vdim);
-  } else {
+  }
+  else {
     gkyl_cart_modal_serendip(&basis, ndim, poly_order);
   }
   gkyl_cart_modal_serendip(&basis_conf, cdim, poly_order);
@@ -551,12 +568,14 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     basis_on_dev_conf = gkyl_cu_malloc(sizeof(struct gkyl_basis));
     if (poly_order == 1) {
       gkyl_cart_modal_gkhybrid_cu_dev(basis_on_dev, cdim, vdim);
-    } else {
+    }
+    else {
       gkyl_cart_modal_serendip_cu_dev(basis_on_dev, ndim, poly_order);
     }
     gkyl_cart_modal_serendip_cu_dev(basis_on_dev_conf, cdim, poly_order);
 #endif
-  } else { 
+  }
+  else {
     basis_on_dev = &basis;
     basis_on_dev_conf = &basis_conf;
   }
@@ -571,7 +590,9 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
   gkyl_create_grid_ranges(&grid_vel, ghost_vel, &local_ext_vel, &local_vel);
 
   int ghost[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<cdim; d++) { ghost[d] = ghost_conf[d]; }
+  for (int d = 0; d < cdim; d++) {
+    ghost[d] = ghost_conf[d];
+  }
   struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -580,7 +601,7 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
   // Initialize geometry.
   struct gkyl_gk_geometry_inp geometry_input = {
     .geometry_id = GKYL_GEOMETRY_MAPC2P,
-    .world = {0.0, 0.0},
+    .world = { 0.0, 0.0 },
     .mapc2p = mapc2p_3x,
     .c2p_ctx = 0,
     .bfield_func = bfield_func_3x,
@@ -594,12 +615,13 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     .position_map = pmap,
   };
   geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
-  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext, &geometry_input.geo_local);
+  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext,
+    &geometry_input.geo_local);
   gkyl_cart_modal_serendip(&geometry_input.geo_basis, 3, poly_order);
-  struct gk_geometry* gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
+  struct gk_geometry *gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
   struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
   gkyl_gk_geometry_release(gk_geom_3d);
-  
+
   // Use array_dg_find_peaks to find bmag_max.
   int search_dir = cdim - 1;
   struct gkyl_array_dg_find_peaks_inp peak_inp = {
@@ -610,28 +632,33 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     .search_dir = search_dir,
     .use_gpu = use_gpu,
   };
-  struct gkyl_array_dg_find_peaks *bmag_peak_finder = 
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder =
     gkyl_array_dg_find_peaks_new(&peak_inp, gk_geom->geo_int.bmag);
   gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag);
-  
+
   int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
   int bmag_max_peak_idx = num_peaks - 2;
-  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, bmag_max_peak_idx);
-  const struct gkyl_array *bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
-  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, num_peaks-1);
-  const struct gkyl_array *bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks-1);
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    num_peaks - 1);
+  const struct gkyl_array *bmag_wall_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks - 1);
   const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
   const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
-  const struct gkyl_range *bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
-  
+  const struct gkyl_range *bmag_max_range_ext =
+    gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
+
   // Allocate arrays for phi evaluated at peak locations.
-  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array*));
+  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array *));
   for (int p = 0; p < num_peaks; p++) {
     phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume);
   }
-  
+
   if (use_gpu) {
-    struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
+    struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
     gkyl_gk_geometry_release(gk_geom);
     gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev);
     gkyl_gk_geometry_release(gk_geom_dev);
@@ -647,7 +674,8 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
   struct gkyl_array *phi_ho = use_gpu ? mkarr(false, phi->ncomp, phi->size)
                                       : gkyl_array_acquire(phi);
 
-  gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func_1x_nonzero, &ctx);
+  gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1,
+    phi_func_1x_nonzero, &ctx);
   gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho);
   gkyl_eval_on_nodes_release(evphi);
   gkyl_array_copy(phi, phi_ho);
@@ -660,10 +688,12 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
   struct gkyl_basis basis_mask;
   if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) {
     gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
-  } else {
+  }
+  else {
     if (poly_order == 1) {
       gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
-    } else {
+    }
+    else {
       gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
     }
   }
@@ -678,9 +708,9 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     .phase_grid = &grid,
     .conf_basis = &basis_conf,
     .phase_basis = &basis,
-    .conf_range =  &local_conf,
+    .conf_range = &local_conf,
     .conf_range_ext = &local_ext_conf,
-    .vel_range = &local_vel, 
+    .vel_range = &local_vel,
     .vel_map = gvm,
     .bmag = gk_geom->geo_int.bmag,
     .bmag_max_z_coord = bmag_max_z_coord,
@@ -696,7 +726,8 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     .cellwise_trap_loss = ctx.cellwise_trap_loss,
     .use_gpu = use_gpu,
   };
-  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
+  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask =
+    gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
   gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask);
 
@@ -704,71 +735,76 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
 
   // Verify physical properties of the mask:
   // 1. At the center (z≈0), high-mu particles should be trapped (mask=1)
-  // 2. At the wall (|z| ≈ z_max), particles should not be in the trapped region  
+  // 2. At the wall (|z| ≈ z_max), particles should not be in the trapped region
   // 3. Low-mu particles near center should be passing (mask=0)
-  
+
   // Check specific cells to verify correct behavior.
   // Cell indices: [iz, ivpar, imu] where each starts at 1 in local range.
   // Grid: z in [-pi, pi], vpar in [-vpar_max, vpar_max], mu in [0, mu_max]
   // Central z cells are around iz=4,5 (8 cells, symmetric)
   // High mu cells are imu=3,4 (4 cells)
   // Low mu cells are imu=1
-  
+
   int num_trapped_high_mu_center = 0;
   int num_passing_low_mu_center = 0;
   int total_high_mu_center = 0;
   int total_low_mu_center = 0;
-  
+
   struct gkyl_range_iter iter;
   gkyl_range_iter_init(&iter, &local);
   while (gkyl_range_iter_next(&iter)) {
     int iz = iter.idx[0];
     int imu = iter.idx[2];
-    
+
     // Determine if we're at center (iz = 4 or 5 for 8 cells in [-pi, pi])
     // and if we're at high mu (imu = 3 or 4) or low mu (imu = 1)
     bool is_center = (iz == 4 || iz == 5);
     bool is_high_mu = (imu == 3 || imu == 4);
     bool is_low_mu = (imu == 1);
-    
+
     long linidx = gkyl_range_idx(&local, iter.idx);
     const double *mask_val = gkyl_array_cfetch(mask_ho, linidx);
-    
+
     if (is_center && is_high_mu) {
       total_high_mu_center++;
-      if (mask_val[0] > 0.5) { num_trapped_high_mu_center++; }
+      if (mask_val[0] > 0.5) {
+        num_trapped_high_mu_center++;
+      }
     }
     if (is_center && is_low_mu) {
       total_low_mu_center++;
-      if (mask_val[0] < 0.5) { num_passing_low_mu_center++; }
+      if (mask_val[0] < 0.5) {
+        num_passing_low_mu_center++;
+      }
     }
   }
-  
+
   // High mu particles at center should mostly be trapped.
   double trapped_frac = (double)num_trapped_high_mu_center / (double)total_high_mu_center;
-  printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n", 
-         trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+  printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n",
+    trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
   TEST_CHECK(trapped_frac >= 0.5);
   if (trapped_frac < 0.5) {
-    printf("High-mu center trapped fraction: %g (%d / %d)\n", 
-           trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+    printf("High-mu center trapped fraction: %g (%d / %d)\n",
+      trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
   }
-  
+
   // Low mu particles at center should mostly be passing.
   double passing_frac = (double)num_passing_low_mu_center / (double)total_low_mu_center;
-  printf("Passing fraction for low-mu center particles: %g (%d / %d)\n", 
-         passing_frac, num_passing_low_mu_center, total_low_mu_center);
+  printf("Passing fraction for low-mu center particles: %g (%d / %d)\n",
+    passing_frac, num_passing_low_mu_center, total_low_mu_center);
   TEST_CHECK(passing_frac >= 0.5);
   if (passing_frac < 0.5) {
-    printf("Low-mu center passing fraction: %g (%d / %d)\n", 
-           passing_frac, num_passing_low_mu_center, total_low_mu_center);
+    printf("Low-mu center passing fraction: %g (%d / %d)\n",
+      passing_frac, num_passing_low_mu_center, total_low_mu_center);
   }
 
   // Write output for debugging.
   char fname[1024];
   if (use_gpu) {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_dev.gkyl", poly_order);
-  } else {
+  }
+  else {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_ho.gkyl", poly_order);
   }
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
@@ -778,9 +814,9 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     gkyl_array_release(phi_at_peaks[p]);
   }
   gkyl_free(phi_at_peaks);
-  gkyl_array_release(phi); 
-  gkyl_array_release(phi_ho); 
-  gkyl_array_release(mask); 
+  gkyl_array_release(phi);
+  gkyl_array_release(phi_ho);
+  gkyl_array_release(mask);
   gkyl_array_release(mask_ho);
   gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
   gkyl_velocity_map_release(gvm);
@@ -797,15 +833,30 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
     gkyl_cu_free(basis_on_dev);
     gkyl_cu_free(basis_on_dev_conf);
   }
-#endif  
+#endif
 }
 
-void test_1x2v_p1_gk_ho() { test_1x2v_gk(1, false); }
-void test_1x2v_p1_nonzero_phi_gk_ho() { test_1x2v_nonzero_phi_gk(1, false); }
+void test_1x2v_p1_gk_ho()
+{
+  test_1x2v_gk(1, false);
+}
+
+void test_1x2v_p1_nonzero_phi_gk_ho()
+{
+  test_1x2v_nonzero_phi_gk(1, false);
+}
 
 #ifdef GKYL_HAVE_CUDA
-void test_1x2v_p1_gk_dev() { test_1x2v_gk(1, true); }
-void test_1x2v_p1_nonzero_phi_gk_dev() { test_1x2v_nonzero_phi_gk(1, true); }
+void test_1x2v_p1_gk_dev()
+{
+  test_1x2v_gk(1, true);
+}
+
+void test_1x2v_p1_nonzero_phi_gk_dev()
+{
+  test_1x2v_nonzero_phi_gk(1, true);
+}
+
 #endif
 
 TEST_LIST = {
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index c4b67aab06..08ecea8f56 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -14,9 +14,9 @@ typedef struct gkyl_loss_cone_mask_gyrokinetic gkyl_loss_cone_mask_gyrokinetic;
 typedef void (*loss_cone_mask_gyrokinetic_c2p_t)(const double *xcomp, double *xphys, void *ctx);
 
 // Available options:
-//   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
-//   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array.
-//   C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array.
+// A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
+// B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array.
+// C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array.
 
 // Inputs packaged as a struct.
 struct gkyl_loss_cone_mask_gyrokinetic_inp {
@@ -44,7 +44,7 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
   bool cellwise_trap_loss; // =True takes a whole cell to be either trapped or passing,
                            // so not high-order distinction within the cell is made.
   loss_cone_mask_gyrokinetic_c2p_t c2p_pos_func; // Function that transforms a set of cdim
-                                    // position-space computational coordinates to physical ones.
+  // position-space computational coordinates to physical ones.
   void *c2p_pos_func_ctx; // Context for c2p_pos_func.
   bool use_gpu; // Whether to run on GPU.
 };
@@ -61,7 +61,7 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
  * @param inp Input parameters defined in gkyl_loss_cone_mask_gyrokinetic_inp struct.
  * @return New updater pointer.
  */
-struct gkyl_loss_cone_mask_gyrokinetic* 
+struct gkyl_loss_cone_mask_gyrokinetic*
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp);
 
 /**
@@ -85,4 +85,4 @@ void gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up
  *
  * @param up Updater to delete.
  */
-void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up);
+void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up);
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 6a28771efd..6fab5f7ed0 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -8,26 +8,30 @@
 #include <gkyl_mat.h>
 #include <gkyl_mat_priv.h>
 #include <gkyl_range.h>
-#include <gkyl_rect_grid.h> 
+#include <gkyl_rect_grid.h>
 #include <gkyl_util.h>
 #include <assert.h>
 
 GKYL_CU_DH
 static inline void
 log_to_comp(int ndim, const double *eta,
-  const double * GKYL_RESTRICT dx, const double * GKYL_RESTRICT xc,
-  double* GKYL_RESTRICT xout)
+  const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
+  double *GKYL_RESTRICT xout)
 {
-  for (int d=0; d<ndim; ++d) xout[d] = 0.5*dx[d]*eta[d]+xc[d];
+  for (int d = 0; d < ndim; ++d) {
+    xout[d] = 0.5 * dx[d] * eta[d] + xc[d];
+  }
 }
 
 static inline void
 copy_idx_arrays(int cdim, int pdim, const int *cidx, const int *vidx, int *out)
 {
-  for (int i=0; i<cdim; ++i)
+  for (int i = 0; i < cdim; ++i) {
     out[i] = cidx[i];
-  for (int i=cdim; i<pdim; ++i)
-    out[i] = vidx[i-cdim];
+  }
+  for (int i = cdim; i < pdim; ++i) {
+    out[i] = vidx[i - cdim];
+  }
 }
 
 struct gkyl_loss_cone_mask_gyrokinetic {
@@ -44,7 +48,7 @@ struct gkyl_loss_cone_mask_gyrokinetic {
 
   double mass; // Species mass.
   double charge; // Species charge.
-  
+
   // Per-field-line bmag_max arrays (1D for 2x, scalar for 1x).
   const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
   const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
@@ -55,12 +59,12 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
   struct gkyl_basis *bmag_max_basis_on_dev; // Device-resident basis with device-callable function pointers.
   const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
-  
+
   // GPU helper: scalar bmag_max_z value for simple 1x cases.
   // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
   double *bmag_max_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
   double *bmag_wall_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
-  
+
   bool is_tandem; // Whether we are dealing with a tandem mirror case.
   bool use_gpu; // Boolean if we are performing projection on device.
 
@@ -91,19 +95,20 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   struct gkyl_array *qDphiDbmag_quad; // Array keeping q*(phi-phi_m)/(B_max-B)
                                       // at configuration-space quadrature nodes.
   struct gkyl_array *qDphiDbmag_quad_wall; // Array keeping q*phi/(B_wall-B)
-                                      // at configuration-space quadrature nodes.
+  // at configuration-space quadrature nodes.
   struct gkyl_array *qDphiDbmag_quad_tandem; // Array keeping q*(phi-phi_tandem)/(B_tandem-B)
-                                      // at configuration-space quadrature nodes.
+  // at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad; // B_max-B at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad_wall; // B-B_wall at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad_tandem; // B_tandem-B at configuration-space quadrature nodes.
 
-  struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts  
+  struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts
                                                           // stores the info to convert phase
                                                           // space nodal to modal gkyl arrays.
 };
 
 #ifdef GKYL_HAVE_CUDA
+
 /**
  * Obtain bmag_peak-bmag at conf-space quadrature nodes and store it in Dbmag_quad.
  *
@@ -113,7 +118,7 @@ struct gkyl_loss_cone_mask_gyrokinetic {
  * @param Dbmag_quad Output array (bmag_peak - bmag) at quadrature nodes.
  * @param bmag_peak Peak bmag value (per-field-line array for 2x, scalar for 1x).
  */
-void 
+void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
   struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak);
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index b02101a2c4..7f6627b2a5 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -14,8 +14,8 @@
 
 //
 // mu_bound = (0.5*mass*pow(vpar,2)+charge*Delta_phi)/(bmag[0]*(Rm-1));
-//          = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1));
-//          = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]);
+// = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1));
+// = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]);
 //
 
 // allocate array (filled with zeros)
@@ -32,7 +32,9 @@ c2p_pos_identity(const double *xcomp, double *xphys, void *ctx)
 {
   struct gkyl_loss_cone_mask_gyrokinetic *up = ctx;
   int cdim = up->cdim;
-  for (int d=0; d<cdim; d++) xphys[d] = xcomp[d];
+  for (int d = 0; d < cdim; d++) {
+    xphys[d] = xcomp[d];
+  }
 }
 
 // create range to loop over quadrature points.
@@ -40,8 +42,12 @@ static inline struct gkyl_range
 get_qrange(int cdim, int dim, int num_quad, int num_quad_v, bool *is_vdim_p2)
 {
   int qshape[GKYL_MAX_DIM];
-  for (int i=0; i<cdim; ++i) qshape[i] = num_quad;
-  for (int i=cdim; i<dim; ++i) qshape[i] = is_vdim_p2[i-cdim] ? num_quad_v : num_quad;
+  for (int i = 0; i < cdim; ++i) {
+    qshape[i] = num_quad;
+  }
+  for (int i = cdim; i < dim; ++i) {
+    qshape[i] = is_vdim_p2[i - cdim] ? num_quad_v : num_quad;
+  }
   struct gkyl_range qrange;
   gkyl_range_init_from_shape(&qrange, dim, qshape);
   return qrange;
@@ -57,9 +63,9 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
   int ndim = basis->ndim;
   int num_quad_v = num_quad;
   // Hybrid basis have p=2 in velocity space.
-  bool is_vdim_p2[2] = {false};  // 2 is the max vdim for GK.
+  bool is_vdim_p2[2] = { false };  // 2 is the max vdim for GK.
   if (num_quad > 1 && basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) {
-    num_quad_v = num_quad+1;
+    num_quad_v = num_quad + 1;
     is_vdim_p2[0] = true;  // only vpar is quadratic in GK hybrid.
   }
 
@@ -108,7 +114,7 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
   if (use_gpu) {
     *ordinates = gkyl_array_cu_dev_new(GKYL_DOUBLE, ndim, tot_quad);
     *weights = gkyl_array_cu_dev_new(GKYL_DOUBLE, 1, tot_quad);
-  } 
+  }
   else {
     *ordinates = gkyl_array_new(GKYL_DOUBLE, ndim, tot_quad);
     *weights = gkyl_array_new(GKYL_DOUBLE, 1, tot_quad);
@@ -119,25 +125,29 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
 
   while (gkyl_range_iter_next(&iter)) {
     int node = gkyl_range_idx(&qrange, iter.idx);
-    
+
     // set ordinates
     double *ord = gkyl_array_fetch(ordinates_ho, node);
-    for (int i=0; i<cdim; ++i)
-      ord[i] = ordinates1[iter.idx[i]-qrange.lower[i]];
+    for (int i = 0; i < cdim; ++i) {
+      ord[i] = ordinates1[iter.idx[i] - qrange.lower[i]];
+    }
+
+    for (int i = cdim; i < ndim; ++i) {
+      ord[i] = is_vdim_p2[i - cdim] ?
+        ordinates1_v[iter.idx[i] - qrange.lower[i]] : ordinates1[iter.idx[i] - qrange.lower[i]];
+    }
 
-    for (int i=cdim; i<ndim; ++i)
-      ord[i] = is_vdim_p2[i-cdim] ? 
-        ordinates1_v[iter.idx[i]-qrange.lower[i]] : ordinates1[iter.idx[i]-qrange.lower[i]];
-    
     // set weights
     double *wgt = gkyl_array_fetch(weights_ho, node);
     wgt[0] = 1.0;
-    for (int i=0; i<cdim; ++i)
-      wgt[0] *= weights1[iter.idx[i]-qrange.lower[i]];
+    for (int i = 0; i < cdim; ++i) {
+      wgt[0] *= weights1[iter.idx[i] - qrange.lower[i]];
+    }
 
-    for (int i=cdim; i<ndim; ++i)
-      wgt[0] *= is_vdim_p2[i-cdim] ? 
-        weights1_v[iter.idx[i]-qrange.lower[i]] : weights1[iter.idx[i]-qrange.lower[i]];
+    for (int i = cdim; i < ndim; ++i) {
+      wgt[0] *= is_vdim_p2[i - cdim] ?
+        weights1_v[iter.idx[i] - qrange.lower[i]] : weights1[iter.idx[i] - qrange.lower[i]];
+    }
   }
 
   // Pre-compute basis functions at ordinates.
@@ -147,8 +157,9 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
   else
     *basis_at_ords = gkyl_array_new(GKYL_DOUBLE, basis->num_basis, tot_quad);
 
-  for (int n=0; n<tot_quad; ++n)
+  for (int n = 0; n < tot_quad; ++n) {
     basis->eval(gkyl_array_fetch(ordinates_ho, n), gkyl_array_fetch(basis_at_ords_ho, n));
+  }
 
   // Copy host array to device array.
   gkyl_array_copy(*ordinates, ordinates_ho);
@@ -163,7 +174,7 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
 }
 
 static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, 
+gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
   struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_max)
 {
@@ -200,26 +211,26 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
     else {
       // 2x case: evaluate bmag_max at this psi cell.
       // The bmag_max array is 1D in psi, so we need the psi index.
-      int psi_idx[1] = {conf_iter.idx[0]};
+      int psi_idx[1] = { conf_iter.idx[0] };
       long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
       const double *bmag_max_d = gkyl_array_cfetch(bmag_max, psi_linidx);
       // For simplicity, evaluate at cell center (logical coord 0).
-      double xc[1] = {0.0};
+      double xc[1] = { 0.0 };
       bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
     }
 
-    // Sum over basis 
-    for (int n=0; n<tot_quad_conf; ++n) {
+    // Sum over basis
+    for (int n = 0; n < tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
-      for (int k=0; k<num_basis_conf; ++k) {
-        Dbmag_quad_d[n] += bmag_d[k]*b_ord[k];
+      for (int k = 0; k < num_basis_conf; ++k) {
+        Dbmag_quad_d[n] += bmag_d[k] * b_ord[k];
       }
       Dbmag_quad_d[n] = bmag_max_val - Dbmag_quad_d[n];
     }
   }
 }
 
-struct gkyl_loss_cone_mask_gyrokinetic* 
+struct gkyl_loss_cone_mask_gyrokinetic*
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp)
 {
   gkyl_loss_cone_mask_gyrokinetic *up = gkyl_malloc(sizeof(*up));
@@ -234,10 +245,10 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->pdim = inp->phase_basis->ndim;
 
   up->cellwise_trap_loss = inp->cellwise_trap_loss;
-  int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order+1;
+  int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order + 1;
   up->norm_fac = 1;
   if (!up->cellwise_trap_loss)
-    up->norm_fac = num_quad == 1? 1.0/pow(sqrt(2.0),up->pdim) : 1.0;
+    up->norm_fac = num_quad == 1? 1.0 / pow(sqrt(2.0), up->pdim) : 1.0;
 
   if (num_quad == 1) {
     up->num_basis_conf = 1;
@@ -274,9 +285,9 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   // create a map between phase-space and conf-space ordinates.
   int num_quad_v = num_quad;  // Hybrid basis have p=2 in velocity space.
   // hybrid basis have p=2 in velocity space.
-  bool is_vdim_p2[2] = {false};  // 2 is the max vdim for GK.
+  bool is_vdim_p2[2] = { false };  // 2 is the max vdim for GK.
   if (num_quad > 1 && inp->phase_basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) {
-    num_quad_v = num_quad+1;
+    num_quad_v = num_quad + 1;
     is_vdim_p2[0] = true;  // only vpar is quadratic in GK hybrid.
   }
   up->conf_qrange = get_qrange(up->cdim, up->cdim, num_quad, num_quad_v, is_vdim_p2);
@@ -290,32 +301,39 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     // Allocate device copies of arrays needed for quadrature.
 
     int p2c_qidx_ho[up->phase_qrange.volume];
-    up->p2c_qidx = (int*) gkyl_cu_malloc(sizeof(int)*up->phase_qrange.volume);
+    up->p2c_qidx = (int *)gkyl_cu_malloc(sizeof(int) * up->phase_qrange.volume);
 
     // Allocate mask_quad at phase-space quadrature points.
     // Dbmag_quad at configuration-space quadrature points.
     // qDphiDbmag_quad, the term proportional to (phi-phi_m)/(bmag_max-bmag), at quadrature points.
     up->mask_out_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_phase,
-      inp->conf_range_ext->volume*inp->vel_range->volume);
-    up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-    up->qDphiDbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+      inp->conf_range_ext->volume * inp->vel_range->volume);
+    up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf,
+      inp->conf_range_ext->volume);
+    up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf,
+      inp->conf_range_ext->volume);
+    up->qDphiDbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf,
+      inp->conf_range_ext->volume);
 
     // Allocate the memory for computing the specific phase nodal to modal calculation
     struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho;
-    phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, 
+    phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase,
+      1.0, 0.0,
       GKYL_NO_TRANS, GKYL_NO_TRANS, false);
 
     // Compute the matrix A for the phase nodal to modal memory
-    const double *phase_w = (const double*) up->weights_phase->data;
-    const double *phaseb_o = (const double*) up->basis_at_ords_phase->data;
-    for (int n=0; n<up->tot_quad_phase; ++n) {
-      for (int k=0; k<up->num_basis_phase; ++k)
-        gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n, phase_w[n]*phaseb_o[k+up->num_basis_phase*n]);
+    const double *phase_w = (const double *)up->weights_phase->data;
+    const double *phaseb_o = (const double *)up->basis_at_ords_phase->data;
+    for (int n = 0; n < up->tot_quad_phase; ++n) {
+      for (int k = 0; k < up->num_basis_phase; ++k) {
+        gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n,
+          phase_w[n] * phaseb_o[k + up->num_basis_phase * n]);
+      }
     }
-    
+
     // Copy to device
-    up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, 
+    up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase,
+      up->tot_quad_phase, 1.0, 0.0,
       GKYL_NO_TRANS, GKYL_NO_TRANS, up->use_gpu);
     gkyl_mat_copy(up->phase_nodal_to_modal_mem->A, phase_nodal_to_modal_mem_ho->A);
     gkyl_mat_mm_array_mem_release(phase_nodal_to_modal_mem_ho);
@@ -329,13 +347,14 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
       &up->ordinates_phase, &up->weights_phase, &up->basis_at_ords_phase, up->use_gpu);
 
     int pidx[GKYL_MAX_DIM];
-    for (int n=0; n<up->tot_quad_phase; ++n) {
+    for (int n = 0; n < up->tot_quad_phase; ++n) {
       gkyl_range_inv_idx(&up->phase_qrange, n, pidx);
       int cqidx = gkyl_range_idx(&up->conf_qrange, pidx);
       p2c_qidx_ho[n] = cqidx;
     }
-    gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int)*up->phase_qrange.volume, GKYL_CU_MEMCPY_H2D);
-    
+    gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int) * up->phase_qrange.volume,
+      GKYL_CU_MEMCPY_H2D);
+
     // Allocate and set scalar bmag_max_z for GPU kernels.
     // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
     // inp->bmag_max_z_coord is a GPU array, so copy to host before reading.
@@ -348,12 +367,13 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
       // 1x case: single value.
       const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, 0);
       bmag_max_z_val = bmag_max_z_d[0];
-    } else {
+    }
+    else {
       // 2x case: use the first field line's value (simplified approach).
-      int psi_idx[1] = {inp->bmag_max_range->lower[0]};
+      int psi_idx[1] = { inp->bmag_max_range->lower[0] };
       long bmag_max_z_linidx = gkyl_range_idx(inp->bmag_max_range, psi_idx);
       const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, bmag_max_z_linidx);
-      double xc[1] = {0.0};
+      double xc[1] = { 0.0 };
       bmag_max_z_val = inp->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
     }
     gkyl_array_release(bmag_max_z_coord_ho);
@@ -373,8 +393,11 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->bmag_max_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
   up->bmag_wall = gkyl_array_acquire(inp->bmag_wall);
   up->bmag_wall_z_coord = gkyl_array_acquire(inp->bmag_wall_z_coord);
-  up->bmag_tandem = up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem) : gkyl_array_acquire(inp->bmag_max);
-  up->bmag_tandem_z_coord = up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem_z_coord) : gkyl_array_acquire(inp->bmag_max_z_coord);
+  up->bmag_tandem =
+    up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem) : gkyl_array_acquire(inp->bmag_max);
+  up->bmag_tandem_z_coord =
+    up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem_z_coord) :
+    gkyl_array_acquire(inp->bmag_max_z_coord);
   up->bmag_max_basis = inp->bmag_max_basis;
   up->bmag_max_range = inp->bmag_max_range;
 
@@ -386,52 +409,57 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   gkyl_array_clear(up->Dbmag_quad, 0.0);
   gkyl_array_clear(up->Dbmag_quad_wall, 0.0);
   gkyl_array_clear(up->Dbmag_quad_tandem, 0.0);
-  
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad, up->bmag_max); // bmag_max - bmag
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall, up->bmag_wall); // bmag_wall - bmag
+
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad,
+    up->bmag_max);                                                                                          // bmag_max - bmag
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall,
+    up->bmag_wall);                                                                                               // bmag_wall - bmag
   gkyl_array_scale(up->Dbmag_quad_wall, -1.0); // bmag - bmag_wall
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem, up->bmag_tandem); // bmag_tandem - bmag
-    
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem,
+    up->bmag_tandem);                                                                                                 // bmag_tandem - bmag
+
   return up;
 }
 
 static void
-proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f)
+proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords,
+  double *f)
 {
   int num_basis = up->num_basis_phase;
   int tot_quad = up->tot_quad_phase;
 
-  const double* GKYL_RESTRICT weights = up->weights_phase->data;
-  const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
-  const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
+  const double *GKYL_RESTRICT weights = up->weights_phase->data;
+  const double *GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
+  const double *GKYL_RESTRICT func_at_ords = fun_at_ords->data;
 
-  for (int k=0; k<num_basis; ++k) {
+  for (int k = 0; k < num_basis; ++k) {
     f[k] = 0.0;
   }
-  for (int imu=0; imu<tot_quad; ++imu) {
-    double tmp = weights[imu]*func_at_ords[imu];
-    for (int k=0; k<num_basis; ++k) {
-      f[k] += tmp*basis_at_ords[k+num_basis*imu];
+  for (int imu = 0; imu < tot_quad; ++imu) {
+    double tmp = weights[imu] * func_at_ords[imu];
+    for (int k = 0; k < num_basis; ++k) {
+      f[k] += tmp * basis_at_ords[k + num_basis * imu];
     }
   }
 }
 
 static void
-nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f)
+nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords,
+  double *f)
 {
   int num_basis = up->num_basis_phase;
   int tot_quad = up->tot_quad_phase;
 
-  const double* GKYL_RESTRICT weights = up->weights_phase->data;
-  const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
-  const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
+  const double *GKYL_RESTRICT weights = up->weights_phase->data;
+  const double *GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
+  const double *GKYL_RESTRICT func_at_ords = fun_at_ords->data;
 
-  for (int k=0; k<num_basis; ++k) {
+  for (int k = 0; k < num_basis; ++k) {
     f[k] = 0.0;
   }
   f[0] = 1.0;
-  
-  for (int imu=0; imu<tot_quad; ++imu) {
+
+  for (int imu = 0; imu < tot_quad; ++imu) {
     if (func_at_ords[imu] < 1e-14) {
       f[0] = 0.0;
       break;
@@ -442,7 +470,7 @@ nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_a
 void
 gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const struct gkyl_array *phi_m, 
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m,
   const struct gkyl_array *phi_tandem, struct gkyl_array *mask_out)
 {
 
@@ -453,7 +481,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 #endif
 
   int cdim = up->cdim, pdim = up->pdim;
-  int vdim = pdim-cdim;
+  int vdim = pdim - cdim;
 
   int tot_quad_conf = up->tot_quad_conf;
   int num_basis_conf = up->num_basis_conf;
@@ -464,11 +492,11 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   struct gkyl_range_iter conf_iter, vel_iter;
 
   int pidx[GKYL_MAX_DIM], rem_dir[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<conf_range->ndim; ++d) {
+  for (int d = 0; d < conf_range->ndim; ++d) {
     rem_dir[d] = 1;
   }
 
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
+  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 };
   double phi_quad[tot_quad_conf];
   double qDphiDbmag_quad[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag[0]).
   double qDphiDbmag_quad_wall[tot_quad_conf]; // charge*(phi-phi_m)/(bmag[0]-bmag_wall).
@@ -483,8 +511,9 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
     const double *phi_d = gkyl_array_cfetch(phi, linidx_conf);
     const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
     const double *Dbmag_quad_wall = gkyl_array_cfetch(up->Dbmag_quad_wall, linidx_conf);
-    const double *Dbmag_quad_tandem = is_tandem ? 
-      gkyl_array_cfetch(up->Dbmag_quad_tandem, linidx_conf) : gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_tandem = is_tandem ?
+      gkyl_array_cfetch(up->Dbmag_quad_tandem, linidx_conf) : gkyl_array_cfetch(up->Dbmag_quad,
+      linidx_conf);
 
     // Get phi_m value for this field line.
     // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
@@ -496,44 +525,49 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
       const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, 0);
       phi_m_val = phi_m_d[0];
       phi_tandem_m_val = phi_tandem_m_d[0];
-    } else {
+    }
+    else {
       // 2x case: evaluate phi_m at this psi cell center.
-      int psi_idx[1] = {conf_iter.idx[0]};
+      int psi_idx[1] = { conf_iter.idx[0] };
       long phi_m_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
       const double *phi_m_d = gkyl_array_cfetch(phi_m, phi_m_linidx);
       const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, phi_m_linidx);
       // Evaluate at cell center (logical coord 0).
-      double xc_log[1] = {0.0};
+      double xc_log[1] = { 0.0 };
       phi_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_m_d);
       phi_tandem_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_tandem_m_d);
     }
 
     // Sum over basis for given potential phi.
-    for (int n=0; n<tot_quad_conf; ++n) {
+    for (int n = 0; n < tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
 
       // Compute the configuration-space quadrature
       phi_quad[n] = 0.0;
-      for (int k=0; k<num_basis_conf; ++k) {
-        phi_quad[n] += phi_d[k]*b_ord[k];
+      for (int k = 0; k < num_basis_conf; ++k) {
+        phi_quad[n] += phi_d[k] * b_ord[k];
       }
 
       if (Dbmag_quad[n] > 0.0) {
-        qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m_val)/Dbmag_quad[n];
-      } else {
+        qDphiDbmag_quad[n] = up->charge * (phi_quad[n] - phi_m_val) / Dbmag_quad[n];
+      }
+      else {
         qDphiDbmag_quad[n] = 0.0;
       }
 
       if (Dbmag_quad_wall[n] > 0.0) {
-        qDphiDbmag_quad_wall[n] = up->charge*phi_quad[n]/Dbmag_quad_wall[n];
-      } else {
+        qDphiDbmag_quad_wall[n] = up->charge * phi_quad[n] / Dbmag_quad_wall[n];
+      }
+      else {
         qDphiDbmag_quad_wall[n] = 0.0;
       }
 
       if (is_tandem) {
         if (Dbmag_quad_tandem[n] > 0.0) {
-          qDphiDbmag_quad_tandem[n] = up->charge*(phi_quad[n]-phi_tandem_m_val)/Dbmag_quad_tandem[n];
-        } else {
+          qDphiDbmag_quad_tandem[n] = up->charge * (phi_quad[n] - phi_tandem_m_val) /
+            Dbmag_quad_tandem[n];
+        }
+        else {
           qDphiDbmag_quad_tandem[n] = 0.0;
         }
       }
@@ -543,7 +577,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
     gkyl_range_deflate(&vel_rng, phase_range, rem_dir, conf_iter.idx);
     gkyl_range_iter_no_split_init(&vel_iter, &vel_rng);
     while (gkyl_range_iter_next(&vel_iter)) {
-      
+
       copy_idx_arrays(conf_range->ndim, phase_range->ndim, conf_iter.idx, vel_iter.idx, pidx);
       long linidx_phase = gkyl_range_idx(&vel_rng, vel_iter.idx);
 
@@ -568,8 +602,9 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         const double *vmap_d = gkyl_array_cfetch(gvm->vmap, linidx_vel);
         double xcomp[1];
         for (int vd = 0; vd < vdim; vd++) {
-          xcomp[0] = xcomp_d[cdim+vd];
-          xmu[cdim+vd] = gvm->vmap_basis->eval_expand(xcomp, vmap_d+vd*gvm->vmap_basis->num_basis);
+          xcomp[0] = xcomp_d[cdim + vd];
+          xmu[cdim + vd] = gvm->vmap_basis->eval_expand(xcomp,
+            vmap_d + vd * gvm->vmap_basis->num_basis);
         }
 
         // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
@@ -580,26 +615,29 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         double KEparDbmag_tandem = 0.0;
 
         if (Dbmag_quad[cqidx] > 0.0) {
-          KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx];
-        } else {
+          KEparDbmag = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad[cqidx];
+        }
+        else {
           KEparDbmag = 0.0;
         }
 
         if (Dbmag_quad_wall[cqidx] > 0.0) {
-          KEparDbmag_wall = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall[cqidx];
-        } else {
+          KEparDbmag_wall = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall[cqidx];
+        }
+        else {
           KEparDbmag_wall = 0.0;
         }
 
         if (Dbmag_quad_tandem[cqidx] > 0.0) {
-          KEparDbmag_tandem = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad_tandem[cqidx];
-        } else {
+          KEparDbmag_tandem = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem[cqidx];
+        }
+        else {
           KEparDbmag_tandem = 0.0;
         }
 
-        double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
-        double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall[cqidx]));
-        double mu_bound_tandem = GKYL_MAX2(0.0, KEparDbmag_tandem+qDphiDbmag_quad_tandem[cqidx]);
+        double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad[cqidx]);
+        double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall[cqidx]));
+        double mu_bound_tandem = GKYL_MAX2(0.0, KEparDbmag_tandem + qDphiDbmag_quad_tandem[cqidx]);
 
         // Get the z-coordinate of bmag_max for this field line.
         // For 1x: single value (index 0).
@@ -613,61 +651,71 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
             const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, 0);
             bmag_tandem_z_val = bmag_tandem_z_d[0];
           }
-        } else {
+        }
+        else {
           // 2x case: evaluate bmag_max_z at this psi cell.
-          int psi_idx[1] = {conf_iter.idx[0]};
+          int psi_idx[1] = { conf_iter.idx[0] };
           long bmag_max_z_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
           const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, bmag_max_z_linidx);
           // For simplicity, evaluate at cell center (logical coord 0).
-          double xc[1] = {0.0};
+          double xc[1] = { 0.0 };
           bmag_max_z_val = up->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
           if (is_tandem) {
-            const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, bmag_max_z_linidx);
+            const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord,
+              bmag_max_z_linidx);
             bmag_tandem_z_val = up->bmag_max_basis->eval_expand(xc, bmag_tandem_z_d);
           }
         }
 
         double *fq = gkyl_array_fetch(up->fun_at_ords, pqidx);
         // xmu[cdim-1] is the z-coordinate (last config space coordinate).
-        
+
         if (is_tandem) {
           // Tandem mirror trapping condition:
           // Determine which region we're in based on position.
-          bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
-                               fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
-          bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
-          
+          bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) &&
+            fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val);
+          bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val);
+
           if (in_outer_cell) {
             // Between tandem and outer mirror - check outer barrier
-            if (mu_bound < xmu[cdim+1]) {
+            if (mu_bound < xmu[cdim + 1]) {
               fq[0] = 1.0 * up->norm_fac;
-            } else {
+            }
+            else {
               fq[0] = 0.0;
             }
-          } else if (in_central_cell) {
+          }
+          else if (in_central_cell) {
             // In central cell - must overcome the minimum of both barriers to escape.
             // A particle is trapped if mu > min(mu_bound, mu_bound_tandem).
             double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
-            if (mu_bound_min < xmu[cdim+1]) {
+            if (mu_bound_min < xmu[cdim + 1]) {
               fq[0] = 1.0 * up->norm_fac;
-            } else {
+            }
+            else {
               fq[0] = 0.0;
             }
-          } else {
+          }
+          else {
             // In the outer wall region beyond outer mirror
-            if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) {
+            if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) {
               fq[0] = 1.0 * up->norm_fac;
-            } else {
+            }
+            else {
               fq[0] = 0.0;
             }
           }
-        } else {
+        }
+        else {
           // Single mirror case (original logic)
-          if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) {
+          if (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) {
             fq[0] = 1.0 * up->norm_fac;
-          } else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) {
+          }
+          else if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) {
             fq[0] = 1.0 * up->norm_fac;
-          } else {
+          }
+          else {
             fq[0] = 0.0;
           }
         }
@@ -682,7 +730,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 }
 
 void
-gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
+gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up)
 {
   gkyl_velocity_map_release(up->vel_map);
 
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index 717f827d49..693b5ff4f1 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -22,23 +22,23 @@ extern "C" {
 // For 2x: bmag_peak varies with psi (x-direction).
 __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(int cdim, struct gkyl_range conf_range,
-  struct gkyl_range bmag_peak_range, const struct gkyl_array* basis_at_ords_conf,
-  const struct gkyl_array* bmag, const struct gkyl_array* bmag_peak,
-  const struct gkyl_basis* bmag_peak_basis, struct gkyl_array* Dbmag_quad_out)
-{    
+  struct gkyl_range bmag_peak_range, const struct gkyl_array *basis_at_ords_conf,
+  const struct gkyl_array *bmag, const struct gkyl_array *bmag_peak,
+  const struct gkyl_basis *bmag_peak_basis, struct gkyl_array *Dbmag_quad_out)
+{
   int num_basis_conf = basis_at_ords_conf->ncomp;
   int tot_quad_conf = basis_at_ords_conf->size;
 
   int cidx[GKYL_MAX_CDIM];
 
-  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < conf_range.volume; tid += blockDim.x * gridDim.x) {
 
     gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
     long linidx = gkyl_range_idx(&conf_range, cidx);
 
-    const double *bmag_d = (const double*) gkyl_array_cfetch(bmag, linidx);
-    double *Dbmag_quad_d = (double*) gkyl_array_fetch(Dbmag_quad_out, linidx);
+    const double *bmag_d = (const double *)gkyl_array_cfetch(bmag, linidx);
+    double *Dbmag_quad_d = (double *)gkyl_array_fetch(Dbmag_quad_out, linidx);
 
     // Get bmag_peak for this field line.
     // For 1x: single value (index 0).
@@ -46,64 +46,67 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(int cdim, struct gkyl_range co
     double bmag_peak_val;
     if (cdim == 1) {
       // 1x case: single value.
-      const double *bmag_peak_d = (const double*) gkyl_array_cfetch(bmag_peak, 0);
+      const double *bmag_peak_d = (const double *)gkyl_array_cfetch(bmag_peak, 0);
       bmag_peak_val = bmag_peak_d[0]; // Just the constant coefficient.
-    } else {
+    }
+    else {
       // 2x case: evaluate bmag_peak at this psi cell.
-      int psi_idx[1] = {cidx[0]};
+      int psi_idx[1] = { cidx[0] };
       long psi_linidx = gkyl_range_idx(&bmag_peak_range, psi_idx);
-      const double *bmag_peak_d = (const double*) gkyl_array_cfetch(bmag_peak, psi_linidx);
+      const double *bmag_peak_d = (const double *)gkyl_array_cfetch(bmag_peak, psi_linidx);
       // Evaluate at cell center (logical coord 0).
-      double xc[1] = {0.0};
+      double xc[1] = { 0.0 };
       bmag_peak_val = bmag_peak_basis->eval_expand(xc, bmag_peak_d);
     }
 
     // Sum over basis to get bmag at quadrature points, then compute difference.
-    for (int n=0; n<tot_quad_conf; ++n) {
-      const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, n);
+    for (int n = 0; n < tot_quad_conf; ++n) {
+      const double *b_ord = (const double *)gkyl_array_cfetch(basis_at_ords_conf, n);
 
       double bmag_quad = 0.0;
-      for (int k=0; k<num_basis_conf; ++k) {
-        bmag_quad += bmag_d[k]*b_ord[k];
+      for (int k = 0; k < num_basis_conf; ++k) {
+        bmag_quad += bmag_d[k] * b_ord[k];
       }
       Dbmag_quad_d[n] = bmag_peak_val - bmag_quad;
     }
   }
 }
 
-void 
+void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
   struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak)
 {
   int nblocks = conf_range->nblocks, nthreads = conf_range->nthreads;
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(up->cdim, *conf_range,
-    *up->bmag_max_range, up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_peak->on_dev,
-    up->bmag_max_basis_on_dev, Dbmag_quad->on_dev);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks,
+    nthreads>>>(up->cdim, *conf_range,
+  *up->bmag_max_range, up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_peak->on_dev,
+  up->bmag_max_basis_on_dev, Dbmag_quad->on_dev);
 }
 
 static void
-gkyl_parallelize_components_kernel_launch_dims(dim3* dimGrid, dim3* dimBlock, gkyl_range range, int ncomp)
+gkyl_parallelize_components_kernel_launch_dims(dim3 *dimGrid, dim3 *dimBlock, gkyl_range range,
+  int ncomp)
 {
-  // Create a 2D thread grid so we launch ncomp*range.volume number of threads 
+  // Create a 2D thread grid so we launch ncomp*range.volume number of threads
   // so we can parallelize over components too
   dimBlock->y = ncomp; // ncomp *must* be less than 256
   dimGrid->y = 1;
-  dimBlock->x = GKYL_DEFAULT_NUM_THREADS/ncomp;
+  dimBlock->x = GKYL_DEFAULT_NUM_THREADS / ncomp;
   dimGrid->x = gkyl_int_div_up(range.volume, dimBlock->x);
 }
 
 // Kernel to compute qDphiDbmag_quad = charge*(phi-phi_m)/(bmag_max-bmag) at quadrature nodes.
 // Supports per-field-line phi_m lookup for 2x mirrors.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range conf_range, 
-  struct gkyl_range phi_m_range, const struct gkyl_array* basis_at_ords_conf, 
-  const struct gkyl_basis* phi_m_basis, double charge, bool is_tandem,
-  const struct gkyl_array* phi, const struct gkyl_array* phi_m, const struct gkyl_array* phi_tandem,
-  const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall, 
-  const struct gkyl_array* Dbmag_quad_tandem,
-  struct gkyl_array* qDphiDbmag_quad, struct gkyl_array* qDphiDbmag_quad_wall,
-  struct gkyl_array* qDphiDbmag_quad_tandem)
+gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range conf_range,
+  struct gkyl_range phi_m_range, const struct gkyl_array *basis_at_ords_conf,
+  const struct gkyl_basis *phi_m_basis, double charge, bool is_tandem,
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall,
+  const struct gkyl_array *Dbmag_quad_tandem,
+  struct gkyl_array *qDphiDbmag_quad, struct gkyl_array *qDphiDbmag_quad_wall,
+  struct gkyl_array *qDphiDbmag_quad_tandem)
 {
   int num_basis_conf = basis_at_ords_conf->ncomp;
 
@@ -111,18 +114,18 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range
 
   // 2D thread grid
   // linc2 goes from 0 to tot_quad_conf= basis_at_ords_conf->size.
-  long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
+  long linc2 = threadIdx.y + blockIdx.y * blockDim.y;
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < conf_range.volume; tid += blockDim.x * gridDim.x) {
     gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
 
     long linidx = gkyl_range_idx(&conf_range, cidx);
 
-    const double *phi_d = (const double*) gkyl_array_cfetch(phi, linidx);
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx);
-    const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx);
-    const double *Dbmag_quad_tandem_d = is_tandem ? 
-      (const double*) gkyl_array_cfetch(Dbmag_quad_tandem, linidx) : Dbmag_quad_d;
+    const double *phi_d = (const double *)gkyl_array_cfetch(phi, linidx);
+    const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx);
+    const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall, linidx);
+    const double *Dbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx) : Dbmag_quad_d;
 
     // Get phi_m value for this field line.
     // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
@@ -130,50 +133,53 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range
     double phi_m_val, phi_tandem_m_val;
     if (cdim == 1) {
       // 1x case: single scalar value stored as p=0 DG expansion.
-      const double *phi_m_d = (const double*) gkyl_array_cfetch(phi_m, 0);
+      const double *phi_m_d = (const double *)gkyl_array_cfetch(phi_m, 0);
       phi_m_val = phi_m_d[0];
       if (is_tandem) {
-        const double *phi_tandem_m_d = (const double*) gkyl_array_cfetch(phi_tandem, 0);
+        const double *phi_tandem_m_d = (const double *)gkyl_array_cfetch(phi_tandem, 0);
         phi_tandem_m_val = phi_tandem_m_d[0];
       }
-    } else {
+    }
+    else {
       // 2x case: evaluate phi_m at this psi cell center.
-      int psi_idx[1] = {cidx[0]};
+      int psi_idx[1] = { cidx[0] };
       long phi_m_linidx = gkyl_range_idx(&phi_m_range, psi_idx);
-      const double *phi_m_d = (const double*) gkyl_array_cfetch(phi_m, phi_m_linidx);
+      const double *phi_m_d = (const double *)gkyl_array_cfetch(phi_m, phi_m_linidx);
       // Evaluate at cell center (logical coord 0).
-      double xc[1] = {0.0};
+      double xc[1] = { 0.0 };
       phi_m_val = phi_m_basis->eval_expand(xc, phi_m_d);
       if (is_tandem) {
-        const double *phi_tandem_m_d = (const double*) gkyl_array_cfetch(phi_tandem, phi_m_linidx);
+        const double *phi_tandem_m_d = (const double *)gkyl_array_cfetch(phi_tandem, phi_m_linidx);
         phi_tandem_m_val = phi_m_basis->eval_expand(xc, phi_tandem_m_d);
       }
     }
 
-    // Sum over basis at configuration-space quadrature points. 
-    const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, linc2);
+    // Sum over basis at configuration-space quadrature points.
+    const double *b_ord = (const double *)gkyl_array_cfetch(basis_at_ords_conf, linc2);
     double phi_quad = 0;
-    for (int k=0; k<num_basis_conf; ++k)
-      phi_quad += phi_d[k]*b_ord[k];
+    for (int k = 0; k < num_basis_conf; ++k) {
+      phi_quad += phi_d[k] * b_ord[k];
+    }
 
     // Potential energy term at each quadrature point.
-    double *qDphiDbmag_quad_d = (double*) gkyl_array_fetch(qDphiDbmag_quad, linidx);
-    double *qDphiDbmag_quad_wall_d = (double*) gkyl_array_fetch(qDphiDbmag_quad_wall, linidx);
-    
+    double *qDphiDbmag_quad_d = (double *)gkyl_array_fetch(qDphiDbmag_quad, linidx);
+    double *qDphiDbmag_quad_wall_d = (double *)gkyl_array_fetch(qDphiDbmag_quad_wall, linidx);
+
     if (Dbmag_quad_d[linc2] > 0.0)
-      qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m_val)/Dbmag_quad_d[linc2];
+      qDphiDbmag_quad_d[linc2] = charge * (phi_quad - phi_m_val) / Dbmag_quad_d[linc2];
     else
       qDphiDbmag_quad_d[linc2] = 0.0;
 
     if (Dbmag_quad_wall_d[linc2] > 0.0)
-      qDphiDbmag_quad_wall_d[linc2] = charge*phi_quad/Dbmag_quad_wall_d[linc2];
+      qDphiDbmag_quad_wall_d[linc2] = charge * phi_quad / Dbmag_quad_wall_d[linc2];
     else
       qDphiDbmag_quad_wall_d[linc2] = 0.0;
 
     if (is_tandem) {
-      double *qDphiDbmag_quad_tandem_d = (double*) gkyl_array_fetch(qDphiDbmag_quad_tandem, linidx);
+      double *qDphiDbmag_quad_tandem_d = (double *)gkyl_array_fetch(qDphiDbmag_quad_tandem, linidx);
       if (Dbmag_quad_tandem_d[linc2] > 0.0)
-        qDphiDbmag_quad_tandem_d[linc2] = charge*(phi_quad-phi_tandem_m_val)/Dbmag_quad_tandem_d[linc2];
+        qDphiDbmag_quad_tandem_d[linc2] = charge * (phi_quad - phi_tandem_m_val) /
+          Dbmag_quad_tandem_d[linc2];
       else
         qDphiDbmag_quad_tandem_d[linc2] = 0.0;
     }
@@ -185,122 +191,136 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range
 __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  struct gkyl_range bmag_max_range, const struct gkyl_basis* bmag_max_basis, bool is_tandem,
-  double mass, const struct gkyl_array* phase_ordinates,
-  const struct gkyl_array* bmag_max_z_coord, const struct gkyl_array* bmag_tandem_z_coord,
-  const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* qDphiDbmag_quad_wall,
-  const struct gkyl_array* qDphiDbmag_quad_tandem,
-  const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
-  const struct gkyl_array* Dbmag_quad_tandem,
-  const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out)
+  struct gkyl_range bmag_max_range, const struct gkyl_basis *bmag_max_basis, bool is_tandem,
+  double mass, const struct gkyl_array *phase_ordinates,
+  const struct gkyl_array *bmag_max_z_coord, const struct gkyl_array *bmag_tandem_z_coord,
+  const struct gkyl_array *qDphiDbmag_quad, const struct gkyl_array *qDphiDbmag_quad_wall,
+  const struct gkyl_array *qDphiDbmag_quad_tandem,
+  const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall,
+  const struct gkyl_array *Dbmag_quad_tandem,
+  const int *p2c_qidx, struct gkyl_array *vmap, struct gkyl_basis *vmap_basis,
+  struct gkyl_array *mask_out)
 {
   int pdim = phase_range.ndim;
-  int vdim = pdim-cdim;
+  int vdim = pdim - cdim;
 
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
+  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 };
   int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2];
 
   int tot_phase_quad = phase_ordinates->size;
 
-  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < phase_range.volume; tid += blockDim.x * gridDim.x) {
     gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
     // Get configuration-space linear index.
-    for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k];
+    for (unsigned int k = 0; k < cdim; k++) {
+      cidx[k] = pidx[k];
+    }
     long linidx_conf = gkyl_range_idx(&conf_range, cidx);
 
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
-    const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx_conf);
-    const double *Dbmag_quad_tandem_d = is_tandem ? 
-      (const double*) gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
-    const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
-    const double *qDphiDbmag_quad_wall_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad_wall, linidx_conf);
+    const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall,
+      linidx_conf);
+    const double *Dbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
+    const double *qDphiDbmag_quad_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad,
+      linidx_conf);
+    const double *qDphiDbmag_quad_wall_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad_wall,
+      linidx_conf);
     const double *qDphiDbmag_quad_tandem_d = is_tandem ?
-      (const double*) gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
+      (const double *)gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
 
     // Get z-coordinates for field-line specific values.
     double bmag_max_z_val, bmag_tandem_z_val;
     if (cdim == 1) {
-      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, 0);
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, 0);
       bmag_max_z_val = bmag_max_z_d[0];
       if (is_tandem) {
-        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, 0);
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, 0);
         bmag_tandem_z_val = bmag_tandem_z_d[0];
       }
-    } else {
-      int psi_idx[1] = {cidx[0]};
+    }
+    else {
+      int psi_idx[1] = { cidx[0] };
       long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx);
-      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
-      double xc_log[1] = {0.0};
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
+      double xc_log[1] = { 0.0 };
       bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d);
       if (is_tandem) {
-        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, psi_linidx);
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord,
+          psi_linidx);
         bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d);
       }
     }
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
-    double *mask_d = (double*) gkyl_array_fetch(mask_out, linidx_phase);
+    double *mask_d = (double *)gkyl_array_fetch(mask_out, linidx_phase);
 
-    for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d];
+    for (int d = cdim; d < pdim; d++) {
+      vidx[d - cdim] = pidx[d];
+    }
     long linidx_vel = gkyl_range_idx(&vel_range, vidx);
-    const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel);
+    const double *vmap_d = (const double *)gkyl_array_cfetch(vmap, linidx_vel);
 
     mask_d[0] = 1.0; // In this case the mask has ncomp=1.
 
-    for (int n=0; n<tot_phase_quad; ++n) {
+    for (int n = 0; n < tot_phase_quad; ++n) {
       int cqidx = p2c_qidx[n];
 
-      const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, n);
+      const double *xcomp_d = (const double *)gkyl_array_cfetch(phase_ordinates, n);
 
       // Convert comp position coordinate to phys pos coord.
       log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
-  
+
       // Convert comp velocity coordinate to phys velocity coord.
       double xcomp[1];
       for (int vd = 0; vd < vdim; vd++) {
-        xcomp[0] = xcomp_d[cdim+vd];
-        xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
+        xcomp[0] = xcomp_d[cdim + vd];
+        xmu[cdim + vd] = vmap_basis->eval_expand(xcomp, vmap_d + vd * vmap_basis->num_basis);
       }
-  
+
       // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag).
       double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0;
       if (Dbmag_quad_d[cqidx] > 0.0)
-        KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-  
+        KEparDbmag = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_d[cqidx];
+
       if (Dbmag_quad_wall_d[cqidx] > 0.0)
-        KEparDbmag_wall = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall_d[cqidx];
+        KEparDbmag_wall = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall_d[cqidx];
 
       if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0)
-        KEparDbmag_tandem = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_tandem_d[cqidx];
+        KEparDbmag_tandem = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem_d[cqidx];
 
-      double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
-      double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]));
-      double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0, KEparDbmag_tandem+qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
+      double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad_d[cqidx]);
+      double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall_d[cqidx]));
+      double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0,
+        KEparDbmag_tandem + qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
 
       bool is_trapped;
       if (is_tandem) {
         // Tandem mirror trapping condition.
-        bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
-                             fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
-        bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
-        
+        bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) &&
+          fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val);
+        bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val);
+
         if (in_outer_cell) {
-          is_trapped = mu_bound < xmu[cdim+1];
-        } else if (in_central_cell) {
+          is_trapped = mu_bound < xmu[cdim + 1];
+        }
+        else if (in_central_cell) {
           double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
-          is_trapped = mu_bound_min < xmu[cdim+1];
-        } else {
-          is_trapped = mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val);
+          is_trapped = mu_bound_min < xmu[cdim + 1];
         }
-      } else {
+        else {
+          is_trapped = mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val);
+        }
+      }
+      else {
         // Single mirror case.
-        is_trapped = (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val)) ||
-                     (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val));
+        is_trapped = (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) ||
+          (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val));
       }
-  
+
       if (!is_trapped) {
         mask_d[0] = 0.0;
         break;
@@ -314,59 +334,67 @@ gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase,
 __global__ static void
 gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  struct gkyl_range bmag_max_range, const struct gkyl_basis* bmag_max_basis, bool is_tandem,
-  double mass, double norm_fac, const struct gkyl_array* phase_ordinates, 
-  const struct gkyl_array* bmag_max_z_coord, const struct gkyl_array* bmag_tandem_z_coord,
-  const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* qDphiDbmag_quad_wall,
-  const struct gkyl_array* qDphiDbmag_quad_tandem,
-  const struct gkyl_array* Dbmag_quad, const struct gkyl_array* Dbmag_quad_wall,
-  const struct gkyl_array* Dbmag_quad_tandem,
-  const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad)
+  struct gkyl_range bmag_max_range, const struct gkyl_basis *bmag_max_basis, bool is_tandem,
+  double mass, double norm_fac, const struct gkyl_array *phase_ordinates,
+  const struct gkyl_array *bmag_max_z_coord, const struct gkyl_array *bmag_tandem_z_coord,
+  const struct gkyl_array *qDphiDbmag_quad, const struct gkyl_array *qDphiDbmag_quad_wall,
+  const struct gkyl_array *qDphiDbmag_quad_tandem,
+  const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall,
+  const struct gkyl_array *Dbmag_quad_tandem,
+  const int *p2c_qidx, struct gkyl_array *vmap, struct gkyl_basis *vmap_basis,
+  struct gkyl_array *mask_out_quad)
 {
   int pdim = phase_range.ndim;
-  int vdim = pdim-cdim;
+  int vdim = pdim - cdim;
 
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
+  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 };
   int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2];
 
   // 2D thread grid
   // linc2 goes from 0 to tot_quad_phase
-  long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for (unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
+  long linc2 = threadIdx.y + blockIdx.y * blockDim.y;
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < phase_range.volume; tid += blockDim.x * gridDim.x) {
     gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
     // Get configuration-space linear index.
-    for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k];
+    for (unsigned int k = 0; k < cdim; k++) {
+      cidx[k] = pidx[k];
+    }
 
     long linidx_conf = gkyl_range_idx(&conf_range, cidx);
 
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
-    const double *Dbmag_quad_wall_d = (const double*) gkyl_array_cfetch(Dbmag_quad_wall, linidx_conf);
+    const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall,
+      linidx_conf);
     const double *Dbmag_quad_tandem_d = is_tandem ?
-      (const double*) gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
-    const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
-    const double *qDphiDbmag_quad_wall_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad_wall, linidx_conf);
+      (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
+    const double *qDphiDbmag_quad_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad,
+      linidx_conf);
+    const double *qDphiDbmag_quad_wall_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad_wall,
+      linidx_conf);
     const double *qDphiDbmag_quad_tandem_d = is_tandem ?
-      (const double*) gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
+      (const double *)gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
 
     // Get z-coordinates for field-line specific values.
     double bmag_max_z_val, bmag_tandem_z_val;
     if (cdim == 1) {
-      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, 0);
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, 0);
       bmag_max_z_val = bmag_max_z_d[0];
       if (is_tandem) {
-        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, 0);
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, 0);
         bmag_tandem_z_val = bmag_tandem_z_d[0];
       }
-    } else {
-      int psi_idx[1] = {cidx[0]};
+    }
+    else {
+      int psi_idx[1] = { cidx[0] };
       long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx);
-      const double *bmag_max_z_d = (const double*) gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
-      double xc_log[1] = {0.0};
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
+      double xc_log[1] = { 0.0 };
       bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d);
       if (is_tandem) {
-        const double *bmag_tandem_z_d = (const double*) gkyl_array_cfetch(bmag_tandem_z_coord, psi_linidx);
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord,
+          psi_linidx);
         bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d);
       }
     }
@@ -375,11 +403,13 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_ph
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
 
     int cqidx = p2c_qidx[linc2];
-    for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d];
+    for (int d = cdim; d < pdim; d++) {
+      vidx[d - cdim] = pidx[d];
+    }
 
     long linidx_vel = gkyl_range_idx(&vel_range, vidx);
-    const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel);
-    const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, linc2);
+    const double *vmap_d = (const double *)gkyl_array_cfetch(vmap, linidx_vel);
+    const double *xcomp_d = (const double *)gkyl_array_cfetch(phase_ordinates, linc2);
 
     // Convert comp position coordinate to phys pos coord.
     log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
@@ -387,46 +417,51 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_ph
     // Convert comp velocity coordinate to phys velocity coord.
     double xcomp[1];
     for (int vd = 0; vd < vdim; vd++) {
-      xcomp[0] = xcomp_d[cdim+vd];
-      xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
+      xcomp[0] = xcomp_d[cdim + vd];
+      xmu[cdim + vd] = vmap_basis->eval_expand(xcomp, vmap_d + vd * vmap_basis->num_basis);
     }
 
     // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag).
     double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0;
     if (Dbmag_quad_d[cqidx] > 0.0)
-      KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
+      KEparDbmag = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_d[cqidx];
 
     if (Dbmag_quad_wall_d[cqidx] > 0.0)
-      KEparDbmag_wall = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_wall_d[cqidx];
+      KEparDbmag_wall = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall_d[cqidx];
 
     if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0)
-      KEparDbmag_tandem = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_tandem_d[cqidx];
+      KEparDbmag_tandem = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem_d[cqidx];
 
-    double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
-    double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall+qDphiDbmag_quad_wall_d[cqidx]));
-    double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0, KEparDbmag_tandem+qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
+    double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad_d[cqidx]);
+    double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall_d[cqidx]));
+    double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0,
+      KEparDbmag_tandem + qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
+
+    double *fq = (double *)gkyl_array_fetch(mask_out_quad, linidx_phase);
 
-    double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase);
-    
     if (is_tandem) {
       // Tandem mirror trapping condition.
-      bool in_outer_cell = fabs(xmu[cdim-1]) < fabs(bmag_max_z_val) &&
-                           fabs(xmu[cdim-1]) > fabs(bmag_tandem_z_val);
-      bool in_central_cell = fabs(xmu[cdim-1]) <= fabs(bmag_tandem_z_val);
-      
+      bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) &&
+        fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val);
+      bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val);
+
       if (in_outer_cell) {
-        fq[linc2] = (mu_bound < xmu[cdim+1]) ? norm_fac : 0.0;
-      } else if (in_central_cell) {
+        fq[linc2] = (mu_bound < xmu[cdim + 1]) ? norm_fac : 0.0;
+      }
+      else if (in_central_cell) {
         double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
-        fq[linc2] = (mu_bound_min < xmu[cdim+1]) ? norm_fac : 0.0;
-      } else {
-        fq[linc2] = (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val)) ? norm_fac : 0.0;
+        fq[linc2] = (mu_bound_min < xmu[cdim + 1]) ? norm_fac : 0.0;
+      }
+      else {
+        fq[linc2] = (mu_bound_wall > xmu[cdim + 1] &&
+          fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) ? norm_fac : 0.0;
       }
-    } else {
+    }
+    else {
       // Single mirror case.
-      if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_z_val))
+      if (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val))
         fq[linc2] = norm_fac;
-      else if (mu_bound_wall > xmu[cdim+1] && fabs(xmu[cdim-1]) >= fabs(bmag_max_z_val))
+      else if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val))
         fq[linc2] = norm_fac;
       else
         fq[linc2] = 0.0;
@@ -442,41 +477,49 @@ gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
 {
   dim3 dimGrid_conf, dimBlock_conf;
   int tot_quad_conf = up->basis_at_ords_conf->size;
-  gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, tot_quad_conf);
+  gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range,
+    tot_quad_conf);
 
   // Compute qDphiDbmag at quadrature points.
   gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(
-    up->cdim, *conf_range, *up->bmag_max_range, 
+    up->cdim, *conf_range, *up->bmag_max_range,
     up->basis_at_ords_conf->on_dev, up->bmag_max_basis_on_dev, up->charge, up->is_tandem,
     phi->on_dev, phi_m->on_dev, phi_tandem->on_dev,
     up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
-    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev);
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad_tandem->on_dev);
 
   const struct gkyl_velocity_map *gvm = up->vel_map;
 
   if (up->cellwise_trap_loss) {
     // Don't do quadrature.
     int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
-    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
-      up->mass, up->ordinates_phase->on_dev,
-      up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev, 
-      up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev,
-      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev, 
-      up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, mask_out->on_dev);
-  } else {
+    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks,
+      nthreads>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
+    gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
+    up->mass, up->ordinates_phase->on_dev,
+    up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad_tandem->on_dev,
+    up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+    up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, mask_out->on_dev);
+  }
+  else {
     // Use quadrature.
     dim3 dimGrid, dimBlock;
     int tot_quad_phase = up->basis_at_ords_phase->size;
-    gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, tot_quad_phase);
-
-    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
-      up->mass, up->norm_fac, up->ordinates_phase->on_dev,
-      up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
-      up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, up->qDphiDbmag_quad_tandem->on_dev,
-      up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
-      up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, up->mask_out_quad->on_dev);
+    gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range,
+      tot_quad_phase);
+
+    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid,
+      dimBlock>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
+    gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
+    up->mass, up->norm_fac, up->ordinates_phase->on_dev,
+    up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad_tandem->on_dev,
+    up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+    up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, up->mask_out_quad->on_dev);
 
     // Call cublas to do the matrix multiplication nodal to modal conversion.
     gkyl_mat_mm_array(up->phase_nodal_to_modal_mem, up->mask_out_quad, mask_out);

From 062d869a9d40f2c8340ebb44e41874a283841ee9 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 6 Mar 2026 11:23:36 -0800
Subject: [PATCH 23/32] Fix an issue with the non-uniform grids in the 2x2v
 nonuniform wham regression test (and in my production simulations)

---
 .../creg/rt_gk_wham_nonuniformx_2x2v_p1.c     |  2 +-
 gyrokinetic/zero/gkyl_position_map_priv.h     | 74 +++++++++++++------
 2 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
index fb7ffd7519..7f310f9fcc 100644
--- a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
@@ -813,7 +813,7 @@ int main(int argc, char **argv)
         .map_strength = 0.5,
         .maximum_slope_at_min_B = 2,
         .gaussian_std = 0.2,
-        .gaussian_max_integration_width = 1.0,
+        .gaussian_max_integration_width = 0.5,
       },
     },
 
diff --git a/gyrokinetic/zero/gkyl_position_map_priv.h b/gyrokinetic/zero/gkyl_position_map_priv.h
index ec730c26ef..0822b014fd 100644
--- a/gyrokinetic/zero/gkyl_position_map_priv.h
+++ b/gyrokinetic/zero/gkyl_position_map_priv.h
@@ -276,13 +276,13 @@ calc_bmag_global_derivative(double theta, void *ctx)
   double fout[3];
   xh[0] = gpm->constB_ctx->psi;
   xh[1] = gpm->constB_ctx->alpha;
-  xh[2] = theta - h;
+  xh[2] = theta + h;
   gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx);
   double Bmag_plus = fout[0];
-  xh[2] = theta - 2*h;
+  xh[2] = theta - h;
   gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx);
   double Bmag_minus = fout[0];
-  return (Bmag_plus - Bmag_minus) / (h);
+  return (Bmag_plus - Bmag_minus) / (2*h);
 }
 
 /**
@@ -314,15 +314,24 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
   double *theta_extrema = gkyl_malloc(sizeof(double) * (npts + 1));
   double *bmag_extrema = gkyl_malloc(sizeof(double) * (npts + 1));
 
-  for (int i = 0; i <= npts; i++){
+  for (int i = 1; i < npts; i++){
     double theta = theta_lo + i * theta_dxi;
     xp[Z_IDX] = theta;
     gkyl_calc_bmag_global(0.0, xp, &bmag_vals[i], bmag_ctx);
     dbmag_vals[i] = calc_bmag_global_derivative(theta, gpm);
-    if (i==0) continue;
 
-    // Minima
-    if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0){
+    // Near-zero derivative: B is locally flat here, record as a minimum.
+    // Use continue so this is mutually exclusive with the sign-change checks below.
+    if (fabs(dbmag_vals[i]) < 1e-10) {
+      theta_extrema[extrema] = theta;
+      bmag_extrema[extrema] = bmag_vals[i];
+      extrema++;
+      continue;
+    }
+
+    // Minima via sign change. Guard on |dbmag[i-1]| to avoid a double-record if the
+    // previous point was already captured by the near-zero branch above.
+    if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0 && fabs(dbmag_vals[i-1]) >= 1e-10){
       if (bmag_vals[i] < bmag_vals[i-1])
       {
         theta_extrema[extrema] = theta;
@@ -337,8 +346,8 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
       }
     }
 
-    // Maxima
-    if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0){
+    // Maxima via sign change. Guard on |dbmag[i-1]| for the same reason.
+    if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0 && fabs(dbmag_vals[i-1]) >= 1e-10){
       if (bmag_vals[i] > bmag_vals[i-1])
       {
         theta_extrema[extrema] = theta;
@@ -375,30 +384,53 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
 
   // Left edge
   if (bmag_extrema[0] > bmag_extrema[1])
-  {    gpm->constB_ctx->min_or_max[0] = 1;  } // Maximum
+  {
+    gpm->constB_ctx->min_or_max[0] = 1;  // Maximum
+  }
   else if (bmag_extrema[0] < bmag_extrema[1])
-  {    gpm->constB_ctx->min_or_max[0] = 0;  } // Minimum
+  {
+    gpm->constB_ctx->min_or_max[0] = 0;  // Minimum
+  }
   else
-  {    printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+  {
+    printf("Error: Extrema[0] is not an extrema (bmag[0]=%.6g == bmag[1]=%.6g). "
+      "Position_map optimization failed\n", bmag_extrema[0], bmag_extrema[1]);
+  }
 
   // Middle points
   for (int i = 1; i < extrema - 1; i++)
   {
     if (bmag_extrema[i] > bmag_extrema[i-1] && bmag_extrema[i] > bmag_extrema[i+1])
-    {      gpm->constB_ctx->min_or_max[i] = 1;    } // Maximum
+    {
+      gpm->constB_ctx->min_or_max[i] = 1;  // Maximum
+    }
     else if (bmag_extrema[i] < bmag_extrema[i-1] && bmag_extrema[i] < bmag_extrema[i+1])
-    {      gpm->constB_ctx->min_or_max[i] = 0;    } // Minimum
+    {
+      gpm->constB_ctx->min_or_max[i] = 0;  // Minimum
+    }
     else
-    {      printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+    {
+      printf("Error: Extrema[%d] is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g, bmag[%d+1]=%.6g). "
+        "Position_map optimization failed\n",
+        i, i, bmag_extrema[i-1], i, bmag_extrema[i], i, bmag_extrema[i+1]);
+    }
   }
 
   // Right edge
   if (bmag_extrema[extrema-1] > bmag_extrema[extrema-2])
-  {    gpm->constB_ctx->min_or_max[extrema-1] = 1; } // Maximum
+  {
+    gpm->constB_ctx->min_or_max[extrema-1] = 1; // Maximum
+  }
   else if (bmag_extrema[extrema-1] < bmag_extrema[extrema-2])
-  {    gpm->constB_ctx->min_or_max[extrema-1] = 0; } // Minimum
-  else  
-  {    printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+  {
+    gpm->constB_ctx->min_or_max[extrema-1] = 0; // Minimum
+  }
+  else
+  {
+    printf("Error: Extrema[%d] (right edge) is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g). "
+      "Position_map optimization failed\n",
+      extrema-1, extrema-1, bmag_extrema[extrema-2], extrema-1, bmag_extrema[extrema-1]);
+  }
 
   // Free mallocs
   gkyl_free(bmag_vals);
@@ -454,7 +486,7 @@ refine_B_field_extrema(struct gkyl_position_map *gpm)
     else if (bmag_cent < bmag_left && bmag_cent < bmag_right)
     { is_maximum = false; } // Local minima
     else
-    { printf("Error: Extrema is not an extrema. Position_map optimization failed\n");
+    { // printf("Error: Extrema is not an extrema. Position_map optimization failed\n");
       break;
     }
 
@@ -655,7 +687,7 @@ position_map_constB_z_numeric(double t, const double *xn, double *fout, void *ct
         return;
       }
       else {
-        fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n");
+        // fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n");
         fout[0] = theta;
         return;
       }

From d687d0fffc19720b317b50864bf14049c6dabc4d Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 12 Mar 2026 13:42:43 -0700
Subject: [PATCH 24/32] Fix two egregious mistakes in using the gk_run methods
 for the POA scheme. There is no POA scheme in the run methods.

---
 .../creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c | 99 ++++++++++++++++---
 .../creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c   | 98 +++++++++++++++---
 2 files changed, 165 insertions(+), 32 deletions(-)

diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
index 5a56930575..024ae5ff13 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
@@ -7,7 +7,6 @@
 #include <gkyl_eqn_type.h>
 #include <gkyl_fem_poisson_bctype.h>
 #include <gkyl_gyrokinetic.h>
-#include <gkyl_gyrokinetic_run.h>
 #include <gkyl_math.h>
 
 #include <rt_arg_parse.h>
@@ -980,25 +979,93 @@ int main(int argc, char **argv)
   };
 
   // Create app object.
-  // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
-  struct gkyl_gyrokinetic_run_inp run_inp = {
-    .app_inp = app_inp,
-    .time_stepping = {
-      .t_end           = ctx.t_end,
-      .num_frames      = ctx.num_frames,
-      .write_phase_freq = ctx.write_phase_freq,
-      .int_diag_calc_num = (int)(ctx.int_diag_calc_freq * ctx.num_frames),
-      .dt_failure_tol  = ctx.dt_failure_tol,
-      .num_failures_max = ctx.num_failures_max,
-      .is_restart      = app_args.is_restart,
-      .restart_frame   = app_args.restart_frame,
-      .num_steps       = app_args.num_steps,
-    },
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
   };
 
-  gkyl_gyrokinetic_run_simulation(&run_inp);
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit=0; pit<ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    };
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0)
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
 
+  freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
   
diff --git a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
index 097113721a..d93aca3164 100644
--- a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
@@ -11,7 +11,6 @@
 #include <gkyl_fem_parproj.h>
 #include <gkyl_fem_poisson_bctype.h>
 #include <gkyl_gyrokinetic.h>
-#include <gkyl_gyrokinetic_run.h>
 #include <gkyl_math.h>
 
 #include <rt_arg_parse.h>
@@ -687,25 +686,92 @@ int main(int argc, char **argv)
   };
 
   // Create app object.
-  // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
-  struct gkyl_gyrokinetic_run_inp run_inp = {
-    .app_inp = app_inp,
-    .time_stepping = {
-      .t_end           = ctx.t_end,
-      .num_frames      = ctx.num_frames,
-      .write_phase_freq = ctx.write_phase_freq,
-      .int_diag_calc_num = (int)(ctx.int_diag_calc_freq * ctx.num_frames),
-      .dt_failure_tol  = ctx.dt_failure_tol,
-      .num_failures_max = ctx.num_failures_max,
-      .is_restart      = app_args.is_restart,
-      .restart_frame   = app_args.restart_frame,
-      .num_steps       = app_args.num_steps,
-    },
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
   };
 
-  gkyl_gyrokinetic_run_simulation(&run_inp);
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit=0; pit<ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    };
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0)
+  {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
 
+  freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
 

From 2d39244e070c70c08c8d65c3c11d3a9918c3be9f Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 12 Mar 2026 13:56:57 -0700
Subject: [PATCH 25/32] More dramatic potential for this regression test

---
 .../creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c      | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
index 662b4bbfa5..bb8339fe3f 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -240,19 +240,14 @@ void
 eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  // double b = 8;
-  // double func = (atan(-(xn[0] - 0.7) * b) - atan(-(xn[0] + 0.7) * b))/M_PI;
-  // fout[0] = 1e17*func;
-  fout[0] = 1e17;
+  double z = xn[1];
+  fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2));
 }
 
 void
 eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  // double b=30;
-  // double func = (-atan(-(xn[0] - 0.98) * b) - atan(-(xn[0] + 0.98) * b))/M_PI;
-  // fout[0] = 1.2e6*func;
   fout[0] = 0.0;
 }
 
@@ -260,9 +255,6 @@ void
 eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  // double b = 5;
-  // double func = (atan(-(xn[0] - 0.7) * b) - atan(-(xn[0] + 0.7) * b))/M_PI;
-  // fout[0] = 15000*GKYL_ELEMENTARY_CHARGE*func;
   fout[0] = app->Ti0;
 }
 
@@ -277,7 +269,6 @@ eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_R
   double src_sigma = app->ion_source_sigma;
   double src_amp_floor = src_amp * 1e-2;
   if (fabs(z) <= 0.98) {
-    // sixth order polynomial drop of to the edge
     fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
   }
   else {
@@ -317,7 +308,6 @@ void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *
   double cvpar = vc[0], cmu = vc[1];
   double b = 1.4;
   vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
-  // Cubic map in mu.
   vp[1] = mu_max_ion * pow(cmu, 3);
 }
 

From 724fb7faca339ee9a7478268dd356734b2f0ad87 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 12 Mar 2026 14:00:59 -0700
Subject: [PATCH 26/32] Add commented out calls to c2p_pos in
 loss_cone_mask_gyrokinetic kernels

---
 gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index 693b5ff4f1..b32224d502 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -274,6 +274,9 @@ gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase,
       // Convert comp position coordinate to phys pos coord.
       log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
 
+      // Nonuniform spatial grids are NOT implemented on GPU
+      // up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
+
       // Convert comp velocity coordinate to phys velocity coord.
       double xcomp[1];
       for (int vd = 0; vd < vdim; vd++) {
@@ -414,6 +417,9 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_ph
     // Convert comp position coordinate to phys pos coord.
     log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
 
+    // Nonuniform spatial grids are NOT implemented on GPU
+    // up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
+
     // Convert comp velocity coordinate to phys velocity coord.
     double xcomp[1];
     for (int vd = 0; vd < vdim; vd++) {

From 6ae3332121a2a6c38780752f2442cb4ee85a5365 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Thu, 12 Mar 2026 17:57:59 -0400
Subject: [PATCH 27/32] Update the 1x2v boltz elc mirror poa to resemble the
 2x2v case for debugging

---
 .../creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c | 795 +++++++-----------
 1 file changed, 313 insertions(+), 482 deletions(-)

diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
index 024ae5ff13..83d13d24df 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
@@ -30,80 +30,44 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
-
   // Plasma parameters
-  double mi; // Ion mass.
-  double me; // Electron mass.
-  double qi; // Ion charge.
-  double qe; // Electron charge.
-  double Te0; // Electron temperature.
-  double Ti0; // Ion temperature.
-  double n0; // Density.
-  double B_p; // Plasma magnetic field (mirror center).
-  double beta; // Plasma beta in the center.
-  double tau; // Temperature ratio.
-
-  // Parameters controlling initial conditions.
-  double alim;
-  double alphaIC0;
-  double alphaIC1;
-  double Ti_perp0; // Reference ion perp temperature.
-  double Ti_par0; // Reference ion par temperature.
-  double Ti_perp_m; // Ion perp temperature at the throat.
-  double Ti_par_m; // Ion par temperature at the throat.
-  double cs_m; // Ion sound speed at the throat.
-
-  double nuFrac; // Fraction multiplying collision frequency.
-  double logLambdaIon; // Ion Coulomb logarithm.
-  double nuIon; // Ion-ion collision freq.
-
-  double vti; // Ion thermal speed.
-  double vte; // Electron thermal speed.
-  double c_s; // Ion sound speed.
-  double omega_ci; // Ion gyrofrequency.
-  double rho_s; // Ion sound gyroradius.
-
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  double vti;
   double RatZeq0; // Radius of the field line at Z=0.
-  double Z_min; // Minimum axial coordinate Z.
-  double Z_max; // Maximum axial coordinate Z.
-  double z_min; // Minimum value of the position along the field line.
-  double z_max; // Maximum value of the position along the field line.
-  double psi_eval; // Psi (poloidal flux) of the field line.
-  double psi_in, z_in; // Auxiliary psi and z.
-
-  // Magnetic equilibrium model.
-  double mcB;
-  double gamma;
-  double Z_m; // Axial coordinate at mirror throat.
-  double z_m; // Computational coordinate at mirror throat.
-
-  // Source parameters
-  double NSrcIon;
-  double lineLengthSrcIon;
-  double sigSrcIon;
-  double NSrcFloorIon;
-  double TSrc0Ion;
-  double TSrcFloorIon;
-
-  // Physical velocity space limits.
-  double vpar_min_ion, vpar_max_ion;
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_eval;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
   double mu_max_ion;
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv, mu_lin_fac_inv; // Inverse factor of where linear mapping ends.
-  double vpar_pow, mu_pow; // Power of the velocity grid.
-  double vpar_min_ion_c, vpar_max_ion_c;
-  double mu_min_ion_c, mu_max_ion_c;
-
-  // Grid DOF.
+  int Npsi;
   int Nz;
   int Nvpar;
   int Nmu;
   int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
   int poly_order;
 
+  // Source parameters
+  double ion_source_amplitude;
+  double ion_source_sigma;
+  double ion_source_temp;
+
   double t_end; // End time.
   int num_frames; // Number of output frames.
   int num_phases; // Number of phases.
@@ -112,6 +76,15 @@ struct gk_mirror_ctx
   double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol; // Minimum allowable fraction of initial time-step.
   int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Geometry parameters for Lorentzian mirror
+  double mcB;     // Magnetic field parameter
+  double gamma;   // Width parameter for Lorentzian profile
+  double Z_m;     // Mirror throat location
+  double Z_min;   // Minimum Z coordinate
+  double Z_max;   // Maximum Z coordinate
+  double psi_in;  // Working variable for psi integration
+  double z_in;    // Working variable for z integration
 };
 
 double
@@ -121,9 +94,10 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
+
   double psi = 0.5 * pow(RIn, 2.) * mcB *
-               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
   return psi;
 }
 
@@ -131,9 +105,13 @@ double
 R_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
-    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
-     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rout = sqrt(2. * psiIn / (mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
   return Rout;
 }
 
@@ -141,17 +119,21 @@ void
 Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
-  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
-          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
-            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
-  *BZ = mcB *
-        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
-         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
-  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+
+  BRad[0] = -(1. / 2.) * Rcoord * mcB *
+    (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+
+  BZ[0] = mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+
+  Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
 }
 
 double
@@ -168,15 +150,13 @@ double
 z_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  app->psi_in = psiIn;
   double eps = 0.0;
+  app->psi_in = psiIn;
   struct gkyl_qr_res integral;
-  if (eps <= ZIn)
-  {
+  if (eps <= ZIn) {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
   }
-  else
-  {
+  else {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
     integral.res = -integral.res;
   }
@@ -200,14 +180,12 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (zIn >= 0.0)
-  {
+  if (0.0 <= zIn) {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
   }
-  else
-  {
+  else {
     double fl = root_Z_psiz(app->Z_min - eps, ctx);
     double fr = root_Z_psiz(eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
@@ -215,235 +193,128 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   return Zout.res;
 }
 
+// Geometry evaluation functions for the gk app
 void
-eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
-  double z = xn[0];
+  double psi = xc[0], theta = xc[1], z = xc[2];
 
-  struct gk_mirror_ctx *app = ctx;
-  double NSrc = app->NSrcIon;
-  double zSrc = app->lineLengthSrcIon;
-  double sigSrc = app->sigSrcIon;
-  double NSrcFloor = app->NSrcFloorIon;
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
 
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
 
-  if (fabs(Z) <= app->Z_m)
-  {
-    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
-                              exp(-pow(z - zSrc, 2) / (2.0 * pow(sigSrc, 2))));
-  }
-  else
-  {
-    fout[0] = 1e-16;
-  }
+  xp[0] = x; xp[1] = y; xp[2] = Z;
 }
 
 void
-eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
-  fout[0] = 0.0;
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
 }
 
+// Evaluate collision frequencies
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double sigSrc = app->sigSrcIon;
-  double TSrc0 = app->TSrc0Ion;
-  double Tfloor = app->TSrcFloorIon;
-
-  if (fabs(z) <= 2.0 * sigSrc)
-  {
-    fout[0] = TSrc0;
-  }
-  else
-  {
-    fout[0] = Tfloor;
-  }
+  fout[0] = app->nuIon;
 }
 
-// Ion initial conditions
 void
 eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double sigma = 0.9*z_m;
-  if (fabs(z) <= sigma)
-  {
-    fout[0] = 0.5*app->n0*(1. + tanh(10. * sigma * fabs(sigma - fabs(z))));
-  }
-  else
-  {
-    fout[0] = 0.5*app->n0*exp(-5 * (fabs(sigma - fabs(z))));
-  }
+  double z = xn[0];
+  fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2));
 }
 
 void
 eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double cs_m = app->cs_m;
-  double z_m = app->z_m;
-  double z_max = app->z_max;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = 0.0;
-  }
-  else
-  {
-    fout[0] = (fabs(z) / z) * cs_m * tanh(3 * (z_max - z_m) * fabs(fabs(z) - z_m));
-  }
+  fout[0] = 0.0;
 }
 
 void
-eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_par0 = app->Ti_par0;
-  double Ti_par_m = app->Ti_par_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_par_m+(Ti_par0-Ti_par_m)*tanh(4 * fabs(z_m - fabs(z)));
-  }
-  else
-  {
-    fout[0] = Ti_par_m;
-  }
+  fout[0] = app->Ti0;
 }
 
 void
-eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_perp0 = app->Ti_perp0;
-  double Ti_perp_m = app->Ti_perp_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_perp_m - Ti_perp0*tanh(3.*fabs(z_m-fabs(z)));
+  double z = xn[0];
+  double src_amp = app->ion_source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->ion_source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
   }
-  else
-  {
-    fout[0] = Ti_perp_m * GKYL_MAX2(1.e-3, exp(-5. * (fabs(z_m - fabs(z)))));
+  else {
+    fout[0] = 1e-16;
   }
 }
 
 void
-evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
-{
-  struct gk_mirror_ctx *app = ctx;
-  fout[0] = app->nuIon;
-}
-
-// Geometry evaluation functions for the gk app
-// mapc2p must assume a 3d input xc
-void
-mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
-{
-  double psi = xc[0];
-  double theta = xc[1];
-  double z = xc[2];
-
-  double Z = Z_psiz(psi, z, ctx);
-  double R = R_psiZ(psi, Z, ctx);
-
-  // Cartesian coordinates on plane perpendicular to Z axis.
-  double x = R * cos(theta);
-  double y = R * sin(theta);
-  xp[0] = x;
-  xp[1] = y;
-  xp[2] = Z;
-}
-
-// bmag_func must assume a 3d input xc
-void
-bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xc[2];
-
-  struct gk_mirror_ctx *app = ctx;
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  fout[0] = Bmag;
+  fout[0] = 0.0;
 }
 
-// bfield_func must assume a 3d input xc
 void
-bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xc[2];
-
   struct gk_mirror_ctx *app = ctx;
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-
-  double phi = xc[1];
-  // zc are computational coords. 
-  // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
-  fout[2] = BZ;
+  double z = xn[0];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
 }
 
-void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_ion = app->vpar_max_ion;
   double mu_max_ion = app->mu_max_ion;
 
   double cvpar = vc[0], cmu = vc[1];
-  // Linear map up to vpar_max/lin_frac_inv, then a power grid.
-  double vpar_lin_fac_inv = app->vpar_lin_fac_inv;
-  double vpar_pow = app->vpar_pow;
-  if (fabs(cvpar) <= 1.0/vpar_lin_fac_inv)
-    vp[0] = vpar_max_ion*cvpar;
-  else if (cvpar < -1.0/vpar_lin_fac_inv)
-    vp[0] = -vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-  else
-    vp[0] =  vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-
-//  // Quadratic mu.
-//  vp[1] = mu_max_ion*pow(cmu,2.0);
-  // Linear map up to mu_max/lin_frac_inv, then a power grid.
-  double mu_lin_fac_inv = app->mu_lin_fac_inv;
-  double mu_pow = app->mu_pow;
-//  if (cmu <= 1.0/mu_lin_fac_inv)
-//    vp[0] = mu_max_ion*cmu;
-//  else
-//    vp[0] = mu_max_ion*pow(mu_lin_fac_inv,mu_pow-1)*pow(cmu,mu_pow);
-  double w = 0.3;
-  double f = 0.012;
-  double a = mu_max_ion*(f-1.0)/(w*w-1.0);
-  double b = mu_max_ion*(w*w-f)/(w*w-1.0);
-  if (cmu <= w)
-    vp[1] = (f*mu_max_ion/w)*cmu;
-  else
-    vp[1] = a*pow(cmu,2)+b;
-
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  vp[1] = mu_max_ion * pow(cmu, 3);
 }
 
 struct gk_mirror_ctx
 create_ctx(void)
 {
   int cdim = 1, vdim = 2; // Dimensionality.
+  int poly_order = 1;
 
   // Universal constant parameters.
   double eps0 = GKYL_EPSILON0;
@@ -463,202 +334,150 @@ create_ctx(void)
   double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
   double Ti0 = tau * Te0;
 
-  // Parameters controlling initial conditions.
-  double alim = 0.125;
-  double alphaIC0 = 2;
-  double alphaIC1 = 10;
-
-  double nuFrac = 1.0;
   // Ion-ion collision freq.
+  double nuFrac = 1.0;
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
-  double vte = sqrt(Te0 / me);
-  double c_s = sqrt(Te0 / mi);
-
-  // Gyrofrequencies and gyroradii.
-  double omega_ci = eV * B_p / mi;
-  double rho_s = c_s / omega_ci;
-
-  // Geometry parameters.
-  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
-  // Axial coordinate Z extents. Endure that Z=0 is not on
-  // the boundary of a cell (due to AD errors).
-  double Z_min = -2.5;
-  double Z_max =  2.5;
-
-  // Parameters controlling the magnetic equilibrium model.
-  double mcB = 6.51292;
-  double gamma = 0.124904;
-  double Z_m = 0.98;
-
-  // Source parameters
-  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
-  double lineLengthSrcIon = 0.0;
-  double sigSrcIon = Z_m / 4.0;
-  double NSrcFloorIon = 0.05 * NSrcIon;
-  double TSrc0Ion = Ti0 * 1.25;
-  double TSrcFloorIon = TSrc0Ion / 8.0;
 
   // Grid parameters
   double vpar_max_ion = 16 * vti;
-  double vpar_min_ion = -vpar_max_ion;
   double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 64;
+  int Nvpar = 32; // 96 uniform
+  int Nmu = 16;  // 192 uniform
 
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv = 4;
-  double vpar_pow = 3;
-  double vpar_min_ion_c = -1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double vpar_max_ion_c =  1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double mu_min_ion_c = 0.;
-  double mu_max_ion_c = 1.;
-  double mu_lin_fac_inv = 1.0/0.012;
-  double mu_pow = 2;
-//  double mu_min_ion_c = 0.0;
-//  double mu_max_ion_c = 1.0/pow(mu_lin_fac_inv,(mu_pow-1)/mu_pow);
-
-  // Grid DOF:
-  int Nz = 192; // Number of cells in z direction.
-  int Nvpar = 48; // Number of cells in parallel velocity direction.
-  int Nmu = 16;  // Number of cells in mu direction.
-  int poly_order = 1;
-
-  // Initial conditions parameter.s
-  double Ti_perp0 = 10000 * eV;
-  double Ti_par0 = 7500 * eV;
+  // Source parameters
+  double ion_source_amplitude = 1.e20;
+  double ion_source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
 
-  // Parameters at mirror throat
-  double Ti_perp_m = 15000 * eV;
-  double Ti_par_m = 1000 * eV;
-  double z_m = 0.982544;
-  double cs_m = sqrt((Te0+3.0*Ti_par_m)/mi);
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+  double mcB = 3.691260;
+  double gamma = 0.226381;
+  double Z_m = 0.98;
 
-  // Factor multiplying collisionless terms.
-  double alpha_oap = 0.01;
+  // POA parameters
+  double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
   double alpha_fdp = 1.0;
-  // Duration of each phase.
-  double tau_oap = 2400.0e-9;
-  double tau_fdp = 24.0e-9;
-  double tau_fdp_extra = 2*tau_fdp;
+  double tau_oap = 0.001;  // Duration of each phase.
+  double tau_fdp = 7e-9;
+  double tau_fdp_extra = 2e-9;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 4; // Frames per OAP phase
-  int num_frames_fdp = 4; // Frames per FDP phase
-  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+  int num_frames_oap = 2;        // Frames per OAP phase
+  int num_frames_fdp = 2;        // Frames per FDP phase
+  int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
   bool is_static_field_oap = true;
   bool is_static_field_fdp = false;
-  // Whether to enable positivity.
+
+  // Whether positivity is enabled.
   bool is_positivity_enabled_oap = false;
   bool is_positivity_enabled_fdp = true;
+
   // Type of df/dt multipler.
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
-  // Add an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
-
-  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  // The final stage is an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
   int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
 
   struct gk_mirror_ctx ctx = {
-    .cdim = cdim,  .vdim = vdim,
-    .mi = mi,  .qi = qi,
-    .me = me,  .qe = qe,
-    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
-    .B_p = B_p,  .beta = beta,  .tau = tau,
-    .alim = alim,
-    .alphaIC0 = alphaIC0,
-    .alphaIC1 = alphaIC1,
-    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
-    .vti = vti,  .vte = vte,  .c_s = c_s,
-    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .nuFrac = nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
     .RatZeq0 = RatZeq0,
-    .Z_min = Z_min,  .Z_max = Z_max,
-    // Parameters controlling the magnetic equilibrium model.
-    .mcB = mcB,  .gamma = gamma,
-    .Z_m = Z_m,
-    .z_m = z_m,
-    // Initial condition parameters.
-    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
-    .Ti_perp_m = Ti_perp_m,  .Ti_par_m = Ti_par_m,  .cs_m = cs_m,
-    // Source parameters
-    .NSrcIon = NSrcIon,  .NSrcFloorIon = NSrcFloorIon,
-    .TSrc0Ion = TSrc0Ion,  .TSrcFloorIon = TSrcFloorIon,
-    .lineLengthSrcIon = lineLengthSrcIon,  .sigSrcIon = sigSrcIon,
-    // Physical velocity space limits.
-    .vpar_min_ion = vpar_min_ion,
     .vpar_max_ion = vpar_max_ion,
     .mu_max_ion = mu_max_ion,
-    // Computational velocity space limits.
-    .vpar_lin_fac_inv = vpar_lin_fac_inv,
-    .vpar_pow = vpar_pow,
-    .vpar_min_ion_c = vpar_min_ion_c,
-    .vpar_max_ion_c = vpar_max_ion_c,
-    .mu_lin_fac_inv = mu_lin_fac_inv,
-    .mu_pow = mu_pow,
-    .mu_min_ion_c = mu_min_ion_c,
-    .mu_max_ion_c = mu_max_ion_c,
-    // Grid DOF.
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nz, Nvpar, Nmu},
+    .cells = { Nz, Nvpar, Nmu },
     .poly_order = poly_order,
-    // Time integration and I/O parameters.
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+
+    .ion_source_amplitude = ion_source_amplitude,
+    .ion_source_sigma = ion_source_sigma,
+    .ion_source_temp = ion_source_temp,
+
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
   };
 
   // Populate a couple more values in the context.
   ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
-  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -670,25 +489,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -698,7 +517,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -720,7 +539,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -734,15 +553,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -751,7 +571,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -791,27 +611,21 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
-  while ((t_curr < t_end) && (step <= num_steps))
-  {
-    if (step == 1 || step % 20 == 0)
-      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
-
-    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+    dt = t_end - t_curr; // Ensure we don't step beyond t_end.
     struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
-    if (step == 1 || step % 20 == 0)
-      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
-
-    if (!status.success)
-    {
+    if (!status.success) {
       gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
       break;
     }
     t_curr += status.dt_actual;
     dt = status.dt_suggested;
 
-    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
-    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
 
     if (dt_init < 0.0) {
       dt_init = status.dt_actual;
@@ -823,8 +637,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -838,7 +654,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -857,96 +673,106 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
 
   struct gkyl_gyrokinetic_species ion = {
     .name = "ion",
-    .charge = ctx.qi,  .mass = ctx.mi,
+    .charge = ctx.qi,
+    .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
-    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
     .cells = { cells_v[0], cells_v[1] },
-
     .polarization_density = ctx.n0,
 
-    .mapc2p = {
-      .mapping = mapc2p_vel_ion,
-      .ctx = &ctx,
-    },
-
     .projection = {
-      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
       .density = eval_density_ion,
-      .upar = eval_upar_ion,
-      .temppar = eval_temp_par_ion,
-      .tempperp = eval_temp_perp_ion,
       .ctx_density = &ctx,
+      .upar = eval_upar_ion,
       .ctx_upar = &ctx,
-      .ctx_temppar = &ctx,
-      .ctx_tempperp = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
     },
 
     .collisionless = {
       .type = GKYL_GK_COLLISIONLESS_ES,
       .scale_factor = 1.0, // Will be replaced below.
+      .write_diagnostics = true,
+    },
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+      .cellwise_const = true,
+      .write_diagnostics = true,
     },
 
-    .collisions =  {
+    .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
-      .self_nu = evalNuIon,
-      .self_nu_ctx = &ctx,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .write_diagnostics = true,
     },
-
     .source = {
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
-	.density = eval_density_ion_source,
-        .upar = eval_upar_ion_source,
-        .temp = eval_temp_ion_source,
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
+        .density = eval_density_ion_source,
         .ctx_upar = &ctx,
+        .upar = eval_upar_ion_source,
         .ctx_temp = &ctx,
-      }, 
-    },
-
-    .time_rate_multiplier = {
-      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
-      .cellwise_const = true,
-      .write_diagnostics = true,
-    },
-
-    .positivity = {
-      .type = GKYL_GK_POSITIVITY_SHIFT,
-      .write_diagnostics = true,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
     },
 
     .bcs = {
       { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
       { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
     },
-
-    .num_diag_moments = 4,
-    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
   };
-
   struct gkyl_gyrokinetic_field field = {
     .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
     .electron_mass = ctx.me,
     .electron_charge = ctx.qe,
     .electron_temp = ctx.Te0,
-    .is_static = false, // So solvers are allocated.
+    .is_static = false,
   };
 
-  // GK app
-  struct gkyl_gk app_inp = { 
+  struct gkyl_gk app_inp = {  // GK app
     .cdim = ctx.cdim,
     .lower = {ctx.z_min},
     .upper = {ctx.z_max},
@@ -960,14 +786,14 @@ int main(int argc, char **argv)
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
-      .bfield_ctx = &ctx
+      .bfield_ctx = &ctx,
     },
 
     .num_periodic_dir = 0,
     .periodic_dirs = {},
 
     .num_species = 1,
-    .species = {ion},
+    .species = { ion },
 
     .field = field,
 
@@ -994,10 +820,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -1008,14 +836,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -1041,10 +870,11 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -1054,21 +884,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();

From baad16f8a547d53f59e2398a9d28fa4650a76800 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 13 Mar 2026 11:25:10 -0400
Subject: [PATCH 28/32] I found a parallelism bug. I was passing the global
 allgathered phi into the loss_cone_mask_advance, but the itterators are
 local, so they were itterating over the local ranges, which was offsetting
 phi when it is evaluated. It should instead be using the local phi.
 Furthermore, I cleaned up the unit tests a little.

I also realize that we should not be calling up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);. This is because the spatial coordinates are only used to find if we are beyond the mirror throat and in the expander. The mirror throat is evaluated in computational coordinates, so we should compare computational to computational coordinates. I'm not sure if this is a mistake on main because in this branch, I re-did how the mirror throat is found using the find_peaks operator.
---
 gyrokinetic/apps/gk_species_fdot_multiplier.c       | 4 ++--
 gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c | 9 ++++-----
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c       | 4 +++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index fce3d9d568..9ee9fa34af 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -96,12 +96,12 @@ gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app,
     gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
       fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem);
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-      fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem,
+      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem,
       fdmul->multiplier);
   }
   else {
     gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-      fdmul->phi_smooth_global, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
+      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
   }
 
   // Multiply out by the multiplier.
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 62561d1dd0..f7eaab745a 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -291,7 +291,6 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .global = local_conf,
     .global_ext = local_ext_conf,
     .basis = basis_conf,
-    .position_map = pmap,
   };
   geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
   gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext,
@@ -784,8 +783,8 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
 
   // High mu particles at center should mostly be trapped.
   double trapped_frac = (double)num_trapped_high_mu_center / (double)total_high_mu_center;
-  printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n",
-    trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+  // printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n",
+  //   trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
   TEST_CHECK(trapped_frac >= 0.5);
   if (trapped_frac < 0.5) {
     printf("High-mu center trapped fraction: %g (%d / %d)\n",
@@ -794,8 +793,8 @@ test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
 
   // Low mu particles at center should mostly be passing.
   double passing_frac = (double)num_passing_low_mu_center / (double)total_low_mu_center;
-  printf("Passing fraction for low-mu center particles: %g (%d / %d)\n",
-    passing_frac, num_passing_low_mu_center, total_low_mu_center);
+  // printf("Passing fraction for low-mu center particles: %g (%d / %d)\n",
+  //   passing_frac, num_passing_low_mu_center, total_low_mu_center);
   TEST_CHECK(passing_frac >= 0.5);
   if (passing_frac < 0.5) {
     printf("Low-mu center passing fraction: %g (%d / %d)\n",
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 7f6627b2a5..830042091c 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -594,7 +594,9 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         // Convert comp position coordinate to phys pos coord.
         gkyl_rect_grid_cell_center(up->grid_phase, pidx, xc);
         log_to_comp(up->cdim, xcomp_d, up->grid_phase->dx, xc, xmu);
-        up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
+        // up->c2p_pos(xmu, xmu, up->c2p_pos_ctx); 
+        // I don't think this operation should happen because the z coodinate is only used
+        // for comparing to the location of maximum bmag, which is in computational coordinates
 
         // Convert comp velocity coordinate to phys velocity coord.
         const struct gkyl_velocity_map *gvm = up->vel_map;

From 6d95ed7b3d6a3a71a13dfc1518233ebb7a69c751 Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 13 Mar 2026 09:31:34 -0700
Subject: [PATCH 29/32] Reduce the number of frames in the regression tests

---
 gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c     | 4 ++--
 gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c     | 4 ++--
 gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c   | 4 ++--
 gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c | 4 ++--
 gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c         | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
index 83d13d24df..d9a3dc2bcd 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
@@ -372,8 +372,8 @@ create_ctx(void)
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 2;        // Frames per OAP phase
-  int num_frames_fdp = 2;        // Frames per FDP phase
+  int num_frames_oap = 1;        // Frames per OAP phase
+  int num_frames_fdp = 1;        // Frames per FDP phase
   int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
index bb8339fe3f..0c67c2b484 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -374,8 +374,8 @@ create_ctx(void)
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 2;        // Frames per OAP phase
-  int num_frames_fdp = 2;        // Frames per FDP phase
+  int num_frames_oap = 1;        // Frames per OAP phase
+  int num_frames_fdp = 1;        // Frames per FDP phase
   int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
index fe62ae50e7..03ffe9943a 100644
--- a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -632,8 +632,8 @@ create_ctx(void)
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 4; // Frames per OAP phase
-  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
   int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
index e0ddd1c38b..dd7c7fe6be 100644
--- a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -407,8 +407,8 @@ create_ctx(void)
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 4; // Frames per OAP phase
-  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
   int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
index a43d12323b..b6fba11687 100644
--- a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -320,8 +320,8 @@ create_ctx(void)
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 4; // Frames per OAP phase
-  int num_frames_fdp = 4; // Frames per FDP phase
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
   int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.

From 443c76c81bd0af0e43a795a040305139c1eb4afa Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 13 Mar 2026 09:34:02 -0700
Subject: [PATCH 30/32] Add sprintf to the app name

---
 gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c     | 4 +++-
 gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c     | 3 +++
 gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c   | 3 +++
 gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c | 3 +++
 gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c       | 4 +++-
 gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c         | 3 +++
 6 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
index d9a3dc2bcd..e8e12e017a 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
@@ -804,8 +804,10 @@ int main(int argc, char **argv)
     },
   };
 
-  // Create app object.
+  // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
   // Triggers for IO.
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
index 0c67c2b484..cd44c494f0 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -811,6 +811,9 @@ int main(int argc, char **argv)
     },
   };
 
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
   // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
index 03ffe9943a..0ce0adf5b5 100644
--- a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -1159,6 +1159,9 @@ int main(int argc, char **argv)
     },
   };
 
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
   // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
index dd7c7fe6be..7663554d9f 100644
--- a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -835,6 +835,9 @@ int main(int argc, char **argv)
     },
   };
 
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
   // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
diff --git a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
index dbda0e1f61..3da11c59be 100644
--- a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
@@ -685,8 +685,10 @@ int main(int argc, char **argv)
     },
   };
 
-  // Create app object.
+  // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
   // Triggers for IO.
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
index b6fba11687..5ba8cae6da 100644
--- a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -861,6 +861,9 @@ int main(int argc, char **argv)
     },
   };
 
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
   // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 

From 122f3bef183ecda039b9845aa6a9b8dd2b49184b Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Fri, 13 Mar 2026 13:02:18 -0400
Subject: [PATCH 31/32] Remove c2p from the loss cone mask, as it shouldn't be
 there since everything is done in computational coordinates

---
 gyrokinetic/apps/gk_species_fdot_multiplier.c |  2 --
 .../zero/gkyl_loss_cone_mask_gyrokinetic.h    |  6 -----
 .../gkyl_loss_cone_mask_gyrokinetic_priv.h    |  3 ---
 gyrokinetic/zero/loss_cone_mask_gyrokinetic.c | 23 -------------------
 .../zero/loss_cone_mask_gyrokinetic_cu.cu     |  6 -----
 5 files changed, 40 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 9ee9fa34af..0fbd4002ae 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -299,8 +299,6 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
         .qtype = qtype,
         .num_quad = num_quad,
         .cellwise_trap_loss = cellwise_const,
-        .c2p_pos_func = proj_on_basis_c2p_position_func,
-        .c2p_pos_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
       fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index 08ecea8f56..7bb388349f 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -10,9 +10,6 @@
 // Object type.
 typedef struct gkyl_loss_cone_mask_gyrokinetic gkyl_loss_cone_mask_gyrokinetic;
 
-// Type of function expected for the ctp_pos_func input.
-typedef void (*loss_cone_mask_gyrokinetic_c2p_t)(const double *xcomp, double *xphys, void *ctx);
-
 // Available options:
 // A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
 // B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array.
@@ -43,9 +40,6 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
   int num_quad; // Number of quad points in each direction to use (default: poly_order+1).
   bool cellwise_trap_loss; // =True takes a whole cell to be either trapped or passing,
                            // so not high-order distinction within the cell is made.
-  loss_cone_mask_gyrokinetic_c2p_t c2p_pos_func; // Function that transforms a set of cdim
-  // position-space computational coordinates to physical ones.
-  void *c2p_pos_func_ctx; // Context for c2p_pos_func.
   bool use_gpu; // Whether to run on GPU.
 };
 
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 6fab5f7ed0..68cc2579fb 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -68,9 +68,6 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   bool is_tandem; // Whether we are dealing with a tandem mirror case.
   bool use_gpu; // Boolean if we are performing projection on device.
 
-  loss_cone_mask_gyrokinetic_c2p_t c2p_pos; // Function transforming position comp to phys coords.
-  void *c2p_pos_ctx; // Context for the c2p_pos mapping.
-
   bool cellwise_trap_loss; // Whether a whole cell is trapped/lost, or whether
                            // high-order distinction within a cell is allowed.
   struct gkyl_range conf_qrange; // Range of Configuration-space ordinates.
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 830042091c..072c142aa0 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -26,17 +26,6 @@ mkarr(long nc, long size, bool use_gpu)
     : gkyl_array_new(GKYL_DOUBLE, nc, size);
 }
 
-// Identity comp to phys coord mapping, for when user doesn't provide a map.
-static inline void
-c2p_pos_identity(const double *xcomp, double *xphys, void *ctx)
-{
-  struct gkyl_loss_cone_mask_gyrokinetic *up = ctx;
-  int cdim = up->cdim;
-  for (int d = 0; d < cdim; d++) {
-    xphys[d] = xcomp[d];
-  }
-}
-
 // create range to loop over quadrature points.
 static inline struct gkyl_range
 get_qrange(int cdim, int dim, int num_quad, int num_quad_v, bool *is_vdim_p2)
@@ -262,15 +251,6 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->bmag_max_z_scalar_gpu = NULL; // Will be set for GPU case.
   up->bmag_max_basis_on_dev = NULL; // Will be set for GPU case.
 
-  if (inp->c2p_pos_func == 0) {
-    up->c2p_pos = c2p_pos_identity;
-    up->c2p_pos_ctx = up;
-  }
-  else {
-    up->c2p_pos = inp->c2p_pos_func;
-    up->c2p_pos_ctx = inp->c2p_pos_func_ctx;
-  }
-
   // Initialize data needed for conf-space quadrature.
   up->tot_quad_conf = init_quad_values(up->cdim, inp->conf_basis, inp->qtype, num_quad,
     &up->ordinates_conf, &up->weights_conf, &up->basis_at_ords_conf, false);
@@ -594,9 +574,6 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         // Convert comp position coordinate to phys pos coord.
         gkyl_rect_grid_cell_center(up->grid_phase, pidx, xc);
         log_to_comp(up->cdim, xcomp_d, up->grid_phase->dx, xc, xmu);
-        // up->c2p_pos(xmu, xmu, up->c2p_pos_ctx); 
-        // I don't think this operation should happen because the z coodinate is only used
-        // for comparing to the location of maximum bmag, which is in computational coordinates
 
         // Convert comp velocity coordinate to phys velocity coord.
         const struct gkyl_velocity_map *gvm = up->vel_map;
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index b32224d502..693b5ff4f1 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -274,9 +274,6 @@ gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase,
       // Convert comp position coordinate to phys pos coord.
       log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
 
-      // Nonuniform spatial grids are NOT implemented on GPU
-      // up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
-
       // Convert comp velocity coordinate to phys velocity coord.
       double xcomp[1];
       for (int vd = 0; vd < vdim; vd++) {
@@ -417,9 +414,6 @@ gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_ph
     // Convert comp position coordinate to phys pos coord.
     log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
 
-    // Nonuniform spatial grids are NOT implemented on GPU
-    // up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
-
     // Convert comp velocity coordinate to phys velocity coord.
     double xcomp[1];
     for (int vd = 0; vd < vdim; vd++) {

From f8f799bc7131adfe684a2f42a2c94717569e100a Mon Sep 17 00:00:00 2001
From: Maxwell-Rosen <mrquell@gmail.com>
Date: Sat, 14 Mar 2026 11:50:14 -0400
Subject: [PATCH 32/32] Remove c2p context from damping

---
 gyrokinetic/apps/gk_species_damping.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index fb916f59ca..a8de75530c 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -222,8 +222,6 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
         .mass = gks->info.mass,
         .charge = gks->info.charge,
         .num_quad = num_quad,
-        .c2p_pos_func = proj_on_basis_c2p_position_func,
-        .c2p_pos_func_ctx = &damp->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
       damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);