diff --git a/core/unit/ctest_array_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c
new file mode 100644
index 000000000..98861f457
--- /dev/null
+++ b/core/unit/ctest_array_dg_find_peaks.c
@@ -0,0 +1,1192 @@
+#include <acutest.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_ops.h>
+#include <gkyl_array_rio.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_array_dg_find_peaks_priv.h>
+#include <gkyl_basis.h>
+#include <gkyl_eval_on_nodes.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_decomp.h>
+#include <gkyl_rect_grid.h>
+#include <gkyl_util.h>
+
+#include <math.h>
+#include <stdio.h>
+
+// Helper function to create test arrays on CPU or GPU.
+static struct gkyl_array*
+mkarr(bool use_gpu, long nc, long size)
+{
+  struct gkyl_array *a = use_gpu ? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+                                 : gkyl_array_new(GKYL_DOUBLE, nc, size);
+  return a;
+}
+
+// 1D test function with multiple peaks: f(z) = cos(2*pi*z/L)
+// Has maxima at z=0, z=L and minimum at z=L/2.
+static void
+test_func_1d_cos(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  double L = 2.0;  // Period.
+  fout[0] = cos(2.0 * M_PI * z / L);
+}
+
+// 1D test function that looks like a mirror bmag profile:
+// f(z) = B0 * (1 + (R-1)*sin^2(pi*z/L))
+// Has minimum at z=0, maxima at z=-L/2 and z=L/2 (mirror throats).
+static void
+test_func_1d_mirror(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  double L = 2.0;       // Half-length.
+  double B0 = 1.0;      // Minimum B.
+  double R = 4.0;       // Mirror ratio.
+  double sinval = sin(M_PI * z / L);
+  fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
+}
+
+// 2D test function: f(psi, z) = (1 + 0.1*psi) * cos(2*pi*z/L)
+// The peaks vary slightly with psi.
+static void
+test_func_2d_cos(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double L = 2.0;
+  fout[0] = (1.0 + 0.1 * psi) * cos(2.0 * M_PI * z / L);
+}
+
+// 2D mirror-like function: peaks at z = +/- z_m(psi).
+static void
+test_func_2d_mirror(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double L = 2.0;
+  double B0 = 1.0 + 0.1 * psi;  // Varies with psi.
+  double R = 4.0;
+  double sinval = sin(M_PI * z / L);
+  fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval);
+}
+
+// Complex test function: f(z) = (sin^2(z) + 0.1) * exp(-z^2/100)
+// Has 9 peaks on [-5, 5]: edges at z=±5, local extrema at z≈±3π/2, ±π, ±π/2, 0.
+static void
+test_func_1d_complex(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  double sinz = sin(z);
+  fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0);
+}
+
+// 2D complex test function: f(psi, z) = (sin^2(z) + 0.1) * exp(-z^2/100) * psi
+// Peaks scale linearly with psi.
+static void
+test_func_2d_complex(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  double sinz = sin(z);
+  fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0) * psi;
+}
+
+// Test function to project onto peaks: g(psi, z) = z^2 * psi^2
+static void
+test_func_quadratic_2d(double t, const double *xn, double *fout, void *ctx)
+{
+  double psi = xn[0], z = xn[1];
+  fout[0] = z * z * psi * psi;
+}
+
+// 1D version: g(z) = z^2
+static void
+test_func_quadratic_1d(double t, const double *xn, double *fout, void *ctx)
+{
+  double z = xn[0];
+  fout[0] = z * z;
+}
+
+// Test 1D peak finding with cos function.
+void
+test_1d_find_peaks_cos(int poly_order, bool use_gpu)
+{
+  // Grid: z in [-1, 1] (one period of cos(2*pi*z/2)).
+  double lower[] = { -1.0 };
+  double upper[] = { 1.0 };
+  int cells[] = { 16 };
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  // Basis.
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  // Ranges.
+  int ghost[] = { 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_cos, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results: cos(pi*z) on [-1,1] has EDGE_LO at z=-1, LOCAL_MAX at z=0, EDGE_HI at z=1.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 3);
+
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    { GKYL_PEAK_EDGE_LO, -1.0, },
+    { GKYL_PEAK_LOCAL_MAX, 0.0, },
+    { GKYL_PEAK_EDGE_HI, 1.0, },
+  };
+
+  for (int p = 0; p < 3 && p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
+
+    const double *val = gkyl_array_cfetch(vals, 0);
+    const double *coord = gkyl_array_cfetch(coords, 0);
+
+    double z = coord[0];
+    double expected_val[1];
+    test_func_1d_cos(0.0, &z, expected_val, NULL);
+
+    TEST_CHECK(ptype == expected_peaks[p].type);
+    TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 0.1);
+    TEST_CHECK(gkyl_compare_double(val[0], expected_val[0], 0.2));
+
+    gkyl_array_release(coords);
+    gkyl_array_release(vals);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(vals_d);
+  }
+
+  gkyl_array_release(f_ho);
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 1D peak finding with mirror-like function.
+void
+test_1d_find_peaks_mirror(int poly_order, bool use_gpu)
+{
+  // Grid: z in [-1, 1].
+  double lower[] = { -1.0 };
+  double upper[] = { 1.0 };
+  int cells[] = { 16 };
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  // Basis.
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  // Ranges.
+  int ghost[] = { 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_mirror, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 3);
+
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
+
+    const double *val = gkyl_array_cfetch(vals, 0);
+    const double *coord = gkyl_array_cfetch(coords, 0);
+
+    // Check specific peaks.
+    if (ptype == GKYL_PEAK_EDGE_LO) {
+      TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15));
+      TEST_CHECK(fabs(coord[0] - (-1.0)) < 1e-15);
+    }
+    else if (ptype == GKYL_PEAK_LOCAL_MIN) {
+      TEST_CHECK(gkyl_compare_double(val[0], 1.0, 1e-15));
+      TEST_CHECK(fabs(coord[0]) < 1e-15);
+    }
+    else if (ptype == GKYL_PEAK_EDGE_HI) {
+      TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15));
+      TEST_CHECK(fabs(coord[0] - 1.0) < 1e-15);
+    }
+    gkyl_array_release(vals);
+    gkyl_array_release(coords);
+    gkyl_array_release(vals_d);
+    gkyl_array_release(coords_d);
+  }
+
+  gkyl_array_release(f_ho);
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D peak finding.
+void
+test_2d_find_peaks(int poly_order, bool use_gpu)
+{
+  double lower[] = { 0.0, -1.0 };
+  double upper[] = { 1.0, 1.0 };
+  int cells[] = { 4, 16 };
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 2, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 2, poly_order);
+
+  int ghost[] = { 1, 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_mirror, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results: Mirror function should have 3 peaks along z.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 3);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Check that values and coordinates are reasonable for each peak.
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
+
+    double xc_log[1] = { 0.0 };
+
+    // Check first and last psi cells.
+    for (int cell_idx = out_range->lower[0]; cell_idx <= out_range->upper[0];
+      cell_idx += (out_range->upper[0] - out_range->lower[0])) {
+      long linidx = gkyl_range_idx(out_range, (int[]){ cell_idx });
+      const double *val_d = gkyl_array_cfetch(vals, linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+
+      double val_at_center = out_basis->eval_expand(xc_log, val_d);
+      double coord_at_center = out_basis->eval_expand(xc_log, coord_d);
+      double psi_phys = out_grid->lower[0] + (cell_idx - 0.5) * out_grid->dx[0];
+
+      // Compute expected value at detected coordinate.
+      double xn[2] = { psi_phys, coord_at_center };
+      double expected_val[1];
+      test_func_2d_mirror(0.0, xn, expected_val, NULL);
+
+      // Check value matches analytical function.
+      TEST_CHECK(gkyl_compare_double(val_at_center, expected_val[0], 1e-15));
+
+      // Check that coordinate matches expected peak location.
+      if (ptype == GKYL_PEAK_EDGE_LO) {
+        TEST_CHECK(fabs(coord_at_center - (-1.0)) < 1e-15);
+      }
+      else if (ptype == GKYL_PEAK_LOCAL_MIN) {
+        TEST_CHECK(fabs(coord_at_center) < 1e-15);
+      }
+      else if (ptype == GKYL_PEAK_EDGE_HI) {
+        TEST_CHECK(fabs(coord_at_center - 1.0) < 1e-15);
+      }
+    }
+    gkyl_array_release(vals);
+    gkyl_array_release(coords);
+    gkyl_array_release(vals_d);
+    gkyl_array_release(coords_d);
+  }
+
+  gkyl_array_release(f_ho);
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 1D peak finding with complex oscillatory function.
+void
+test_1d_find_peaks_complex(int poly_order, bool use_gpu)
+{
+  double lower[] = { -2.0 * M_PI };
+  double upper[] = { 2.0 * M_PI };
+  int cells[] = { 64 };  // Need fine resolution to capture oscillations.
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = { 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+
+  TEST_CHECK(num_peaks == 9);
+
+  // Define expected peak locations and types.
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
+  };
+
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
+
+    const double *val = gkyl_array_cfetch(vals, 0);
+    const double *coord = gkyl_array_cfetch(coords, 0);
+
+    double z = coord[0];
+    double expected_val[1];
+    test_func_1d_complex(0.0, &z, expected_val, NULL);
+
+    TEST_CHECK(ptype == expected_peaks[p].type);
+    TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 1e-15);
+    double rel_error = fabs(val[0] - expected_val[0]) / fabs(expected_val[0]);
+    TEST_CHECK(rel_error < 1e-15);
+
+    gkyl_array_release(coords);
+    gkyl_array_release(vals);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(vals_d);
+  }
+
+  gkyl_array_release(f_ho);
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D peak finding with complex oscillatory function.
+void
+test_2d_find_peaks_complex(int poly_order, bool use_gpu)
+{
+  // Grid: psi in [0.5, 2.0], z in [-5, 5].
+  double lower[] = { 0.5, -2.0 * M_PI };
+  double upper[] = { 2.0, 2.0 * M_PI };
+  int cells[] = { 16, 64 };
+  int ndim = 2;
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+
+  // Basis.
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+
+  // Ranges.
+  int ghost[] = { 1, 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function onto basis (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copy if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+
+  // Compute peaks.
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Define expected peak locations and types (same as 1D).
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
+  };
+
+  // Get node locations for output basis.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis);
+  out_basis->node_list(gkyl_array_fetch(nodes, 0));
+
+  // Check peak types and validate values at each psi cell.
+  for (int p = 0; p < num_peaks; p++) {
+    enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p);
+    TEST_CHECK(ptype == expected_peaks[p].type);
+
+    const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p);
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size);
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(vals, vals_d);
+    gkyl_array_copy(coords, coords_d);
+
+    // Check each psi cell.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, out_range);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(out_range, iter.idx);
+
+      const double *val_d = gkyl_array_cfetch(vals, linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+
+      // Get cell center for physical psi coordinate.
+      double xc_out[1];
+      gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out);
+      double psi_phys = xc_out[0];
+
+      // Evaluate at each nodal point in this cell.
+      for (int n = 0; n < out_basis->num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double val_at_node = out_basis->eval_expand(nod_log, val_d);
+        double z_at_node = out_basis->eval_expand(nod_log, coord_d);
+
+        // Compute physical psi coordinate at this node.
+        // dx/2 away from the center is the nodal location.
+        double nod_phys[1];
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0;
+
+        // Compute expected value at detected coordinates.
+        double xn[2] = { nod_phys[0], z_at_node };
+        double expected_val[1];
+        test_func_2d_complex(0.0, xn, expected_val, NULL);
+
+        TEST_CHECK(fabs(z_at_node - expected_peaks[p].z_expected) < 1e-15);
+        double rel_error = fabs(val_at_node - expected_val[0]) / fabs(expected_val[0]);
+        TEST_CHECK(rel_error < 1e-15);
+      }
+    }
+    gkyl_array_release(vals);
+    gkyl_array_release(coords);
+    gkyl_array_release(vals_d);
+    gkyl_array_release(coords_d);
+  }
+
+  gkyl_array_release(nodes);
+  gkyl_array_release(f_ho);
+  gkyl_array_release(f);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 1D project_on_peaks with complex function.
+void
+test_1d_project_on_peaks(int poly_order, bool use_gpu)
+{
+  double lower[] = { -2.0 * M_PI };
+  double upper[] = { 2.0 * M_PI };
+  int cells[] = { 64 };
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = { 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks.
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  // Allocate output arrays for projected values.
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < num_peaks; p++) {
+    g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
+  }
+  gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
+
+  // Verify that g evaluated at each peak matches analytical values.
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
+  };
+  for (int p = 0; p < num_peaks; p++) {
+    // Copy back to host for verification.
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp,
+      g_at_peaks[p]->size);
+    gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
+
+    const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0);
+    double z = expected_peaks[p].z_expected;
+    double expected = z * z;
+    TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12));
+    TEST_MSG("Peak %d: z=%.5f, g_at_peak=%.5f, expected=%.5f", p, z, g_val[0], expected);
+
+    gkyl_array_release(g_at_peaks_ho);
+  }
+
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(g_at_peaks[p]);
+  }
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D project_on_peaks with complex function.
+void
+test_2d_project_on_peaks(int poly_order, bool use_gpu)
+{
+  double lower[] = { 0.5, -2.0 * M_PI };
+  double upper[] = { 2.0, 2.0 * M_PI };
+  int cells[] = { 16, 64 };
+  int ndim = 2;
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+
+  int ghost[] = { 1, 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Allocate output arrays for projected values.
+  struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < num_peaks; p++) {
+    g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
+  }
+  gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
+
+  // Define expected peak locations (same as before).
+  double expected_z_peaks[] = {
+    -2.0 * M_PI, -3.0 * M_PI / 2.0, -M_PI, -M_PI / 2.0, 0.0,
+    M_PI / 2.0, M_PI, 3.0 * M_PI / 2.0, 2.0 * M_PI
+  };
+
+  // Get node locations for output basis.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis);
+  out_basis->node_list(gkyl_array_fetch(nodes, 0));
+
+  // Verify that g evaluated at each peak matches analytical values.
+  for (int p = 0; p < num_peaks; p++) {
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(coords, coords_d);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp,
+      g_at_peaks[p]->size);
+    gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
+
+    // Check each psi cell.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, out_range);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(out_range, iter.idx);
+
+      const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+
+      // Get cell center for physical psi coordinate.
+      double xc_out[1];
+      gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out);
+
+      // Evaluate at each nodal point in this cell.
+      for (int n = 0; n < out_basis->num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double g_at_node = out_basis->eval_expand(nod_log, g_val_d);
+        double z_at_node = out_basis->eval_expand(nod_log, coord_d);
+
+        // Compute physical psi coordinate at this node.
+        double nod_phys[1];
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0;
+        double psi = nod_phys[0];
+
+        // Analytical value: g(psi, z) = z^2 * psi^2
+        double expected = z_at_node * z_at_node * psi * psi;
+        TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15);
+        TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15));
+      }
+    }
+    gkyl_array_release(coords);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(g_at_peaks_ho);
+  }
+
+  // Clean up.
+  gkyl_array_release(nodes);
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(g_at_peaks[p]);
+  }
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 1D project_on_peak_idx with complex function.
+void
+test_1d_project_on_peak_idx(int poly_order, bool use_gpu)
+{
+  double lower[] = { -2.0 * M_PI };
+  double upper[] = { 2.0 * M_PI };
+  int cells[] = { 64 };
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, 1, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, 1, poly_order);
+
+  int ghost[] = { 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks.
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
+  // Create peak finder.
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 0,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  // Allocate output arrays for projected values.
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  struct gkyl_array *g_at_peaks = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
+
+  int chosen_idx = 1;
+  gkyl_array_dg_find_peaks_project_on_peak_idx(peaks, g, chosen_idx, g_at_peaks);
+
+  struct {
+    enum gkyl_peak_type type;
+    double z_expected;
+  } expected_peaks[] = {
+    { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, -M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, 0.0, },
+    { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, },
+    { GKYL_PEAK_LOCAL_MIN, M_PI, },
+    { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, },
+    { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, },
+  };
+
+  // Copy back to host for verification.
+  struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks->ncomp,
+    g_at_peaks->size);
+  gkyl_array_copy(g_at_peaks_ho, g_at_peaks);
+
+  const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0);
+  double z = expected_peaks[chosen_idx].z_expected;
+  double expected = z * z;
+  TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12));
+
+  gkyl_array_release(g_at_peaks_ho);
+  gkyl_array_release(g_at_peaks);
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// Test 2D project_on_peak_idx with complex function.
+void
+test_2d_project_on_peak_idx(int poly_order, bool use_gpu)
+{
+  double lower[] = { 0.5, -2.0 * M_PI };
+  double upper[] = { 2.0, 2.0 * M_PI };
+  int cells[] = { 16, 64 };
+  int ndim = 2;
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+
+  struct gkyl_basis basis;
+  gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+
+  int ghost[] = { 1, 1 };
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  // Project test function for peak finding (always on host first).
+  struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2
+  struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume);
+  ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL);
+  gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho);
+  gkyl_eval_on_nodes_release(ev);
+
+  // Create device copies if needed.
+  struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume);
+  gkyl_array_copy(f, f_ho);
+  gkyl_array_copy(g, g_ho);
+
+  // Create peak finder (search along z, which is direction 1).
+  struct gkyl_array_dg_find_peaks_inp inp = {
+    .basis = &basis,
+    .grid = &grid,
+    .range = &local,
+    .range_ext = &local_ext,
+    .search_dir = 1,  // Search along z.
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f);
+  gkyl_array_dg_find_peaks_advance(peaks, f);
+
+  // Check results.
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks);
+  TEST_CHECK(num_peaks == 9);
+
+  const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks);
+  const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks);
+  const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks);
+  const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks);
+
+  // Allocate output arrays for projected values.
+  struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX];
+  for (int p = 0; p < num_peaks; p++) {
+    g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume);
+  }
+  gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks);
+
+  // Define expected peak locations (same as before).
+  double expected_z_peaks[] = {
+    -2.0 * M_PI, -3.0 * M_PI / 2.0, -M_PI, -M_PI / 2.0, 0.0,
+    M_PI / 2.0, M_PI, 3.0 * M_PI / 2.0, 2.0 * M_PI
+  };
+
+  // Get node locations for output basis.
+  struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis);
+  out_basis->node_list(gkyl_array_fetch(nodes, 0));
+
+  // Verify that g evaluated at each peak matches analytical values.
+  for (int p = 0; p < num_peaks; p++) {
+    const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p);
+
+    // Copy back to host for verification.
+    struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size);
+    gkyl_array_copy(coords, coords_d);
+    struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp,
+      g_at_peaks[p]->size);
+    gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]);
+
+    // Check each psi cell.
+    struct gkyl_range_iter iter;
+    gkyl_range_iter_init(&iter, out_range);
+    while (gkyl_range_iter_next(&iter)) {
+      long linidx = gkyl_range_idx(out_range, iter.idx);
+
+      const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx);
+      const double *coord_d = gkyl_array_cfetch(coords, linidx);
+
+      // Get cell center for physical psi coordinate.
+      double xc_out[1];
+      gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out);
+
+      // Evaluate at each nodal point in this cell.
+      for (int n = 0; n < out_basis->num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(nodes, n);
+        double g_at_node = out_basis->eval_expand(nod_log, g_val_d);
+        double z_at_node = out_basis->eval_expand(nod_log, coord_d);
+
+        // Compute physical psi coordinate at this node.
+        double nod_phys[1];
+        nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0;
+        double psi = nod_phys[0];
+
+        // Analytical value: g(psi, z) = z^2 * psi^2
+        double expected = z_at_node * z_at_node * psi * psi;
+        TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15);
+        TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15));
+      }
+    }
+    gkyl_array_release(coords);
+    gkyl_array_release(coords_d);
+    gkyl_array_release(g_at_peaks_ho);
+  }
+
+  // Clean up.
+  gkyl_array_release(nodes);
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(g_at_peaks[p]);
+  }
+  gkyl_array_release(f_ho);
+  gkyl_array_release(g_ho);
+  gkyl_array_release(f);
+  gkyl_array_release(g);
+  gkyl_array_dg_find_peaks_release(peaks);
+}
+
+// CPU test wrappers
+void test_1d_cos_p1_ho()
+{
+  test_1d_find_peaks_cos(1, false);
+}
+
+void test_1d_mirror_p1_ho()
+{
+  test_1d_find_peaks_mirror(1, false);
+}
+
+void test_1d_complex_p1_ho()
+{
+  test_1d_find_peaks_complex(1, false);
+}
+
+void test_2d_p1_ho()
+{
+  test_2d_find_peaks(1, false);
+}
+
+void test_2d_complex_p1_ho()
+{
+  test_2d_find_peaks_complex(1, false);
+}
+
+void test_1d_project_p1_ho()
+{
+  test_1d_project_on_peaks(1, false);
+}
+
+void test_2d_project_p1_ho()
+{
+  test_2d_project_on_peaks(1, false);
+}
+
+void test_1d_project_idx_p1_ho()
+{
+  test_1d_project_on_peak_idx(1, false);
+}
+
+void test_2d_project_idx_p1_ho()
+{
+  test_2d_project_on_peak_idx(1, false);
+}
+
+#ifdef GKYL_HAVE_CUDA
+
+// GPU test wrappers
+void test_1d_cos_p1_dev()
+{
+  test_1d_find_peaks_cos(1, true);
+}
+
+void test_1d_mirror_p1_dev()
+{
+  test_1d_find_peaks_mirror(1, true);
+}
+
+void test_1d_complex_p1_dev()
+{
+  test_1d_find_peaks_complex(1, true);
+}
+
+void test_2d_p1_dev()
+{
+  test_2d_find_peaks(1, true);
+}
+
+void test_2d_complex_p1_dev()
+{
+  test_2d_find_peaks_complex(1, true);
+}
+
+void test_1d_project_p1_dev()
+{
+  test_1d_project_on_peaks(1, true);
+}
+
+void test_2d_project_p1_dev()
+{
+  test_2d_project_on_peaks(1, true);
+}
+
+void test_1d_project_idx_p1_dev()
+{
+  test_1d_project_on_peak_idx(1, true);
+}
+
+void test_2d_project_idx_p1_dev()
+{
+  test_2d_project_on_peak_idx(1, true);
+}
+
+#endif
+
+TEST_LIST = {
+  { "test_1d_cos_p1", test_1d_cos_p1_ho },
+  { "test_1d_mirror_p1", test_1d_mirror_p1_ho },
+  { "test_1d_complex_p1", test_1d_complex_p1_ho },
+  { "test_2d_p1", test_2d_p1_ho },
+  { "test_2d_complex_p1", test_2d_complex_p1_ho },
+  { "test_1d_project_p1", test_1d_project_p1_ho },
+  { "test_2d_project_p1", test_2d_project_p1_ho },
+  { "test_1d_project_idx_p1", test_1d_project_idx_p1_ho },
+  { "test_2d_project_idx_p1", test_2d_project_idx_p1_ho },
+#ifdef GKYL_HAVE_CUDA
+  { "test_1d_cos_p1_gpu", test_1d_cos_p1_dev },
+  { "test_1d_mirror_p1_gpu", test_1d_mirror_p1_dev },
+  { "test_1d_complex_p1_gpu", test_1d_complex_p1_dev },
+  { "test_2d_p1_gpu", test_2d_p1_dev },
+  { "test_2d_complex_p1_gpu", test_2d_complex_p1_dev },
+  { "test_1d_project_p1_gpu", test_1d_project_p1_dev },
+  { "test_2d_project_p1_gpu", test_2d_project_p1_dev },
+  { "test_1d_project_idx_p1_gpu", test_1d_project_idx_p1_dev },
+  { "test_2d_project_idx_p1_gpu", test_2d_project_idx_p1_dev },
+#endif
+  { NULL, NULL },
+};
diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c
new file mode 100644
index 000000000..d55087e18
--- /dev/null
+++ b/core/zero/array_dg_find_peaks.c
@@ -0,0 +1,739 @@
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_alloc_flags_priv.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_array_dg_find_peaks_priv.h>
+#include <gkyl_nodal_ops.h>
+
+/**
+ * Scan along the search direction at a fixed preserved-direction coordinate
+ * to count the number of peaks and determine their types.
+ */
+static void
+count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in_ho,
+  int preserved_idx, int *num_peaks_out, enum gkyl_peak_type *peak_types_out)
+{
+  int ndim = up->grid.ndim;
+  int search_dir = up->search_dir;
+
+  int total_nodes_search = up->total_nodes_search;
+
+  // Use pre-allocated search buffers from the struct.
+  double *vals = up->search_vals;
+  double *coords = up->search_coords;
+  for (int i = 0; i < total_nodes_search; i++) {
+    vals[i] = 0.0;
+    coords[i] = 0.0;
+  }
+
+  // Iterate along cells in search direction and collect nodal values.
+  for (int cell_idx = up->range.lower[search_dir];
+    cell_idx <= up->range.upper[search_dir];
+    cell_idx++) {
+    // Build index array for this cell.
+    int idx[GKYL_MAX_DIM];
+    if (ndim == 1) {
+      idx[0] = cell_idx;
+    }
+    else {
+      int preserved_dir = (search_dir == 0) ? 1 : 0;
+      idx[preserved_dir] = preserved_idx;
+      idx[search_dir] = cell_idx;
+    }
+
+    long linidx = gkyl_range_idx(&up->range, idx);
+    const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+    double xc[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+    // Evaluate at each node in this cell.
+    for (int n = 0; n < up->basis.num_basis; n++) {
+      const double *nod_log = gkyl_array_cfetch(up->nodes, n);
+
+      // Determine node offset in search direction.
+      int node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
+
+      int cell_local = cell_idx - up->range.lower[search_dir];
+
+      int search_node_idx = cell_local + node_offset;
+
+      double val = up->basis.eval_expand(nod_log, f_d);
+      double nod_phys[GKYL_MAX_DIM];
+      dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+      // Only store if this is the first time we see this search node
+      // (avoid duplicates at cell boundaries).
+      if (vals[search_node_idx] == 0.0 && coords[search_node_idx] == 0.0) {
+        vals[search_node_idx] = val;
+        coords[search_node_idx] = nod_phys[search_dir];
+      }
+    }
+  }
+
+  // Now scan the values to find peaks.
+  // A peak is: EDGE_LO at index 0, EDGE_HI at last index, LOCAL_MAX/MIN in between.
+  int num_peaks = 0;
+
+  // Always add lower edge.
+  peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_LO;
+
+  // Scan for local maxima and minima (indices 1 to total_nodes_search-2).
+  for (int i = 1; i < total_nodes_search - 1; i++) {
+    double prev = vals[i - 1];
+    double curr = vals[i];
+    double next = vals[i + 1];
+
+    if (curr > prev && curr > next) {
+      // Local maximum.
+      assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX);
+      peak_types_out[num_peaks++] = GKYL_PEAK_LOCAL_MAX;
+    }
+    else if (curr < prev && curr < next) {
+      // Local minimum.
+      assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX);
+      peak_types_out[num_peaks++] = GKYL_PEAK_LOCAL_MIN;
+    }
+  }
+
+  // Always add upper edge.
+  assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX);
+  peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_HI;
+
+  *num_peaks_out = num_peaks;
+}
+
+/**
+ * Find all peaks along the search direction for a given preserved-direction
+ * node index, storing results in the nodal arrays.
+ */
+static void
+find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in_ho,
+  int preserved_node_idx)
+{
+  int ndim = up->grid.ndim;
+  int search_dir = up->search_dir;
+
+  int total_nodes_search = up->total_nodes_search;
+
+  // Use pre-allocated search buffers from the struct.
+  double *vals = up->search_vals;
+  double *coords = up->search_coords;
+  bool *visited = up->search_visited;
+  for (int i = 0; i < total_nodes_search; i++) {
+    vals[i] = 0.0;
+    coords[i] = 0.0;
+    visited[i] = false;
+  }
+
+  // For 2D, determine the preserved direction cell index from the node index.
+  int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+  // Iterate along cells in search direction and collect nodal values.
+  for (int cell_idx = up->range.lower[search_dir];
+    cell_idx <= up->range.upper[search_dir];
+    cell_idx++) {
+    // For 2D, we need to iterate over cells in the preserved direction that
+    // contribute to this preserved node index.
+    int pres_cell_start, pres_cell_end;
+    if (ndim == 1) {
+      pres_cell_start = 0;
+      pres_cell_end = 0;
+    }
+    else {
+      // Determine which cells contribute to this preserved node.
+      // Node i is shared by cells i and i+1 (0-indexed from lower).
+      // preserved_node_idx 0 is only in cell lower[preserved_dir].
+      // preserved_node_idx N is only in cell upper[preserved_dir].
+      if (preserved_node_idx == 0) {
+        pres_cell_start = up->range.lower[preserved_dir];
+        pres_cell_end = up->range.lower[preserved_dir];
+      }
+      else if (preserved_node_idx == up->out_nrange.upper[0]) {
+        pres_cell_start = up->range.upper[preserved_dir];
+        pres_cell_end = up->range.upper[preserved_dir];
+      }
+      else {
+        pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+        pres_cell_end = pres_cell_start + 1;
+        if (pres_cell_end > up->range.upper[preserved_dir])
+          pres_cell_end = up->range.upper[preserved_dir];
+      }
+    }
+
+    for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) {
+      // Build index array for this cell.
+      int idx[GKYL_MAX_DIM];
+      if (ndim == 1) {
+        idx[0] = cell_idx;
+      }
+      else {
+        idx[preserved_dir] = pres_cell;
+        idx[search_dir] = cell_idx;
+      }
+
+      long linidx = gkyl_range_idx(&up->range, idx);
+      const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+      double xc[GKYL_MAX_DIM];
+      gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+      // Evaluate at each node in this cell.
+      for (int n = 0; n < up->basis.num_basis; n++) {
+        const double *nod_log = gkyl_array_cfetch(up->nodes, n);
+
+        // Check if this node corresponds to our preserved node index.
+        if (ndim > 1) {
+          int pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
+          int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
+          int this_pres_node = pres_cell_local + pres_node_offset;
+          if (this_pres_node != preserved_node_idx)
+            continue;
+        }
+
+        // Determine node offset in search direction.
+        int search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
+
+        int cell_local = cell_idx - up->range.lower[search_dir];
+        int search_node_idx = cell_local + search_node_offset;
+
+        if (!visited[search_node_idx]) {
+          double val = up->basis.eval_expand(nod_log, f_d); // GPU error here
+          double nod_phys[GKYL_MAX_DIM];
+          dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+          vals[search_node_idx] = val;
+          coords[search_node_idx] = nod_phys[search_dir];
+          visited[search_node_idx] = true;
+        }
+      }
+    }
+  }
+
+  // Now extract peaks based on peak_types.
+  int peak_idx = 0;
+
+  // EDGE_LO is always first peak at index 0.
+  if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) {
+    double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
+    double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+    val_n[0] = vals[0];
+    coord_n[0] = coords[0];
+    peak_idx++;
+  }
+
+  // Find local maxima and minima.
+  for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) {
+    double prev = vals[i - 1];
+    double curr = vals[i];
+    double next = vals[i + 1];
+
+    bool is_max = (curr > prev && curr > next);
+    bool is_min = (curr < prev && curr < next);
+
+    if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) ||
+      (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
+      double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
+      double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+      val_n[0] = curr;
+      coord_n[0] = coords[i];
+      peak_idx++;
+    }
+  }
+
+  // EDGE_HI is always last peak.
+  if (peak_idx < up->num_peaks && up->peak_types[peak_idx] == GKYL_PEAK_EDGE_HI) {
+    double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx);
+    double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx);
+    val_n[0] = vals[total_nodes_search - 1];
+    coord_n[0] = coords[total_nodes_search - 1];
+  }
+}
+
+/**
+ * Evaluate an input array at peak locations for a given preserved-direction
+ * node index, storing results in the nodal output arrays.
+ */
+static void
+eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal,
+  int peak_idx)
+{
+  int ndim = up->grid.ndim;
+  int search_dir = up->search_dir;
+  int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+  // Get the peak coordinate that was found during find_peaks.
+  const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[peak_idx],
+    preserved_node_idx);
+  double peak_coord_search = peak_coord_n[0];
+
+  // Find the cell containing this coordinate in the search direction.
+  // We need to build a point coordinate to pass to find_cell.
+  double point[GKYL_MAX_DIM];
+  int known_idx[GKYL_MAX_DIM];
+  int cell_idx[GKYL_MAX_DIM];
+
+  for (int d = 0; d < ndim; d++) {
+    if (d == search_dir) {
+      point[d] = peak_coord_search;
+      known_idx[d] = -1; // Not known
+    }
+    else {
+      // Use dummy value - we'll specify known_idx.
+      point[d] = 0.0;
+      known_idx[d] = -1;
+    }
+  }
+
+  // If 2D, we need to determine preserved direction cell from preserved_node_idx.
+  // For p=1 with N cells (1-based indexing), nodal points map as:
+  // Node 0 -> cell 1, logical coord -1 (left edge of first cell)
+  // Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k)
+  // This ensures proper continuity at shared cell boundaries.
+  if (ndim > 1) {
+    int pres_cell;
+    if (preserved_node_idx == 0) {
+      // First node: evaluate at left edge of first cell.
+      pres_cell = up->range.lower[preserved_dir];
+    }
+    else {
+      // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx.
+      // Clamp to upper bound for safety.
+      pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+      if (pres_cell > up->range.upper[preserved_dir]) {
+        pres_cell = up->range.upper[preserved_dir];
+      }
+    }
+    known_idx[preserved_dir] = pres_cell;
+
+    // Set the coordinate in preserved direction to the cell center.
+    int pres_cell_idx[GKYL_MAX_DIM];
+    for (int d = 0; d < ndim; d++) {
+      pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1;
+    }
+    double xc_pres[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres);
+    point[preserved_dir] = xc_pres[preserved_dir];
+  }
+
+  gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx);
+
+  // Clamp cell_idx to interior range (avoid ghost cells).
+  for (int d = 0; d < up->grid.ndim; d++) {
+    if (cell_idx[d] < up->range.lower[d]) {
+      cell_idx[d] = up->range.lower[d];
+    }
+    if (cell_idx[d] > up->range.upper[d]) {
+      cell_idx[d] = up->range.upper[d];
+    }
+  }
+
+  // Get the DG coefficients at this cell.
+  long linidx = gkyl_range_idx(&up->range, cell_idx);
+  const double *f_d = gkyl_array_cfetch(in_ho, linidx);
+
+  // Get cell center.
+  double xc[GKYL_MAX_DIM];
+  gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
+
+  // Convert peak coordinate to logical space.
+  double nod_log[GKYL_MAX_DIM];
+  for (int d = 0; d < ndim; d++) {
+    if (d == search_dir) {
+      // Convert physical coordinate to logical [-1, 1].
+      nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
+    }
+    else if (ndim > 1) {
+      // In preserved direction, use the node position in the cell.
+      // Node 0 is at left edge (-1), all others at right edge (+1).
+      nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
+    }
+  }
+
+  // Evaluate the DG expansion at this logical coordinate.
+  double val = up->basis.eval_expand(nod_log, f_d);
+
+  // Store the result.
+  double *val_n = gkyl_array_fetch(out_vals_nodal[peak_idx], preserved_node_idx);
+  val_n[0] = val;
+}
+
+struct gkyl_array_dg_find_peaks*
+gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_peaks_inp,
+  const struct gkyl_array *in)
+{
+  struct gkyl_array_dg_find_peaks *up = gkyl_malloc(sizeof(*up));
+
+  // Copy input parameters.
+  up->grid = *find_peaks_inp->grid;
+  up->basis = *find_peaks_inp->basis;
+  up->range = *find_peaks_inp->range;
+  up->range_ext = *find_peaks_inp->range_ext;
+  up->search_dir = find_peaks_inp->search_dir;
+  up->use_gpu = find_peaks_inp->use_gpu;
+
+  int ndim = find_peaks_inp->grid->ndim;
+  int poly_order = find_peaks_inp->basis->poly_order;
+  int out_dim = ndim - 1;
+
+  assert(find_peaks_inp->search_dir >= 0 && find_peaks_inp->search_dir < ndim);
+  assert(poly_order == 1); // gkyl_array_dg_find_peaks: only p=1 is supported
+
+  // Set up output grid/basis/range.
+  if (out_dim == 0) {
+    // 1D -> 0D case.
+    int cells_1d[1] = { 1 };
+    double lower_1d[1] = { 0.0 };
+    double upper_1d[1] = { 1.0 };
+    gkyl_rect_grid_init(&up->out_grid, 1, lower_1d, upper_1d, cells_1d);
+    gkyl_range_init(&up->out_range, 1, (int[]){ 1 }, (int[]){ 1 });
+    gkyl_range_init(&up->out_range_ext, 1, (int[]){ 0 }, (int[]){ 2 });
+    gkyl_cart_modal_serendip(&up->out_basis, 1, 0);
+
+    int nodes_shape[1] = { 1 };
+    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
+  }
+  else if (out_dim == 1) {
+    // 2D -> 1D case.
+    int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0;
+
+    int cells_out = find_peaks_inp->grid->cells[preserved_dir];
+    double lower_out = find_peaks_inp->grid->lower[preserved_dir];
+    double upper_out = find_peaks_inp->grid->upper[preserved_dir];
+
+    gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out);
+
+    int lower_idx[1] = { find_peaks_inp->range->lower[preserved_dir] };
+    int upper_idx[1] = { find_peaks_inp->range->upper[preserved_dir] };
+    gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx);
+
+    int lower_ext_idx[1] = { find_peaks_inp->range_ext->lower[preserved_dir] };
+    int upper_ext_idx[1] = { find_peaks_inp->range_ext->upper[preserved_dir] };
+    gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx);
+
+    gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order);
+
+    int num_nodes = gkyl_range_shape(&up->out_range, 0) + 1;
+    int nodes_shape[1] = {num_nodes};
+    gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape);
+  }
+  else {
+    assert(false); // dg_find_peaks: only 1D->0D and 2D->1D supported
+  }
+
+  // Store node locations for input basis.
+  up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, find_peaks_inp->basis->num_basis);
+  find_peaks_inp->basis->node_list(gkyl_array_fetch(up->nodes, 0));
+
+  // Create nodal-to-modal converter.
+  up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false);
+
+  // No device basis on CPU.
+  up->out_basis_on_dev = NULL;
+
+  // Compute total_nodes_search for the struct.
+  int num_cells_search = find_peaks_inp->range->upper[find_peaks_inp->search_dir]
+    - find_peaks_inp->range->lower[find_peaks_inp->search_dir] + 1;
+  up->total_nodes_search = num_cells_search + 1;
+
+  // Pre-allocate search-direction working buffers (reused by advance).
+  up->search_vals = gkyl_malloc(sizeof(double) * up->total_nodes_search);
+  up->search_coords = gkyl_malloc(sizeof(double) * up->total_nodes_search);
+  up->search_visited = gkyl_malloc(sizeof(bool) * up->total_nodes_search);
+
+  // Count peaks at middle preserved coordinate.
+  int mid_preserved_idx = 0;
+  if (out_dim == 1) {
+    int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0;
+    mid_preserved_idx = (find_peaks_inp->range->lower[preserved_dir] +
+      find_peaks_inp->range->upper[preserved_dir]) / 2;
+  }
+
+  // Copy input to host if needed.
+  if (up->use_gpu) {
+    struct gkyl_array *field_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size);
+    gkyl_array_copy(field_ho, in);
+    count_peaks_along_dir(up, field_ho, mid_preserved_idx, &up->num_peaks, up->peak_types);
+    gkyl_array_release(field_ho);
+  }
+  else {
+    count_peaks_along_dir(up, in, mid_preserved_idx, &up->num_peaks, up->peak_types);
+  }
+
+  // Allocate output arrays for each peak.
+  for (int p = 0; p < up->num_peaks; p++) {
+    up->out_vals[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis,
+      up->out_range_ext.volume);
+    up->out_coords[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis,
+      up->out_range_ext.volume);
+    up->out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+    up->out_coords_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+    up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume);
+  }
+
+  // Initialize unused peak arrays to NULL.
+  for (int p = up->num_peaks; p < GKYL_DG_FIND_PEAKS_MAX; p++) {
+    up->out_vals[p] = NULL;
+    up->out_coords[p] = NULL;
+    up->out_vals_nodal[p] = NULL;
+    up->out_coords_nodal[p] = NULL;
+    up->out_eval_at_peaks_vals_nodal[p] = NULL;
+  }
+
+  up->flags = 0;
+  GKYL_CLEAR_CU_ALLOC(up->flags);
+  up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free);
+  up->on_dev = up; // CPU object points to itself.
+
+  struct gkyl_array_dg_find_peaks *up_out = up;
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    up_out = gkyl_array_dg_find_peaks_new_cu(up);
+    gkyl_array_dg_find_peaks_release(up);
+  }
+#endif
+
+  return up_out;
+}
+
+void
+gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in)
+{
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    gkyl_array_dg_find_peaks_advance_cu(up, in);
+    return;
+  }
+#endif
+
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Find peaks for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    find_peaks_for_preserved_node(up, in, pres_node);
+  }
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    for (int p = 0; p < up->num_peaks; p++) {
+      double *val_m = gkyl_array_fetch(up->out_vals[p], 0);
+      double *coord_m = gkyl_array_fetch(up->out_coords[p], 0);
+      const double *val_n = gkyl_array_cfetch(up->out_vals_nodal[p], 0);
+      const double *coord_n = gkyl_array_cfetch(up->out_coords_nodal[p], 0);
+      val_m[0] = val_n[0];
+      coord_m[0] = coord_n[0];
+    }
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_vals_nodal[p], up->out_vals[p], false);
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_coords_nodal[p], up->out_coords[p], false);
+    }
+  }
+}
+
+int
+gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up)
+{
+  return up->num_peaks;
+}
+
+enum gkyl_peak_type
+gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return up->peak_types[peak_idx];
+}
+
+const struct gkyl_basis*
+gkyl_array_dg_find_peaks_get_basis(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_basis;
+}
+
+const struct gkyl_rect_grid*
+gkyl_array_dg_find_peaks_get_grid(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_grid;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_range(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_range;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_range_ext;
+}
+
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *up)
+{
+  return &up->out_nrange;
+}
+
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_acquire_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return gkyl_array_acquire(up->out_vals[peak_idx]);
+}
+
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_acquire_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return gkyl_array_acquire(up->out_vals_nodal[peak_idx]);
+}
+
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_acquire_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return gkyl_array_acquire(up->out_coords[peak_idx]);
+}
+
+const struct gkyl_array*
+gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up,
+  int peak_idx)
+{
+  assert(peak_idx >= 0 && peak_idx < up->num_peaks);
+  return gkyl_array_acquire(up->out_coords_nodal[peak_idx]);
+}
+
+void
+gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals)
+{
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    gkyl_array_dg_find_peaks_project_on_peaks_cu(up, in_array, out_vals);
+    return;
+  }
+#endif
+
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Evaluate the input array at peak locations for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    for (int p = 0; p < up->num_peaks; p++) {
+      eval_array_at_peaks_for_preserved_node(up, in_array, pres_node,
+        up->out_eval_at_peaks_vals_nodal, p);
+    }
+  }
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    for (int p = 0; p < up->num_peaks; p++) {
+      double *val_m = gkyl_array_fetch(out_vals[p], 0);
+      const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0);
+      val_m[0] = val_n[0];
+    }
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p],
+        false);
+    }
+  }
+}
+
+void
+gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val)
+{
+#ifdef GKYL_HAVE_CUDA
+  if (up->use_gpu) {
+    gkyl_array_dg_find_peaks_project_on_peak_idx_cu(up, in_array, peak_idx, out_val);
+    return;
+  }
+#endif
+
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+
+  // Evaluate the input array at peak locations for each preserved-direction node.
+  int num_nodes_out = up->out_nrange.volume;
+
+  for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) {
+    eval_array_at_peaks_for_preserved_node(up, in_array, pres_node,
+      up->out_eval_at_peaks_vals_nodal, peak_idx);
+  }
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function).
+    double *val_m = gkyl_array_fetch(out_val, 0);
+    const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0);
+    val_m[0] = val_n[0];
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform.
+    gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val,
+      false);
+  }
+}
+
+struct gkyl_array_dg_find_peaks*
+gkyl_array_dg_find_peaks_acquire(const struct gkyl_array_dg_find_peaks *up)
+{
+  gkyl_ref_count_inc(&up->ref_count);
+  return (struct gkyl_array_dg_find_peaks *)up;
+}
+
+void
+gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref)
+{
+  struct gkyl_array_dg_find_peaks *up =
+    container_of(ref, struct gkyl_array_dg_find_peaks, ref_count);
+
+  for (int p = 0; p < up->num_peaks; p++) {
+    gkyl_array_release(up->out_vals[p]);
+    gkyl_array_release(up->out_coords[p]);
+    gkyl_array_release(up->out_vals_nodal[p]);
+    gkyl_array_release(up->out_coords_nodal[p]);
+    gkyl_array_release(up->out_eval_at_peaks_vals_nodal[p]);
+  }
+  gkyl_array_release(up->nodes);
+  gkyl_nodal_ops_release(up->n2m);
+
+  if (GKYL_IS_CU_ALLOC(up->flags)) {
+    gkyl_cart_modal_basis_release_cu(up->out_basis_on_dev);
+    gkyl_cu_free(up->search_vals);
+    gkyl_cu_free(up->search_coords);
+    gkyl_cu_free(up->search_visited);
+    gkyl_cu_free(up->on_dev);
+  }
+  else {
+    gkyl_free(up->search_vals);
+    gkyl_free(up->search_coords);
+    gkyl_free(up->search_visited);
+  }
+
+  gkyl_free(up);
+}
+
+void
+gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up)
+{
+  gkyl_ref_count_dec(&up->ref_count);
+}
diff --git a/core/zero/array_dg_find_peaks_cu.cu b/core/zero/array_dg_find_peaks_cu.cu
new file mode 100644
index 000000000..9581f9a37
--- /dev/null
+++ b/core/zero/array_dg_find_peaks_cu.cu
@@ -0,0 +1,540 @@
+/* -*- c++ -*- */
+extern "C" {
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_array_dg_find_peaks_priv.h>
+#include <gkyl_alloc_flags_priv.h>
+#include <gkyl_nodal_ops.h>
+#include <assert.h>
+}
+
+/**
+ * CUDA kernel: find peaks along the search direction for each preserved-direction
+ * node index. One thread per preserved_node_idx.
+ *
+ * Each thread:
+ *  1. Scans all cells along the search direction, collecting nodal values/coords
+ *     into thread-local arrays.
+ *  2. Extracts peaks (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI) and writes results
+ *     into the nodal output arrays.
+ */
+__global__ void
+gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in, int num_nodes_out)
+{
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < num_nodes_out; tid += blockDim.x * gridDim.x) {
+    int preserved_node_idx = (int)tid;
+
+    int ndim = up->grid.ndim;
+    int search_dir = up->search_dir;
+    int num_basis = up->basis.num_basis;
+
+    // Number of cells and nodes along the search direction.
+    int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1;
+
+    int total_nodes_search = num_cells_search + 1;
+
+    // Each thread gets its own contiguous slice of the pre-allocated
+    // search buffers. Offset = preserved_node_idx * total_nodes_search.
+    long buf_off = (long)preserved_node_idx * total_nodes_search;
+    double *vals = up->search_vals + buf_off;
+    double *coords = up->search_coords + buf_off;
+    bool *visited = up->search_visited + buf_off;
+    for (int i = 0; i < total_nodes_search; i++) {
+      vals[i] = 0.0;
+      coords[i] = 0.0;
+      visited[i] = false;
+    }
+
+    // Preserved direction (only used for 2D).
+    int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+    // Iterate along cells in the search direction.
+    for (int cell_idx = up->range.lower[search_dir];
+      cell_idx <= up->range.upper[search_dir]; cell_idx++) {
+      // For 2D, determine which cells in the preserved direction
+      // contribute to this preserved_node_idx.
+      int pres_cell_start, pres_cell_end;
+      if (ndim == 1) {
+        pres_cell_start = 0;
+        pres_cell_end = 0;
+      }
+      else {
+        // Node i is shared by cells i and i+1 (0-indexed from lower).
+        // preserved_node_idx 0 is only in cell lower[preserved_dir].
+        // preserved_node_idx N is only in cell upper[preserved_dir].
+        if (preserved_node_idx == 0) {
+          pres_cell_start = up->range.lower[preserved_dir];
+          pres_cell_end = up->range.lower[preserved_dir];
+        }
+        else if (preserved_node_idx == up->out_nrange.upper[0]) {
+          pres_cell_start = up->range.upper[preserved_dir];
+          pres_cell_end = up->range.upper[preserved_dir];
+        }
+        else {
+          pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+          pres_cell_end = pres_cell_start + 1;
+          if (pres_cell_end > up->range.upper[preserved_dir])
+            pres_cell_end = up->range.upper[preserved_dir];
+        }
+      }
+
+      for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) {
+        // Build cell index.
+        int idx[GKYL_MAX_DIM];
+        if (ndim == 1) {
+          idx[0] = cell_idx;
+        }
+        else {
+          idx[preserved_dir] = pres_cell;
+          idx[search_dir] = cell_idx;
+        }
+
+        long linidx = gkyl_range_idx(&up->range, idx);
+        const double *f_d = (const double *)gkyl_array_cfetch(in, linidx);
+
+        double xc[GKYL_MAX_DIM];
+        gkyl_rect_grid_cell_center(&up->grid, idx, xc);
+
+        // Evaluate at each node in this cell.
+        for (int n = 0; n < num_basis; n++) {
+          const double *nod_log = (const double *)gkyl_array_cfetch(up->nodes, n);
+
+          // Check if this node belongs to our preserved_node_idx (2D only).
+          if (ndim > 1) {
+            int pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1;
+            int pres_cell_local = pres_cell - up->range.lower[preserved_dir];
+
+            int this_pres_node = pres_cell_local + pres_node_offset;
+
+            if (this_pres_node != preserved_node_idx)
+              continue;
+          }
+
+          // Determine node offset in the search direction.
+          int search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1;
+
+          int cell_local = cell_idx - up->range.lower[search_dir];
+
+          int search_node_idx = cell_local + search_node_offset;
+
+          if (!visited[search_node_idx]) {
+            double val = up->basis.eval_expand(nod_log, f_d);
+            double nod_phys[GKYL_MAX_DIM];
+            dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys);
+
+            vals[search_node_idx] = val;
+            coords[search_node_idx] = nod_phys[search_dir];
+            visited[search_node_idx] = true;
+          }
+        }
+      }
+    }
+
+    // Extract peaks based on peak_types and write to nodal output arrays.
+    int peak_idx = 0;
+
+    // EDGE_LO is always the first peak at index 0.
+    if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) {
+      double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
+        preserved_node_idx);
+      double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
+        preserved_node_idx);
+      val_n[0] = vals[0];
+      coord_n[0] = coords[0];
+      peak_idx++;
+    }
+
+    // Find local maxima and minima.
+    for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) {
+      double prev = vals[i - 1];
+      double curr = vals[i];
+      double next = vals[i + 1];
+
+      bool is_max = (curr > prev && curr > next);
+      bool is_min = (curr < prev && curr < next);
+
+      if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) ||
+        (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) {
+        double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
+          preserved_node_idx);
+        double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
+          preserved_node_idx);
+        val_n[0] = curr;
+        coord_n[0] = coords[i];
+        peak_idx++;
+      }
+    }
+
+    // EDGE_HI is always the last peak.
+    if (peak_idx < up->num_peaks && up->peak_types[peak_idx] == GKYL_PEAK_EDGE_HI) {
+      double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx],
+        preserved_node_idx);
+      double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx],
+        preserved_node_idx);
+      val_n[0] = vals[total_nodes_search - 1];
+      coord_n[0] = coords[total_nodes_search - 1];
+    }
+  }
+}
+
+/**
+ * CUDA kernel: evaluate an input array at peak locations for given peak indices.
+ * Writes results into out_eval_at_peaks_vals_nodal arrays on device.
+ *
+ * Thread mapping: one thread per (preserved_node_idx, peak_offset) pair.
+ * total_threads = num_nodes_out * num_peaks_to_eval.
+ *
+ * @param up Device-side updater struct
+ * @param in Device-side input array (DG field to evaluate)
+ * @param num_nodes_out Number of preserved-direction nodes
+ * @param peak_start First peak index to evaluate
+ * @param num_peaks_to_eval Number of peaks to evaluate (starting from peak_start)
+ */
+__global__ void
+gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in, int num_nodes_out,
+  int peak_start, int num_peaks_to_eval)
+{
+  unsigned long total_threads = (unsigned long)num_nodes_out * num_peaks_to_eval;
+
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < total_threads; tid += blockDim.x * gridDim.x) {
+    int preserved_node_idx = (int)(tid / num_peaks_to_eval);
+    int peak_offset = (int)(tid % num_peaks_to_eval);
+    int peak_idx = peak_start + peak_offset;
+
+    int ndim = up->grid.ndim;
+    int search_dir = up->search_dir;
+    int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0);
+
+    // Get the peak coordinate found during advance.
+    const double *peak_coord_n = (const double *)gkyl_array_cfetch(
+      up->out_coords_nodal[peak_idx], preserved_node_idx);
+    double peak_coord_search = peak_coord_n[0];
+
+    // Determine cell index containing the peak.
+    // We compute the search-direction cell directly from the uniform grid
+    // geometry (avoids calling gkyl_rect_grid_find_cell which is not available
+    // as a device symbol).
+    int cell_idx[GKYL_MAX_DIM];
+
+    // Search direction: compute cell from coordinate on uniform grid.
+    // cell = floor((x - lower) / dx) + 1  (1-based indexing).
+    // Use pick_lower semantics: if exactly on a boundary, pick the lower cell.
+    {
+      double rel = (peak_coord_search - up->grid.lower[search_dir]) / up->grid.dx[search_dir];
+      int c = (int)rel + 1; // 1-based
+      // pick_lower: if exactly on upper boundary of cell c, rel is integer, pick c not c+1.
+      // The (int) cast truncates toward zero which gives pick_lower behavior for positive rel.
+      // Clamp to valid range.
+      if (c < up->range.lower[search_dir])
+        c = up->range.lower[search_dir];
+      if (c > up->range.upper[search_dir])
+        c = up->range.upper[search_dir];
+      cell_idx[search_dir] = c;
+    }
+
+    // For 2D: determine preserved-direction cell from preserved_node_idx.
+    if (ndim > 1) {
+      int pres_cell;
+      if (preserved_node_idx == 0) {
+        pres_cell = up->range.lower[preserved_dir];
+      }
+      else {
+        pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1;
+        if (pres_cell > up->range.upper[preserved_dir])
+          pres_cell = up->range.upper[preserved_dir];
+      }
+      cell_idx[preserved_dir] = pres_cell;
+    }
+
+    // Fetch DG coefficients at this cell.
+    long linidx = gkyl_range_idx(&up->range, cell_idx);
+    const double *f_d = (const double *)gkyl_array_cfetch(in, linidx);
+
+    // Get cell center for logical coordinate conversion.
+    double xc[GKYL_MAX_DIM];
+    gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc);
+
+    // Convert peak coordinate to logical space [-1, 1].
+    double nod_log[GKYL_MAX_DIM];
+    for (int d = 0; d < ndim; d++) {
+      if (d == search_dir) {
+        nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d];
+      }
+      else if (ndim > 1) {
+        // Node 0 is at left edge (-1), all others at right edge (+1).
+        nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0;
+      }
+    }
+
+    // Evaluate the DG expansion and store result.
+    double val = up->basis.eval_expand(nod_log, f_d);
+    double *val_n = (double *)gkyl_array_fetch(
+      up->out_eval_at_peaks_vals_nodal[peak_idx], preserved_node_idx);
+    val_n[0] = val;
+  }
+}
+
+// Host function to launch the project_on_peaks kernel and run nodal-to-modal transforms.
+void
+gkyl_array_dg_find_peaks_project_on_peaks_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+  int num_nodes_out = up->out_nrange.volume;
+  int num_peaks = up->num_peaks;
+
+  // Launch kernel: one thread per (preserved_node, peak) pair.
+  long total_threads = (long)num_nodes_out * num_peaks;
+  int nthreads = 256;
+  int nblocks = (total_threads + nthreads - 1) / nthreads;
+
+  gkyl_eval_at_peaks_kernel<<<nblocks, nthreads>>>(
+    up->on_dev, in_array->on_dev, num_nodes_out, 0, num_peaks);
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    for (int p = 0; p < num_peaks; p++) {
+      gkyl_array_copy(out_vals[p], up->out_eval_at_peaks_vals_nodal[p]);
+    }
+  }
+  else {
+    for (int p = 0; p < num_peaks; p++) {
+      gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1,
+        up->out_eval_at_peaks_vals_nodal[p], out_vals[p]);
+    }
+  }
+}
+
+// Host function to launch the project_on_peak_idx kernel and run nodal-to-modal transform.
+void
+gkyl_array_dg_find_peaks_project_on_peak_idx_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+  int num_nodes_out = up->out_nrange.volume;
+
+  // Launch kernel: one thread per preserved_node, single peak.
+  int nthreads = 256;
+  int nblocks = (num_nodes_out + nthreads - 1) / nthreads;
+
+  gkyl_eval_at_peaks_kernel<<<nblocks, nthreads>>>(
+    up->on_dev, in_array->on_dev, num_nodes_out, peak_idx, 1);
+
+  // Transform nodal to modal.
+  if (out_dim == 0) {
+    gkyl_array_copy(out_val, up->out_eval_at_peaks_vals_nodal[peak_idx]);
+  }
+  else {
+    gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+      &up->out_nrange, &up->out_range, 1,
+      up->out_eval_at_peaks_vals_nodal[peak_idx], out_val);
+  }
+}
+
+// Host function to launch the advance kernel and run nodal-to-modal transforms.
+void
+gkyl_array_dg_find_peaks_advance_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in)
+{
+  int ndim = up->grid.ndim;
+  int out_dim = ndim - 1;
+  int num_nodes_out = up->out_nrange.volume;
+
+  // Launch the kernel: one thread per preserved node.
+  int nthreads = 256;
+  int nblocks = (num_nodes_out + nthreads - 1) / nthreads;
+
+  gkyl_find_peaks_kernel<<<nblocks, nthreads>>>(
+    up->on_dev, in->on_dev, num_nodes_out);
+
+  // Transform nodal to modal for each peak.
+  if (out_dim == 0) {
+    // 1D -> 0D case: modal = nodal (p=0, single value).
+    // Copy from nodal to modal arrays on device.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_array_copy(up->out_vals[p], up->out_vals_nodal[p]);
+      gkyl_array_copy(up->out_coords[p], up->out_coords_nodal[p]);
+    }
+  }
+  else {
+    // 2D -> 1D case: use nodal-to-modal transform on GPU.
+    for (int p = 0; p < up->num_peaks; p++) {
+      gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1,
+        up->out_vals_nodal[p], up->out_vals[p]);
+      gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid,
+        &up->out_nrange, &up->out_range, 1,
+        up->out_coords_nodal[p], up->out_coords[p]);
+    }
+  }
+}
+
+struct gkyl_array_dg_find_peaks*
+gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho)
+{
+  struct gkyl_array_dg_find_peaks *up =
+    (struct gkyl_array_dg_find_peaks *)gkyl_malloc(sizeof(*up));
+
+  // Copy all scalar/struct fields from host object.
+  up->grid = up_ho->grid;
+  up->basis = up_ho->basis;
+  up->range = up_ho->range;
+  up->range_ext = up_ho->range_ext;
+  up->search_dir = up_ho->search_dir;
+  up->use_gpu = true;
+
+  up->out_grid = up_ho->out_grid;
+  up->out_basis = up_ho->out_basis;
+  up->out_range = up_ho->out_range;
+  up->out_range_ext = up_ho->out_range_ext;
+  up->out_nrange = up_ho->out_nrange;
+
+  up->num_peaks = up_ho->num_peaks;
+  for (int p = 0; p < GKYL_DG_FIND_PEAKS_MAX; p++) {
+    up->peak_types[p] = up_ho->peak_types[p];
+  }
+
+  int ndim = up_ho->basis.ndim;
+  int poly_order = up_ho->basis.poly_order;
+  int out_dim = ndim - 1;
+
+  // Create a GPU copy of the nodes array so the kernel can access it.
+  up->nodes = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+    up_ho->nodes->ncomp, up_ho->nodes->size);
+  gkyl_array_copy(up->nodes, up_ho->nodes);
+
+  // Create GPU-enabled nodal-to-modal converter.
+  // Use the host basis (up->out_basis) here because gkyl_nodal_ops_new
+  // calls cbasis->node_list on the host.
+  up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, true);
+
+  // Create a device-resident basis with device-callable function pointers.
+  // This is needed by gkyl_nodal_ops_n2m_cu which passes the basis pointer
+  // directly to a CUDA kernel that dereferences cbasis->nodal_to_modal().
+  if (out_dim > 0)
+    up->out_basis_on_dev = gkyl_cart_modal_serendip_cu_dev_new(1, poly_order);
+  else
+    up->out_basis_on_dev = NULL;
+
+  // Pre-allocate search-direction working arrays on device.
+  // Each thread (one per preserved node) gets its own contiguous slice
+  // of total_nodes_search elements, so total size = num_nodes_out * total_nodes_search.
+  up->total_nodes_search = up_ho->total_nodes_search;
+  int num_nodes_out = up->out_nrange.volume;
+  long search_buf_len = (long)num_nodes_out * up->total_nodes_search;
+  up->search_vals = (double *)gkyl_cu_malloc(sizeof(double) * search_buf_len);
+  up->search_coords = (double *)gkyl_cu_malloc(sizeof(double) * search_buf_len);
+  up->search_visited = (bool *)gkyl_cu_malloc(sizeof(bool) * search_buf_len);
+
+  up->flags = 0;
+  GKYL_SET_CU_ALLOC(up->flags);
+  up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free);
+
+  // Allocate GPU output arrays for each peak and copy data from host arrays.
+  for (int p = 0; p < up->num_peaks; p++) {
+    up->out_vals[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_vals[p]->ncomp, up_ho->out_vals[p]->size);
+    gkyl_array_copy(up->out_vals[p], up_ho->out_vals[p]);
+
+    up->out_coords[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_coords[p]->ncomp, up_ho->out_coords[p]->size);
+    gkyl_array_copy(up->out_coords[p], up_ho->out_coords[p]);
+
+    up->out_vals_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_vals_nodal[p]->ncomp, up_ho->out_vals_nodal[p]->size);
+    gkyl_array_copy(up->out_vals_nodal[p], up_ho->out_vals_nodal[p]);
+
+    up->out_coords_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_coords_nodal[p]->ncomp, up_ho->out_coords_nodal[p]->size);
+    gkyl_array_copy(up->out_coords_nodal[p], up_ho->out_coords_nodal[p]);
+
+    up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE,
+      up_ho->out_eval_at_peaks_vals_nodal[p]->ncomp,
+      up_ho->out_eval_at_peaks_vals_nodal[p]->size);
+    gkyl_array_copy(up->out_eval_at_peaks_vals_nodal[p],
+      up_ho->out_eval_at_peaks_vals_nodal[p]);
+  }
+
+  // Initialize unused peak arrays to NULL.
+  for (int p = up->num_peaks; p < GKYL_DG_FIND_PEAKS_MAX; p++) {
+    up->out_vals[p] = NULL;
+    up->out_coords[p] = NULL;
+    up->out_vals_nodal[p] = NULL;
+    up->out_coords_nodal[p] = NULL;
+    up->out_eval_at_peaks_vals_nodal[p] = NULL;
+  }
+
+  // Copy struct to device, with on_dev array pointers and device-callable
+  // basis function pointers swapped in.
+  // Save host-side array pointers and basis structs.
+  struct gkyl_array *ho_nodes = up->nodes;
+  struct gkyl_basis ho_basis = up->basis;
+  struct gkyl_basis ho_out_basis = up->out_basis;
+  struct gkyl_array *ho_out_vals[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_coords[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX];
+  struct gkyl_array *ho_out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];
+
+  // Populate device-callable basis function pointers for the H2D copy.
+  // We allocate temporary device basis structs, initialize them with device
+  // kernels, then copy back to the host struct fields so that when the
+  // whole struct is memcpy'd H2D, it contains device-callable pointers.
+  struct gkyl_basis *tmp_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(ndim, poly_order);
+  gkyl_cu_memcpy(&up->basis, tmp_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H);
+  gkyl_cu_free(tmp_basis_dev);
+
+  int out_basis_dim = (out_dim == 0) ? 1 : 1;
+  int out_basis_po = (out_dim == 0) ? 0 : poly_order;
+  struct gkyl_basis *tmp_out_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(out_basis_dim,
+    out_basis_po);
+  gkyl_cu_memcpy(&up->out_basis, tmp_out_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H);
+  gkyl_cu_free(tmp_out_basis_dev);
+
+  // Swap nodes to its device pointer.
+  up->nodes = up->nodes->on_dev;
+
+  for (int p = 0; p < up->num_peaks; p++) {
+    ho_out_vals[p] = up->out_vals[p];
+    ho_out_coords[p] = up->out_coords[p];
+    ho_out_vals_nodal[p] = up->out_vals_nodal[p];
+    ho_out_coords_nodal[p] = up->out_coords_nodal[p];
+    ho_out_eval_at_peaks_vals_nodal[p] = up->out_eval_at_peaks_vals_nodal[p];
+
+    // Swap in device pointers for the H2D copy.
+    up->out_vals[p] = up->out_vals[p]->on_dev;
+    up->out_coords[p] = up->out_coords[p]->on_dev;
+    up->out_vals_nodal[p] = up->out_vals_nodal[p]->on_dev;
+    up->out_coords_nodal[p] = up->out_coords_nodal[p]->on_dev;
+    up->out_eval_at_peaks_vals_nodal[p] = up->out_eval_at_peaks_vals_nodal[p]->on_dev;
+  }
+
+  // Allocate device struct and copy host struct (with device pointers) to device.
+  struct gkyl_array_dg_find_peaks *up_cu =
+    (struct gkyl_array_dg_find_peaks *)gkyl_cu_malloc(sizeof(*up_cu));
+  gkyl_cu_memcpy(up_cu, up, sizeof(struct gkyl_array_dg_find_peaks), GKYL_CU_MEMCPY_H2D);
+  up->on_dev = up_cu;
+
+  // Restore host-side array pointers and basis so the returned object
+  // has usable host handles and host-callable function pointers.
+  up->nodes = ho_nodes;
+  up->basis = ho_basis;
+  up->out_basis = ho_out_basis;
+  for (int p = 0; p < up->num_peaks; p++) {
+    up->out_vals[p] = ho_out_vals[p];
+    up->out_coords[p] = ho_out_coords[p];
+    up->out_vals_nodal[p] = ho_out_vals_nodal[p];
+    up->out_coords_nodal[p] = ho_out_coords_nodal[p];
+    up->out_eval_at_peaks_vals_nodal[p] = ho_out_eval_at_peaks_vals_nodal[p];
+  }
+
+  return up;
+}
diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h
new file mode 100644
index 000000000..ec866a713
--- /dev/null
+++ b/core/zero/gkyl_array_dg_find_peaks.h
@@ -0,0 +1,314 @@
+#pragma once
+
+#include <gkyl_array.h>
+#include <gkyl_basis.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_grid.h>
+
+/**
+ * Find all peaks (local maxima, local minima, and boundary values) of a DG
+ * field along one direction.
+ *
+ * For a 2D input array f(psi, z), finding peaks along z (dir=1) gives arrays:
+ *   out_val[k](psi) = value of k-th peak along z for each psi
+ *   out_coord[k](psi) = z-coordinate of k-th peak for each psi
+ *
+ * For a 1D input array f(z), finding peaks along z (dir=0) gives scalars:
+ *   out_val[k] = value of k-th peak
+ *   out_coord[k] = z-coordinate of k-th peak
+ *
+ * Peaks are detected by sampling the field at nodal points along the search
+ * direction and identifying:
+ *   - EDGE_LO: Value at the lower boundary of the domain
+ *   - LOCAL_MAX: Points where f increases then decreases
+ *   - LOCAL_MIN: Points where f decreases then increases
+ *   - EDGE_HI: Value at the upper boundary of the domain
+ *
+ * The number of peaks is determined by scanning along the search direction
+ * at a middle preserved-direction coordinate.
+ */
+typedef struct gkyl_array_dg_find_peaks gkyl_array_dg_find_peaks;
+
+/** Types of peaks that can be found. */
+enum gkyl_peak_type {
+  GKYL_PEAK_EDGE_LO,    // Value at lower boundary
+  GKYL_PEAK_LOCAL_MAX,  // Local maximum
+  GKYL_PEAK_LOCAL_MIN,  // Local minimum
+  GKYL_PEAK_EDGE_HI,    // Value at upper boundary
+};
+
+/** Input parameters for dg_find_peaks updater. */
+struct gkyl_array_dg_find_peaks_inp {
+  const struct gkyl_basis *basis;       // Input basis (N-dimensional)
+  const struct gkyl_rect_grid *grid;    // Input grid
+  const struct gkyl_range *range;       // Input range (local)
+  const struct gkyl_range *range_ext;   // Input extended range
+  int search_dir;                       // Direction to search for peaks (0-indexed)
+  bool use_gpu;                         // Whether to run on GPU
+};
+
+/**
+ * Create a new peak finder updater. The number of peaks is determined by
+ * scanning the input field along the search direction at a middle coordinate.
+ * This must be called AFTER the input field is initialized, as it scans the
+ * field to determine the number of peaks.
+ *
+ * @param inp Input parameters
+ * @param field Input field to scan for peak count determination
+ * @return New updater pointer
+ */
+struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new(
+  const struct gkyl_array_dg_find_peaks_inp *inp, const struct gkyl_array *field);
+
+/**
+ * Compute the peaks. For each point along the preserved dimensions,
+ * find all peaks along the search direction.
+ *
+ * @param up Updater object
+ * @param in Input array (N-dimensional DG field)
+ */
+void gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in);
+
+/**
+ * Get the number of peaks found.
+ *
+ * @param up Updater object
+ * @return Number of peaks
+ */
+int gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the type of a specific peak (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI).
+ *
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Type of the peak
+ */
+enum gkyl_peak_type gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up,
+  int peak_idx);
+
+/**
+ * Get the output basis ((N-1)-dimensional, or p=0 1D for 1D->0D).
+ *
+ * @param up Updater object
+ * @return Pointer to output basis
+ */
+const struct gkyl_basis* gkyl_array_dg_find_peaks_get_basis(
+  const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output grid.
+ *
+ * @param up Updater object
+ * @return Pointer to output grid
+ */
+const struct gkyl_rect_grid* gkyl_array_dg_find_peaks_get_grid(
+  const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output range.
+ *
+ * @param up Updater object
+ * @return Pointer to output range
+ */
+const struct gkyl_range* gkyl_array_dg_find_peaks_get_range(
+  const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output extended range.
+ *
+ * @param up Updater object
+ * @return Pointer to output extended range
+ */
+const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext(
+  const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output nodal range.
+ *
+ * @param up Updater object
+ * @return Pointer to output nodal range
+ */
+const struct gkyl_range*
+gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Get the output array containing peak values for a specific peak.
+ *
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output values array (modal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Get the output array containing peak values in nodal basis for a specific peak.
+ *
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output values array (nodal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals_nodal(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Get the output array containing coordinates of a specific peak.
+ *
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output coordinates array (modal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Get the output array containing coordinates in nodal basis of a specific peak.
+ *
+ * @param up Updater object
+ * @param peak_idx Index of the peak (0 to num_peaks-1)
+ * @return Pointer to output coordinates array (nodal DG expansion)
+ */
+const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords_nodal(
+  const struct gkyl_array_dg_find_peaks *up, int peak_idx);
+
+/**
+ * Project (evaluate) an arbitrary array onto the peak locations previously
+ * found by gkyl_array_dg_find_peaks_advance.
+ *
+ * For a 1D case with 5 peaks, this evaluates the input array at those 5 peak
+ * locations and returns the values.
+ *
+ * For a 2D case with peaks along lines (e.g., psi vs z with peaks in z),
+ * this evaluates the input array along the contours defined by the peak
+ * locations for each psi.
+ *
+ * The peak locations must have been previously computed via
+ * gkyl_array_dg_find_peaks_advance. This method evaluates the provided array
+ * at those same locations.
+ *
+ * Example usage:
+ * @code
+ * // 1. Find peaks in bmag along z direction
+ * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag);
+ * gkyl_array_dg_find_peaks_advance(peak_finder, bmag);
+ *
+ * // 2. Get bmag_max (LOCAL_MAX peak) location and value
+ * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder);
+ * int bmag_max_idx = -1;
+ * for (int p = 0; p < num_peaks; p++) {
+ *   if (gkyl_array_dg_find_peaks_get_type(peak_finder, p) == GKYL_PEAK_LOCAL_MAX) {
+ *     bmag_max_idx = p;
+ *     break;
+ *   }
+ * }
+ * const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(peak_finder, bmag_max_idx);
+ * const struct gkyl_array *z_max = gkyl_array_dg_find_peaks_acquire_coords(peak_finder, bmag_max_idx);
+ *
+ * // 3. Evaluate phi at the same locations where bmag has peaks
+ * struct gkyl_array *phi_at_peaks[num_peaks];
+ * for (int p = 0; p < num_peaks; p++) {
+ *   phi_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume);
+ * }
+ * gkyl_array_dg_find_peaks_project_on_peaks(peak_finder, phi, phi_at_peaks);
+ *
+ * // 4. Now phi_at_peaks[bmag_max_idx] contains phi evaluated at the mirror throat
+ * @endcode
+ *
+ * @param up Updater object (must have run advance first)
+ * @param in_array Array to evaluate at peak locations (same grid/basis as original field)
+ * @param out_vals Output: array of evaluated values for each peak
+ *                 (must be pre-allocated with num_peaks elements, each matching out_range_ext)
+ */
+void gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals);
+
+/**
+ * Project (evaluate) an arbitrary array onto a single peak location previously
+ * found by gkyl_array_dg_find_peaks_advance.
+ *
+ * This is a more efficient version of gkyl_array_dg_find_peaks_project_on_peaks
+ * when you only need the evaluation at one specific peak (e.g., only at the
+ * mirror throat LOCAL_MAX peak).
+ *
+ * Example usage:
+ * @code
+ * // 1. Find peaks in bmag along z direction
+ * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag);
+ * gkyl_array_dg_find_peaks_advance(peak_finder, bmag);
+ *
+ * // 2. Find the LOCAL_MAX peak index
+ * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder);
+ * int bmag_max_idx = num_peaks - 2; // Assuming standard ordering
+ *
+ * // 3. Evaluate phi only at the mirror throat (bmag_max location)
+ * struct gkyl_array *phi_m = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume);
+ * gkyl_array_dg_find_peaks_project_on_peak_idx(peak_finder, phi, bmag_max_idx, phi_m);
+ *
+ * // 4. Now phi_m contains phi evaluated at the mirror throat
+ * @endcode
+ *
+ * @param up Updater object (must have run advance first)
+ * @param in_array Array to evaluate at peak location (same grid/basis as original field)
+ * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1)
+ * @param out_val Output: evaluated values at the specified peak
+ *                (must be pre-allocated to match out_range_ext)
+ */
+void gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val);
+
+/**
+ * Release the updater and all internal arrays.
+ *
+ * @param up Updater to delete
+ */
+void gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up);
+
+/**
+ * Create a new GPU peak finder updater from an already-initialized host object.
+ * Allocates GPU arrays, copies the struct to device, and returns a host-side
+ * struct with array pointers referencing device memory. Called internally by
+ * gkyl_array_dg_find_peaks_new when use_gpu is true.
+ *
+ * @param up_ho Host-side updater object (fully initialized)
+ * @return New updater pointer with GPU arrays
+ */
+struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new_cu(
+  struct gkyl_array_dg_find_peaks *up_ho);
+
+/**
+ * GPU implementation of the advance method. Launches a CUDA kernel to find
+ * peaks for each preserved-direction node, then runs nodal-to-modal transforms
+ * on device.
+ *
+ * @param up Updater object (with GPU arrays)
+ * @param in Input array (device-side DG field)
+ */
+void gkyl_array_dg_find_peaks_advance_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in);
+
+/**
+ * GPU implementation of project_on_peaks. Launches a CUDA kernel to evaluate
+ * an input array at all peak locations, then runs nodal-to-modal transforms
+ * on device.
+ *
+ * @param up Updater object (with GPU arrays)
+ * @param in_array Input array (device-side DG field)
+ * @param out_vals Output: array of evaluated values for each peak (device-side)
+ */
+void gkyl_array_dg_find_peaks_project_on_peaks_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, struct gkyl_array **out_vals);
+
+/**
+ * GPU implementation of project_on_peak_idx. Launches a CUDA kernel to evaluate
+ * an input array at a single peak location, then runs a nodal-to-modal transform
+ * on device.
+ *
+ * @param up Updater object (with GPU arrays)
+ * @param in_array Input array (device-side DG field)
+ * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1)
+ * @param out_val Output: evaluated values at the specified peak (device-side)
+ */
+void gkyl_array_dg_find_peaks_project_on_peak_idx_cu(struct gkyl_array_dg_find_peaks *up,
+  const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val);
diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h
new file mode 100644
index 000000000..f8695a569
--- /dev/null
+++ b/core/zero/gkyl_array_dg_find_peaks_priv.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <float.h>
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_array_dg_find_peaks.h>
+#include <gkyl_nodal_ops.h>
+#include <gkyl_ref_count.h>
+
+// Maximum number of peaks we can handle.
+#define GKYL_DG_FIND_PEAKS_MAX 16
+
+/**
+ * Convert logical (reference) coordinates to computational (physical) coordinates.
+ * xout[d] = xc[d] + 0.5*dx[d]*eta[d]
+ */
+GKYL_CU_DH
+static inline void
+dg_find_peaks_log_to_comp(int ndim, const double *eta,
+  const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
+  double *GKYL_RESTRICT xout)
+{
+  for (int d = 0; d < ndim; ++d) {
+    xout[d] = 0.5 * dx[d] * eta[d] + xc[d];
+  }
+}
+
+/** Internal struct for dg_find_peaks updater. */
+struct gkyl_array_dg_find_peaks {
+  // Input parameters (copies).
+  struct gkyl_rect_grid grid;       // Input grid (copy)
+  struct gkyl_basis basis;          // Input basis (copy)
+  struct gkyl_range range;          // Input local range (copy)
+  struct gkyl_range range_ext;      // Input extended range (copy)
+  int search_dir;                   // Direction to search for peaks
+  bool use_gpu;
+
+  // Output grid/basis/range (owned).
+  struct gkyl_rect_grid out_grid;   // Output grid (N-1 dim, or 1D 1-cell for 1D->0D)
+  struct gkyl_basis out_basis;      // Output basis (N-1 dim, or p=0 1D for 1D->0D)
+  struct gkyl_range out_range;      // Output range
+  struct gkyl_range out_range_ext;  // Output extended range
+  struct gkyl_range out_nrange;     // Nodal range for output
+
+  // Peak information.
+  int num_peaks;                              // Number of peaks detected
+  enum gkyl_peak_type peak_types[GKYL_DG_FIND_PEAKS_MAX]; // Type of each peak
+
+  // Output arrays (owned) - one per peak.
+  struct gkyl_array *out_vals[GKYL_DG_FIND_PEAKS_MAX];        // Peak values (modal DG)
+  struct gkyl_array *out_coords[GKYL_DG_FIND_PEAKS_MAX];      // Peak coordinates (modal DG)
+  struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX];  // Nodal peak values
+  struct gkyl_array *out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak coordinates
+  struct gkyl_array *out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Values evaluated at peaks (nodal)
+
+  // Internal working arrays.
+  struct gkyl_array *nodes;         // Node locations in logical coords
+
+  // Working arrays for the find-peaks scan along the search direction.
+  // On CPU these are malloc'd per call; on GPU they are pre-allocated
+  // with size (num_nodes_out * total_nodes_search) so each thread
+  // can index its own contiguous slice.
+  double *search_vals;              // Nodal values along search dir
+  double *search_coords;            // Physical coordinates along search dir
+  bool *search_visited;             // Visited flags along search dir
+  int total_nodes_search;           // Number of nodes along search dir
+
+  // Nodal-to-modal converter.
+  struct gkyl_nodal_ops *n2m;
+
+  // Device-resident basis for passing to GPU API functions (e.g. gkyl_nodal_ops_n2m_cu).
+  // Allocated via gkyl_cart_modal_serendip_cu_dev_new; NULL on CPU.
+  struct gkyl_basis *out_basis_on_dev;
+
+  uint32_t flags;
+  struct gkyl_array_dg_find_peaks *on_dev; // Pointer to device object (if GPU).
+  struct gkyl_ref_count ref_count;         // Reference counter.
+};
+
+/**
+ * Function that actually frees memory associated with this
+ * object when the number of references has decreased to zero.
+ *
+ * @param ref Reference counter for this object.
+ */
+void gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref);
diff --git a/gyrokinetic/apps/gk_species.c b/gyrokinetic/apps/gk_species.c
index 3345748d9..ad19b0b6d 100644
--- a/gyrokinetic/apps/gk_species.c
+++ b/gyrokinetic/apps/gk_species.c
@@ -136,6 +136,8 @@ gk_species_rhs_dynamic(gkyl_gyrokinetic_app *app, struct gk_species *species,
   
   // Enforce the omega_H constraint on dt.
   double dt_omegaH = gk_species_omegaH_dt(app, species, fin);
+  
+  gk_species_fdot_multiplier_advance_times_omegaH(app, species, &species->fdot_mult, &dt_omegaH);
   dt_out = fmin(dt_out, dt_omegaH);
 
   app->stat.species_omega_cfl_tm += gkyl_time_diff_now_sec(tm);
diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index cbc907ddf..a8de75530 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -1,16 +1,26 @@
 #include <assert.h>
-#include <gkyl_gyrokinetic_priv.h>
-#include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_alloc.h>
+#include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_dg_basis_ops.h>
+#include <gkyl_gyrokinetic_priv.h>
+#include <gkyl_loss_cone_mask_gyrokinetic.h>
+
+static void
+proj_on_basis_c2p_position_func(const double *xcomp, double *xphys, void *ctx)
+{
+  struct gk_proj_on_basis_c2p_func_ctx *c2p_ctx = ctx;
+  gkyl_position_map_eval_mc2nu(c2p_ctx->pos_map, xcomp, xphys);
+}
 
 void
-gk_species_damping_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
 }
 
 void
-gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   struct timespec wst = gkyl_wall_clock();
   // DG metadata for damping rate.
@@ -18,19 +28,21 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g
     { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 },
     { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" },
   };
-  int mpe_drate_len = sizeof(mpe_drate)/sizeof(mpe_drate[0]);
+  int mpe_drate_len = sizeof(mpe_drate) / sizeof(mpe_drate[0]);
   // Update app basic metada with time/frame.
   gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm);
   gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame);
   // Package metadata.
-  int io_meta_len[] = {app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len};
-  const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_drate, app->gk_geom->io_meta};
-  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta);
+  int io_meta_len[] = { app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len };
+  const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_drate,
+                                                    app->gk_geom->io_meta };
+  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int),
+    io_meta_len, io_meta);
 
   // Write out the damping rate.
   const char *fmt = "%s-%s_damping_rate_%d.gkyl";
   int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame);
-  char fileNm[sz+1]; // ensures no buffer overflow
+  char fileNm[sz + 1]; // ensures no buffer overflow
   snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame);
 
   // Copy data from device to host before writing it out.
@@ -40,12 +52,13 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g
   gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->damping.rate_host, fileNm);
   app->stat.n_io += 1;
 
-  gkyl_msgpack_data_release(mt); 
+  gkyl_msgpack_data_release(mt);
   app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst);
 }
 
 void
-gk_species_damping_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   gk_species_damping_write_enabled(app, gks, tm, frame);
   gks->damping.write_func = gk_species_damping_write_disabled;
@@ -65,6 +78,7 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 {
   damp->type = gks->info.damping.type;
   damp->evolve = false; // Whether the rate is time dependent.
+  damp->is_tandem = false; // Default to single mirror.
 
   int num_quad = gks->info.damping.num_quad? gks->info.damping.num_quad : 1; // Default is a p=0 mask.
   assert(num_quad == 1); // MF 2025/06/11: Limited to this for now.
@@ -72,125 +86,184 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
   // Default function pointers.
   damp->write_func = gk_species_damping_write_disabled;
 
+  damp->proj_on_basis_c2p_ctx.cdim = app->cdim;
+  damp->proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
+  damp->proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
+  damp->proj_on_basis_c2p_ctx.pos_map = app->position_map;
+
   if (damp->type) {
     // Allocate rate array.
-    damp->rate = mkarr(app->use_gpu, num_quad==1? 1 : gks->basis.num_basis, gks->local_ext.volume);
+    damp->rate = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis,
+      gks->local_ext.volume);
     damp->rate_host = damp->rate;
     if (app->use_gpu)
-      damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size); 
+      damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size);
 
     if (damp->type == GKYL_GK_DAMPING_USER_INPUT) {
       struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
       proj_on_basis_c2p_ctx.cdim = app->cdim;
       proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
       proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) {
-          .grid = &gks->grid,
-          .basis = &gks->basis,
-          .num_quad = num_quad,
-          .num_ret_vals = 1,
-          .eval = gks->info.damping.rate_profile,
-          .ctx = gks->info.damping.rate_profile_ctx,
-          .c2p_func = proj_on_basis_c2p_phase_func,
-          .c2p_func_ctx = &proj_on_basis_c2p_ctx,
-        }
-      );
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) {
+        .grid = &gks->grid,
+        .basis = &gks->basis,
+        .num_quad = num_quad,
+        .num_ret_vals = 1,
+        .eval = gks->info.damping.rate_profile,
+        .ctx = gks->info.damping.rate_profile_ctx,
+        .c2p_func = proj_on_basis_c2p_phase_func,
+        .c2p_func_ctx = &proj_on_basis_c2p_ctx,
+      });
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, damp->rate_host);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(damp->rate, damp->rate_host);
 
       if (num_quad == 1)
-        gkyl_array_scale_range(damp->rate, 1.0/pow(sqrt(2.0),gks->grid.ndim), &gks->local);
+        gkyl_array_scale_range(damp->rate, 1.0 / pow(sqrt(2.0), gks->grid.ndim), &gks->local);
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
       damp->evolve = true; // Since the loss cone boundary is proportional to phi(t).
 
-      // Maximum bmag and its location.
-      // NOTE: if the same max bmag occurs at multiple locations,
-      // bmag_max_coord may have different values on different MPI processes.
-      double bmag_max_coord_ho[GKYL_MAX_CDIM];
-      double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho);
-      double bmag_max_local = bmag_max_ho;
-      double bmag_max_global;
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global);
-      double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim];
-      if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = bmag_max_coord_ho[d];
+      // Create peak finder for bmag to find the mirror throat.
+      // Search along the parallel (z) direction, which is the last configuration space dimension.
+      int search_dir = app->cdim - 1;
+      struct gkyl_array_dg_find_peaks_inp peak_inp = {
+        .basis = &app->basis,
+        .grid = &app->grid,
+        .range = &app->global,
+        .range_ext = &app->global_ext,
+        .search_dir = search_dir,
+        .use_gpu = app->use_gpu,
+      };
+      // Pass a global bmag_int into the peak finder
+      struct gkyl_array *bmag_int_global = mkarr(app->use_gpu,
+        app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
+      damp->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
+
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag,
+        bmag_int_global);
+      damp->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
+      gkyl_array_dg_find_peaks_advance(damp->bmag_peak_finder, app->gk_geom->geo_int.bmag);
+      gkyl_array_release(bmag_int_global);
+
+      // Get the LOCAL_MAX peak (bmag maximum along z direction).
+      int num_peaks = gkyl_array_dg_find_peaks_num_peaks(damp->bmag_peak_finder);
+      damp->bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
+      damp->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder,
+        damp->bmag_max_peak_idx);
+      damp->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder,
+        damp->bmag_max_peak_idx);
+      damp->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder,
+        num_peaks - 1);
+      damp->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder,
+        num_peaks - 1);
+      damp->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(damp->bmag_peak_finder);
+      damp->bmag_max_range = gkyl_array_dg_find_peaks_get_range(damp->bmag_peak_finder);
+      damp->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(damp->bmag_peak_finder);
+
+      damp->phi_at_bmag_max = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis,
+        damp->bmag_max_range_ext->volume);
+      damp->phi_at_bmag_tandem = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis,
+        damp->bmag_max_range_ext->volume);
+      // phi is defined as 0 at the wall
+
+      bool is_symmetric;
+      int cdim = app->cdim;
+      if (gkyl_compare_double(-app->grid.lower[cdim - 1], app->grid.upper[cdim - 1], 1e-12)) {
+        is_symmetric = true;
+      }
+      else if (gkyl_compare_double(app->grid.lower[cdim - 1], 0.0, 1e-12)) {
+        is_symmetric = false;
       }
       else {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = -DBL_MAX;
+        assert(false); // Needs either the lower bound at 0 or symmetric grid
       }
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global);
 
-      if (app->use_gpu) {
-        damp->bmag_max = gkyl_cu_malloc(sizeof(double));
-        damp->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double));
-	gkyl_cu_memcpy(damp->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D);
-	gkyl_cu_memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
+      if ( (is_symmetric && num_peaks == 5) || (!is_symmetric && num_peaks == 3) ) {
+        damp->is_tandem = false;
+      }
+      else if ((is_symmetric && num_peaks == 9) || (!is_symmetric && num_peaks == 5)) {
+        damp->is_tandem = true;
       }
       else {
-        damp->bmag_max = gkyl_malloc(sizeof(double));
-        damp->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double));
-	memcpy(damp->bmag_max, &bmag_max_global, sizeof(double));
-	memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double));
+        assert(false); // Unsupported number of extrema for loss-cone damping
       }
 
-      // Electrostatic potential at bmag_max_coord.
-      if (app->use_gpu) {
-        damp->phi_m = gkyl_cu_malloc(sizeof(double));
-        damp->phi_m_global = gkyl_cu_malloc(sizeof(double));
+      if (damp->is_tandem) {
+        damp->bmag_tandem_peak_idx = num_peaks - 4;
       }
       else {
-        damp->phi_m = gkyl_malloc(sizeof(double));
-        damp->phi_m_global = gkyl_malloc(sizeof(double));
+        damp->bmag_tandem_peak_idx = num_peaks - 2;
       }
+      damp->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder,
+        damp->bmag_tandem_peak_idx);
+      damp->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder,
+        damp->bmag_tandem_peak_idx);
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
         .phase_grid = &gks->grid,
         .conf_basis = &app->basis,
         .phase_basis = &gks->basis,
-        .conf_range =  &app->local,
+        .conf_range = &app->local,
         .conf_range_ext = &app->local_ext,
-        .vel_range = &gks->local_vel, 
+        .vel_range = &gks->local_vel,
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = damp->bmag_max,
-        .bmag_max_loc = damp->bmag_max_coord,
+        .bmag_max_z_coord = damp->bmag_max_z_coord,
+        .bmag_wall = damp->bmag_wall,
+        .bmag_wall_z_coord = damp->bmag_wall_z_coord,
+        .bmag_tandem = damp->bmag_tandem,
+        .bmag_tandem_z_coord = damp->bmag_tandem_z_coord,
+        .is_tandem = damp->is_tandem,
+        .bmag_max_basis = damp->bmag_max_basis,
+        .bmag_max_range = damp->bmag_max_range,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
         .num_quad = num_quad,
         .use_gpu = app->use_gpu,
       };
-      damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+      damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
       // Project the conf-space rate profile provided.
-      struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis, gks->local_ext.volume);
-      struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false, scale_prof_high_order->ncomp, scale_prof_high_order->size)
+      struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis,
+        gks->local_ext.volume);
+      struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false,
+        scale_prof_high_order->ncomp, scale_prof_high_order->size)
                                                      : gkyl_array_acquire(scale_prof_high_order);
-      
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1, 
+
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1,
         gks->info.damping.rate_profile, gks->info.damping.rate_profile_ctx);
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, scale_prof_high_order_ho);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(scale_prof_high_order, scale_prof_high_order_ho);
 
-      damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis, gks->local_ext.volume);
-      gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0),gks->grid.ndim), scale_prof_high_order, 0);
+      damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis,
+        gks->local_ext.volume);
+      gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0), gks->grid.ndim), scale_prof_high_order,
+        0);
 
       gkyl_array_release(scale_prof_high_order_ho);
       gkyl_array_release(scale_prof_high_order);
 
       // Compute the initial damping rate (assuming phi=0 because phi hasn't been computed).
       // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(app->field->phi_smooth, damp->bmag_max_coord,
-        app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
-      gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
-      // Project the loss cone mask.
-      gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        app->field->phi_smooth, damp->phi_m_global, damp->rate);
+      gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
+        damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
+
+      if (damp->is_tandem) {
+        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth,
+          damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
+        // Project the loss cone mask.
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
+      }
+      else {
+        // Project the loss cone mask using the phi_m array.
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+      }
       // Multiply by the user's scaling profile.
       gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
     }
@@ -206,7 +279,8 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 }
 
 void
-gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_damping *damp, 
+gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_damping *damp,
   const struct gkyl_array *phi, const struct gkyl_array *fin, struct gkyl_array *f_buffer,
   struct gkyl_array *rhs, struct gkyl_array *cflrate)
 {
@@ -218,21 +292,28 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
       gkyl_array_accumulate(rhs, -1.0, f_buffer);
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, damp->bmag_max_coord,
-        app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
-      gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
-
-      // Project the loss cone mask.
-      gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        phi, damp->phi_m_global, damp->rate);
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, damp->phi_smooth_global);
+      // Find the potential at bmag_max
+      gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, damp->phi_smooth_global,
+        damp->bmag_max_peak_idx, damp->phi_at_bmag_max);
+
+      if (damp->is_tandem) {
+        gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder,
+          damp->phi_smooth_global,
+          damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem);
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate);
+      }
+      else {
+        gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
+          damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate);
+      }
 
       // Assemble the damping term -scale_prof * mask * f.
       gkyl_array_set(f_buffer, 1.0, fin);
       gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
       gkyl_array_scale_by_cell(f_buffer, damp->rate);
       gkyl_array_accumulate(rhs, -1.0, f_buffer);
-
     }
 
     // Add the frequency to the CFL frequency.
@@ -243,7 +324,7 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
 }
 
 void
-gk_species_damping_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, int frame)
 {
   gks->damping.write_func(app, gks, tm, frame);
 }
@@ -253,25 +334,26 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
 {
   if (damp->type) {
     gkyl_array_release(damp->rate);
-    if (app->use_gpu)
+    if (app->use_gpu) {
       gkyl_array_release(damp->rate_host);
+    }
 
     if (damp->type == GKYL_GK_DAMPING_USER_INPUT) {
       // Nothing to release.
     }
     else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      if (app->use_gpu) {
-        gkyl_cu_free(damp->bmag_max);
-        gkyl_cu_free(damp->bmag_max_coord);
-        gkyl_cu_free(damp->phi_m);
-        gkyl_cu_free(damp->phi_m_global);
-      }
-      else {
-        gkyl_free(damp->bmag_max);
-        gkyl_free(damp->bmag_max_coord);
-        gkyl_free(damp->phi_m);
-        gkyl_free(damp->phi_m_global);
-      }
+      gkyl_array_release(damp->bmag_max);
+      gkyl_array_release(damp->bmag_max_z_coord);
+      gkyl_array_release(damp->bmag_wall);
+      gkyl_array_release(damp->bmag_wall_z_coord);
+      gkyl_array_release(damp->bmag_tandem);
+      gkyl_array_release(damp->bmag_tandem_z_coord);
+
+      gkyl_array_release(damp->phi_at_bmag_max);
+      gkyl_array_release(damp->phi_at_bmag_tandem);
+
+      gkyl_array_release(damp->phi_smooth_global);
+      gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op);
       gkyl_array_release(damp->scale_prof);
     }
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 0a983aeaa..0fbd4002a 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -1,16 +1,19 @@
 #include <assert.h>
-#include <gkyl_gyrokinetic_priv.h>
-#include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_alloc.h>
+#include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_dg_basis_ops.h>
+#include <gkyl_gyrokinetic_priv.h>
+#include <gkyl_loss_cone_mask_gyrokinetic.h>
 
 void
-gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks,
+  double tm, int frame)
 {
 }
 
 void
-gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks,
+  double tm, int frame)
 {
   struct timespec wst = gkyl_wall_clock();
   // DG metadata for multiplier.
@@ -18,34 +21,38 @@ gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app* app, struct gk_sp
     { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 },
     { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" },
   };
-  int mpe_mult_len = sizeof(mpe_mult)/sizeof(mpe_mult[0]);
+  int mpe_mult_len = sizeof(mpe_mult) / sizeof(mpe_mult[0]);
   // Update app basic metada with time/frame.
   gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm);
   gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame);
   // Package metadata.
-  int io_meta_len[] = {app->io_meta_basic_len, mpe_mult_len, app->gk_geom->io_meta_len};
-  const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_mult, app->gk_geom->io_meta};
-  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta);
+  int io_meta_len[] = { app->io_meta_basic_len, mpe_mult_len, app->gk_geom->io_meta_len };
+  const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_mult,
+                                                    app->gk_geom->io_meta };
+  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int),
+    io_meta_len, io_meta);
 
   // Write out the multiplicative function.
   const char *fmt = "%s-%s_fdot_multiplier_%d.gkyl";
   int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame);
-  char fileNm[sz+1]; // ensures no buffer overflow
+  char fileNm[sz + 1]; // ensures no buffer overflow
   snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame);
 
   // Copy data from device to host before writing it out.
   if (app->use_gpu)
     gkyl_array_copy(gks->fdot_mult.multiplier_host, gks->fdot_mult.multiplier);
 
-  gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->fdot_mult.multiplier_host, fileNm);
+  gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->fdot_mult.multiplier_host,
+    fileNm);
   app->stat.n_io += 1;
 
-  gkyl_msgpack_data_release(mt); 
+  gkyl_msgpack_data_release(mt);
   app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst);
 }
 
 void
-gk_species_fdot_multiplier_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks,
+  double tm, int frame)
 {
   gk_species_fdot_multiplier_write_enabled(app, gks, tm, frame);
   gks->fdot_mult.write_func = gk_species_fdot_multiplier_write_disabled;
@@ -60,19 +67,44 @@ gk_species_fdot_multiplier_advance_mult(gkyl_gyrokinetic_app *app, const struct
 }
 
 void
-gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
-  struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
+gk_species_fdot_multiplier_advance_omegaH_mult(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out)
 {
-  // Find the potential at the mirror throat.
-  gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, fdmul->bmag_max_coord,
-    app->basis_on_dev, &app->grid, &app->local, fdmul->phi_m);
-  gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, fdmul->phi_m, fdmul->phi_m_global);
+  // Multiply out by the multplier.
+  out[0] = out[0] / gks->collisionless.scale_fac;
+}
 
-  // Project the loss cone mask.
-  gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-    phi, fdmul->phi_m_global, fdmul->multiplier);
+void
+gk_species_fdot_multiplier_advance_omegaH_disabled(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out)
+{
+}
 
-  // Multiply out by the multplier.
+void
+gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
+{
+  gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, fdmul->phi_smooth_global);
+  // Find the potential at bmag_max
+  gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
+    fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max);
+
+  if (fdmul->is_tandem) {
+    gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global,
+      fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem);
+    gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
+      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem,
+      fdmul->multiplier);
+  }
+  else {
+    gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
+      phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier);
+  }
+
+  // Multiply out by the multiplier.
   gkyl_array_scale_by_cell(out, fdmul->multiplier);
 }
 
@@ -108,6 +140,7 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
   // Default function pointers.
   fdmul->write_func = gk_species_fdot_multiplier_write_disabled;
   fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_disabled;
+  fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_disabled;
   fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_disabled;
 
   if (fdmul->type) {
@@ -123,7 +156,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
 
     // Allocate multiplier array.
     fdmul->multiplier = mkarr(app->use_gpu, basis_mult.num_basis, gks->local_ext.volume);
-    fdmul->multiplier_host = app->use_gpu? mkarr(false, fdmul->multiplier->ncomp, fdmul->multiplier->size)
+    fdmul->multiplier_host = app->use_gpu? mkarr(false, fdmul->multiplier->ncomp,
+      fdmul->multiplier->size)
                                          : gkyl_array_acquire(fdmul->multiplier);
 
     // Context for c2p function passed to proj_on_basis.
@@ -134,114 +168,157 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec
 
     if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_USER_INPUT) {
 
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) {
-          .grid = &gks->grid,
-          .basis = &basis_mult,
-          .num_quad = basis_mult.poly_order+1,
-          .num_ret_vals = 1,
-          .eval = gks->info.time_rate_multiplier.profile,
-          .ctx = gks->info.time_rate_multiplier.profile_ctx,
-          .c2p_func = proj_on_basis_c2p_phase_func,
-          .c2p_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
-        }
-      );
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) {
+        .grid = &gks->grid,
+        .basis = &basis_mult,
+        .num_quad = basis_mult.poly_order + 1,
+        .num_ret_vals = 1,
+        .eval = gks->info.time_rate_multiplier.profile,
+        .ctx = gks->info.time_rate_multiplier.profile_ctx,
+        .c2p_func = proj_on_basis_c2p_phase_func,
+        .c2p_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
+      });
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, fdmul->multiplier_host);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(fdmul->multiplier, fdmul->multiplier_host);
 
       fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_mult;
+      fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult;
       fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult;
       if (fdmul->write_diagnostics)
         fdmul->write_func = gk_species_fdot_multiplier_write_init_only;
       else
         gkyl_array_release(fdmul->multiplier_host);
-
     }
     else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
       // Available options:
-      //   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
-      //   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
+      // A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
+      // B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array.
       enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD;
-      int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be
-                                              // at leat p+1 for Gauss-Lobatto.
-
-      // Maximum bmag and its location.
-      // NOTE: if the same max bmag occurs at multiple locations,
-      // bmag_max_coord may have different values on different MPI processes.
-      double bmag_max_coord_ho[GKYL_MAX_CDIM];
-      double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho);
-      double bmag_max_local = bmag_max_ho;
-      double bmag_max_global;
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global);
-      double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim];
-      if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = bmag_max_coord_ho[d];
+      int num_quad = gks->basis.poly_order + 1; // This can be p+1 or 1. Must be
+                                                // at least p+1 for Gauss-Lobatto.
+
+      // Create peak finder for bmag to find the mirror throat.
+      // Search along the parallel (z) direction, which is the last configuration space dimension.
+      int search_dir = app->cdim - 1;
+      struct gkyl_array_dg_find_peaks_inp peak_inp = {
+        .basis = &app->basis,
+        .grid = &app->grid,
+        .range = &app->global,
+        .range_ext = &app->global_ext,
+        .search_dir = search_dir,
+        .use_gpu = app->use_gpu,
+      };
+      // Pass a global bmag_int into the peak finder
+      struct gkyl_array *bmag_int_global = mkarr(app->use_gpu,
+        app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume);
+      fdmul->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
+
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag,
+        bmag_int_global);
+
+      fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global);
+      gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, bmag_int_global);
+      gkyl_array_release(bmag_int_global);
+
+      // Get the LOCAL_MAX peak (bmag maximum along z direction).
+      int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder);
+      fdmul->bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
+      fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder,
+        fdmul->bmag_max_peak_idx);
+      fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder,
+        fdmul->bmag_max_peak_idx);
+      fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder,
+        num_peaks - 1);
+      fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder,
+        num_peaks - 1);
+      fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder);
+      fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder);
+      fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder);
+
+      fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis,
+        fdmul->bmag_max_range_ext->volume);
+      fdmul->phi_at_bmag_tandem = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis,
+        fdmul->bmag_max_range_ext->volume);
+      // phi is defined as 0 at the wall
+
+      bool is_symmetric, is_tandem;
+      int cdim = app->cdim;
+      if (gkyl_compare_double(-app->grid.lower[cdim - 1], app->grid.upper[cdim - 1], 1e-12)) {
+        is_symmetric = true;
+      }
+      else if (gkyl_compare_double(app->grid.lower[cdim - 1], 0.0, 1e-12)) {
+        is_symmetric = false;
       }
       else {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = -DBL_MAX;
+        assert(false); // Needs either the lower bound at 0 or symmetric grid
       }
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global);
 
-      if (app->use_gpu) {
-        fdmul->bmag_max = gkyl_cu_malloc(sizeof(double));
-        fdmul->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double));
-	gkyl_cu_memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D);
-	gkyl_cu_memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
+      if ( (is_symmetric && num_peaks == 5) || (!is_symmetric && num_peaks == 3) ) {
+        is_tandem = false;
+      }
+      else if ((is_symmetric && num_peaks == 9) || (!is_symmetric && num_peaks == 5)) {
+        is_tandem = true;
       }
       else {
-        fdmul->bmag_max = gkyl_malloc(sizeof(double));
-        fdmul->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double));
-	memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double));
-	memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double));
+        assert(false); // Unsupported number of extrema for loss-cone multiplier
       }
 
-      // Electrostatic potential at bmag_max_coord.
-      if (app->use_gpu) {
-        fdmul->phi_m = gkyl_cu_malloc(sizeof(double));
-        fdmul->phi_m_global = gkyl_cu_malloc(sizeof(double));
+      if (is_tandem) {
+        fdmul->bmag_tandem_peak_idx = num_peaks - 4;
       }
       else {
-        fdmul->phi_m = gkyl_malloc(sizeof(double));
-        fdmul->phi_m_global = gkyl_malloc(sizeof(double));
+        fdmul->bmag_tandem_peak_idx = num_peaks - 2;
       }
+      fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder,
+        fdmul->bmag_tandem_peak_idx);
+      fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder,
+        fdmul->bmag_tandem_peak_idx);
 
       // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
         .phase_grid = &gks->grid,
         .conf_basis = &app->basis,
         .phase_basis = &gks->basis,
-        .conf_range =  &app->local,
+        .conf_range = &app->local,
         .conf_range_ext = &app->local_ext,
-        .vel_range = &gks->local_vel, 
+        .vel_range = &gks->local_vel,
         .vel_map = gks->vel_map,
         .bmag = app->gk_geom->geo_int.bmag,
         .bmag_max = fdmul->bmag_max,
-        .bmag_max_loc = fdmul->bmag_max_coord,
+        .bmag_max_z_coord = fdmul->bmag_max_z_coord,
+        .bmag_wall = fdmul->bmag_wall,
+        .bmag_wall_z_coord = fdmul->bmag_wall_z_coord,
+        .bmag_tandem = fdmul->bmag_tandem,
+        .bmag_tandem_z_coord = fdmul->bmag_tandem_z_coord,
+        .is_tandem = is_tandem,
+        .bmag_max_basis = fdmul->bmag_max_basis,
+        .bmag_max_range = fdmul->bmag_max_range,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
         .qtype = qtype,
         .num_quad = num_quad,
         .cellwise_trap_loss = cellwise_const,
-        .c2p_pos_func = proj_on_basis_c2p_position_func,
-        .c2p_pos_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
-      fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+      fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
       fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_loss_cone_mult;
+      fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult;
       fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult;
-      if (fdmul->write_diagnostics)
+      if (fdmul->write_diagnostics) {
         fdmul->write_func = gk_species_fdot_multiplier_write_enabled;
-      else
+      }
+      else {
         gkyl_array_release(fdmul->multiplier_host);
+      }
     }
   }
 }
 
 void
-gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   struct timespec wst = gkyl_wall_clock();
@@ -250,9 +327,22 @@ gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const st
 
   app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
 }
-  
+
 void
-gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out)
+{
+  struct timespec wst = gkyl_wall_clock();
+
+  fdmul->advance_times_omegaH_func(app, gks, fdmul, out);
+
+  app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
+}
+
+void
+gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app,
+  const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out)
 {
   struct timespec wst = gkyl_wall_clock();
@@ -260,46 +350,48 @@ gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const s
   fdmul->advance_times_rate_func(app, gks, fdmul, phi, out);
 
   app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst);
-  
 }
 
 void
-gk_species_fdot_multiplier_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_fdot_multiplier_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   gks->fdot_mult.write_func(app, gks, tm, frame);
 }
 
 void
-gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const struct gk_fdot_multiplier *fdmul)
+gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app,
+  const struct gk_fdot_multiplier *fdmul)
 {
   if (fdmul->type) {
     gkyl_array_release(fdmul->multiplier);
-    if (fdmul->write_diagnostics)
+    if (fdmul->write_diagnostics) {
       gkyl_array_release(fdmul->multiplier_host);
+    }
 
-    if (fdmul->type == GKYL_GK_DAMPING_USER_INPUT) {
+    if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_USER_INPUT) {
       // Nothing to release.
     }
-    else if (fdmul->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      if (app->use_gpu) {
-        gkyl_cu_free(fdmul->bmag_max);
-        gkyl_cu_free(fdmul->bmag_max_coord);
-        gkyl_cu_free(fdmul->phi_m);
-        gkyl_cu_free(fdmul->phi_m_global);
-      }
-      else {
-        gkyl_free(fdmul->bmag_max);
-        gkyl_free(fdmul->bmag_max_coord);
-        gkyl_free(fdmul->phi_m);
-        gkyl_free(fdmul->phi_m_global);
-      }
+    else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
+      gkyl_array_release(fdmul->bmag_max);
+      gkyl_array_release(fdmul->bmag_max_z_coord);
+      gkyl_array_release(fdmul->bmag_wall);
+      gkyl_array_release(fdmul->bmag_wall_z_coord);
+      gkyl_array_release(fdmul->bmag_tandem);
+      gkyl_array_release(fdmul->bmag_tandem_z_coord);
+
+      gkyl_array_release(fdmul->phi_at_bmag_max);
+      gkyl_array_release(fdmul->phi_at_bmag_tandem);
+
+      gkyl_array_release(fdmul->phi_smooth_global);
+      gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder);
       gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
     }
   }
 }
 
 void
-gk_species_fdot_multiplier_reset(gkyl_gyrokinetic_app* app, double tm, struct gk_species *gks,
+gk_species_fdot_multiplier_reset(gkyl_gyrokinetic_app *app, double tm, struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp)
 {
   gk_species_fdot_multiplier_release(app, fdmul);
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index ae971282b..c49da3a1a 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -801,12 +801,27 @@ struct gk_source {
 struct gk_damping {
   enum gkyl_gyrokinetic_damping_type type; // Type of damping term.
   bool evolve; // Whether the source is time dependent.
+  bool is_tandem; // Whether we are doing a tandem mirror.
   struct gkyl_array *rate; // Damping rate.
   struct gkyl_array *rate_host; // Host copy for use in IO and projecting.
+  struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_coord; // Location of bmag_max.
-  double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction.
+  struct gkyl_array *phi_smooth_global; // Smoothed electrostatic potential on the global grid.
+  // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
+  const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line.
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line.
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line.
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
+  int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
+  int bmag_tandem_peak_idx; // Index of the TANDEM_MIRROR peak in the peak finder.
+  struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations.
+  struct gkyl_array *phi_at_bmag_tandem; // Phi evaluated at tandem mirror locations.
   struct gkyl_array *scale_prof; // Conf-space scaling factor profile.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
@@ -816,19 +831,36 @@ struct gk_fdot_multiplier {
   enum gkyl_gyrokinetic_fdot_multiplier_type type; // Type of multiplicative function term.
   bool write_diagnostics; // Whether to write diagnostics out.
   bool evolve; // Whether the multiplicative function is time dependent.
+  bool is_tandem; // Whether we are doing a tandem mirror
   struct gkyl_array *multiplier; // Damping rate.
   struct gkyl_array *multiplier_host; // Host copy for use in IO and projecting.
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_coord; // Location of bmag_max.
-  double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
+  // Updater to find bmag peaks (mirror throat location).
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction.
+  struct gkyl_array *phi_smooth_global; // Smoothed electrostatic potential on the global grid.
+  // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder).
+  const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line.
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line.
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line.
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays.
+  int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder.
+  int bmag_tandem_peak_idx; // Index of the TANDEM_MIRROR peak in the peak finder.
+  struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations.
+  struct gkyl_array *phi_at_bmag_tandem; // Phi evaluated at tandem mirror locations.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
   void (*advance_times_rate_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
     struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out);
   void (*advance_times_cfl_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
     struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out);
+  void (*advance_times_omegaH_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out);
 };
 
 struct gk_heating {
@@ -2830,6 +2862,17 @@ void gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk
 void gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
   struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out);
 
+/**
+ * Multiply the omegaH rate.
+ *
+ * @param app gyrokinetic app object.
+ * @param gks Species object.
+ * @param fdmul Species df/dt multiplier object.
+ * @param out omegaH rate to multiply.
+ */
+void gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_fdot_multiplier *fdmul, double *out);
+
 /**
  * Multiply df/dt.
  *
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
index 87c13efe3..e8e12e017 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
@@ -30,80 +30,44 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
-
   // Plasma parameters
-  double mi; // Ion mass.
-  double me; // Electron mass.
-  double qi; // Ion charge.
-  double qe; // Electron charge.
-  double Te0; // Electron temperature.
-  double Ti0; // Ion temperature.
-  double n0; // Density.
-  double B_p; // Plasma magnetic field (mirror center).
-  double beta; // Plasma beta in the center.
-  double tau; // Temperature ratio.
-
-  // Parameters controlling initial conditions.
-  double alim;
-  double alphaIC0;
-  double alphaIC1;
-  double Ti_perp0; // Reference ion perp temperature.
-  double Ti_par0; // Reference ion par temperature.
-  double Ti_perp_m; // Ion perp temperature at the throat.
-  double Ti_par_m; // Ion par temperature at the throat.
-  double cs_m; // Ion sound speed at the throat.
-
-  double nuFrac; // Fraction multiplying collision frequency.
-  double logLambdaIon; // Ion Coulomb logarithm.
-  double nuIon; // Ion-ion collision freq.
-
-  double vti; // Ion thermal speed.
-  double vte; // Electron thermal speed.
-  double c_s; // Ion sound speed.
-  double omega_ci; // Ion gyrofrequency.
-  double rho_s; // Ion sound gyroradius.
-
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  double vti;
   double RatZeq0; // Radius of the field line at Z=0.
-  double Z_min; // Minimum axial coordinate Z.
-  double Z_max; // Maximum axial coordinate Z.
-  double z_min; // Minimum value of the position along the field line.
-  double z_max; // Maximum value of the position along the field line.
-  double psi_eval; // Psi (poloidal flux) of the field line.
-  double psi_in, z_in; // Auxiliary psi and z.
-
-  // Magnetic equilibrium model.
-  double mcB;
-  double gamma;
-  double Z_m; // Axial coordinate at mirror throat.
-  double z_m; // Computational coordinate at mirror throat.
-
-  // Source parameters
-  double NSrcIon;
-  double lineLengthSrcIon;
-  double sigSrcIon;
-  double NSrcFloorIon;
-  double TSrc0Ion;
-  double TSrcFloorIon;
-
-  // Physical velocity space limits.
-  double vpar_min_ion, vpar_max_ion;
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_eval;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
   double mu_max_ion;
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv, mu_lin_fac_inv; // Inverse factor of where linear mapping ends.
-  double vpar_pow, mu_pow; // Power of the velocity grid.
-  double vpar_min_ion_c, vpar_max_ion_c;
-  double mu_min_ion_c, mu_max_ion_c;
-
-  // Grid DOF.
+  int Npsi;
   int Nz;
   int Nvpar;
   int Nmu;
   int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
   int poly_order;
 
+  // Source parameters
+  double ion_source_amplitude;
+  double ion_source_sigma;
+  double ion_source_temp;
+
   double t_end; // End time.
   int num_frames; // Number of output frames.
   int num_phases; // Number of phases.
@@ -112,6 +76,15 @@ struct gk_mirror_ctx
   double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol; // Minimum allowable fraction of initial time-step.
   int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Geometry parameters for Lorentzian mirror
+  double mcB;     // Magnetic field parameter
+  double gamma;   // Width parameter for Lorentzian profile
+  double Z_m;     // Mirror throat location
+  double Z_min;   // Minimum Z coordinate
+  double Z_max;   // Maximum Z coordinate
+  double psi_in;  // Working variable for psi integration
+  double z_in;    // Working variable for z integration
 };
 
 double
@@ -121,9 +94,10 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
+
   double psi = 0.5 * pow(RIn, 2.) * mcB *
-               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
   return psi;
 }
 
@@ -131,9 +105,13 @@ double
 R_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
-    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
-     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rout = sqrt(2. * psiIn / (mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
   return Rout;
 }
 
@@ -141,17 +119,21 @@ void
 Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
-  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
-          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
-            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
-  *BZ = mcB *
-        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
-         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
-  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+
+  BRad[0] = -(1. / 2.) * Rcoord * mcB *
+    (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+
+  BZ[0] = mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+
+  Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
 }
 
 double
@@ -168,15 +150,13 @@ double
 z_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  app->psi_in = psiIn;
   double eps = 0.0;
+  app->psi_in = psiIn;
   struct gkyl_qr_res integral;
-  if (eps <= ZIn)
-  {
+  if (eps <= ZIn) {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
   }
-  else
-  {
+  else {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
     integral.res = -integral.res;
   }
@@ -200,14 +180,12 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (zIn >= 0.0)
-  {
+  if (0.0 <= zIn) {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
   }
-  else
-  {
+  else {
     double fl = root_Z_psiz(app->Z_min - eps, ctx);
     double fr = root_Z_psiz(eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
@@ -215,235 +193,128 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   return Zout.res;
 }
 
+// Geometry evaluation functions for the gk app
 void
-eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
-  double z = xn[0];
+  double psi = xc[0], theta = xc[1], z = xc[2];
 
-  struct gk_mirror_ctx *app = ctx;
-  double NSrc = app->NSrcIon;
-  double zSrc = app->lineLengthSrcIon;
-  double sigSrc = app->sigSrcIon;
-  double NSrcFloor = app->NSrcFloorIon;
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
 
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
 
-  if (fabs(Z) <= app->Z_m)
-  {
-    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
-                              exp(-pow(z - zSrc, 2) / (2.0 * pow(sigSrc, 2))));
-  }
-  else
-  {
-    fout[0] = 1e-16;
-  }
+  xp[0] = x; xp[1] = y; xp[2] = Z;
 }
 
 void
-eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
-  fout[0] = 0.0;
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
 }
 
+// Evaluate collision frequencies
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double sigSrc = app->sigSrcIon;
-  double TSrc0 = app->TSrc0Ion;
-  double Tfloor = app->TSrcFloorIon;
-
-  if (fabs(z) <= 2.0 * sigSrc)
-  {
-    fout[0] = TSrc0;
-  }
-  else
-  {
-    fout[0] = Tfloor;
-  }
+  fout[0] = app->nuIon;
 }
 
-// Ion initial conditions
 void
 eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double sigma = 0.9*z_m;
-  if (fabs(z) <= sigma)
-  {
-    fout[0] = 0.5*app->n0*(1. + tanh(10. * sigma * fabs(sigma - fabs(z))));
-  }
-  else
-  {
-    fout[0] = 0.5*app->n0*exp(-5 * (fabs(sigma - fabs(z))));
-  }
+  double z = xn[0];
+  fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2));
 }
 
 void
 eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double cs_m = app->cs_m;
-  double z_m = app->z_m;
-  double z_max = app->z_max;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = 0.0;
-  }
-  else
-  {
-    fout[0] = (fabs(z) / z) * cs_m * tanh(3 * (z_max - z_m) * fabs(fabs(z) - z_m));
-  }
+  fout[0] = 0.0;
 }
 
 void
-eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_par0 = app->Ti_par0;
-  double Ti_par_m = app->Ti_par_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_par_m+(Ti_par0-Ti_par_m)*tanh(4 * fabs(z_m - fabs(z)));
-  }
-  else
-  {
-    fout[0] = Ti_par_m;
-  }
+  fout[0] = app->Ti0;
 }
 
 void
-eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_perp0 = app->Ti_perp0;
-  double Ti_perp_m = app->Ti_perp_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_perp_m - Ti_perp0*tanh(3.*fabs(z_m-fabs(z)));
+  double z = xn[0];
+  double src_amp = app->ion_source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->ion_source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
   }
-  else
-  {
-    fout[0] = Ti_perp_m * GKYL_MAX2(1.e-3, exp(-5. * (fabs(z_m - fabs(z)))));
+  else {
+    fout[0] = 1e-16;
   }
 }
 
 void
-evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
-{
-  struct gk_mirror_ctx *app = ctx;
-  fout[0] = app->nuIon;
-}
-
-// Geometry evaluation functions for the gk app
-// mapc2p must assume a 3d input xc
-void
-mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
-{
-  double psi = xc[0];
-  double theta = xc[1];
-  double z = xc[2];
-
-  double Z = Z_psiz(psi, z, ctx);
-  double R = R_psiZ(psi, Z, ctx);
-
-  // Cartesian coordinates on plane perpendicular to Z axis.
-  double x = R * cos(theta);
-  double y = R * sin(theta);
-  xp[0] = x;
-  xp[1] = y;
-  xp[2] = Z;
-}
-
-// bmag_func must assume a 3d input xc
-void
-bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xc[2];
-
-  struct gk_mirror_ctx *app = ctx;
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  fout[0] = Bmag;
+  fout[0] = 0.0;
 }
 
-// bfield_func must assume a 3d input xc
 void
-bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xc[2];
-
   struct gk_mirror_ctx *app = ctx;
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-
-  double phi = xc[1];
-  // zc are computational coords. 
-  // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
-  fout[2] = BZ;
+  double z = xn[0];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
 }
 
-void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_ion = app->vpar_max_ion;
   double mu_max_ion = app->mu_max_ion;
 
   double cvpar = vc[0], cmu = vc[1];
-  // Linear map up to vpar_max/lin_frac_inv, then a power grid.
-  double vpar_lin_fac_inv = app->vpar_lin_fac_inv;
-  double vpar_pow = app->vpar_pow;
-  if (fabs(cvpar) <= 1.0/vpar_lin_fac_inv)
-    vp[0] = vpar_max_ion*cvpar;
-  else if (cvpar < -1.0/vpar_lin_fac_inv)
-    vp[0] = -vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-  else
-    vp[0] =  vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-
-//  // Quadratic mu.
-//  vp[1] = mu_max_ion*pow(cmu,2.0);
-  // Linear map up to mu_max/lin_frac_inv, then a power grid.
-  double mu_lin_fac_inv = app->mu_lin_fac_inv;
-  double mu_pow = app->mu_pow;
-//  if (cmu <= 1.0/mu_lin_fac_inv)
-//    vp[0] = mu_max_ion*cmu;
-//  else
-//    vp[0] = mu_max_ion*pow(mu_lin_fac_inv,mu_pow-1)*pow(cmu,mu_pow);
-  double w = 0.3;
-  double f = 0.012;
-  double a = mu_max_ion*(f-1.0)/(w*w-1.0);
-  double b = mu_max_ion*(w*w-f)/(w*w-1.0);
-  if (cmu <= w)
-    vp[1] = (f*mu_max_ion/w)*cmu;
-  else
-    vp[1] = a*pow(cmu,2)+b;
-
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  vp[1] = mu_max_ion * pow(cmu, 3);
 }
 
 struct gk_mirror_ctx
 create_ctx(void)
 {
   int cdim = 1, vdim = 2; // Dimensionality.
+  int poly_order = 1;
 
   // Universal constant parameters.
   double eps0 = GKYL_EPSILON0;
@@ -463,202 +334,150 @@ create_ctx(void)
   double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
   double Ti0 = tau * Te0;
 
-  // Parameters controlling initial conditions.
-  double alim = 0.125;
-  double alphaIC0 = 2;
-  double alphaIC1 = 10;
-
-  double nuFrac = 1.0;
   // Ion-ion collision freq.
+  double nuFrac = 1.0;
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
-  double vte = sqrt(Te0 / me);
-  double c_s = sqrt(Te0 / mi);
-
-  // Gyrofrequencies and gyroradii.
-  double omega_ci = eV * B_p / mi;
-  double rho_s = c_s / omega_ci;
-
-  // Geometry parameters.
-  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
-  // Axial coordinate Z extents. Endure that Z=0 is not on
-  // the boundary of a cell (due to AD errors).
-  double Z_min = -2.5;
-  double Z_max =  2.5;
-
-  // Parameters controlling the magnetic equilibrium model.
-  double mcB = 6.51292;
-  double gamma = 0.124904;
-  double Z_m = 0.98;
-
-  // Source parameters
-  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
-  double lineLengthSrcIon = 0.0;
-  double sigSrcIon = Z_m / 4.0;
-  double NSrcFloorIon = 0.05 * NSrcIon;
-  double TSrc0Ion = Ti0 * 1.25;
-  double TSrcFloorIon = TSrc0Ion / 8.0;
 
   // Grid parameters
   double vpar_max_ion = 16 * vti;
-  double vpar_min_ion = -vpar_max_ion;
   double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 64;
+  int Nvpar = 32; // 96 uniform
+  int Nmu = 16;  // 192 uniform
 
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv = 4;
-  double vpar_pow = 3;
-  double vpar_min_ion_c = -1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double vpar_max_ion_c =  1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double mu_min_ion_c = 0.;
-  double mu_max_ion_c = 1.;
-  double mu_lin_fac_inv = 1.0/0.012;
-  double mu_pow = 2;
-//  double mu_min_ion_c = 0.0;
-//  double mu_max_ion_c = 1.0/pow(mu_lin_fac_inv,(mu_pow-1)/mu_pow);
-
-  // Grid DOF:
-  int Nz = 64; // Number of cells in z direction.
-  int Nvpar = 32; // Number of cells in parallel velocity direction.
-  int Nmu = 16;  // Number of cells in mu direction.
-  int poly_order = 1;
-
-  // Initial conditions parameter.s
-  double Ti_perp0 = 10000 * eV;
-  double Ti_par0 = 7500 * eV;
+  // Source parameters
+  double ion_source_amplitude = 1.e20;
+  double ion_source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
 
-  // Parameters at mirror throat
-  double Ti_perp_m = 15000 * eV;
-  double Ti_par_m = 1000 * eV;
-  double z_m = 0.982544;
-  double cs_m = sqrt((Te0+3.0*Ti_par_m)/mi);
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+  double mcB = 3.691260;
+  double gamma = 0.226381;
+  double Z_m = 0.98;
 
-  // Factor multiplying collisionless terms.
-  double alpha_oap = 0.01;
+  // POA parameters
+  double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
   double alpha_fdp = 1.0;
-  // Duration of each phase.
-  double tau_oap = 2400.0e-9;
-  double tau_fdp = 24.0e-9;
-  double tau_fdp_extra = 2*tau_fdp;
+  double tau_oap = 0.001;  // Duration of each phase.
+  double tau_fdp = 7e-9;
+  double tau_fdp_extra = 2e-9;
   int num_cycles = 2; // Number of OAP+FDP cycles to run.
 
   // Frame counts for each phase type (specified independently)
-  int num_frames_oap = 1; // Frames per OAP phase
-  int num_frames_fdp = 1; // Frames per FDP phase
-  int num_frames_fdp_extra = 2*num_frames_fdp;  // Frames for the extra FDP phase
+  int num_frames_oap = 1;        // Frames per OAP phase
+  int num_frames_fdp = 1;        // Frames per FDP phase
+  int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
 
   // Whether to evolve the field.
   bool is_static_field_oap = true;
   bool is_static_field_fdp = false;
-  // Whether to enable positivity.
+
+  // Whether positivity is enabled.
   bool is_positivity_enabled_oap = false;
   bool is_positivity_enabled_fdp = true;
+
   // Type of df/dt multipler.
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
-  // Add an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
-
-  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  // The final stage is an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
   int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
 
   struct gk_mirror_ctx ctx = {
-    .cdim = cdim,  .vdim = vdim,
-    .mi = mi,  .qi = qi,
-    .me = me,  .qe = qe,
-    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
-    .B_p = B_p,  .beta = beta,  .tau = tau,
-    .alim = alim,
-    .alphaIC0 = alphaIC0,
-    .alphaIC1 = alphaIC1,
-    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
-    .vti = vti,  .vte = vte,  .c_s = c_s,
-    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .nuFrac = nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
     .RatZeq0 = RatZeq0,
-    .Z_min = Z_min,  .Z_max = Z_max,
-    // Parameters controlling the magnetic equilibrium model.
-    .mcB = mcB,  .gamma = gamma,
-    .Z_m = Z_m,
-    .z_m = z_m,
-    // Initial condition parameters.
-    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
-    .Ti_perp_m = Ti_perp_m,  .Ti_par_m = Ti_par_m,  .cs_m = cs_m,
-    // Source parameters
-    .NSrcIon = NSrcIon,  .NSrcFloorIon = NSrcFloorIon,
-    .TSrc0Ion = TSrc0Ion,  .TSrcFloorIon = TSrcFloorIon,
-    .lineLengthSrcIon = lineLengthSrcIon,  .sigSrcIon = sigSrcIon,
-    // Physical velocity space limits.
-    .vpar_min_ion = vpar_min_ion,
     .vpar_max_ion = vpar_max_ion,
     .mu_max_ion = mu_max_ion,
-    // Computational velocity space limits.
-    .vpar_lin_fac_inv = vpar_lin_fac_inv,
-    .vpar_pow = vpar_pow,
-    .vpar_min_ion_c = vpar_min_ion_c,
-    .vpar_max_ion_c = vpar_max_ion_c,
-    .mu_lin_fac_inv = mu_lin_fac_inv,
-    .mu_pow = mu_pow,
-    .mu_min_ion_c = mu_min_ion_c,
-    .mu_max_ion_c = mu_max_ion_c,
-    // Grid DOF.
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nz, Nvpar, Nmu},
+    .cells = { Nz, Nvpar, Nmu },
     .poly_order = poly_order,
-    // Time integration and I/O parameters.
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+
+    .ion_source_amplitude = ion_source_amplitude,
+    .ion_source_sigma = ion_source_sigma,
+    .ion_source_temp = ion_source_temp,
+
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
   };
 
   // Populate a couple more values in the context.
   ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
-  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -670,25 +489,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -698,7 +517,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -720,7 +539,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -734,15 +553,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -751,7 +571,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -791,27 +611,21 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
-  while ((t_curr < t_end) && (step <= num_steps))
-  {
-    if (step == 1 || step % 20 == 0)
-      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
-
-    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+    dt = t_end - t_curr; // Ensure we don't step beyond t_end.
     struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
-    if (step == 1 || step % 20 == 0)
-      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
-
-    if (!status.success)
-    {
+    if (!status.success) {
       gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
       break;
     }
     t_curr += status.dt_actual;
     dt = status.dt_suggested;
 
-    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
-    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
 
     if (dt_init < 0.0) {
       dt_init = status.dt_actual;
@@ -823,8 +637,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -838,7 +654,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -857,96 +673,106 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
 
   struct gkyl_gyrokinetic_species ion = {
     .name = "ion",
-    .charge = ctx.qi,  .mass = ctx.mi,
+    .charge = ctx.qi,
+    .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
-    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
     .cells = { cells_v[0], cells_v[1] },
-
     .polarization_density = ctx.n0,
 
-    .mapc2p = {
-      .mapping = mapc2p_vel_ion,
-      .ctx = &ctx,
-    },
-
     .projection = {
-      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
       .density = eval_density_ion,
-      .upar = eval_upar_ion,
-      .temppar = eval_temp_par_ion,
-      .tempperp = eval_temp_perp_ion,
       .ctx_density = &ctx,
+      .upar = eval_upar_ion,
       .ctx_upar = &ctx,
-      .ctx_temppar = &ctx,
-      .ctx_tempperp = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
     },
 
     .collisionless = {
       .type = GKYL_GK_COLLISIONLESS_ES,
       .scale_factor = 1.0, // Will be replaced below.
+      .write_diagnostics = true,
+    },
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+      .cellwise_const = true,
+      .write_diagnostics = true,
     },
 
-    .collisions =  {
+    .collisions = {
       .collision_id = GKYL_LBO_COLLISIONS,
-      .self_nu = evalNuIon,
-      .self_nu_ctx = &ctx,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .write_diagnostics = true,
     },
-
     .source = {
       .source_id = GKYL_PROJ_SOURCE,
       .num_sources = 1,
       .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
-	.density = eval_density_ion_source,
-        .upar = eval_upar_ion_source,
-        .temp = eval_temp_ion_source,
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
         .ctx_density = &ctx,
+        .density = eval_density_ion_source,
         .ctx_upar = &ctx,
+        .upar = eval_upar_ion_source,
         .ctx_temp = &ctx,
-      }, 
-    },
-
-    .time_rate_multiplier = {
-      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
-      .cellwise_const = true,
-      .write_diagnostics = true,
-    },
-
-    .positivity = {
-      .type = GKYL_GK_POSITIVITY_SHIFT,
-      .write_diagnostics = true,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
     },
 
     .bcs = {
       { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
       { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
     },
-
-    .num_diag_moments = 4,
-    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
   };
-
   struct gkyl_gyrokinetic_field field = {
     .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
     .electron_mass = ctx.me,
     .electron_charge = ctx.qe,
     .electron_temp = ctx.Te0,
-    .is_static = false, // So solvers are allocated.
+    .is_static = false,
   };
 
-  // GK app
-  struct gkyl_gk app_inp = { 
+  struct gkyl_gk app_inp = {  // GK app
     .cdim = ctx.cdim,
     .lower = {ctx.z_min},
     .upper = {ctx.z_max},
@@ -960,14 +786,14 @@ int main(int argc, char **argv)
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
-      .bfield_ctx = &ctx
+      .bfield_ctx = &ctx,
     },
 
     .num_periodic_dir = 0,
     .periodic_dirs = {},
 
     .num_species = 1,
-    .species = {ion},
+    .species = { ion },
 
     .field = field,
 
@@ -978,9 +804,10 @@ int main(int argc, char **argv)
     },
   };
 
-  // Create app object.
   // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
   // Triggers for IO.
@@ -995,10 +822,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -1009,14 +838,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -1042,10 +872,11 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -1055,21 +886,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
new file mode 100644
index 000000000..cd44c494f
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -0,0 +1,917 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  double vti;
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_max;
+  double psi_min;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
+  double mu_max_ion;
+  int Npsi;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  // Source parameters
+  double ion_source_amplitude;
+  double ion_source_sigma;
+  double ion_source_temp;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Geometry parameters for Lorentzian mirror
+  double mcB;     // Magnetic field parameter
+  double gamma;   // Width parameter for Lorentzian profile
+  double Z_m;     // Mirror throat location
+  double Z_min;   // Minimum Z coordinate
+  double Z_max;   // Maximum Z coordinate
+  double psi_in;  // Working variable for psi integration
+  double z_in;    // Working variable for z integration
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rout = sqrt(2. * psiIn / (mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+
+  BRad[0] = -(1. / 2.) * Rcoord * mcB *
+    (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+
+  BZ[0] = mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+
+  Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double eps = 0.0;
+  app->psi_in = psiIn;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn) {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (0.0 <= zIn) {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+// Geometry evaluation functions for the gk app
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0], theta = xc[1], z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+
+  xp[0] = x; xp[1] = y; xp[2] = Z;
+}
+
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
+}
+
+// Evaluate collision frequencies
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+void
+eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[1];
+  fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2));
+}
+
+void
+eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti0;
+}
+
+void
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[1];
+  double src_amp = app->ion_source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->ion_source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[1];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  vp[1] = mu_max_ion * pow(cmu, 3);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 2, vdim = 2; // Dimensionality.
+  int poly_order = 1;
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0;
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS;
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+
+  // Ion-ion collision freq.
+  double nuFrac = 1.0;
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 64;
+  int Npsi = 4;
+  int Nvpar = 32; // 96 uniform
+  int Nmu = 16;  // 192 uniform
+
+  // Source parameters
+  double ion_source_amplitude = 1.e20;
+  double ion_source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+  double mcB = 3.691260;
+  double gamma = 0.226381;
+  double Z_m = 0.98;
+
+  // POA parameters
+  double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
+  double alpha_fdp = 1.0;
+  double tau_oap = 0.001;  // Duration of each phase.
+  double tau_fdp = 7e-9;
+  double tau_fdp_extra = 2e-9;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1;        // Frames per OAP phase
+  int num_frames_fdp = 1;        // Frames per FDP phase
+  int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+
+  // Whether positivity is enabled.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = false;
+
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // The final stage is an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .nuFrac = nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .RatZeq0 = RatZeq0,
+    .vpar_max_ion = vpar_max_ion,
+    .mu_max_ion = mu_max_ion,
+    .Npsi = Npsi,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Npsi, Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+
+    .ion_source_amplitude = ion_source_amplitude,
+    .ion_source_sigma = ion_source_sigma,
+    .ion_source_temp = ion_source_temp,
+
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_max = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.psi_min = psi_RZ(ctx.RatZeq0 / 10, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_max, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_max, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx->me,
+    .electron_charge = ctx->qe,
+    .electron_temp = ctx->Te0,
+    .polarization_bmag = ctx->B_p,
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+    dt = t_end - t_curr; // Ensure we don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density_ion,
+      .ctx_density = &ctx,
+      .upar = eval_upar_ion,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+      .write_diagnostics = true,
+    },
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .write_diagnostics = true,
+    },
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_ion_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_ion_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_ZERO_FLUX },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_ABSORB },
+      { .dir = 1, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
+      { .dir = 1, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
+    },
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
+  };
+  struct gkyl_gyrokinetic_field field = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx.me,
+    .electron_charge = ctx.qe,
+    .electron_temp = ctx.Te0,
+    .is_static = false,
+  };
+
+  struct gkyl_gk app_inp = {  // GK app
+    .name = "gk_mirror_boltz_elc_poa_2x2v_p1",
+    .cdim = ctx.cdim,
+    .upper = { ctx.psi_max, ctx.Z_max },
+    .lower = { ctx.psi_min, ctx.Z_min },
+    .cells = { cells_x[0], cells_x[1] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
+      .world = { 0.0 },
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx,
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+
+    .num_species = 1,
+    .species = { ion },
+
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0], app_args.cuts[1] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
new file mode 100644
index 000000000..0ce0adf5b
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -0,0 +1,1265 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double kperpRhos;
+  // Parameters controlling initial conditions.
+  double alim;
+  double alphaIC0;
+  double alphaIC1;
+  double nuFrac;
+  // Electron-electron collision freq.
+  double logLambdaElc;
+  double nuElc;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  // Thermal speeds.
+  double vti;
+  double vte;
+  double c_s;
+  // Gyrofrequencies and gyroradii.
+  double omega_ci;
+  double rho_s;
+  double kperp; // Perpendicular wavenumber in SI units.
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double Z_min;
+  double Z_max;
+  double z_min;
+  double z_max;
+  double psi_eval;
+  double psi_in;
+  double z_in;
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m;
+  // Bananna tip info. Hardcoad to avoid dependency on ctx
+  double B_bt;
+  double R_bt;
+  double Z_bt;
+  double z_bt;
+  double R_m;
+  double B_m;
+  double z_m;
+  // Physics parameters at mirror throat
+  double n_m;
+  double Te_m;
+  double Ti_m;
+  double cs_m;
+  // Source parameters
+  double NSrcIon;
+  double lineLengthSrcIon;
+  double sigSrcIon;
+  double NSrcFloorIon;
+  double TSrc0Ion;
+  double TSrcFloorIon;
+  double NSrcElc;
+  double lineLengthSrcElc;
+  double sigSrcElc;
+  double NSrcFloorElc;
+  double TSrc0Elc;
+  double TSrcFloorElc;
+  double alpha; // Multirate factor.
+  // Grid parameters
+  double vpar_max_ion;
+  double vpar_max_elc;
+  double mu_max_ion;
+  double mu_max_elc;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB *
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+    (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+  *BZ = mcB *
+    (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn) {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0) {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+// -- Source functions.
+void
+eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double NSrc = app->NSrcElc;
+  double zSrc = app->lineLengthSrcElc;
+  double sigSrc = app->sigSrcElc;
+  double NSrcFloor = app->NSrcFloorElc;
+  if (fabs(Z) <= app->Z_m) {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2.))) *
+      exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2.))));
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double sigSrc = app->sigSrcElc;
+  double TSrc0 = app->TSrc0Elc;
+  double Tfloor = app->TSrcFloorElc;
+  if (fabs(z) <= 2.0 * sigSrc) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+void
+eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double NSrc = app->NSrcIon;
+  double zSrc = app->lineLengthSrcIon;
+  double sigSrc = app->sigSrcIon;
+  double NSrcFloor = app->NSrcFloorIon;
+  if (fabs(Z) <= app->Z_m) {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
+      exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2))));
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double sigSrc = app->sigSrcIon;
+  double TSrc0 = app->TSrc0Ion;
+  double Tfloor = app->TSrcFloorIon;
+  if (fabs(z) <= 2.0 * sigSrc) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+// Electrons initial conditions
+void
+eval_density_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
+  }
+  else {
+    fout[0] = app->n_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+void
+eval_upar_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  if (fabs(z) <= app->z_m) {
+    fout[0] = 0.0;
+  }
+  else if (z > app->z_m) {
+    fout[0] = app->cs_m * (z - app->z_m);
+  }
+  else {
+    fout[0] = app->cs_m * (z + app->z_m);
+  }
+}
+
+void
+eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
+  }
+  else {
+    fout[0] = app->Te_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+// Ion initial conditions
+void
+eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC0 / 2);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC1 / 2);
+  }
+  else {
+    fout[0] = app->n_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+void
+eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  if (fabs(z) <= app->z_m) {
+    fout[0] = 0.0;
+  }
+  else if (z > app->z_m) {
+    fout[0] = app->cs_m * (z - app->z_m); // * (z -  / app->z_m);
+  }
+  else {
+    fout[0] = app->cs_m * (z + app->z_m); // * (z + app->z_m) / app->z_m;
+  }
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC0 / 2);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC1 / 2);
+  }
+  else {
+    fout[0] = app->Ti_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+// Potential initial condition
+void
+eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double z_m = 0.98;
+  double z_max = app->z_max;
+  double sigma = 0.2 * z_m;
+  double center_potential = 8.0 * app->Te0 / app->qi;
+  if (fabs(z) <= sigma) {
+    fout[0] = center_potential;
+  }
+  else {
+    fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
+  }
+}
+
+// Evaluate collision frequencies
+void
+evalNuElc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuElc;
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bmag_func must assume a 3d input xc
+void
+bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  fout[0] = Bmag;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0; // Not sure if this is right
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS; // ion mass
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+  double kperpRhos = 0.1;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  // Electron-electron collision freq.
+  double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
+  double nuElc = nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
+    (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Perpendicular wavenumber in SI units:
+  double kperp = kperpRhos / rho_s;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 6.51292;
+  double gamma = 0.124904;
+  double Z_m = 0.98;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0;
+  double lineLengthSrcIon = 0.0;
+  double sigSrcIon = Z_m / 4.0;
+  double NSrcFloorIon = 0.05 * NSrcIon;
+  double TSrc0Ion = Ti0 * 1.25;
+  double TSrcFloorIon = TSrc0Ion / 8.0;
+  double NSrcElc = NSrcIon;
+  double lineLengthSrcElc = lineLengthSrcIon;
+  double sigSrcElc = sigSrcIon;
+  double NSrcFloorElc = NSrcFloorIon;
+  double TSrc0Elc = TSrc0Ion / tau;
+  double TSrcFloorElc = TSrcFloorIon / tau;
+
+  // Bananna tip info. Hardcoad to avoid dependency on ctx
+  double B_bt = 1.058278;
+  double R_bt = 0.071022;
+  double Z_bt = 0.467101;
+  double z_bt = 0.468243;
+  double R_m = 0.017845;
+  double B_m = 16.662396;
+  double z_m = 0.982544;
+
+  // Physics parameters at mirror throat
+  double n_m = 1.105617e19;
+  double Te_m = 346.426583 * eV;
+  double Ti_m = 3081.437703 * eV;
+  double cs_m = 4.037740e5;
+
+  double alpha = 0.01; // Multirate factor.
+
+  // Grid parameters
+  double vpar_max_elc = 20 * vte;
+  double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p);
+  double vpar_max_ion = 20 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 32;
+  int Nvpar = 32; // Number of cells in the paralell velocity direction 96
+  int Nmu = 16;  // Number of cells in the mu direction 192
+  int poly_order = 1;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 1e-7;
+  double tau_fdp = 3e-10;
+  double tau_fdp_extra = 2 * tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .kperpRhos = kperpRhos,
+    .alim = alim,
+    .alphaIC0 = alphaIC0,
+    .alphaIC1 = alphaIC1,
+    .nuFrac = nuFrac,
+    .logLambdaElc = logLambdaElc,
+    .nuElc = nuElc,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .vte = vte,
+    .c_s = c_s,
+    .omega_ci = omega_ci,
+    .rho_s = rho_s,
+    .kperp = kperp,
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .B_bt = B_bt,
+    .R_bt = R_bt,
+    .Z_bt = Z_bt,
+    .z_bt = z_bt,
+    .R_m = R_m,
+    .B_m = B_m,
+    .z_m = z_m,
+    .n_m = n_m,
+    .Te_m = Te_m,
+    .Ti_m = Ti_m,
+    .cs_m = cs_m,
+    .NSrcIon = NSrcIon,
+    .lineLengthSrcIon = lineLengthSrcIon,
+    .sigSrcIon = sigSrcIon,
+    .NSrcFloorIon = NSrcFloorIon,
+    .TSrc0Ion = TSrc0Ion,
+    .TSrcFloorIon = TSrcFloorIon,
+    .NSrcElc = NSrcElc,
+    .lineLengthSrcElc = lineLengthSrcElc,
+    .sigSrcElc = sigSrcElc,
+    .NSrcFloorElc = NSrcFloorElc,
+    .TSrc0Elc = TSrc0Elc,
+    .TSrcFloorElc = TSrcFloorElc,
+    .vpar_max_ion = vpar_max_ion,
+    .vpar_max_elc = vpar_max_elc,
+    .mu_max_ion = mu_max_ion,
+    .mu_max_elc = mu_max_elc,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .polarization_bmag = ctx->B_p,
+    .kperpSq = pow(ctx->kperp, 2.),
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species elc = {
+    .name = "elc",
+    .charge = ctx.qe,
+    .mass = ctx.me,
+    .vdim = ctx.vdim,
+    .lower = { -ctx.vpar_max_elc, 0.0 },
+    .upper = { ctx.vpar_max_elc, ctx.mu_max_elc },
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .ctx_density = &ctx,
+      .density = eval_density_elc,
+      .ctx_upar = &ctx,
+      .upar = eval_upar_elc,
+      .ctx_temp = &ctx,
+      .temp = eval_temp_elc,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .num_cross_collisions = 1,
+      .collide_with = { "ion" },
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_elc_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_elc_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_elc_source,
+      },
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                      GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
+  };
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { -ctx.vpar_max_ion, 0.0 },
+    .upper = { ctx.vpar_max_ion, ctx.mu_max_ion },
+    .cells = { cells_v[0], cells_v[1] },
+    .scale_with_polarization = true,
+
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .ctx_density = &ctx,
+      .density = eval_density,
+      .ctx_upar = &ctx,
+      .upar = eval_upar,
+      .ctx_temp = &ctx,
+      .temp = eval_temp_ion,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Ti0,
+      .num_cross_collisions = 1,
+      .collide_with = { "elc" },
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                      GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .polarization_bmag = ctx.B_p, // Issue here. B0 from soloviev, so not sure what to do. Ours is not constant
+    .kperpSq = pow(ctx.kperp, 2.),
+    .is_static = false, // Will be replaced below.
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = {
+    .name = "gk_mirror_kinetic_elc_poa_1x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
+      .world = { ctx.psi_eval, 0.0 },
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+    .num_species = 2,
+    .species = { elc, ion },
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
new file mode 100644
index 000000000..7663554d9
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -0,0 +1,941 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi; // Ion mass.
+  double me; // Electron mass.
+  double qi; // Ion charge.
+  double qe; // Electron charge.
+  double Te0; // Electron temperature.
+  double Ti0; // Ion temperature.
+  double n0; // Density.
+  double B_p; // Plasma magnetic field (mirror center).
+  double beta; // Plasma beta in the center.
+  double tau; // Temperature ratio.
+
+  double Ti_perp0; // Reference ion perp temperature.
+  double Ti_par0; // Reference ion par temperature.
+  double cs_m; // Ion sound speed at the throat.
+
+  double nuFrac; // Fraction multiplying collision frequency.
+  double logLambdaIon; // Ion Coulomb logarithm.
+  double nuIon; // Ion-ion collision freq.
+
+  double vti; // Ion thermal speed.
+  double vte; // Electron thermal speed.
+  double c_s; // Ion sound speed.
+  double omega_ci; // Ion gyrofrequency.
+  double rho_s; // Ion sound gyroradius.
+
+  double RatZeq0; // Radius of the field line at Z=0.
+  double Z_min; // Minimum axial coordinate Z.
+  double Z_max; // Maximum axial coordinate Z.
+  double z_min; // Minimum value of the position along the field line.
+  double z_max; // Maximum value of the position along the field line.
+  double psi_eval; // Psi (poloidal flux) of the field line.
+  double psi_in, z_in; // Auxiliary psi and z.
+
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m; // Axial coordinate at mirror throat.
+  double z_m; // Computational coordinate at mirror throat.
+
+  // Source parameters
+  double NSrcIon;
+  double TSrc0Ion;
+
+  // Physical velocity space limits.
+  double vpar_min_ion, vpar_max_ion;
+  double mu_max_ion;
+  // Computational velocity space limits.
+  double vpar_min_ion_c, vpar_max_ion_c;
+  double mu_min_ion_c, mu_max_ion_c;
+
+  // Grid DOF.
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) +
+    2. / (M_PI * gamma * (1. + pow((ZIn - 2 * Z_m) / gamma, 2.))) +
+    2. / (M_PI * gamma * (1. + pow((ZIn + 2 * Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB *
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))) +
+    2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - 2 * app->Z_m) / app->gamma, 2.))) +
+    2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + 2 * app->Z_m) / app->gamma, 2.)))
+    )));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+    (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) +
+    -2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))) +
+    -4.0 * (ZIn - 2 * Z_m) / (M_PI * pow(gamma,
+      3.) * (pow(1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.), 2.))) +
+    -4.0 * (ZIn + 2 * Z_m) / (M_PI * pow(gamma,
+      3.) * (pow(1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.), 2.)))
+    );
+  *BZ = mcB *
+    (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))) +
+    2.0 / (M_PI * gamma * (1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.))) +
+    2.0 / (M_PI * gamma * (1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.)))
+    );
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn) {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0) {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+void
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->NSrcIon;
+}
+
+void
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->TSrc0Ion;
+}
+
+// Ion initial conditions
+void
+eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->n0;
+}
+
+void
+eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti_par0;
+}
+
+void
+eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti_perp0;
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[2];
+
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  // Cubic map in mu.
+  vp[1] = mu_max_ion * pow(cmu, 3);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0;
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS;
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+
+  double nuFrac = 1.0;
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -3.0;
+  double Z_max = 3.0;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 1;
+  double gamma = 0.124904;
+  double Z_m = 1.0;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
+  double TSrc0Ion = Ti0 * 1.25;
+
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double vpar_min_ion = -vpar_max_ion;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+
+  // Computational velocity space limits.
+  double vpar_min_ion_c = -1.0;
+  double vpar_max_ion_c = 1.0;
+  double mu_min_ion_c = 0.;
+  double mu_max_ion_c = 1.;
+
+  // Grid DOF:
+  int Nz = 200; // Number of cells in z direction.
+  int Nvpar = 48; // Number of cells in parallel velocity direction.
+  int Nmu = 16;  // Number of cells in mu direction.
+  int poly_order = 1;
+
+  // Initial conditions parameter.s
+  double Ti_perp0 = 10000 * eV;
+  double Ti_par0 = 7500 * eV;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 5e-7;
+  double tau_fdp = 3e-9;
+  double tau_fdp_extra = 2 * tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim, .vdim = vdim,
+    .mi = mi, .qi = qi,
+    .me = me, .qe = qe,
+    .Te0 = Te0, .Ti0 = Ti0, .n0 = n0,
+    .B_p = B_p, .beta = beta, .tau = tau,
+    .nuFrac = nuFrac, .logLambdaIon = logLambdaIon, .nuIon = nuIon,
+    .vti = vti, .vte = vte, .c_s = c_s,
+    .omega_ci = omega_ci, .rho_s = rho_s,
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min, .Z_max = Z_max,
+    // Parameters controlling the magnetic equilibrium model.
+    .mcB = mcB, .gamma = gamma,
+    .Z_m = Z_m,
+    // Initial condition parameters.
+    .Ti_perp0 = Ti_perp0, .Ti_par0 = Ti_par0,
+    // Source parameters
+    .NSrcIon = NSrcIon,
+    .TSrc0Ion = TSrc0Ion,
+    // Physical velocity space limits.
+    .vpar_min_ion = vpar_min_ion,
+    .vpar_max_ion = vpar_max_ion,
+    .mu_max_ion = mu_max_ion,
+    // Computational velocity space limits.
+    .vpar_min_ion_c = vpar_min_ion_c,
+    .vpar_max_ion_c = vpar_max_ion_c,
+    .mu_min_ion_c = mu_min_ion_c,
+    .mu_max_ion_c = mu_max_ion_c,
+    // Grid DOF.
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    // Time integration and I/O parameters.
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx->me,
+    .electron_charge = ctx->qe,
+    .electron_temp = ctx->Te0,
+    .polarization_bmag = ctx->B_p,
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi, .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c },
+    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c },
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .density = eval_density_ion,
+      .upar = eval_upar_ion,
+      .temppar = eval_temp_par_ion,
+      .tempperp = eval_temp_perp_ion,
+      .ctx_density = &ctx,
+      .ctx_upar = &ctx,
+      .ctx_temppar = &ctx,
+      .ctx_tempperp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .self_nu = evalNuIon,
+      .self_nu_ctx = &ctx,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .density = eval_density_ion_source,
+        .upar = eval_upar_ion_source,
+        .temp = eval_temp_ion_source,
+        .ctx_density = &ctx,
+        .ctx_upar = &ctx,
+        .ctx_temp = &ctx,
+      },
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .num_diag_moments = 4,
+    .diag_moments = { GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx.me,
+    .electron_charge = ctx.qe,
+    .electron_temp = ctx.Te0,
+    .is_static = false, // So solvers are allocated.
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = {
+    .name = "gk_mirror_tandem_boltz_elc_poa_1x2v",
+    .cdim = ctx.cdim,
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
+      .world = { ctx.psi_eval, 0.0 },
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+
+    .num_species = 1,
+    .species = { ion },
+
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
\ No newline at end of file
diff --git a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
index 3e9808b9d..3da11c59b 100644
--- a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c
@@ -685,9 +685,10 @@ int main(int argc, char **argv)
     },
   };
 
-  // Create app object.
   // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
   // Triggers for IO.
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
new file mode 100644
index 000000000..5ba8cae6d
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -0,0 +1,966 @@
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_parproj.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double kperpRhos;
+  // Parameters controlling initial conditions.
+  double alim;
+  double nuFrac;
+  // Electron-electron collision freq.
+  double logLambdaElc;
+  double nuElc;
+  double elc_nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  // Thermal speeds.
+  double vti;
+  double vte;
+  double c_s;
+  // Gyrofrequencies and gyroradii.
+  double omega_ci;
+  double rho_s;
+  double kperp; // Perpendicular wavenumber in SI units.
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_min;
+  double psi_eval;
+  double psi_max;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
+  double vpar_max_elc;
+  double mu_max_ion;
+  double mu_max_elc;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Source parameters
+  double source_amplitude;
+  double source_sigma;
+  double ion_source_temp;
+  double elc_source_temp;
+};
+
+void
+eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 1e17;
+}
+
+void
+eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti0;
+}
+
+void
+eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Te0;
+}
+
+void
+eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double src_amp = app->source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 1.0) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6));
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 1.0) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+void
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double TSrc0 = app->elc_source_temp; // Using same temp as ion source for simplicity
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 1.0) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+// Potential initial condition
+void
+eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double z_m = 0.98;
+  double z_max = app->z_max;
+  double sigma = 0.2 * z_m;
+  double center_potential = 8.0 * app->Te0 / app->qi;
+  if (fabs(z) <= sigma) {
+    fout[0] = center_potential;
+  }
+  else {
+    fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
+  }
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.45;
+  double linear_velocity_threshold = 1. / 6.;
+  double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b));
+  if (fabs(cvpar) < frac_linear) {
+    double func_frac = tan(frac_linear * b) / tan(b);
+    vp[0] = vpar_max_ion * func_frac * cvpar / frac_linear;
+  }
+  else {
+    vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  }
+  // Quadratic map in mu.
+  vp[1] = mu_max_ion * pow(cmu, 3);
+}
+
+void mapc2p_vel_elc(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_elc = app->vpar_max_elc;
+  double mu_max_elc = app->mu_max_elc;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.45;
+  double linear_velocity_threshold = 1. / 6.;
+  double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b));
+  if (fabs(cvpar) < frac_linear) {
+    double func_frac = tan(frac_linear * b) / tan(b);
+    vp[0] = vpar_max_elc * func_frac * cvpar / frac_linear;
+  }
+  else {
+    vp[0] = vpar_max_elc * tan(cvpar * b) / tan(b);
+  }
+  // Quadratic map in mu.
+  vp[1] = mu_max_elc * pow(cmu, 3.0 / 2.0);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0; // Not sure if this is right
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS; // ion mass
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+  double kperpRhos = 0.1;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  double elc_nuFrac = 1 / 5.489216862238348;
+  // Electron-electron collision freq.
+  double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
+  double nuElc = elc_nuFrac * nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
+    (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Perpendicular wavenumber in SI units:
+  double kperp = kperpRhos / rho_s;
+
+  // Geometry parameters.
+  double z_min = -2.0;
+  double z_max = 2.0;
+  double psi_eval = 1e-3;
+
+  // Grid parameters
+  double vpar_max_elc = 30 * vte;
+  double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p);
+  double vpar_max_ion = 30 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 32;
+  int Nvpar = 32; // Number of cells in the paralell velocity direction 96
+  int Nmu = 16;  // Number of cells in the mu direction 192
+  int poly_order = 1;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 1.5e-8;
+  double tau_fdp = 1.5e-10;
+  double tau_fdp_extra = 2 * tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  // Source parameters
+  double source_amplitude = 1.e20;
+  double source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
+  double elc_source_temp = 5000. * eV; // Using same temp as ion source for simplicity
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .kperpRhos = kperpRhos,
+    .alim = alim,
+    .nuFrac = nuFrac,
+    .logLambdaElc = logLambdaElc,
+    .nuElc = nuElc,
+    .elc_nuFrac = elc_nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .vte = vte,
+    .c_s = c_s,
+    .omega_ci = omega_ci,
+    .rho_s = rho_s,
+    .kperp = kperp,
+    .z_min = z_min,
+    .z_max = z_max,
+    .psi_eval = psi_eval,
+    .vpar_max_ion = vpar_max_ion,
+    .vpar_max_elc = vpar_max_elc,
+    .mu_max_ion = mu_max_ion,
+    .mu_max_elc = mu_max_elc,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+  };
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .type = pparams->fdot_mult_type,
+    .cellwise_const = true,
+    .write_diagnostics = true,
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .polarization_bmag = ctx->B_p,
+    .kperpSq = pow(ctx->kperp, 2.),
+    .is_static = pparams->is_static_field,
+    .time_rate_diagnostics = true,
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species elc = {
+    .name = "elc",
+    .charge = ctx.qe,
+    .mass = ctx.me,
+    .vdim = ctx.vdim,
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_elc,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density,
+      .ctx_density = &ctx,
+      .upar = eval_upar,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_elc,
+      .ctx_temp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .num_cross_collisions = 1,
+      .collide_with = { "ion" },
+      .write_diagnostics = true,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_elc_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 5,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+      }
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+    },
+  };
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
+    .polarization_density = ctx.n0,
+    .scale_with_polarization = true,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density,
+      .ctx_density = &ctx,
+      .upar = eval_upar,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .time_rate_multiplier = {
+      .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated.
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Ti0,
+      .num_cross_collisions = 1,
+      .collide_with = { "elc" },
+      .write_diagnostics = true,
+    },
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_HAMILTONIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
+  };
+  struct gkyl_gyrokinetic_field field = {
+    .polarization_bmag = ctx.B_p,
+    .kperpSq = pow(ctx.kperp, 2.),
+    .time_rate_diagnostics = true,
+    .is_static = false,
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+
+  struct gkyl_mirror_geo_grid_inp grid_inp = {
+    .filename_psi = "gyrokinetic/data/unit/wham_hires.geqdsk_psi.gkyl", // psi file to use
+    .rclose = 0.2, // closest R to region of interest
+    .zmin = -2.0,  // Z of lower boundary
+    .zmax = 2.0,   // Z of upper boundary
+    .include_axis = false, // Include R=0 axis in grid
+    .fl_coord = GKYL_GEOMETRY_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid
+  };
+
+  struct gkyl_gk app_inp = {  // GK app
+    .name = "gk_wham_kinetic_poa_1x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MIRROR,
+      .world = { ctx.psi_eval, 0.0 },
+      .mirror_grid_info = grid_inp,
+    },
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+    .num_species = 2,
+    .species = { elc, ion },
+    .field = field,
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
index ae0e96a7b..07c910376 100644
--- a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
@@ -812,7 +812,7 @@ int main(int argc, char **argv)
         .map_strength = 0.5,
         .maximum_slope_at_min_B = 2,
         .gaussian_std = 0.2,
-        .gaussian_max_integration_width = 1.0,
+        .gaussian_max_integration_width = 0.5,
       },
     },
 
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index 4d590b952..f7eaab745 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -1,5 +1,7 @@
 #include <acutest.h>
 
+#define _USE_MATH_DEFINES
+#include <math.h>
 #include <gkyl_util.h>
 #include <gkyl_array.h>
 #include <gkyl_array_ops.h>
@@ -14,8 +16,13 @@
 #include <gkyl_eval_on_nodes.h>
 #include <gkyl_proj_on_basis.h>
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
+#include <gkyl_array_dg_find_peaks.h>
 #include <gkyl_const.h>
 
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
 struct loss_cone_mask_test_ctx {
   int cdim; // Configuration space dimensionality.
   double eV; // Elementary charge.
@@ -25,30 +32,34 @@ struct loss_cone_mask_test_ctx {
   double mass, charge; // Species mass and charge.
   double n0, T0, B0; // Reference parameters.
   double phi_fac; // phi(z=0) = phi_fac*T0/e;
+  double psi_max; // For 2x: upper limit of psi (radial coordinate).
   double z_max, vpar_max, mu_max; // Upper grid extents.
-  int Nz, Nvpar, Nmu; // Number of cells in each direction.
+  int Npsi, Nz, Nvpar, Nmu; // Number of cells in each direction.
   enum gkyl_quad_type quad_type; // Type of quadrature/nodes.
   int num_quad; // Number of quadrature points to use in projection, 1 or p+1.
   bool cellwise_trap_loss; // Whether a whole cell is either trapped or lost.
+  bool is_tandem; // Whether this is a tandem mirror configuration.
+  double B_tandem; // Field at tandem mirror (for tandem case).
+  double z_tandem; // z-coordinate of tandem mirror (for tandem case).
 };
 
 // allocate array (filled with zeros)
 static struct gkyl_array*
 mkarr(bool use_gpu, long nc, long size)
 {
-  struct gkyl_array* a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
-	                        : gkyl_array_new(GKYL_DOUBLE, nc, size);
+  struct gkyl_array *a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+                          : gkyl_array_new(GKYL_DOUBLE, nc, size);
   return a;
 }
 
 void
-mapc2p_3x(double t, const double *xc, double* GKYL_RESTRICT xp, void *ctx)
+mapc2p_3x(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
   xp[0] = xc[0]; xp[1] = xc[1]; xp[2] = xc[2];
 }
 
 void
-bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+bfield_func_3x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double x = xc[0], y = xc[1], z = xc[2];
 
@@ -58,12 +69,26 @@ bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx
 
   fout[0] = 0.0;
   fout[1] = 0.0;
-  fout[2] = B_m * (1.0 - ((R_m-1.0)/R_m)*pow(cos(z), 2.0));
-//  fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0));
+  fout[2] = B_m * (1.0 - ((R_m - 1.0) / R_m) * pow(cos(z), 2.0));
+// fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0));
+}
+
+void
+phi_func_1x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[0];
+
+  struct loss_cone_mask_test_ctx *params = ctx;
+  double phi_fac = params->phi_fac;
+  double T0 = params->T0;
+  double eV = params->eV;
+
+  fout[0] = 0.0; // 0.5 * phi_fac*T0/eV * (1.0 + cos(z));
 }
 
+// Non-zero electrostatic potential: peaked at center, zero at wall.
 void
-phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+phi_func_1x_nonzero(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double z = xc[0];
 
@@ -71,12 +96,51 @@ phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   double phi_fac = params->phi_fac;
   double T0 = params->T0;
   double eV = params->eV;
+  double z_max = params->z_max;
+
+  // Parabolic potential profile: phi(z) = phi_fac*T0/eV * (1 - (z/z_max)^2)
+  // This gives phi=phi_fac*T0/eV at z=0, and phi=0 at z=+/-z_max.
+  fout[0] = phi_fac * T0 / eV * (1.0 - pow(z / z_max, 2.0));
+}
+
+// Reference mask for nonzero phi case.
+void
+mask_ref_1x2v_nonzero_phi(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[0], vpar = xc[1], mu = xc[2];
+  struct loss_cone_mask_test_ctx *params = ctx;
+
+  double z_m = params->z_m;
+  double mass = params->mass;
+  double charge = params->charge;
+
+  double phi, phi_m;
+  phi_func_1x_nonzero(t, xc, &phi, ctx);
+  phi_func_1x_nonzero(t, &z_m, &phi_m, ctx);
+
+  double bfield[3], bmag;
+  double zinfl[3] = { 0.0 }, z_minfl[3] = { 0.0 };
+  zinfl[2] = z, z_minfl[2] = z_m;
+  bfield_func_3x(t, zinfl, bfield, ctx);
+  bmag = bfield[2];
+
+  double bfield_m[3], bmag_m;
+  bfield_func_3x(t, z_minfl, bfield_m, ctx);
+  bmag_m = bfield_m[2];
 
-  fout[0] = 0.0; //0.5 * phi_fac*T0/eV * (1.0 + cos(z));
+  // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
+  double mu_bound = (0.5 * mass * pow(vpar,
+    2) + charge * (phi - phi_m)) / (bmag * (bmag_m / bmag - 1));
+  if (mu_bound < mu && fabs(z) < z_m) {
+    fout[0] = 1.0;
+  }
+  else {
+    fout[0] = 0.0;
+  }
 }
 
 void
-mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+mask_ref_1x2v(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
   double z = xc[0], vpar = xc[1], mu = xc[2];
   struct loss_cone_mask_test_ctx *params = ctx;
@@ -90,7 +154,7 @@ mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   phi_func_1x(t, &z_m, &phi_m, ctx);
 
   double bfield[3], bmag;
-  double zinfl[3] = {0.0}, z_minfl[3] = {0.0};
+  double zinfl[3] = { 0.0 }, z_minfl[3] = { 0.0 };
   zinfl[2] = z, z_minfl[2] = z_m;
   bfield_func_3x(t, zinfl, bfield, ctx);
   bmag = bfield[2];
@@ -100,7 +164,8 @@ mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
   bmag_m = bfield_m[2];
 
   // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
-  double mu_bound = (0.5*mass*pow(vpar,2)+charge*(phi-phi_m))/(bmag*(bmag_m/bmag-1));
+  double mu_bound = (0.5 * mass * pow(vpar,
+    2) + charge * (phi - phi_m)) / (bmag * (bmag_m / bmag - 1));
   if (mu_bound < mu && fabs(z) < z_m)
     fout[0] = 1.0;
   else
@@ -120,11 +185,11 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .eV = eV,
     .R_m = 8.0,
     .B_m = 4.0,
-    .z_m = M_PI/2.0,
-    .mass = 2.014*mass_proton,
+    .z_m = M_PI / 2.0,
+    .mass = 2.014 * mass_proton,
     .charge = eV,
     .n0 = 1e18,
-    .T0 = 100*eV,
+    .T0 = 100 * eV,
     .phi_fac = 3.0,
     .z_max = M_PI,
     .Nz = 8,
@@ -134,31 +199,32 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .num_quad = 2,
     .cellwise_trap_loss = true,
   };
-  ctx.B0 = ctx.B_m/2.0;
-  ctx.vpar_max = 6.0*sqrt(ctx.T0/ctx.mass);
-  ctx.mu_max = 0.5*ctx.mass*pow(ctx.vpar_max,2)/ctx.B0;
+  ctx.B0 = ctx.B_m / 2.0;
+  ctx.vpar_max = 6.0 * sqrt(ctx.T0 / ctx.mass);
+  ctx.mu_max = 0.5 * ctx.mass * pow(ctx.vpar_max, 2) / ctx.B0;
 
   double mass = ctx.mass;
-  double lower[] = {-ctx.z_max, -ctx.vpar_max, 0.0}, upper[] = {ctx.z_max, ctx.vpar_max, ctx.mu_max};
-  int cells[] = {ctx.Nz, ctx.Nvpar, ctx.Nmu};
-  const int ndim = sizeof(cells)/sizeof(cells[0]);
+  double lower[] = { -ctx.z_max, -ctx.vpar_max, 0.0 },
+    upper[] = { ctx.z_max, ctx.vpar_max, ctx.mu_max };
+  int cells[] = { ctx.Nz, ctx.Nvpar, ctx.Nmu };
+  const int ndim = sizeof(cells) / sizeof(cells[0]);
   const int cdim = ctx.cdim;
-  const int vdim = ndim-ctx.cdim;
+  const int vdim = ndim - ctx.cdim;
 
   // Grids.
   double lower_conf[cdim], upper_conf[cdim];
   int cells_conf[cdim];
-  for (int d=0; d<cdim; d++) {
+  for (int d = 0; d < cdim; d++) {
     lower_conf[d] = lower[d];
     upper_conf[d] = upper[d];
     cells_conf[d] = cells[d];
   }
   double lower_vel[vdim], upper_vel[vdim];
   int cells_vel[vdim];
-  for (int d=0; d<vdim; d++) {
-    lower_vel[d] = lower[cdim+d];
-    upper_vel[d] = upper[cdim+d];
-    cells_vel[d] = cells[cdim+d];
+  for (int d = 0; d < vdim; d++) {
+    lower_vel[d] = lower[cdim + d];
+    upper_vel[d] = upper[cdim + d];
+    cells_vel[d] = cells[cdim + d];
   }
   struct gkyl_rect_grid grid;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
@@ -169,7 +235,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 
   // Basis functions.
   struct gkyl_basis basis, basis_conf;
-  if (poly_order == 1) 
+  if (poly_order == 1)
     gkyl_cart_modal_gkhybrid(&basis, cdim, vdim);
   else
     gkyl_cart_modal_serendip(&basis, ndim, poly_order);
@@ -180,14 +246,14 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 #ifdef GKYL_HAVE_CUDA
     basis_on_dev = gkyl_cu_malloc(sizeof(struct gkyl_basis));
     basis_on_dev_conf = gkyl_cu_malloc(sizeof(struct gkyl_basis));
-    if (poly_order == 1) 
+    if (poly_order == 1)
       gkyl_cart_modal_gkhybrid_cu_dev(basis_on_dev, cdim, vdim);
     else
       gkyl_cart_modal_serendip_cu_dev(basis_on_dev, ndim, poly_order);
     gkyl_cart_modal_serendip_cu_dev(basis_on_dev_conf, cdim, poly_order);
 #endif
   }
-  else { 
+  else {
     basis_on_dev = &basis;
     basis_on_dev_conf = &basis_conf;
   }
@@ -202,7 +268,9 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   gkyl_create_grid_ranges(&grid_vel, ghost_vel, &local_ext_vel, &local_vel);
 
   int ghost[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<cdim; d++) ghost[d] = ghost_conf[d];
+  for (int d = 0; d < cdim; d++) {
+    ghost[d] = ghost_conf[d];
+  }
   struct gkyl_range local, local_ext; // local, local-ext phase-space ranges
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
@@ -211,7 +279,7 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Initialize geometry
   struct gkyl_gk_geometry_inp geometry_input = {
     .geometry_id = GKYL_GEOMETRY_MAPC2P,
-    .world = {0.0, 0.0},
+    .world = { 0.0, 0.0 },
     .mapc2p = mapc2p_3x, // mapping of computational to physical space
     .c2p_ctx = 0,
     .bfield_func = bfield_func_3x, // magnetic field magnitude
@@ -225,16 +293,55 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .basis = basis_conf,
   };
   geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
-  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext, &geometry_input.geo_local);
+  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext,
+    &geometry_input.geo_local);
   gkyl_cart_modal_serendip(&geometry_input.geo_basis, 3, poly_order);
-  struct gk_geometry* gk_geom_3d;
+  struct gk_geometry *gk_geom_3d;
   gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
   // Deflate geometry if necessary.
   struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
   gkyl_gk_geometry_release(gk_geom_3d);
+
+  // Use array_dg_find_peaks to find bmag_max along the z direction.
+  // Search along the parallel (z) direction, which is the last configuration space dimension.
+  int search_dir = cdim - 1;
+  struct gkyl_array_dg_find_peaks_inp peak_inp = {
+    .basis = &basis_conf,
+    .grid = &grid_conf,
+    .range = &local_conf,
+    .range_ext = &local_ext_conf,
+    .search_dir = search_dir,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder =
+    gkyl_array_dg_find_peaks_new(&peak_inp, gk_geom->geo_int.bmag);
+  gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag);
+
+  // Get the LOCAL_MAX peak (bmag maximum along z direction).
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
+  int bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    num_peaks - 1);                                                                                          // First peak is wall
+  const struct gkyl_array *bmag_wall_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks - 1);
+  const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range_ext =
+    gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
+
+  // Allocate arrays for phi evaluated at all peak locations.
+  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array *));
+  for (int p = 0; p < num_peaks; p++) {
+    phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume);
+  }
+
   // If we are on the gpu, copy from host
   if (use_gpu) {
-    struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
+    struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
     gkyl_gk_geometry_release(gk_geom);
     gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev);
     gkyl_gk_geometry_release(gk_geom_dev);
@@ -248,82 +355,55 @@ test_1x2v_gk(int poly_order, bool use_gpu)
   // Project the electostatic potential.
   struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume);
   struct gkyl_array *phi_ho = use_gpu? mkarr(false, phi->ncomp, phi->size)
-	                             : gkyl_array_acquire(phi);
+                               : gkyl_array_acquire(phi);
 
   gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func_1x, &ctx);
   gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho);
   gkyl_eval_on_nodes_release(evphi);
   gkyl_array_copy(phi, phi_ho);
 
-  // Location of the mirror throat.
-  double bmag_max_loc_ho[] = {ctx.z_m};
-  double *bmag_max_loc;
-  if (use_gpu) {
-    bmag_max_loc = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    bmag_max_loc = gkyl_malloc(sizeof(double));
-    memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double));
-  }
-
-  // Get the magnetic field at the mirror throat.
-  double bfield_max_ho[3], bmag_max_ho[1];
-  double xc_infl[] = {0.0,0.0,ctx.z_m};
-  bfield_func_3x(0.0, xc_infl, bfield_max_ho, &ctx);
-  bmag_max_ho[0] = bfield_max_ho[2];
-  double *bmag_max;
-  if (use_gpu) {
-    bmag_max = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(bmag_max, bmag_max_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    bmag_max = gkyl_malloc(sizeof(double));
-    memcpy(bmag_max, bmag_max_ho, sizeof(double));
-  }
+  // Project phi onto peak locations to get phi_m at the mirror throat.
+  gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks);
 
-  // Get the potential at the mirror throat (z=pi/2).
-  double phi_m_ho[1];
-  double xc[] = {ctx.z_m};
-  phi_func_1x(0.0, xc, phi_m_ho, &ctx);
-  double *phi_m;
-  if (use_gpu) {
-    phi_m = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(phi_m, phi_m_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    phi_m = gkyl_malloc(sizeof(double));
-    memcpy(phi_m, phi_m_ho, sizeof(double));
-  }
+  // Get phi at the mirror throat (bmag_max peak location).
+  const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx];
 
   // Basis used to project the mask.
   struct gkyl_basis basis_mask;
-  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss)
+  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) {
     gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
+  }
   else {
-    if (poly_order == 1) 
+    if (poly_order == 1) {
       gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
-    else
+    }
+    else {
       gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
+    }
   }
 
   // Create mask array.
   struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume);
   struct gkyl_array *mask_ho = use_gpu? mkarr(false, mask->ncomp, mask->size)
-	                              : gkyl_array_acquire(mask);
+                                : gkyl_array_acquire(mask);
 
   // Project the loss cone mask.
+  // Use bmag_max and bmag_max_z_coord arrays from find_peaks.
   struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
     .phase_grid = &grid,
     .conf_basis = &basis_conf,
     .phase_basis = &basis,
-    .conf_range =  &local_conf,
+    .conf_range = &local_conf,
     .conf_range_ext = &local_ext_conf,
-    .vel_range = &local_vel, 
+    .vel_range = &local_vel,
     .vel_map = gvm,
     .bmag = gk_geom->geo_int.bmag,
+    .bmag_max_z_coord = bmag_max_z_coord,
     .bmag_max = bmag_max,
-    .bmag_max_loc = bmag_max_loc,
+    .bmag_wall = bmag_wall,
+    .bmag_wall_z_coord = bmag_wall_z_coord,
+    .bmag_max_basis = bmag_max_basis,
+    .bmag_max_range = bmag_max_range,
     .mass = ctx.mass,
     .charge = ctx.charge,
     .qtype = ctx.quad_type,
@@ -331,93 +411,463 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .cellwise_trap_loss = ctx.cellwise_trap_loss,
     .use_gpu = use_gpu,
   };
-  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask =
+    gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
-  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, mask);
+  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask);
 
   gkyl_array_copy(mask_ho, mask);
 
   // Project expected mask.
   struct gkyl_array *mask_ref_ho = mkarr(false, basis_mask.num_basis, local_ext.volume);
-  gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask, basis_mask.poly_order+1, 1, mask_ref_1x2v, &ctx);
+  gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask,
+    basis_mask.poly_order + 1, 1, mask_ref_1x2v, &ctx);
   gkyl_proj_on_basis_advance(evmask_ref, 0.0, &local, mask_ref_ho);
   gkyl_proj_on_basis_release(evmask_ref);
-  if (ctx.num_quad == 1) {
-    // Rescale to deal with normalization.
-    gkyl_array_scale(mask_ref_ho, 1.0/pow(sqrt(2.0),cdim+vdim));
-  }
-
-//  // values to compare  at index (1, 9, 9) [remember, lower-left index is (1,1,1)]
-//  double p1_vals[] = {  
-//     7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
-//    -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00,
-//    -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, };
-//  double p2_vals[] = { 
-//    7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
-//    -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19,
-//    0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20,
-//    0.0000000000000000e+00, 0.0000000000000000e+00 };
+
+//// values to compare  at index (1, 9, 9) [remember, lower-left index is (1,1,1)]
+// double p1_vals[] = {
+// 7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
+// -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00,
+// -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, };
+// double p2_vals[] = {
+// 7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
+// -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19,
+// 0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20,
+// 0.0000000000000000e+00, 0.0000000000000000e+00 };
 //
-//  const double *fv = gkyl_array_cfetch(distf, gkyl_range_idx(&local_ext, (int[3]) { 1, 9, 9 }));
-//  if (poly_order == 1) {
-//    for (int i=0; i<basis.num_basis; ++i) {
-//      TEST_CHECK( gkyl_compare_double(p1_vals[i], fv[i], 1e-2) );
-//    }
-//  }
+// const double *fv = gkyl_array_cfetch(distf, gkyl_range_idx(&local_ext, (int[3]) { 1, 9, 9 }));
+// if (poly_order == 1) {
+// for (int i=0; i<basis.num_basis; ++i) {
+// TEST_CHECK( gkyl_compare_double(p1_vals[i], fv[i], 1e-2) );
+// }
+// }
 //
-//  if (poly_order == 2) {
-//    for (int i=0; i<basis.num_basis; ++i)
-//      TEST_CHECK( gkyl_compare_double(p2_vals[i], fv[i], 1e-2) );
-//  }
+// if (poly_order == 2) {
+// for (int i=0; i<basis.num_basis; ++i)
+// TEST_CHECK( gkyl_compare_double(p2_vals[i], fv[i], 1e-2) );
+// }
 
   // Write mask to file.
   char fname[1024];
-  if (use_gpu)
+  if (use_gpu) {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_dev.gkyl", poly_order);
-  else
+  }
+  else {
     sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ho.gkyl", poly_order);
+  }
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
 
   sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ref.gkyl", poly_order);
   gkyl_grid_sub_array_write(&grid, &local, 0, mask_ref_ho, fname);
 
+  // Free phi_m and phi_at_peaks arrays.
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(phi_at_peaks[p]);
+  }
+  gkyl_free(phi_at_peaks);
+  gkyl_array_release(phi);
+  gkyl_array_release(phi_ho);
+  gkyl_array_release(mask);
+  gkyl_array_release(mask_ho);
+  gkyl_array_release(mask_ref_ho);
+  gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
+  gkyl_velocity_map_release(gvm);
+  // Release acquired peak arrays.
+  gkyl_array_release(bmag_max);
+  gkyl_array_release(bmag_max_z_coord);
+  gkyl_array_release(bmag_wall);
+  gkyl_array_release(bmag_wall_z_coord);
+  gkyl_array_dg_find_peaks_release(bmag_peak_finder);
+  gkyl_position_map_release(pmap);
+  gkyl_gk_geometry_release(gk_geom);
+  gkyl_position_map_release(pmap);
+
+#ifdef GKYL_HAVE_CUDA
+  if (use_gpu) {
+    gkyl_cu_free(basis_on_dev);
+    gkyl_cu_free(basis_on_dev_conf);
+  }
+#endif
+}
+
+// Test with non-zero electrostatic potential.
+// This tests that the trapped-passing boundary correctly accounts for
+// the q*(phi-phi_m) term in the mu_bound calculation.
+void
+test_1x2v_nonzero_phi_gk(int poly_order, bool use_gpu)
+{
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mass_proton = GKYL_PROTON_MASS;
+
+  // Set reference parameters.
+  struct loss_cone_mask_test_ctx ctx = {
+    .cdim = 1,
+    .eV = eV,
+    .R_m = 8.0,
+    .B_m = 4.0,
+    .z_m = M_PI / 2.0,
+    .mass = 2.014 * mass_proton,
+    .charge = eV,  // Positive ions.
+    .n0 = 1e18,
+    .T0 = 100 * eV,
+    .phi_fac = 3.0,  // phi(z=0) = 3*T0/e = 300 V.
+    .z_max = M_PI,
+    .Nz = 8,
+    .Nvpar = 8,
+    .Nmu = 4,
+    .quad_type = GKYL_GAUSS_LOBATTO_QUAD,
+    .num_quad = 2,
+    .cellwise_trap_loss = true,
+  };
+  ctx.B0 = ctx.B_m / 2.0;
+  ctx.vpar_max = 6.0 * sqrt(ctx.T0 / ctx.mass);
+  ctx.mu_max = 0.5 * ctx.mass * pow(ctx.vpar_max, 2) / ctx.B0;
+
+  double lower[] = { -ctx.z_max, -ctx.vpar_max, 0.0 };
+  double upper[] = { ctx.z_max, ctx.vpar_max, ctx.mu_max };
+  int cells[] = { ctx.Nz, ctx.Nvpar, ctx.Nmu };
+  const int ndim = sizeof(cells) / sizeof(cells[0]);
+  const int cdim = ctx.cdim;
+  const int vdim = ndim - ctx.cdim;
+
+  // Grids.
+  double lower_conf[cdim], upper_conf[cdim];
+  int cells_conf[cdim];
+  for (int d = 0; d < cdim; d++) {
+    lower_conf[d] = lower[d];
+    upper_conf[d] = upper[d];
+    cells_conf[d] = cells[d];
+  }
+  double lower_vel[vdim], upper_vel[vdim];
+  int cells_vel[vdim];
+  for (int d = 0; d < vdim; d++) {
+    lower_vel[d] = lower[cdim + d];
+    upper_vel[d] = upper[cdim + d];
+    cells_vel[d] = cells[cdim + d];
+  }
+  struct gkyl_rect_grid grid;
+  gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
+  struct gkyl_rect_grid grid_conf;
+  gkyl_rect_grid_init(&grid_conf, cdim, lower_conf, upper_conf, cells_conf);
+  struct gkyl_rect_grid grid_vel;
+  gkyl_rect_grid_init(&grid_vel, vdim, lower_vel, upper_vel, cells_vel);
+
+  // Basis functions.
+  struct gkyl_basis basis, basis_conf;
+  if (poly_order == 1) {
+    gkyl_cart_modal_gkhybrid(&basis, cdim, vdim);
+  }
+  else {
+    gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+  }
+  gkyl_cart_modal_serendip(&basis_conf, cdim, poly_order);
+
+  struct gkyl_basis *basis_on_dev, *basis_on_dev_conf;
+  if (use_gpu) {
+#ifdef GKYL_HAVE_CUDA
+    basis_on_dev = gkyl_cu_malloc(sizeof(struct gkyl_basis));
+    basis_on_dev_conf = gkyl_cu_malloc(sizeof(struct gkyl_basis));
+    if (poly_order == 1) {
+      gkyl_cart_modal_gkhybrid_cu_dev(basis_on_dev, cdim, vdim);
+    }
+    else {
+      gkyl_cart_modal_serendip_cu_dev(basis_on_dev, ndim, poly_order);
+    }
+    gkyl_cart_modal_serendip_cu_dev(basis_on_dev_conf, cdim, poly_order);
+#endif
+  }
+  else {
+    basis_on_dev = &basis;
+    basis_on_dev_conf = &basis_conf;
+  }
+
+  // Ranges.
+  int ghost_conf[] = { 1, 1, 1 };
+  struct gkyl_range local_conf, local_ext_conf;
+  gkyl_create_grid_ranges(&grid_conf, ghost_conf, &local_ext_conf, &local_conf);
+
+  int ghost_vel[] = { 0, 0 };
+  struct gkyl_range local_vel, local_ext_vel;
+  gkyl_create_grid_ranges(&grid_vel, ghost_vel, &local_ext_vel, &local_vel);
+
+  int ghost[GKYL_MAX_DIM] = { 0 };
+  for (int d = 0; d < cdim; d++) {
+    ghost[d] = ghost_conf[d];
+  }
+  struct gkyl_range local, local_ext;
+  gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
+
+  struct gkyl_position_map *pmap = gkyl_position_map_null_new();
+
+  // Initialize geometry.
+  struct gkyl_gk_geometry_inp geometry_input = {
+    .geometry_id = GKYL_GEOMETRY_MAPC2P,
+    .world = { 0.0, 0.0 },
+    .mapc2p = mapc2p_3x,
+    .c2p_ctx = 0,
+    .bfield_func = bfield_func_3x,
+    .bfield_ctx = &ctx,
+    .grid = grid_conf,
+    .local = local_conf,
+    .local_ext = local_ext_conf,
+    .global = local_conf,
+    .global_ext = local_ext_conf,
+    .basis = basis_conf,
+    .position_map = pmap,
+  };
+  geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
+  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext,
+    &geometry_input.geo_local);
+  gkyl_cart_modal_serendip(&geometry_input.geo_basis, 3, poly_order);
+  struct gk_geometry *gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
+  struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
+  gkyl_gk_geometry_release(gk_geom_3d);
+
+  // Use array_dg_find_peaks to find bmag_max.
+  int search_dir = cdim - 1;
+  struct gkyl_array_dg_find_peaks_inp peak_inp = {
+    .basis = &basis_conf,
+    .grid = &grid_conf,
+    .range = &local_conf,
+    .range_ext = &local_ext_conf,
+    .search_dir = search_dir,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_array_dg_find_peaks *bmag_peak_finder =
+    gkyl_array_dg_find_peaks_new(&peak_inp, gk_geom->geo_int.bmag);
+  gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag);
+
+  int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder);
+  int bmag_max_peak_idx = num_peaks - 2;
+  const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    bmag_max_peak_idx);
+  const struct gkyl_array *bmag_max_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx);
+  const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder,
+    num_peaks - 1);
+  const struct gkyl_array *bmag_wall_z_coord =
+    gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks - 1);
+  const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder);
+  const struct gkyl_range *bmag_max_range_ext =
+    gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder);
+
+  // Allocate arrays for phi evaluated at peak locations.
+  struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array *));
+  for (int p = 0; p < num_peaks; p++) {
+    phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume);
+  }
+
+  if (use_gpu) {
+    struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
+    gkyl_gk_geometry_release(gk_geom);
+    gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev);
+    gkyl_gk_geometry_release(gk_geom_dev);
+  }
+
+  // Velocity space mapping.
+  struct gkyl_mapc2p_inp c2p_in = { };
+  struct gkyl_velocity_map *gvm = gkyl_velocity_map_new(c2p_in, grid, grid_vel,
+    local, local_ext, local_vel, local_ext_vel, use_gpu);
+
+  // Project the electrostatic potential with NON-ZERO phi.
+  struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume);
+  struct gkyl_array *phi_ho = use_gpu ? mkarr(false, phi->ncomp, phi->size)
+                                      : gkyl_array_acquire(phi);
+
+  gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1,
+    phi_func_1x_nonzero, &ctx);
+  gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho);
+  gkyl_eval_on_nodes_release(evphi);
+  gkyl_array_copy(phi, phi_ho);
+
+  // Project phi onto peak locations to get phi_m.
+  gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks);
+  const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx];
+
+  // Basis used to project the mask.
+  struct gkyl_basis basis_mask;
+  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) {
+    gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
+  }
+  else {
+    if (poly_order == 1) {
+      gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
+    }
+    else {
+      gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
+    }
+  }
+
+  // Create mask array.
+  struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume);
+  struct gkyl_array *mask_ho = use_gpu ? mkarr(false, mask->ncomp, mask->size)
+                                       : gkyl_array_acquire(mask);
+
+  // Project the loss cone mask.
+  struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
+    .phase_grid = &grid,
+    .conf_basis = &basis_conf,
+    .phase_basis = &basis,
+    .conf_range = &local_conf,
+    .conf_range_ext = &local_ext_conf,
+    .vel_range = &local_vel,
+    .vel_map = gvm,
+    .bmag = gk_geom->geo_int.bmag,
+    .bmag_max_z_coord = bmag_max_z_coord,
+    .bmag_max = bmag_max,
+    .bmag_wall = bmag_wall,
+    .bmag_wall_z_coord = bmag_wall_z_coord,
+    .bmag_max_basis = bmag_max_basis,
+    .bmag_max_range = bmag_max_range,
+    .mass = ctx.mass,
+    .charge = ctx.charge,
+    .qtype = ctx.quad_type,
+    .num_quad = ctx.num_quad,
+    .cellwise_trap_loss = ctx.cellwise_trap_loss,
+    .use_gpu = use_gpu,
+  };
+  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask =
+    gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
+
+  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask);
+
+  gkyl_array_copy(mask_ho, mask);
+
+  // Verify physical properties of the mask:
+  // 1. At the center (z≈0), high-mu particles should be trapped (mask=1)
+  // 2. At the wall (|z| ≈ z_max), particles should not be in the trapped region
+  // 3. Low-mu particles near center should be passing (mask=0)
+
+  // Check specific cells to verify correct behavior.
+  // Cell indices: [iz, ivpar, imu] where each starts at 1 in local range.
+  // Grid: z in [-pi, pi], vpar in [-vpar_max, vpar_max], mu in [0, mu_max]
+  // Central z cells are around iz=4,5 (8 cells, symmetric)
+  // High mu cells are imu=3,4 (4 cells)
+  // Low mu cells are imu=1
+
+  int num_trapped_high_mu_center = 0;
+  int num_passing_low_mu_center = 0;
+  int total_high_mu_center = 0;
+  int total_low_mu_center = 0;
+
+  struct gkyl_range_iter iter;
+  gkyl_range_iter_init(&iter, &local);
+  while (gkyl_range_iter_next(&iter)) {
+    int iz = iter.idx[0];
+    int imu = iter.idx[2];
+
+    // Determine if we're at center (iz = 4 or 5 for 8 cells in [-pi, pi])
+    // and if we're at high mu (imu = 3 or 4) or low mu (imu = 1)
+    bool is_center = (iz == 4 || iz == 5);
+    bool is_high_mu = (imu == 3 || imu == 4);
+    bool is_low_mu = (imu == 1);
+
+    long linidx = gkyl_range_idx(&local, iter.idx);
+    const double *mask_val = gkyl_array_cfetch(mask_ho, linidx);
+
+    if (is_center && is_high_mu) {
+      total_high_mu_center++;
+      if (mask_val[0] > 0.5) {
+        num_trapped_high_mu_center++;
+      }
+    }
+    if (is_center && is_low_mu) {
+      total_low_mu_center++;
+      if (mask_val[0] < 0.5) {
+        num_passing_low_mu_center++;
+      }
+    }
+  }
+
+  // High mu particles at center should mostly be trapped.
+  double trapped_frac = (double)num_trapped_high_mu_center / (double)total_high_mu_center;
+  // printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n",
+  //   trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+  TEST_CHECK(trapped_frac >= 0.5);
+  if (trapped_frac < 0.5) {
+    printf("High-mu center trapped fraction: %g (%d / %d)\n",
+      trapped_frac, num_trapped_high_mu_center, total_high_mu_center);
+  }
+
+  // Low mu particles at center should mostly be passing.
+  double passing_frac = (double)num_passing_low_mu_center / (double)total_low_mu_center;
+  // printf("Passing fraction for low-mu center particles: %g (%d / %d)\n",
+  //   passing_frac, num_passing_low_mu_center, total_low_mu_center);
+  TEST_CHECK(passing_frac >= 0.5);
+  if (passing_frac < 0.5) {
+    printf("Low-mu center passing fraction: %g (%d / %d)\n",
+      passing_frac, num_passing_low_mu_center, total_low_mu_center);
+  }
+
+  // Write output for debugging.
+  char fname[1024];
   if (use_gpu) {
-    gkyl_cu_free(bmag_max);
-    gkyl_cu_free(phi_m);
+    sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_dev.gkyl", poly_order);
   }
   else {
-    gkyl_free(bmag_max);
-    gkyl_free(phi_m);
+    sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_ho.gkyl", poly_order);
+  }
+  gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
+
+  // Cleanup.
+  for (int p = 0; p < num_peaks; p++) {
+    gkyl_array_release(phi_at_peaks[p]);
   }
-  gkyl_array_release(phi); 
-  gkyl_array_release(phi_ho); 
-  gkyl_array_release(mask); 
+  gkyl_free(phi_at_peaks);
+  gkyl_array_release(phi);
+  gkyl_array_release(phi_ho);
+  gkyl_array_release(mask);
   gkyl_array_release(mask_ho);
-  gkyl_array_release(mask_ref_ho);
   gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
   gkyl_velocity_map_release(gvm);
-  gkyl_gk_geometry_release(gk_geom);
+  gkyl_array_release(bmag_max);
+  gkyl_array_release(bmag_max_z_coord);
+  gkyl_array_release(bmag_wall);
+  gkyl_array_release(bmag_wall_z_coord);
+  gkyl_array_dg_find_peaks_release(bmag_peak_finder);
   gkyl_position_map_release(pmap);
+  gkyl_gk_geometry_release(gk_geom);
 
 #ifdef GKYL_HAVE_CUDA
   if (use_gpu) {
     gkyl_cu_free(basis_on_dev);
     gkyl_cu_free(basis_on_dev_conf);
   }
-#endif  
+#endif
+}
+
+void test_1x2v_p1_gk_ho()
+{
+  test_1x2v_gk(1, false);
 }
 
-void test_1x2v_p1_gk_ho() { test_1x2v_gk(1, false); }
+void test_1x2v_p1_nonzero_phi_gk_ho()
+{
+  test_1x2v_nonzero_phi_gk(1, false);
+}
 
 #ifdef GKYL_HAVE_CUDA
-void test_1x2v_p1_gk_dev() { test_1x2v_gk(1, true); }
+void test_1x2v_p1_gk_dev()
+{
+  test_1x2v_gk(1, true);
+}
+
+void test_1x2v_p1_nonzero_phi_gk_dev()
+{
+  test_1x2v_nonzero_phi_gk(1, true);
+}
+
 #endif
 
 TEST_LIST = {
   { "test_1x2v_p1_gk_ho", test_1x2v_p1_gk_ho },
+  { "test_1x2v_p1_nonzero_phi_gk_ho", test_1x2v_p1_nonzero_phi_gk_ho },
 
 #ifdef GKYL_HAVE_CUDA
   { "test_1x2v_p1_gk_dev", test_1x2v_p1_gk_dev },
+  { "test_1x2v_p1_nonzero_phi_gk_dev", test_1x2v_p1_nonzero_phi_gk_dev },
 #endif
   { NULL, NULL },
 };
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index 36e12300c..7bb388349 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -10,13 +10,10 @@
 // Object type.
 typedef struct gkyl_loss_cone_mask_gyrokinetic gkyl_loss_cone_mask_gyrokinetic;
 
-// Type of function expected for the ctp_pos_func input.
-typedef void (*loss_cone_mask_gyrokinetic_c2p_t)(const double *xcomp, double *xphys, void *ctx);
-
 // Available options:
-//   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
-//   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array.
-//   C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array.
+// A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
+// B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array.
+// C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array.
 
 // Inputs packaged as a struct.
 struct gkyl_loss_cone_mask_gyrokinetic_inp {
@@ -27,18 +24,22 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
   const struct gkyl_range *conf_range_ext; // Extended configuration-space range (for internal memory allocations).
   const struct gkyl_range *vel_range; // Velocity space range.
   const struct gkyl_velocity_map *vel_map; // Velocity space mapping object.
-  const struct gkyl_array *bmag; // Magnetic field magnitude.
-  const double *bmag_max; // Maximum bmag (on GPU if use_gpu=true).
-  const double *bmag_max_loc; // Location of maximum bmag (on GPU if use_gpu=true)..
+  const struct gkyl_array *bmag; // Magnetic field magnitude (cdim DG expansion).
+  const struct gkyl_array *bmag_max; // Maximum bmag per field line (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line.
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays (1D for 2x, 0D for 1x).
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+  bool is_tandem; // =True
   double mass; // Species mass.
   double charge; // Species charge.
   enum gkyl_quad_type qtype; // Quadrature rule/nodes.
   int num_quad; // Number of quad points in each direction to use (default: poly_order+1).
   bool cellwise_trap_loss; // =True takes a whole cell to be either trapped or passing,
                            // so not high-order distinction within the cell is made.
-  loss_cone_mask_gyrokinetic_c2p_t c2p_pos_func; // Function that transforms a set of cdim
-                                    // position-space computational coordinates to physical ones.
-  void *c2p_pos_func_ctx; // Context for c2p_pos_func.
   bool use_gpu; // Whether to run on GPU.
 };
 
@@ -54,7 +55,7 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp {
  * @param inp Input parameters defined in gkyl_loss_cone_mask_gyrokinetic_inp struct.
  * @return New updater pointer.
  */
-struct gkyl_loss_cone_mask_gyrokinetic* 
+struct gkyl_loss_cone_mask_gyrokinetic*
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp);
 
 /**
@@ -64,16 +65,18 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
  * @param phase_rng Phase-space range.
  * @param conf_rng Configuration-space range.
  * @param phi Electrostatic potential.
- * @param phi_m Electrostatic potential at the mirror throat (on GPU if use_gpu=true).
+ * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid).
+ * @param phi_tandem Electrostatic potential at the tandem mirror throat (DG array on reduced grid).
  * @param mask_out Output masking function.
  */
 void gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  struct gkyl_array *mask_out);
 
 /**
  * Delete updater.
  *
  * @param up Updater to delete.
  */
-void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up);
+void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up);
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 03e57dec9..68cc2579f 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -8,26 +8,30 @@
 #include <gkyl_mat.h>
 #include <gkyl_mat_priv.h>
 #include <gkyl_range.h>
-#include <gkyl_rect_grid.h> 
+#include <gkyl_rect_grid.h>
 #include <gkyl_util.h>
 #include <assert.h>
 
 GKYL_CU_DH
 static inline void
 log_to_comp(int ndim, const double *eta,
-  const double * GKYL_RESTRICT dx, const double * GKYL_RESTRICT xc,
-  double* GKYL_RESTRICT xout)
+  const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc,
+  double *GKYL_RESTRICT xout)
 {
-  for (int d=0; d<ndim; ++d) xout[d] = 0.5*dx[d]*eta[d]+xc[d];
+  for (int d = 0; d < ndim; ++d) {
+    xout[d] = 0.5 * dx[d] * eta[d] + xc[d];
+  }
 }
 
 static inline void
 copy_idx_arrays(int cdim, int pdim, const int *cidx, const int *vidx, int *out)
 {
-  for (int i=0; i<cdim; ++i)
+  for (int i = 0; i < cdim; ++i) {
     out[i] = cidx[i];
-  for (int i=cdim; i<pdim; ++i)
-    out[i] = vidx[i-cdim];
+  }
+  for (int i = cdim; i < pdim; ++i) {
+    out[i] = vidx[i - cdim];
+  }
 }
 
 struct gkyl_loss_cone_mask_gyrokinetic {
@@ -44,12 +48,25 @@ struct gkyl_loss_cone_mask_gyrokinetic {
 
   double mass; // Species mass.
   double charge; // Species charge.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_loc; // Location of bmag_max.
-  bool use_gpu; // Boolean if we are performing projection on device.
 
-  loss_cone_mask_gyrokinetic_c2p_t c2p_pos; // Function transforming position comp to phys coords.
-  void *c2p_pos_ctx; // Context for the c2p_pos mapping.
+  // Per-field-line bmag_max arrays (1D for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line.
+  const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line.
+  const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x).
+  const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case).
+  const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate
+  const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays.
+  struct gkyl_basis *bmag_max_basis_on_dev; // Device-resident basis with device-callable function pointers.
+  const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays.
+
+  // GPU helper: scalar bmag_max_z value for simple 1x cases.
+  // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
+  double *bmag_max_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
+  double *bmag_wall_z_scalar_gpu; // Single z-coordinate for GPU (1x case only).
+
+  bool is_tandem; // Whether we are dealing with a tandem mirror case.
+  bool use_gpu; // Boolean if we are performing projection on device.
 
   bool cellwise_trap_loss; // Whether a whole cell is trapped/lost, or whether
                            // high-order distinction within a cell is allowed.
@@ -74,25 +91,34 @@ struct gkyl_loss_cone_mask_gyrokinetic {
   struct gkyl_array *mask_out_quad; // Array keeping f_lte at phase-space quadrature nodes.
   struct gkyl_array *qDphiDbmag_quad; // Array keeping q*(phi-phi_m)/(B_max-B)
                                       // at configuration-space quadrature nodes.
+  struct gkyl_array *qDphiDbmag_quad_wall; // Array keeping q*phi/(B_wall-B)
+  // at configuration-space quadrature nodes.
+  struct gkyl_array *qDphiDbmag_quad_tandem; // Array keeping q*(phi-phi_tandem)/(B_tandem-B)
+  // at configuration-space quadrature nodes.
   struct gkyl_array *Dbmag_quad; // B_max-B at configuration-space quadrature nodes.
+  struct gkyl_array *Dbmag_quad_wall; // B-B_wall at configuration-space quadrature nodes.
+  struct gkyl_array *Dbmag_quad_tandem; // B_tandem-B at configuration-space quadrature nodes.
 
-  struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts  
+  struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts
                                                           // stores the info to convert phase
                                                           // space nodal to modal gkyl arrays.
 };
 
 #ifdef GKYL_HAVE_CUDA
+
 /**
- * Obtain bmag_max-bmag at conf-space quadrature nodes and store it in up->Dbmag_quad.
+ * Obtain bmag_peak-bmag at conf-space quadrature nodes and store it in Dbmag_quad.
  *
- * @param up Project on basis updater to run.
- * @param conf_rng Configuration-space range.
+ * @param up Loss cone mask updater.
+ * @param conf_range Configuration-space range.
  * @param bmag Magnetic field magnitude.
- * @param bmag_max Maximum bmag.
+ * @param Dbmag_quad Output array (bmag_peak - bmag) at quadrature nodes.
+ * @param bmag_peak Peak bmag value (per-field-line array for 2x, scalar for 1x).
  */
-void 
+void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max);
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak);
 
 /**
  * Compute projection of the loss cone masking function on the phase-space basis
@@ -102,11 +128,13 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *u
  * @param phase_rng Phase-space range.
  * @param conf_rng Configuration-space range.
  * @param phi Electrostatic potential.
- * @param phi_m Electrostatic potential at the mirror throat (on GPU).
+ * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid).
+ * @param phi_tandem Electrostatic potential at the tandem mirror throat (DG array on reduced grid).
  * @param mask_out Output masking function.
  */
 void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  struct gkyl_array *mask_out);
 #endif
diff --git a/gyrokinetic/zero/gkyl_position_map_priv.h b/gyrokinetic/zero/gkyl_position_map_priv.h
index ec730c26e..0822b014f 100644
--- a/gyrokinetic/zero/gkyl_position_map_priv.h
+++ b/gyrokinetic/zero/gkyl_position_map_priv.h
@@ -276,13 +276,13 @@ calc_bmag_global_derivative(double theta, void *ctx)
   double fout[3];
   xh[0] = gpm->constB_ctx->psi;
   xh[1] = gpm->constB_ctx->alpha;
-  xh[2] = theta - h;
+  xh[2] = theta + h;
   gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx);
   double Bmag_plus = fout[0];
-  xh[2] = theta - 2*h;
+  xh[2] = theta - h;
   gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx);
   double Bmag_minus = fout[0];
-  return (Bmag_plus - Bmag_minus) / (h);
+  return (Bmag_plus - Bmag_minus) / (2*h);
 }
 
 /**
@@ -314,15 +314,24 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
   double *theta_extrema = gkyl_malloc(sizeof(double) * (npts + 1));
   double *bmag_extrema = gkyl_malloc(sizeof(double) * (npts + 1));
 
-  for (int i = 0; i <= npts; i++){
+  for (int i = 1; i < npts; i++){
     double theta = theta_lo + i * theta_dxi;
     xp[Z_IDX] = theta;
     gkyl_calc_bmag_global(0.0, xp, &bmag_vals[i], bmag_ctx);
     dbmag_vals[i] = calc_bmag_global_derivative(theta, gpm);
-    if (i==0) continue;
 
-    // Minima
-    if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0){
+    // Near-zero derivative: B is locally flat here, record as a minimum.
+    // Use continue so this is mutually exclusive with the sign-change checks below.
+    if (fabs(dbmag_vals[i]) < 1e-10) {
+      theta_extrema[extrema] = theta;
+      bmag_extrema[extrema] = bmag_vals[i];
+      extrema++;
+      continue;
+    }
+
+    // Minima via sign change. Guard on |dbmag[i-1]| to avoid a double-record if the
+    // previous point was already captured by the near-zero branch above.
+    if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0 && fabs(dbmag_vals[i-1]) >= 1e-10){
       if (bmag_vals[i] < bmag_vals[i-1])
       {
         theta_extrema[extrema] = theta;
@@ -337,8 +346,8 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
       }
     }
 
-    // Maxima
-    if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0){
+    // Maxima via sign change. Guard on |dbmag[i-1]| for the same reason.
+    if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0 && fabs(dbmag_vals[i-1]) >= 1e-10){
       if (bmag_vals[i] > bmag_vals[i-1])
       {
         theta_extrema[extrema] = theta;
@@ -375,30 +384,53 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
 
   // Left edge
   if (bmag_extrema[0] > bmag_extrema[1])
-  {    gpm->constB_ctx->min_or_max[0] = 1;  } // Maximum
+  {
+    gpm->constB_ctx->min_or_max[0] = 1;  // Maximum
+  }
   else if (bmag_extrema[0] < bmag_extrema[1])
-  {    gpm->constB_ctx->min_or_max[0] = 0;  } // Minimum
+  {
+    gpm->constB_ctx->min_or_max[0] = 0;  // Minimum
+  }
   else
-  {    printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+  {
+    printf("Error: Extrema[0] is not an extrema (bmag[0]=%.6g == bmag[1]=%.6g). "
+      "Position_map optimization failed\n", bmag_extrema[0], bmag_extrema[1]);
+  }
 
   // Middle points
   for (int i = 1; i < extrema - 1; i++)
   {
     if (bmag_extrema[i] > bmag_extrema[i-1] && bmag_extrema[i] > bmag_extrema[i+1])
-    {      gpm->constB_ctx->min_or_max[i] = 1;    } // Maximum
+    {
+      gpm->constB_ctx->min_or_max[i] = 1;  // Maximum
+    }
     else if (bmag_extrema[i] < bmag_extrema[i-1] && bmag_extrema[i] < bmag_extrema[i+1])
-    {      gpm->constB_ctx->min_or_max[i] = 0;    } // Minimum
+    {
+      gpm->constB_ctx->min_or_max[i] = 0;  // Minimum
+    }
     else
-    {      printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+    {
+      printf("Error: Extrema[%d] is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g, bmag[%d+1]=%.6g). "
+        "Position_map optimization failed\n",
+        i, i, bmag_extrema[i-1], i, bmag_extrema[i], i, bmag_extrema[i+1]);
+    }
   }
 
   // Right edge
   if (bmag_extrema[extrema-1] > bmag_extrema[extrema-2])
-  {    gpm->constB_ctx->min_or_max[extrema-1] = 1; } // Maximum
+  {
+    gpm->constB_ctx->min_or_max[extrema-1] = 1; // Maximum
+  }
   else if (bmag_extrema[extrema-1] < bmag_extrema[extrema-2])
-  {    gpm->constB_ctx->min_or_max[extrema-1] = 0; } // Minimum
-  else  
-  {    printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+  {
+    gpm->constB_ctx->min_or_max[extrema-1] = 0; // Minimum
+  }
+  else
+  {
+    printf("Error: Extrema[%d] (right edge) is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g). "
+      "Position_map optimization failed\n",
+      extrema-1, extrema-1, bmag_extrema[extrema-2], extrema-1, bmag_extrema[extrema-1]);
+  }
 
   // Free mallocs
   gkyl_free(bmag_vals);
@@ -454,7 +486,7 @@ refine_B_field_extrema(struct gkyl_position_map *gpm)
     else if (bmag_cent < bmag_left && bmag_cent < bmag_right)
     { is_maximum = false; } // Local minima
     else
-    { printf("Error: Extrema is not an extrema. Position_map optimization failed\n");
+    { // printf("Error: Extrema is not an extrema. Position_map optimization failed\n");
       break;
     }
 
@@ -655,7 +687,7 @@ position_map_constB_z_numeric(double t, const double *xn, double *fout, void *ct
         return;
       }
       else {
-        fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n");
+        // fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n");
         fout[0] = theta;
         return;
       }
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index e7e553570..072c142aa 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -14,17 +14,16 @@
 
 //
 // mu_bound = (0.5*mass*pow(vpar,2)+charge*Delta_phi)/(bmag[0]*(Rm-1));
-//          = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1));
-//          = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]);
+// = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1));
+// = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]);
 //
 
-// Identity comp to phys coord mapping, for when user doesn't provide a map.
-static inline void
-c2p_pos_identity(const double *xcomp, double *xphys, void *ctx)
+// allocate array (filled with zeros)
+static struct gkyl_array*
+mkarr(long nc, long size, bool use_gpu)
 {
-  struct gkyl_loss_cone_mask_gyrokinetic *up = ctx;
-  int cdim = up->cdim;
-  for (int d=0; d<cdim; d++) xphys[d] = xcomp[d];
+  return use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+    : gkyl_array_new(GKYL_DOUBLE, nc, size);
 }
 
 // create range to loop over quadrature points.
@@ -32,8 +31,12 @@ static inline struct gkyl_range
 get_qrange(int cdim, int dim, int num_quad, int num_quad_v, bool *is_vdim_p2)
 {
   int qshape[GKYL_MAX_DIM];
-  for (int i=0; i<cdim; ++i) qshape[i] = num_quad;
-  for (int i=cdim; i<dim; ++i) qshape[i] = is_vdim_p2[i-cdim] ? num_quad_v : num_quad;
+  for (int i = 0; i < cdim; ++i) {
+    qshape[i] = num_quad;
+  }
+  for (int i = cdim; i < dim; ++i) {
+    qshape[i] = is_vdim_p2[i - cdim] ? num_quad_v : num_quad;
+  }
   struct gkyl_range qrange;
   gkyl_range_init_from_shape(&qrange, dim, qshape);
   return qrange;
@@ -49,9 +52,9 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
   int ndim = basis->ndim;
   int num_quad_v = num_quad;
   // Hybrid basis have p=2 in velocity space.
-  bool is_vdim_p2[2] = {false};  // 2 is the max vdim for GK.
+  bool is_vdim_p2[2] = { false };  // 2 is the max vdim for GK.
   if (num_quad > 1 && basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) {
-    num_quad_v = num_quad+1;
+    num_quad_v = num_quad + 1;
     is_vdim_p2[0] = true;  // only vpar is quadratic in GK hybrid.
   }
 
@@ -100,7 +103,7 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
   if (use_gpu) {
     *ordinates = gkyl_array_cu_dev_new(GKYL_DOUBLE, ndim, tot_quad);
     *weights = gkyl_array_cu_dev_new(GKYL_DOUBLE, 1, tot_quad);
-  } 
+  }
   else {
     *ordinates = gkyl_array_new(GKYL_DOUBLE, ndim, tot_quad);
     *weights = gkyl_array_new(GKYL_DOUBLE, 1, tot_quad);
@@ -111,25 +114,29 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
 
   while (gkyl_range_iter_next(&iter)) {
     int node = gkyl_range_idx(&qrange, iter.idx);
-    
+
     // set ordinates
     double *ord = gkyl_array_fetch(ordinates_ho, node);
-    for (int i=0; i<cdim; ++i)
-      ord[i] = ordinates1[iter.idx[i]-qrange.lower[i]];
+    for (int i = 0; i < cdim; ++i) {
+      ord[i] = ordinates1[iter.idx[i] - qrange.lower[i]];
+    }
+
+    for (int i = cdim; i < ndim; ++i) {
+      ord[i] = is_vdim_p2[i - cdim] ?
+        ordinates1_v[iter.idx[i] - qrange.lower[i]] : ordinates1[iter.idx[i] - qrange.lower[i]];
+    }
 
-    for (int i=cdim; i<ndim; ++i)
-      ord[i] = is_vdim_p2[i-cdim] ? 
-        ordinates1_v[iter.idx[i]-qrange.lower[i]] : ordinates1[iter.idx[i]-qrange.lower[i]];
-    
     // set weights
     double *wgt = gkyl_array_fetch(weights_ho, node);
     wgt[0] = 1.0;
-    for (int i=0; i<cdim; ++i)
-      wgt[0] *= weights1[iter.idx[i]-qrange.lower[i]];
+    for (int i = 0; i < cdim; ++i) {
+      wgt[0] *= weights1[iter.idx[i] - qrange.lower[i]];
+    }
 
-    for (int i=cdim; i<ndim; ++i)
-      wgt[0] *= is_vdim_p2[i-cdim] ? 
-        weights1_v[iter.idx[i]-qrange.lower[i]] : weights1[iter.idx[i]-qrange.lower[i]];
+    for (int i = cdim; i < ndim; ++i) {
+      wgt[0] *= is_vdim_p2[i - cdim] ?
+        weights1_v[iter.idx[i] - qrange.lower[i]] : weights1[iter.idx[i] - qrange.lower[i]];
+    }
   }
 
   // Pre-compute basis functions at ordinates.
@@ -139,8 +146,9 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
   else
     *basis_at_ords = gkyl_array_new(GKYL_DOUBLE, basis->num_basis, tot_quad);
 
-  for (int n=0; n<tot_quad; ++n)
+  for (int n = 0; n < tot_quad; ++n) {
     basis->eval(gkyl_array_fetch(ordinates_ho, n), gkyl_array_fetch(basis_at_ords_ho, n));
+  }
 
   // Copy host array to device array.
   gkyl_array_copy(*ordinates, ordinates_ho);
@@ -155,13 +163,16 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q
 }
 
 static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, 
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max)
+gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_max)
 {
   // Get bmag_max-bmag at quadrature nodes.
+  // bmag_max is now a per-field-line array (1D for 2x, scalar for 1x).
 #ifdef GKYL_HAVE_CUDA
   if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag, bmag_max);
+    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag,
+      Dbmag_quad, bmag_max);
 #endif
 
   int cdim = up->cdim, pdim = up->pdim;
@@ -175,20 +186,40 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up,
     long linidx = gkyl_range_idx(conf_range, conf_iter.idx);
 
     const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
-    double *Dbmag_quad = gkyl_array_fetch(up->Dbmag_quad, linidx);
+    double *Dbmag_quad_d = gkyl_array_fetch(Dbmag_quad, linidx);
+
+    // Get bmag_max for this field line (psi value).
+    // For 1x: bmag_max is a single value (index 0).
+    // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0].
+    double bmag_max_val;
+    if (cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, 0);
+      bmag_max_val = bmag_max_d[0]; // Just the constant coefficient.
+    }
+    else {
+      // 2x case: evaluate bmag_max at this psi cell.
+      // The bmag_max array is 1D in psi, so we need the psi index.
+      int psi_idx[1] = { conf_iter.idx[0] };
+      long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+      const double *bmag_max_d = gkyl_array_cfetch(bmag_max, psi_linidx);
+      // For simplicity, evaluate at cell center (logical coord 0).
+      double xc[1] = { 0.0 };
+      bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d);
+    }
 
-    // Sum over basis 
-    for (int n=0; n<tot_quad_conf; ++n) {
+    // Sum over basis
+    for (int n = 0; n < tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
-      for (int k=0; k<num_basis_conf; ++k)
-        Dbmag_quad[n] += bmag_d[k]*b_ord[k];
-
-      Dbmag_quad[n] = bmag_max[0] - Dbmag_quad[n];
+      for (int k = 0; k < num_basis_conf; ++k) {
+        Dbmag_quad_d[n] += bmag_d[k] * b_ord[k];
+      }
+      Dbmag_quad_d[n] = bmag_max_val - Dbmag_quad_d[n];
     }
   }
 }
 
-struct gkyl_loss_cone_mask_gyrokinetic* 
+struct gkyl_loss_cone_mask_gyrokinetic*
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp)
 {
   gkyl_loss_cone_mask_gyrokinetic *up = gkyl_malloc(sizeof(*up));
@@ -197,15 +228,16 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   up->vel_map = gkyl_velocity_map_acquire(inp->vel_map);
   up->mass = inp->mass;
   up->charge = inp->charge;
+  up->is_tandem = inp->is_tandem;
 
   up->cdim = inp->conf_basis->ndim;
   up->pdim = inp->phase_basis->ndim;
 
   up->cellwise_trap_loss = inp->cellwise_trap_loss;
-  int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order+1;
+  int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order + 1;
   up->norm_fac = 1;
   if (!up->cellwise_trap_loss)
-    up->norm_fac = num_quad == 1? 1.0/pow(sqrt(2.0),up->pdim) : 1.0;
+    up->norm_fac = num_quad == 1? 1.0 / pow(sqrt(2.0), up->pdim) : 1.0;
 
   if (num_quad == 1) {
     up->num_basis_conf = 1;
@@ -216,15 +248,8 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     up->num_basis_phase = inp->phase_basis->num_basis;
   }
   up->use_gpu = inp->use_gpu;
-
-  if (inp->c2p_pos_func == 0) {
-    up->c2p_pos = c2p_pos_identity;
-    up->c2p_pos_ctx = up;
-  }
-  else {
-    up->c2p_pos = inp->c2p_pos_func;
-    up->c2p_pos_ctx = inp->c2p_pos_func_ctx;
-  }
+  up->bmag_max_z_scalar_gpu = NULL; // Will be set for GPU case.
+  up->bmag_max_basis_on_dev = NULL; // Will be set for GPU case.
 
   // Initialize data needed for conf-space quadrature.
   up->tot_quad_conf = init_quad_values(up->cdim, inp->conf_basis, inp->qtype, num_quad,
@@ -240,9 +265,9 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
   // create a map between phase-space and conf-space ordinates.
   int num_quad_v = num_quad;  // Hybrid basis have p=2 in velocity space.
   // hybrid basis have p=2 in velocity space.
-  bool is_vdim_p2[2] = {false};  // 2 is the max vdim for GK.
+  bool is_vdim_p2[2] = { false };  // 2 is the max vdim for GK.
   if (num_quad > 1 && inp->phase_basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) {
-    num_quad_v = num_quad+1;
+    num_quad_v = num_quad + 1;
     is_vdim_p2[0] = true;  // only vpar is quadratic in GK hybrid.
   }
   up->conf_qrange = get_qrange(up->cdim, up->cdim, num_quad, num_quad_v, is_vdim_p2);
@@ -256,30 +281,39 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
     // Allocate device copies of arrays needed for quadrature.
 
     int p2c_qidx_ho[up->phase_qrange.volume];
-    up->p2c_qidx = (int*) gkyl_cu_malloc(sizeof(int)*up->phase_qrange.volume);
+    up->p2c_qidx = (int *)gkyl_cu_malloc(sizeof(int) * up->phase_qrange.volume);
 
     // Allocate mask_quad at phase-space quadrature points.
     // Dbmag_quad at configuration-space quadrature points.
     // qDphiDbmag_quad, the term proportional to (phi-phi_m)/(bmag_max-bmag), at quadrature points.
     up->mask_out_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_phase,
-      inp->conf_range_ext->volume*inp->vel_range->volume);
-    up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+      inp->conf_range_ext->volume * inp->vel_range->volume);
+    up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf,
+      inp->conf_range_ext->volume);
+    up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf,
+      inp->conf_range_ext->volume);
+    up->qDphiDbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf,
+      inp->conf_range_ext->volume);
 
     // Allocate the memory for computing the specific phase nodal to modal calculation
     struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho;
-    phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, 
+    phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase,
+      1.0, 0.0,
       GKYL_NO_TRANS, GKYL_NO_TRANS, false);
 
     // Compute the matrix A for the phase nodal to modal memory
-    const double *phase_w = (const double*) up->weights_phase->data;
-    const double *phaseb_o = (const double*) up->basis_at_ords_phase->data;
-    for (int n=0; n<up->tot_quad_phase; ++n) {
-      for (int k=0; k<up->num_basis_phase; ++k)
-        gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n, phase_w[n]*phaseb_o[k+up->num_basis_phase*n]);
+    const double *phase_w = (const double *)up->weights_phase->data;
+    const double *phaseb_o = (const double *)up->basis_at_ords_phase->data;
+    for (int n = 0; n < up->tot_quad_phase; ++n) {
+      for (int k = 0; k < up->num_basis_phase; ++k) {
+        gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n,
+          phase_w[n] * phaseb_o[k + up->num_basis_phase * n]);
+      }
     }
-    
+
     // Copy to device
-    up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, 
+    up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase,
+      up->tot_quad_phase, 1.0, 0.0,
       GKYL_NO_TRANS, GKYL_NO_TRANS, up->use_gpu);
     gkyl_mat_copy(up->phase_nodal_to_modal_mem->A, phase_nodal_to_modal_mem_ho->A);
     gkyl_mat_mm_array_mem_release(phase_nodal_to_modal_mem_ho);
@@ -293,70 +327,119 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti
       &up->ordinates_phase, &up->weights_phase, &up->basis_at_ords_phase, up->use_gpu);
 
     int pidx[GKYL_MAX_DIM];
-    for (int n=0; n<up->tot_quad_phase; ++n) {
+    for (int n = 0; n < up->tot_quad_phase; ++n) {
       gkyl_range_inv_idx(&up->phase_qrange, n, pidx);
       int cqidx = gkyl_range_idx(&up->conf_qrange, pidx);
       p2c_qidx_ho[n] = cqidx;
     }
-    gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int)*up->phase_qrange.volume, GKYL_CU_MEMCPY_H2D);
+    gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int) * up->phase_qrange.volume,
+      GKYL_CU_MEMCPY_H2D);
+
+    // Allocate and set scalar bmag_max_z for GPU kernels.
+    // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup.
+    // inp->bmag_max_z_coord is a GPU array, so copy to host before reading.
+    struct gkyl_array *bmag_max_z_coord_ho = gkyl_array_new(GKYL_DOUBLE,
+      inp->bmag_max_z_coord->ncomp, inp->bmag_max_z_coord->size);
+    gkyl_array_copy(bmag_max_z_coord_ho, inp->bmag_max_z_coord);
+
+    double bmag_max_z_val;
+    if (up->cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, 0);
+      bmag_max_z_val = bmag_max_z_d[0];
+    }
+    else {
+      // 2x case: use the first field line's value (simplified approach).
+      int psi_idx[1] = { inp->bmag_max_range->lower[0] };
+      long bmag_max_z_linidx = gkyl_range_idx(inp->bmag_max_range, psi_idx);
+      const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, bmag_max_z_linidx);
+      double xc[1] = { 0.0 };
+      bmag_max_z_val = inp->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
+    }
+    gkyl_array_release(bmag_max_z_coord_ho);
+    up->bmag_max_z_scalar_gpu = gkyl_cu_malloc(sizeof(double));
+    gkyl_cu_memcpy(up->bmag_max_z_scalar_gpu, &bmag_max_z_val, sizeof(double), GKYL_CU_MEMCPY_H2D);
+
+    // Create a device-resident basis with device-callable function pointers
+    // for use in GPU kernels that call eval_expand.
+    up->bmag_max_basis_on_dev = gkyl_cart_modal_serendip_cu_dev_new(
+      inp->bmag_max_basis->ndim, inp->bmag_max_basis->poly_order);
   }
 #endif
 
-  // Allocate and obtain bmag_max-bmag at quadrature points.
-  if (up->use_gpu) 
-    up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-  else
-    up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+  // Store references to bmag_max arrays (no copy, just store pointers).
+  // Must be done before calling gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad.
+  up->bmag_max = gkyl_array_acquire(inp->bmag_max);
+  up->bmag_max_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord);
+  up->bmag_wall = gkyl_array_acquire(inp->bmag_wall);
+  up->bmag_wall_z_coord = gkyl_array_acquire(inp->bmag_wall_z_coord);
+  up->bmag_tandem =
+    up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem) : gkyl_array_acquire(inp->bmag_max);
+  up->bmag_tandem_z_coord =
+    up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem_z_coord) :
+    gkyl_array_acquire(inp->bmag_max_z_coord);
+  up->bmag_max_basis = inp->bmag_max_basis;
+  up->bmag_max_range = inp->bmag_max_range;
 
-  gkyl_array_clear(up->Dbmag_quad, 0.0); 
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, inp->bmag_max);
+  // Allocate and obtain bmag_max-bmag at quadrature points.
+  up->Dbmag_quad = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu);
+  up->Dbmag_quad_wall = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu);
+  up->Dbmag_quad_tandem = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu);
+
+  gkyl_array_clear(up->Dbmag_quad, 0.0);
+  gkyl_array_clear(up->Dbmag_quad_wall, 0.0);
+  gkyl_array_clear(up->Dbmag_quad_tandem, 0.0);
+
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad,
+    up->bmag_max);                                                                                          // bmag_max - bmag
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall,
+    up->bmag_wall);                                                                                               // bmag_wall - bmag
+  gkyl_array_scale(up->Dbmag_quad_wall, -1.0); // bmag - bmag_wall
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem,
+    up->bmag_tandem);                                                                                                 // bmag_tandem - bmag
 
-  // Save the location of bmag_max in this updater.
-  if (up->use_gpu) {
-    up->bmag_max_loc = gkyl_cu_malloc(sizeof(double)*up->cdim);
-    gkyl_cu_memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim, GKYL_CU_MEMCPY_D2D);
-  }
-  else {
-    up->bmag_max_loc = gkyl_malloc(sizeof(double)*up->cdim);
-    memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim);
-  }
-    
   return up;
 }
 
 static void
-proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f)
+proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords,
+  double *f)
 {
   int num_basis = up->num_basis_phase;
   int tot_quad = up->tot_quad_phase;
 
-  const double* GKYL_RESTRICT weights = up->weights_phase->data;
-  const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
-  const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
+  const double *GKYL_RESTRICT weights = up->weights_phase->data;
+  const double *GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
+  const double *GKYL_RESTRICT func_at_ords = fun_at_ords->data;
 
-  for (int k=0; k<num_basis; ++k) f[k] = 0.0;
-  
-  for (int imu=0; imu<tot_quad; ++imu) {
-    double tmp = weights[imu]*func_at_ords[imu];
-    for (int k=0; k<num_basis; ++k)
-      f[k] += tmp*basis_at_ords[k+num_basis*imu];
+  for (int k = 0; k < num_basis; ++k) {
+    f[k] = 0.0;
+  }
+  for (int imu = 0; imu < tot_quad; ++imu) {
+    double tmp = weights[imu] * func_at_ords[imu];
+    for (int k = 0; k < num_basis; ++k) {
+      f[k] += tmp * basis_at_ords[k + num_basis * imu];
+    }
   }
 }
 
 static void
-nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f)
+nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords,
+  double *f)
 {
   int num_basis = up->num_basis_phase;
   int tot_quad = up->tot_quad_phase;
 
-  const double* GKYL_RESTRICT weights = up->weights_phase->data;
-  const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
-  const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
+  const double *GKYL_RESTRICT weights = up->weights_phase->data;
+  const double *GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
+  const double *GKYL_RESTRICT func_at_ords = fun_at_ords->data;
 
-  for (int k=0; k<num_basis; ++k) f[k] = 0.0;
+  for (int k = 0; k < num_basis; ++k) {
+    f[k] = 0.0;
+  }
   f[0] = 1.0;
-  
-  for (int imu=0; imu<tot_quad; ++imu) {
+
+  for (int imu = 0; imu < tot_quad; ++imu) {
     if (func_at_ords[imu] < 1e-14) {
       f[0] = 0.0;
       break;
@@ -367,30 +450,37 @@ nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_a
 void
 gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m,
+  const struct gkyl_array *phi_tandem, struct gkyl_array *mask_out)
 {
 
 #ifdef GKYL_HAVE_CUDA
   if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range, 
-      phi, phi_m, mask_out);
+    return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range,
+      phi, phi_m, phi_tandem, mask_out);
 #endif
 
   int cdim = up->cdim, pdim = up->pdim;
-  int vdim = pdim-cdim;
+  int vdim = pdim - cdim;
 
   int tot_quad_conf = up->tot_quad_conf;
   int num_basis_conf = up->num_basis_conf;
 
+  bool is_tandem = up->is_tandem;
+
   struct gkyl_range vel_rng;
   struct gkyl_range_iter conf_iter, vel_iter;
 
   int pidx[GKYL_MAX_DIM], rem_dir[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<conf_range->ndim; ++d) rem_dir[d] = 1;
+  for (int d = 0; d < conf_range->ndim; ++d) {
+    rem_dir[d] = 1;
+  }
 
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
+  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 };
   double phi_quad[tot_quad_conf];
   double qDphiDbmag_quad[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag[0]).
+  double qDphiDbmag_quad_wall[tot_quad_conf]; // charge*(phi-phi_m)/(bmag[0]-bmag_wall).
+  double qDphiDbmag_quad_tandem[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag_tandem).
 
   // Outer loop over configuration space cells; for each
   // config-space cell inner loop walks over velocity space.
@@ -400,27 +490,74 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 
     const double *phi_d = gkyl_array_cfetch(phi, linidx_conf);
     const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall = gkyl_array_cfetch(up->Dbmag_quad_wall, linidx_conf);
+    const double *Dbmag_quad_tandem = is_tandem ?
+      gkyl_array_cfetch(up->Dbmag_quad_tandem, linidx_conf) : gkyl_array_cfetch(up->Dbmag_quad,
+      linidx_conf);
+
+    // Get phi_m value for this field line.
+    // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
+    // For 2x: varies with psi, evaluate at this psi cell.
+    double phi_m_val, phi_tandem_m_val;
+    if (cdim == 1) {
+      // 1x case: single scalar value stored as p=0 DG expansion.
+      const double *phi_m_d = gkyl_array_cfetch(phi_m, 0);
+      const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, 0);
+      phi_m_val = phi_m_d[0];
+      phi_tandem_m_val = phi_tandem_m_d[0];
+    }
+    else {
+      // 2x case: evaluate phi_m at this psi cell center.
+      int psi_idx[1] = { conf_iter.idx[0] };
+      long phi_m_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+      const double *phi_m_d = gkyl_array_cfetch(phi_m, phi_m_linidx);
+      const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, phi_m_linidx);
+      // Evaluate at cell center (logical coord 0).
+      double xc_log[1] = { 0.0 };
+      phi_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_m_d);
+      phi_tandem_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_tandem_m_d);
+    }
 
     // Sum over basis for given potential phi.
-    for (int n=0; n<tot_quad_conf; ++n) {
+    for (int n = 0; n < tot_quad_conf; ++n) {
       const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
 
       // Compute the configuration-space quadrature
       phi_quad[n] = 0.0;
-      for (int k=0; k<num_basis_conf; ++k)
-        phi_quad[n] += phi_d[k]*b_ord[k];
+      for (int k = 0; k < num_basis_conf; ++k) {
+        phi_quad[n] += phi_d[k] * b_ord[k];
+      }
 
-      if (Dbmag_quad[n] > 0.0)
-        qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m[0])/Dbmag_quad[n];
-      else
+      if (Dbmag_quad[n] > 0.0) {
+        qDphiDbmag_quad[n] = up->charge * (phi_quad[n] - phi_m_val) / Dbmag_quad[n];
+      }
+      else {
         qDphiDbmag_quad[n] = 0.0;
+      }
+
+      if (Dbmag_quad_wall[n] > 0.0) {
+        qDphiDbmag_quad_wall[n] = up->charge * phi_quad[n] / Dbmag_quad_wall[n];
+      }
+      else {
+        qDphiDbmag_quad_wall[n] = 0.0;
+      }
+
+      if (is_tandem) {
+        if (Dbmag_quad_tandem[n] > 0.0) {
+          qDphiDbmag_quad_tandem[n] = up->charge * (phi_quad[n] - phi_tandem_m_val) /
+            Dbmag_quad_tandem[n];
+        }
+        else {
+          qDphiDbmag_quad_tandem[n] = 0.0;
+        }
+      }
     }
 
     // Inner loop over velocity space.
     gkyl_range_deflate(&vel_rng, phase_range, rem_dir, conf_iter.idx);
     gkyl_range_iter_no_split_init(&vel_iter, &vel_rng);
     while (gkyl_range_iter_next(&vel_iter)) {
-      
+
       copy_idx_arrays(conf_range->ndim, phase_range->ndim, conf_iter.idx, vel_iter.idx, pidx);
       long linidx_phase = gkyl_range_idx(&vel_rng, vel_iter.idx);
 
@@ -437,7 +574,6 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         // Convert comp position coordinate to phys pos coord.
         gkyl_rect_grid_cell_center(up->grid_phase, pidx, xc);
         log_to_comp(up->cdim, xcomp_d, up->grid_phase->dx, xc, xmu);
-        up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
 
         // Convert comp velocity coordinate to phys velocity coord.
         const struct gkyl_velocity_map *gvm = up->vel_map;
@@ -445,24 +581,123 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
         const double *vmap_d = gkyl_array_cfetch(gvm->vmap, linidx_vel);
         double xcomp[1];
         for (int vd = 0; vd < vdim; vd++) {
-          xcomp[0] = xcomp_d[cdim+vd];
-          xmu[cdim+vd] = gvm->vmap_basis->eval_expand(xcomp, vmap_d+vd*gvm->vmap_basis->num_basis);
+          xcomp[0] = xcomp_d[cdim + vd];
+          xmu[cdim + vd] = gvm->vmap_basis->eval_expand(xcomp,
+            vmap_d + vd * gvm->vmap_basis->num_basis);
         }
 
         // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
+        // KEparDbmag_wall = 0.5*mass*pow(vpar,2)/(bmag[0]-bmag_wall).
+        // KEparDbmag_tandem = 0.5*mass*pow(vpar,2)/(bmag_tandem-bmag[0]).
         double KEparDbmag = 0.0;
-        if (Dbmag_quad[cqidx] > 0.0)
-          KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx];
-        else
+        double KEparDbmag_wall = 0.0;
+        double KEparDbmag_tandem = 0.0;
+
+        if (Dbmag_quad[cqidx] > 0.0) {
+          KEparDbmag = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad[cqidx];
+        }
+        else {
           KEparDbmag = 0.0;
+        }
 
-	double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
+        if (Dbmag_quad_wall[cqidx] > 0.0) {
+          KEparDbmag_wall = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall[cqidx];
+        }
+        else {
+          KEparDbmag_wall = 0.0;
+        }
+
+        if (Dbmag_quad_tandem[cqidx] > 0.0) {
+          KEparDbmag_tandem = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem[cqidx];
+        }
+        else {
+          KEparDbmag_tandem = 0.0;
+        }
+
+        double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad[cqidx]);
+        double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall[cqidx]));
+        double mu_bound_tandem = GKYL_MAX2(0.0, KEparDbmag_tandem + qDphiDbmag_quad_tandem[cqidx]);
+
+        // Get the z-coordinate of bmag_max for this field line.
+        // For 1x: single value (index 0).
+        // For 2x: varies with psi, so use conf_iter.idx[0].
+        double bmag_max_z_val, bmag_tandem_z_val;
+        if (cdim == 1) {
+          // 1x case: single value.
+          const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, 0);
+          bmag_max_z_val = bmag_max_z_d[0];
+          if (is_tandem) {
+            const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, 0);
+            bmag_tandem_z_val = bmag_tandem_z_d[0];
+          }
+        }
+        else {
+          // 2x case: evaluate bmag_max_z at this psi cell.
+          int psi_idx[1] = { conf_iter.idx[0] };
+          long bmag_max_z_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx);
+          const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, bmag_max_z_linidx);
+          // For simplicity, evaluate at cell center (logical coord 0).
+          double xc[1] = { 0.0 };
+          bmag_max_z_val = up->bmag_max_basis->eval_expand(xc, bmag_max_z_d);
+          if (is_tandem) {
+            const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord,
+              bmag_max_z_linidx);
+            bmag_tandem_z_val = up->bmag_max_basis->eval_expand(xc, bmag_tandem_z_d);
+          }
+        }
 
         double *fq = gkyl_array_fetch(up->fun_at_ords, pqidx);
-	if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(up->bmag_max_loc[cdim-1])) 
-          fq[0] = 1.0 * up->norm_fac;
-        else
-          fq[0] = 0.0;
+        // xmu[cdim-1] is the z-coordinate (last config space coordinate).
+
+        if (is_tandem) {
+          // Tandem mirror trapping condition:
+          // Determine which region we're in based on position.
+          bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) &&
+            fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val);
+          bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val);
+
+          if (in_outer_cell) {
+            // Between tandem and outer mirror - check outer barrier
+            if (mu_bound < xmu[cdim + 1]) {
+              fq[0] = 1.0 * up->norm_fac;
+            }
+            else {
+              fq[0] = 0.0;
+            }
+          }
+          else if (in_central_cell) {
+            // In central cell - must overcome the minimum of both barriers to escape.
+            // A particle is trapped if mu > min(mu_bound, mu_bound_tandem).
+            double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
+            if (mu_bound_min < xmu[cdim + 1]) {
+              fq[0] = 1.0 * up->norm_fac;
+            }
+            else {
+              fq[0] = 0.0;
+            }
+          }
+          else {
+            // In the outer wall region beyond outer mirror
+            if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) {
+              fq[0] = 1.0 * up->norm_fac;
+            }
+            else {
+              fq[0] = 0.0;
+            }
+          }
+        }
+        else {
+          // Single mirror case (original logic)
+          if (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) {
+            fq[0] = 1.0 * up->norm_fac;
+          }
+          else if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) {
+            fq[0] = 1.0 * up->norm_fac;
+          }
+          else {
+            fq[0] = 0.0;
+          }
+        }
       }
       // Compute DG expansion coefficients of the mask.
       if (up->cellwise_trap_loss)
@@ -474,7 +709,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
 }
 
 void
-gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
+gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up)
 {
   gkyl_velocity_map_release(up->vel_map);
 
@@ -488,16 +723,26 @@ gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
 
   gkyl_array_release(up->fun_at_ords);
   gkyl_array_release(up->Dbmag_quad);
+  gkyl_array_release(up->Dbmag_quad_wall);
+  gkyl_array_release(up->Dbmag_quad_tandem);
+
+  gkyl_array_release(up->bmag_max);
+  gkyl_array_release(up->bmag_max_z_coord);
+  gkyl_array_release(up->bmag_wall);
+  gkyl_array_release(up->bmag_wall_z_coord);
+  gkyl_array_release(up->bmag_tandem);
+  gkyl_array_release(up->bmag_tandem_z_coord);
 
   if (up->use_gpu) {
     gkyl_cu_free(up->p2c_qidx);
     gkyl_array_release(up->mask_out_quad);
     gkyl_array_release(up->qDphiDbmag_quad);
+    gkyl_array_release(up->qDphiDbmag_quad_wall);
+    gkyl_array_release(up->qDphiDbmag_quad_tandem);
+
     gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem);
-    gkyl_cu_free(up->bmag_max_loc);
-  }
-  else {
-    gkyl_free(up->bmag_max_loc);
+    gkyl_cu_free(up->bmag_max_z_scalar_gpu);
+    gkyl_cu_free(up->bmag_max_basis_on_dev);
   }
 
   gkyl_free(up);
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index ee35da47e..693b5ff4f 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -16,61 +16,97 @@ extern "C" {
 #include <gkyl_mat_priv.h>
 }
 
+// Kernel to compute Dbmag_quad = bmag_peak - bmag at quadrature nodes.
+// bmag_peak is a per-field-line array (1D for 2x, scalar for 1x).
+// For 1x: bmag_peak has a single value at index 0.
+// For 2x: bmag_peak varies with psi (x-direction).
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(struct gkyl_range conf_range,
-  const struct gkyl_array* basis_at_ords_conf, const struct gkyl_array* bmag, const double *bmag_max,
-  struct gkyl_array* Dbmag_quad_d)
-{    
+gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(int cdim, struct gkyl_range conf_range,
+  struct gkyl_range bmag_peak_range, const struct gkyl_array *basis_at_ords_conf,
+  const struct gkyl_array *bmag, const struct gkyl_array *bmag_peak,
+  const struct gkyl_basis *bmag_peak_basis, struct gkyl_array *Dbmag_quad_out)
+{
   int num_basis_conf = basis_at_ords_conf->ncomp;
   int tot_quad_conf = basis_at_ords_conf->size;
 
   int cidx[GKYL_MAX_CDIM];
 
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < conf_range.volume; tid += blockDim.x * gridDim.x) {
 
     gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
     long linidx = gkyl_range_idx(&conf_range, cidx);
 
-    const double *bmag_d = (const double*) gkyl_array_cfetch(bmag, linidx);
-
-    double *bmag_quad = (double*) gkyl_array_fetch(Dbmag_quad_d, linidx);
-
-    for (int n=0; n<tot_quad_conf; ++n) {
-      const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, n);
+    const double *bmag_d = (const double *)gkyl_array_cfetch(bmag, linidx);
+    double *Dbmag_quad_d = (double *)gkyl_array_fetch(Dbmag_quad_out, linidx);
+
+    // Get bmag_peak for this field line.
+    // For 1x: single value (index 0).
+    // For 2x: varies with psi, so use cidx[0].
+    double bmag_peak_val;
+    if (cdim == 1) {
+      // 1x case: single value.
+      const double *bmag_peak_d = (const double *)gkyl_array_cfetch(bmag_peak, 0);
+      bmag_peak_val = bmag_peak_d[0]; // Just the constant coefficient.
+    }
+    else {
+      // 2x case: evaluate bmag_peak at this psi cell.
+      int psi_idx[1] = { cidx[0] };
+      long psi_linidx = gkyl_range_idx(&bmag_peak_range, psi_idx);
+      const double *bmag_peak_d = (const double *)gkyl_array_cfetch(bmag_peak, psi_linidx);
+      // Evaluate at cell center (logical coord 0).
+      double xc[1] = { 0.0 };
+      bmag_peak_val = bmag_peak_basis->eval_expand(xc, bmag_peak_d);
+    }
 
-      for (int k=0; k<num_basis_conf; ++k)
-        bmag_quad[n] += bmag_d[k]*b_ord[k];
+    // Sum over basis to get bmag at quadrature points, then compute difference.
+    for (int n = 0; n < tot_quad_conf; ++n) {
+      const double *b_ord = (const double *)gkyl_array_cfetch(basis_at_ords_conf, n);
 
-      bmag_quad[n] = bmag_max[0] - bmag_quad[n];
+      double bmag_quad = 0.0;
+      for (int k = 0; k < num_basis_conf; ++k) {
+        bmag_quad += bmag_d[k] * b_ord[k];
+      }
+      Dbmag_quad_d[n] = bmag_peak_val - bmag_quad;
     }
   }
 }
 
-void 
+void
 gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max)
+  const struct gkyl_range *conf_range, const struct gkyl_array *bmag,
+  struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak)
 {
   int nblocks = conf_range->nblocks, nthreads = conf_range->nthreads;
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_max, up->Dbmag_quad->on_dev);
+  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks,
+    nthreads>>>(up->cdim, *conf_range,
+  *up->bmag_max_range, up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_peak->on_dev,
+  up->bmag_max_basis_on_dev, Dbmag_quad->on_dev);
 }
 
 static void
-gkyl_parallelize_components_kernel_launch_dims(dim3* dimGrid, dim3* dimBlock, gkyl_range range, int ncomp)
+gkyl_parallelize_components_kernel_launch_dims(dim3 *dimGrid, dim3 *dimBlock, gkyl_range range,
+  int ncomp)
 {
-  // Create a 2D thread grid so we launch ncomp*range.volume number of threads 
+  // Create a 2D thread grid so we launch ncomp*range.volume number of threads
   // so we can parallelize over components too
   dimBlock->y = ncomp; // ncomp *must* be less than 256
   dimGrid->y = 1;
-  dimBlock->x = GKYL_DEFAULT_NUM_THREADS/ncomp;
+  dimBlock->x = GKYL_DEFAULT_NUM_THREADS / ncomp;
   dimGrid->x = gkyl_int_div_up(range.volume, dimBlock->x);
 }
 
+// Kernel to compute qDphiDbmag_quad = charge*(phi-phi_m)/(bmag_max-bmag) at quadrature nodes.
+// Supports per-field-line phi_m lookup for 2x mirrors.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range, 
-  const struct gkyl_array* basis_at_ords_conf, double charge, const struct gkyl_array* phi,
-  const double *phi_m, const struct gkyl_array* Dbmag_quad, struct gkyl_array* qDphiDbmag_quad)
+gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range conf_range,
+  struct gkyl_range phi_m_range, const struct gkyl_array *basis_at_ords_conf,
+  const struct gkyl_basis *phi_m_basis, double charge, bool is_tandem,
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall,
+  const struct gkyl_array *Dbmag_quad_tandem,
+  struct gkyl_array *qDphiDbmag_quad, struct gkyl_array *qDphiDbmag_quad_wall,
+  struct gkyl_array *qDphiDbmag_quad_tandem)
 {
   int num_basis_conf = basis_at_ords_conf->ncomp;
 
@@ -78,93 +114,214 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range
 
   // 2D thread grid
   // linc2 goes from 0 to tot_quad_conf= basis_at_ords_conf->size.
-  long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
+  long linc2 = threadIdx.y + blockIdx.y * blockDim.y;
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < conf_range.volume; tid += blockDim.x * gridDim.x) {
     gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
 
     long linidx = gkyl_range_idx(&conf_range, cidx);
 
-    const double *phi_d = (const double*) gkyl_array_cfetch(phi, linidx);
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx);
+    const double *phi_d = (const double *)gkyl_array_cfetch(phi, linidx);
+    const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx);
+    const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall, linidx);
+    const double *Dbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx) : Dbmag_quad_d;
+
+    // Get phi_m value for this field line.
+    // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion).
+    // For 2x: varies with psi, evaluate at this psi cell.
+    double phi_m_val, phi_tandem_m_val;
+    if (cdim == 1) {
+      // 1x case: single scalar value stored as p=0 DG expansion.
+      const double *phi_m_d = (const double *)gkyl_array_cfetch(phi_m, 0);
+      phi_m_val = phi_m_d[0];
+      if (is_tandem) {
+        const double *phi_tandem_m_d = (const double *)gkyl_array_cfetch(phi_tandem, 0);
+        phi_tandem_m_val = phi_tandem_m_d[0];
+      }
+    }
+    else {
+      // 2x case: evaluate phi_m at this psi cell center.
+      int psi_idx[1] = { cidx[0] };
+      long phi_m_linidx = gkyl_range_idx(&phi_m_range, psi_idx);
+      const double *phi_m_d = (const double *)gkyl_array_cfetch(phi_m, phi_m_linidx);
+      // Evaluate at cell center (logical coord 0).
+      double xc[1] = { 0.0 };
+      phi_m_val = phi_m_basis->eval_expand(xc, phi_m_d);
+      if (is_tandem) {
+        const double *phi_tandem_m_d = (const double *)gkyl_array_cfetch(phi_tandem, phi_m_linidx);
+        phi_tandem_m_val = phi_m_basis->eval_expand(xc, phi_tandem_m_d);
+      }
+    }
 
-    // Sum over basis at configuration-space quadrature points. 
-    const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, linc2);
+    // Sum over basis at configuration-space quadrature points.
+    const double *b_ord = (const double *)gkyl_array_cfetch(basis_at_ords_conf, linc2);
     double phi_quad = 0;
-    for (int k=0; k<num_basis_conf; ++k)
-      phi_quad += phi_d[k]*b_ord[k];
+    for (int k = 0; k < num_basis_conf; ++k) {
+      phi_quad += phi_d[k] * b_ord[k];
+    }
 
     // Potential energy term at each quadrature point.
-    double *qDphiDbmag_quad_d = (double*) gkyl_array_fetch(qDphiDbmag_quad, linidx);
+    double *qDphiDbmag_quad_d = (double *)gkyl_array_fetch(qDphiDbmag_quad, linidx);
+    double *qDphiDbmag_quad_wall_d = (double *)gkyl_array_fetch(qDphiDbmag_quad_wall, linidx);
+
     if (Dbmag_quad_d[linc2] > 0.0)
-      qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m[0])/Dbmag_quad_d[linc2];
+      qDphiDbmag_quad_d[linc2] = charge * (phi_quad - phi_m_val) / Dbmag_quad_d[linc2];
     else
       qDphiDbmag_quad_d[linc2] = 0.0;
+
+    if (Dbmag_quad_wall_d[linc2] > 0.0)
+      qDphiDbmag_quad_wall_d[linc2] = charge * phi_quad / Dbmag_quad_wall_d[linc2];
+    else
+      qDphiDbmag_quad_wall_d[linc2] = 0.0;
+
+    if (is_tandem) {
+      double *qDphiDbmag_quad_tandem_d = (double *)gkyl_array_fetch(qDphiDbmag_quad_tandem, linidx);
+      if (Dbmag_quad_tandem_d[linc2] > 0.0)
+        qDphiDbmag_quad_tandem_d[linc2] = charge * (phi_quad - phi_tandem_m_val) /
+          Dbmag_quad_tandem_d[linc2];
+      else
+        qDphiDbmag_quad_tandem_d[linc2] = 0.0;
+    }
   }
 }
 
+// Cellwise kernel: determines if a cell is trapped or lost without quadrature.
+// Supports tandem mirrors and per-field-line z-coordinate lookup.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
+gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  double mass, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
-  const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out)
+  struct gkyl_range bmag_max_range, const struct gkyl_basis *bmag_max_basis, bool is_tandem,
+  double mass, const struct gkyl_array *phase_ordinates,
+  const struct gkyl_array *bmag_max_z_coord, const struct gkyl_array *bmag_tandem_z_coord,
+  const struct gkyl_array *qDphiDbmag_quad, const struct gkyl_array *qDphiDbmag_quad_wall,
+  const struct gkyl_array *qDphiDbmag_quad_tandem,
+  const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall,
+  const struct gkyl_array *Dbmag_quad_tandem,
+  const int *p2c_qidx, struct gkyl_array *vmap, struct gkyl_basis *vmap_basis,
+  struct gkyl_array *mask_out)
 {
-  int pdim = phase_range.ndim, cdim = conf_range.ndim;
-  int vdim = pdim-cdim;
+  int pdim = phase_range.ndim;
+  int vdim = pdim - cdim;
 
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
+  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 };
   int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2];
 
   int tot_phase_quad = phase_ordinates->size;
 
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < phase_range.volume; tid += blockDim.x * gridDim.x) {
     gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
     // Get configuration-space linear index.
-    for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k];
+    for (unsigned int k = 0; k < cdim; k++) {
+      cidx[k] = pidx[k];
+    }
     long linidx_conf = gkyl_range_idx(&conf_range, cidx);
 
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
-    const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
+    const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall,
+      linidx_conf);
+    const double *Dbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
+    const double *qDphiDbmag_quad_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad,
+      linidx_conf);
+    const double *qDphiDbmag_quad_wall_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad_wall,
+      linidx_conf);
+    const double *qDphiDbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
+
+    // Get z-coordinates for field-line specific values.
+    double bmag_max_z_val, bmag_tandem_z_val;
+    if (cdim == 1) {
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, 0);
+      bmag_max_z_val = bmag_max_z_d[0];
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, 0);
+        bmag_tandem_z_val = bmag_tandem_z_d[0];
+      }
+    }
+    else {
+      int psi_idx[1] = { cidx[0] };
+      long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx);
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
+      double xc_log[1] = { 0.0 };
+      bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d);
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord,
+          psi_linidx);
+        bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d);
+      }
+    }
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
-    double *mask_d = (double*) gkyl_array_fetch(mask_out, linidx_phase);
+    double *mask_d = (double *)gkyl_array_fetch(mask_out, linidx_phase);
 
-    for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d];
+    for (int d = cdim; d < pdim; d++) {
+      vidx[d - cdim] = pidx[d];
+    }
     long linidx_vel = gkyl_range_idx(&vel_range, vidx);
-    const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel);
+    const double *vmap_d = (const double *)gkyl_array_cfetch(vmap, linidx_vel);
 
     mask_d[0] = 1.0; // In this case the mask has ncomp=1.
 
-    for (int n=0; n<tot_phase_quad; ++n) {
+    for (int n = 0; n < tot_phase_quad; ++n) {
       int cqidx = p2c_qidx[n];
 
-      const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, n);
+      const double *xcomp_d = (const double *)gkyl_array_cfetch(phase_ordinates, n);
 
       // Convert comp position coordinate to phys pos coord.
       log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
-//      up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
-  
+
       // Convert comp velocity coordinate to phys velocity coord.
       double xcomp[1];
       for (int vd = 0; vd < vdim; vd++) {
-        xcomp[0] = xcomp_d[cdim+vd];
-        xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
+        xcomp[0] = xcomp_d[cdim + vd];
+        xmu[cdim + vd] = vmap_basis->eval_expand(xcomp, vmap_d + vd * vmap_basis->num_basis);
       }
-  
-      // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-      double KEparDbmag = 0.0;
+
+      // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag).
+      double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0;
       if (Dbmag_quad_d[cqidx] > 0.0)
-        KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-      else
-        KEparDbmag = 0.0;
-  
-      double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
-  
-      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) ) {
+        KEparDbmag = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_d[cqidx];
+
+      if (Dbmag_quad_wall_d[cqidx] > 0.0)
+        KEparDbmag_wall = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall_d[cqidx];
+
+      if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0)
+        KEparDbmag_tandem = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem_d[cqidx];
+
+      double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad_d[cqidx]);
+      double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall_d[cqidx]));
+      double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0,
+        KEparDbmag_tandem + qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
+
+      bool is_trapped;
+      if (is_tandem) {
+        // Tandem mirror trapping condition.
+        bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) &&
+          fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val);
+        bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val);
+
+        if (in_outer_cell) {
+          is_trapped = mu_bound < xmu[cdim + 1];
+        }
+        else if (in_central_cell) {
+          double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
+          is_trapped = mu_bound_min < xmu[cdim + 1];
+        }
+        else {
+          is_trapped = mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val);
+        }
+      }
+      else {
+        // Single mirror case.
+        is_trapped = (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) ||
+          (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val));
+      }
+
+      if (!is_trapped) {
         mask_d[0] = 0.0;
         break;
       }
@@ -172,108 +329,199 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
   }
 }
 
+// Quadrature kernel: computes mask at phase-space quadrature nodes.
+// Supports tandem mirrors and per-field-line z-coordinate lookup.
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
+gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_phase,
   struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  double mass, double norm_fac, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
-  const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad)
+  struct gkyl_range bmag_max_range, const struct gkyl_basis *bmag_max_basis, bool is_tandem,
+  double mass, double norm_fac, const struct gkyl_array *phase_ordinates,
+  const struct gkyl_array *bmag_max_z_coord, const struct gkyl_array *bmag_tandem_z_coord,
+  const struct gkyl_array *qDphiDbmag_quad, const struct gkyl_array *qDphiDbmag_quad_wall,
+  const struct gkyl_array *qDphiDbmag_quad_tandem,
+  const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall,
+  const struct gkyl_array *Dbmag_quad_tandem,
+  const int *p2c_qidx, struct gkyl_array *vmap, struct gkyl_basis *vmap_basis,
+  struct gkyl_array *mask_out_quad)
 {
-  int pdim = phase_range.ndim, cdim = conf_range.ndim;
-  int vdim = pdim-cdim;
+  int pdim = phase_range.ndim;
+  int vdim = pdim - cdim;
 
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
+  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 };
   int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2];
 
   // 2D thread grid
   // linc2 goes from 0 to tot_quad_phase
-  long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
+  long linc2 = threadIdx.y + blockIdx.y * blockDim.y;
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < phase_range.volume; tid += blockDim.x * gridDim.x) {
     gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
     // Get configuration-space linear index.
-    for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k];
+    for (unsigned int k = 0; k < cdim; k++) {
+      cidx[k] = pidx[k];
+    }
 
     long linidx_conf = gkyl_range_idx(&conf_range, cidx);
 
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
-    const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
+    const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx_conf);
+    const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall,
+      linidx_conf);
+    const double *Dbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d;
+    const double *qDphiDbmag_quad_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad,
+      linidx_conf);
+    const double *qDphiDbmag_quad_wall_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad_wall,
+      linidx_conf);
+    const double *qDphiDbmag_quad_tandem_d = is_tandem ?
+      (const double *)gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d;
+
+    // Get z-coordinates for field-line specific values.
+    double bmag_max_z_val, bmag_tandem_z_val;
+    if (cdim == 1) {
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, 0);
+      bmag_max_z_val = bmag_max_z_d[0];
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, 0);
+        bmag_tandem_z_val = bmag_tandem_z_d[0];
+      }
+    }
+    else {
+      int psi_idx[1] = { cidx[0] };
+      long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx);
+      const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, psi_linidx);
+      double xc_log[1] = { 0.0 };
+      bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d);
+      if (is_tandem) {
+        const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord,
+          psi_linidx);
+        bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d);
+      }
+    }
 
     gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     long linidx_phase = gkyl_range_idx(&phase_range, pidx);
 
     int cqidx = p2c_qidx[linc2];
-    for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d];
+    for (int d = cdim; d < pdim; d++) {
+      vidx[d - cdim] = pidx[d];
+    }
 
     long linidx_vel = gkyl_range_idx(&vel_range, vidx);
-    const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel);
-    const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, linc2);
+    const double *vmap_d = (const double *)gkyl_array_cfetch(vmap, linidx_vel);
+    const double *xcomp_d = (const double *)gkyl_array_cfetch(phase_ordinates, linc2);
 
     // Convert comp position coordinate to phys pos coord.
-    gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
     log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
-//    up->c2p_pos(xmu, xmu, up->c2p_pos_ctx);
 
     // Convert comp velocity coordinate to phys velocity coord.
     double xcomp[1];
     for (int vd = 0; vd < vdim; vd++) {
-      xcomp[0] = xcomp_d[cdim+vd];
-      xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
+      xcomp[0] = xcomp_d[cdim + vd];
+      xmu[cdim + vd] = vmap_basis->eval_expand(xcomp, vmap_d + vd * vmap_basis->num_basis);
     }
 
-    // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-    double KEparDbmag = 0.0;
+    // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag).
+    double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0;
     if (Dbmag_quad_d[cqidx] > 0.0)
-      KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-    else
-      KEparDbmag = 0.0;
+      KEparDbmag = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_d[cqidx];
 
-    double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
+    if (Dbmag_quad_wall_d[cqidx] > 0.0)
+      KEparDbmag_wall = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall_d[cqidx];
 
-    double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase);
-    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) 
-      fq[linc2] = norm_fac;
-    else
-      fq[linc2] = 0.0;
+    if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0)
+      KEparDbmag_tandem = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem_d[cqidx];
+
+    double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad_d[cqidx]);
+    double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall_d[cqidx]));
+    double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0,
+      KEparDbmag_tandem + qDphiDbmag_quad_tandem_d[cqidx]) : 0.0;
+
+    double *fq = (double *)gkyl_array_fetch(mask_out_quad, linidx_phase);
+
+    if (is_tandem) {
+      // Tandem mirror trapping condition.
+      bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) &&
+        fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val);
+      bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val);
+
+      if (in_outer_cell) {
+        fq[linc2] = (mu_bound < xmu[cdim + 1]) ? norm_fac : 0.0;
+      }
+      else if (in_central_cell) {
+        double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem);
+        fq[linc2] = (mu_bound_min < xmu[cdim + 1]) ? norm_fac : 0.0;
+      }
+      else {
+        fq[linc2] = (mu_bound_wall > xmu[cdim + 1] &&
+          fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) ? norm_fac : 0.0;
+      }
+    }
+    else {
+      // Single mirror case.
+      if (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val))
+        fq[linc2] = norm_fac;
+      else if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val))
+        fq[linc2] = norm_fac;
+      else
+        fq[linc2] = 0.0;
+    }
   }
 }
 
 void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem,
+  struct gkyl_array *mask_out)
 {
   dim3 dimGrid_conf, dimBlock_conf;
   int tot_quad_conf = up->basis_at_ords_conf->size;
-  gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, tot_quad_conf);
-
-  gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m, up->Dbmag_quad->on_dev,
-    up->qDphiDbmag_quad->on_dev);
+  gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range,
+    tot_quad_conf);
+
+  // Compute qDphiDbmag at quadrature points.
+  gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(
+    up->cdim, *conf_range, *up->bmag_max_range,
+    up->basis_at_ords_conf->on_dev, up->bmag_max_basis_on_dev, up->charge, up->is_tandem,
+    phi->on_dev, phi_m->on_dev, phi_tandem->on_dev,
+    up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad_tandem->on_dev);
 
   const struct gkyl_velocity_map *gvm = up->vel_map;
 
   if (up->cellwise_trap_loss) {
     // Don't do quadrature.
     int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
-    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(*up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, up->mass, up->ordinates_phase->on_dev,
-      up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
-      gvm->vmap_basis, mask_out->on_dev);
+    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks,
+      nthreads>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
+    gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
+    up->mass, up->ordinates_phase->on_dev,
+    up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad_tandem->on_dev,
+    up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+    up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, mask_out->on_dev);
   }
   else {
     // Use quadrature.
     dim3 dimGrid, dimBlock;
     int tot_quad_phase = up->basis_at_ords_phase->size;
-    gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, tot_quad_phase);
-
-    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(*up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, up->mass, up->norm_fac, up->ordinates_phase->on_dev,
-      up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
-      gvm->vmap_basis, up->mask_out_quad->on_dev);
-
-    // Call cublas to do the matrix multiplication nodal to modal conversion
+    gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range,
+      tot_quad_phase);
+
+    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid,
+      dimBlock>>>(up->cdim, *up->grid_phase, *phase_range, *conf_range,
+    gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem,
+    up->mass, up->norm_fac, up->ordinates_phase->on_dev,
+    up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev,
+    up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev,
+    up->qDphiDbmag_quad_tandem->on_dev,
+    up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev,
+    up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, up->mask_out_quad->on_dev);
+
+    // Call cublas to do the matrix multiplication nodal to modal conversion.
     gkyl_mat_mm_array(up->phase_nodal_to_modal_mem, up->mask_out_quad, mask_out);
   }
 }