diff --git a/core/unit/ctest_array_dg_find_peaks.c b/core/unit/ctest_array_dg_find_peaks.c new file mode 100644 index 000000000..98861f457 --- /dev/null +++ b/core/unit/ctest_array_dg_find_peaks.c @@ -0,0 +1,1192 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// Helper function to create test arrays on CPU or GPU. +static struct gkyl_array* +mkarr(bool use_gpu, long nc, long size) +{ + struct gkyl_array *a = use_gpu ? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size) + : gkyl_array_new(GKYL_DOUBLE, nc, size); + return a; +} + +// 1D test function with multiple peaks: f(z) = cos(2*pi*z/L) +// Has maxima at z=0, z=L and minimum at z=L/2. +static void +test_func_1d_cos(double t, const double *xn, double *fout, void *ctx) +{ + double z = xn[0]; + double L = 2.0; // Period. + fout[0] = cos(2.0 * M_PI * z / L); +} + +// 1D test function that looks like a mirror bmag profile: +// f(z) = B0 * (1 + (R-1)*sin^2(pi*z/L)) +// Has minimum at z=0, maxima at z=-L/2 and z=L/2 (mirror throats). +static void +test_func_1d_mirror(double t, const double *xn, double *fout, void *ctx) +{ + double z = xn[0]; + double L = 2.0; // Half-length. + double B0 = 1.0; // Minimum B. + double R = 4.0; // Mirror ratio. + double sinval = sin(M_PI * z / L); + fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval); +} + +// 2D test function: f(psi, z) = (1 + 0.1*psi) * cos(2*pi*z/L) +// The peaks vary slightly with psi. +static void +test_func_2d_cos(double t, const double *xn, double *fout, void *ctx) +{ + double psi = xn[0], z = xn[1]; + double L = 2.0; + fout[0] = (1.0 + 0.1 * psi) * cos(2.0 * M_PI * z / L); +} + +// 2D mirror-like function: peaks at z = +/- z_m(psi). +static void +test_func_2d_mirror(double t, const double *xn, double *fout, void *ctx) +{ + double psi = xn[0], z = xn[1]; + double L = 2.0; + double B0 = 1.0 + 0.1 * psi; // Varies with psi. + double R = 4.0; + double sinval = sin(M_PI * z / L); + fout[0] = B0 * (1.0 + (R - 1.0) * sinval * sinval); +} + +// Complex test function: f(z) = (sin^2(z) + 0.1) * exp(-z^2/100) +// Has 9 peaks on [-5, 5]: edges at z=±5, local extrema at z≈±3π/2, ±π, ±π/2, 0. +static void +test_func_1d_complex(double t, const double *xn, double *fout, void *ctx) +{ + double z = xn[0]; + double sinz = sin(z); + fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0); +} + +// 2D complex test function: f(psi, z) = (sin^2(z) + 0.1) * exp(-z^2/100) * psi +// Peaks scale linearly with psi. +static void +test_func_2d_complex(double t, const double *xn, double *fout, void *ctx) +{ + double psi = xn[0], z = xn[1]; + double sinz = sin(z); + fout[0] = (sinz * sinz + 0.1) * exp(-z * z / 100.0) * psi; +} + +// Test function to project onto peaks: g(psi, z) = z^2 * psi^2 +static void +test_func_quadratic_2d(double t, const double *xn, double *fout, void *ctx) +{ + double psi = xn[0], z = xn[1]; + fout[0] = z * z * psi * psi; +} + +// 1D version: g(z) = z^2 +static void +test_func_quadratic_1d(double t, const double *xn, double *fout, void *ctx) +{ + double z = xn[0]; + fout[0] = z * z; +} + +// Test 1D peak finding with cos function. +void +test_1d_find_peaks_cos(int poly_order, bool use_gpu) +{ + // Grid: z in [-1, 1] (one period of cos(2*pi*z/2)). + double lower[] = { -1.0 }; + double upper[] = { 1.0 }; + int cells[] = { 16 }; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, 1, lower, upper, cells); + + // Basis. + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, 1, poly_order); + + // Ranges. + int ghost[] = { 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function onto basis (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_cos, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copy if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + + // Create peak finder. + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 0, + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + + // Compute peaks. + gkyl_array_dg_find_peaks_advance(peaks, f); + + // Check results: cos(pi*z) on [-1,1] has EDGE_LO at z=-1, LOCAL_MAX at z=0, EDGE_HI at z=1. + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 3); + + struct { + enum gkyl_peak_type type; + double z_expected; + } expected_peaks[] = { + { GKYL_PEAK_EDGE_LO, -1.0, }, + { GKYL_PEAK_LOCAL_MAX, 0.0, }, + { GKYL_PEAK_EDGE_HI, 1.0, }, + }; + + for (int p = 0; p < 3 && p < num_peaks; p++) { + enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p); + const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p); + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size); + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(vals, vals_d); + gkyl_array_copy(coords, coords_d); + + const double *val = gkyl_array_cfetch(vals, 0); + const double *coord = gkyl_array_cfetch(coords, 0); + + double z = coord[0]; + double expected_val[1]; + test_func_1d_cos(0.0, &z, expected_val, NULL); + + TEST_CHECK(ptype == expected_peaks[p].type); + TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 0.1); + TEST_CHECK(gkyl_compare_double(val[0], expected_val[0], 0.2)); + + gkyl_array_release(coords); + gkyl_array_release(vals); + gkyl_array_release(coords_d); + gkyl_array_release(vals_d); + } + + gkyl_array_release(f_ho); + gkyl_array_release(f); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 1D peak finding with mirror-like function. +void +test_1d_find_peaks_mirror(int poly_order, bool use_gpu) +{ + // Grid: z in [-1, 1]. + double lower[] = { -1.0 }; + double upper[] = { 1.0 }; + int cells[] = { 16 }; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, 1, lower, upper, cells); + + // Basis. + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, 1, poly_order); + + // Ranges. + int ghost[] = { 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function onto basis (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_mirror, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copy if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + + // Create peak finder. + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 0, + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + + // Compute peaks. + gkyl_array_dg_find_peaks_advance(peaks, f); + + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 3); + + for (int p = 0; p < num_peaks; p++) { + enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p); + const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p); + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size); + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(vals, vals_d); + gkyl_array_copy(coords, coords_d); + + const double *val = gkyl_array_cfetch(vals, 0); + const double *coord = gkyl_array_cfetch(coords, 0); + + // Check specific peaks. + if (ptype == GKYL_PEAK_EDGE_LO) { + TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15)); + TEST_CHECK(fabs(coord[0] - (-1.0)) < 1e-15); + } + else if (ptype == GKYL_PEAK_LOCAL_MIN) { + TEST_CHECK(gkyl_compare_double(val[0], 1.0, 1e-15)); + TEST_CHECK(fabs(coord[0]) < 1e-15); + } + else if (ptype == GKYL_PEAK_EDGE_HI) { + TEST_CHECK(gkyl_compare_double(val[0], 4.0, 1e-15)); + TEST_CHECK(fabs(coord[0] - 1.0) < 1e-15); + } + gkyl_array_release(vals); + gkyl_array_release(coords); + gkyl_array_release(vals_d); + gkyl_array_release(coords_d); + } + + gkyl_array_release(f_ho); + gkyl_array_release(f); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 2D peak finding. +void +test_2d_find_peaks(int poly_order, bool use_gpu) +{ + double lower[] = { 0.0, -1.0 }; + double upper[] = { 1.0, 1.0 }; + int cells[] = { 4, 16 }; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, 2, lower, upper, cells); + + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, 2, poly_order); + + int ghost[] = { 1, 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function onto basis (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_mirror, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copy if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + + // Create peak finder (search along z, which is direction 1). + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 1, // Search along z. + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + + // Compute peaks. + gkyl_array_dg_find_peaks_advance(peaks, f); + + // Check results: Mirror function should have 3 peaks along z. + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 3); + + const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks); + const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks); + const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks); + + // Check that values and coordinates are reasonable for each peak. + for (int p = 0; p < num_peaks; p++) { + enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p); + const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p); + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size); + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(vals, vals_d); + gkyl_array_copy(coords, coords_d); + + double xc_log[1] = { 0.0 }; + + // Check first and last psi cells. + for (int cell_idx = out_range->lower[0]; cell_idx <= out_range->upper[0]; + cell_idx += (out_range->upper[0] - out_range->lower[0])) { + long linidx = gkyl_range_idx(out_range, (int[]){ cell_idx }); + const double *val_d = gkyl_array_cfetch(vals, linidx); + const double *coord_d = gkyl_array_cfetch(coords, linidx); + + double val_at_center = out_basis->eval_expand(xc_log, val_d); + double coord_at_center = out_basis->eval_expand(xc_log, coord_d); + double psi_phys = out_grid->lower[0] + (cell_idx - 0.5) * out_grid->dx[0]; + + // Compute expected value at detected coordinate. + double xn[2] = { psi_phys, coord_at_center }; + double expected_val[1]; + test_func_2d_mirror(0.0, xn, expected_val, NULL); + + // Check value matches analytical function. + TEST_CHECK(gkyl_compare_double(val_at_center, expected_val[0], 1e-15)); + + // Check that coordinate matches expected peak location. + if (ptype == GKYL_PEAK_EDGE_LO) { + TEST_CHECK(fabs(coord_at_center - (-1.0)) < 1e-15); + } + else if (ptype == GKYL_PEAK_LOCAL_MIN) { + TEST_CHECK(fabs(coord_at_center) < 1e-15); + } + else if (ptype == GKYL_PEAK_EDGE_HI) { + TEST_CHECK(fabs(coord_at_center - 1.0) < 1e-15); + } + } + gkyl_array_release(vals); + gkyl_array_release(coords); + gkyl_array_release(vals_d); + gkyl_array_release(coords_d); + } + + gkyl_array_release(f_ho); + gkyl_array_release(f); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 1D peak finding with complex oscillatory function. +void +test_1d_find_peaks_complex(int poly_order, bool use_gpu) +{ + double lower[] = { -2.0 * M_PI }; + double upper[] = { 2.0 * M_PI }; + int cells[] = { 64 }; // Need fine resolution to capture oscillations. + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, 1, lower, upper, cells); + + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, 1, poly_order); + + int ghost[] = { 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function onto basis (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copy if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + + // Create peak finder. + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 0, + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + + // Compute peaks. + gkyl_array_dg_find_peaks_advance(peaks, f); + + // Check results. + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + + TEST_CHECK(num_peaks == 9); + + // Define expected peak locations and types. + struct { + enum gkyl_peak_type type; + double z_expected; + } expected_peaks[] = { + { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, -M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, 0.0, }, + { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, M_PI, }, + { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, }, + { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, }, + }; + + for (int p = 0; p < num_peaks; p++) { + enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p); + const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p); + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size); + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(vals, vals_d); + gkyl_array_copy(coords, coords_d); + + const double *val = gkyl_array_cfetch(vals, 0); + const double *coord = gkyl_array_cfetch(coords, 0); + + double z = coord[0]; + double expected_val[1]; + test_func_1d_complex(0.0, &z, expected_val, NULL); + + TEST_CHECK(ptype == expected_peaks[p].type); + TEST_CHECK(fabs(coord[0] - expected_peaks[p].z_expected) < 1e-15); + double rel_error = fabs(val[0] - expected_val[0]) / fabs(expected_val[0]); + TEST_CHECK(rel_error < 1e-15); + + gkyl_array_release(coords); + gkyl_array_release(vals); + gkyl_array_release(coords_d); + gkyl_array_release(vals_d); + } + + gkyl_array_release(f_ho); + gkyl_array_release(f); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 2D peak finding with complex oscillatory function. +void +test_2d_find_peaks_complex(int poly_order, bool use_gpu) +{ + // Grid: psi in [0.5, 2.0], z in [-5, 5]. + double lower[] = { 0.5, -2.0 * M_PI }; + double upper[] = { 2.0, 2.0 * M_PI }; + int cells[] = { 16, 64 }; + int ndim = 2; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, ndim, lower, upper, cells); + + // Basis. + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, ndim, poly_order); + + // Ranges. + int ghost[] = { 1, 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function onto basis (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copy if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + + // Create peak finder (search along z, which is direction 1). + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 1, // Search along z. + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + + // Compute peaks. + gkyl_array_dg_find_peaks_advance(peaks, f); + + // Check results. + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 9); + + const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks); + const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks); + const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks); + + // Define expected peak locations and types (same as 1D). + struct { + enum gkyl_peak_type type; + double z_expected; + } expected_peaks[] = { + { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, -M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, 0.0, }, + { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, M_PI, }, + { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, }, + { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, }, + }; + + // Get node locations for output basis. + struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis); + out_basis->node_list(gkyl_array_fetch(nodes, 0)); + + // Check peak types and validate values at each psi cell. + for (int p = 0; p < num_peaks; p++) { + enum gkyl_peak_type ptype = gkyl_array_dg_find_peaks_get_type(peaks, p); + TEST_CHECK(ptype == expected_peaks[p].type); + + const struct gkyl_array *vals_d = gkyl_array_dg_find_peaks_acquire_vals(peaks, p); + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *vals = gkyl_array_new(GKYL_DOUBLE, vals_d->ncomp, vals_d->size); + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(vals, vals_d); + gkyl_array_copy(coords, coords_d); + + // Check each psi cell. + struct gkyl_range_iter iter; + gkyl_range_iter_init(&iter, out_range); + while (gkyl_range_iter_next(&iter)) { + long linidx = gkyl_range_idx(out_range, iter.idx); + + const double *val_d = gkyl_array_cfetch(vals, linidx); + const double *coord_d = gkyl_array_cfetch(coords, linidx); + + // Get cell center for physical psi coordinate. + double xc_out[1]; + gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out); + double psi_phys = xc_out[0]; + + // Evaluate at each nodal point in this cell. + for (int n = 0; n < out_basis->num_basis; n++) { + const double *nod_log = gkyl_array_cfetch(nodes, n); + double val_at_node = out_basis->eval_expand(nod_log, val_d); + double z_at_node = out_basis->eval_expand(nod_log, coord_d); + + // Compute physical psi coordinate at this node. + // dx/2 away from the center is the nodal location. + double nod_phys[1]; + nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0; + + // Compute expected value at detected coordinates. + double xn[2] = { nod_phys[0], z_at_node }; + double expected_val[1]; + test_func_2d_complex(0.0, xn, expected_val, NULL); + + TEST_CHECK(fabs(z_at_node - expected_peaks[p].z_expected) < 1e-15); + double rel_error = fabs(val_at_node - expected_val[0]) / fabs(expected_val[0]); + TEST_CHECK(rel_error < 1e-15); + } + } + gkyl_array_release(vals); + gkyl_array_release(coords); + gkyl_array_release(vals_d); + gkyl_array_release(coords_d); + } + + gkyl_array_release(nodes); + gkyl_array_release(f_ho); + gkyl_array_release(f); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 1D project_on_peaks with complex function. +void +test_1d_project_on_peaks(int poly_order, bool use_gpu) +{ + double lower[] = { -2.0 * M_PI }; + double upper[] = { 2.0 * M_PI }; + int cells[] = { 64 }; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, 1, lower, upper, cells); + + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, 1, poly_order); + + int ghost[] = { 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function for peak finding (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Project quadratic function to evaluate at peaks. + struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copies if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + gkyl_array_copy(g, g_ho); + + // Create peak finder. + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 0, + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + gkyl_array_dg_find_peaks_advance(peaks, f); + + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 9); + + // Allocate output arrays for projected values. + const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks); + const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks); + struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX]; + for (int p = 0; p < num_peaks; p++) { + g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume); + } + gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks); + + // Verify that g evaluated at each peak matches analytical values. + struct { + enum gkyl_peak_type type; + double z_expected; + } expected_peaks[] = { + { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, -M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, 0.0, }, + { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, M_PI, }, + { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, }, + { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, }, + }; + for (int p = 0; p < num_peaks; p++) { + // Copy back to host for verification. + struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, + g_at_peaks[p]->size); + gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]); + + const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0); + double z = expected_peaks[p].z_expected; + double expected = z * z; + TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12)); + TEST_MSG("Peak %d: z=%.5f, g_at_peak=%.5f, expected=%.5f", p, z, g_val[0], expected); + + gkyl_array_release(g_at_peaks_ho); + } + + for (int p = 0; p < num_peaks; p++) { + gkyl_array_release(g_at_peaks[p]); + } + gkyl_array_release(f_ho); + gkyl_array_release(g_ho); + gkyl_array_release(f); + gkyl_array_release(g); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 2D project_on_peaks with complex function. +void +test_2d_project_on_peaks(int poly_order, bool use_gpu) +{ + double lower[] = { 0.5, -2.0 * M_PI }; + double upper[] = { 2.0, 2.0 * M_PI }; + int cells[] = { 16, 64 }; + int ndim = 2; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, ndim, lower, upper, cells); + + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, ndim, poly_order); + + int ghost[] = { 1, 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function for peak finding (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2 + struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copies if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + gkyl_array_copy(g, g_ho); + + // Create peak finder (search along z, which is direction 1). + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 1, // Search along z. + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + gkyl_array_dg_find_peaks_advance(peaks, f); + + // Check results. + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 9); + + const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks); + const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks); + const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks); + const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks); + + // Allocate output arrays for projected values. + struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX]; + for (int p = 0; p < num_peaks; p++) { + g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume); + } + gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks); + + // Define expected peak locations (same as before). + double expected_z_peaks[] = { + -2.0 * M_PI, -3.0 * M_PI / 2.0, -M_PI, -M_PI / 2.0, 0.0, + M_PI / 2.0, M_PI, 3.0 * M_PI / 2.0, 2.0 * M_PI + }; + + // Get node locations for output basis. + struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis); + out_basis->node_list(gkyl_array_fetch(nodes, 0)); + + // Verify that g evaluated at each peak matches analytical values. + for (int p = 0; p < num_peaks; p++) { + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(coords, coords_d); + struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, + g_at_peaks[p]->size); + gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]); + + // Check each psi cell. + struct gkyl_range_iter iter; + gkyl_range_iter_init(&iter, out_range); + while (gkyl_range_iter_next(&iter)) { + long linidx = gkyl_range_idx(out_range, iter.idx); + + const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx); + const double *coord_d = gkyl_array_cfetch(coords, linidx); + + // Get cell center for physical psi coordinate. + double xc_out[1]; + gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out); + + // Evaluate at each nodal point in this cell. + for (int n = 0; n < out_basis->num_basis; n++) { + const double *nod_log = gkyl_array_cfetch(nodes, n); + double g_at_node = out_basis->eval_expand(nod_log, g_val_d); + double z_at_node = out_basis->eval_expand(nod_log, coord_d); + + // Compute physical psi coordinate at this node. + double nod_phys[1]; + nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0; + double psi = nod_phys[0]; + + // Analytical value: g(psi, z) = z^2 * psi^2 + double expected = z_at_node * z_at_node * psi * psi; + TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15); + TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15)); + } + } + gkyl_array_release(coords); + gkyl_array_release(coords_d); + gkyl_array_release(g_at_peaks_ho); + } + + // Clean up. + gkyl_array_release(nodes); + for (int p = 0; p < num_peaks; p++) { + gkyl_array_release(g_at_peaks[p]); + } + gkyl_array_release(f_ho); + gkyl_array_release(g_ho); + gkyl_array_release(f); + gkyl_array_release(g); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 1D project_on_peak_idx with complex function. +void +test_1d_project_on_peak_idx(int poly_order, bool use_gpu) +{ + double lower[] = { -2.0 * M_PI }; + double upper[] = { 2.0 * M_PI }; + int cells[] = { 64 }; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, 1, lower, upper, cells); + + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, 1, poly_order); + + int ghost[] = { 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function for peak finding (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_1d_complex, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Project quadratic function to evaluate at peaks. + struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_1d, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copies if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + gkyl_array_copy(g, g_ho); + + // Create peak finder. + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 0, + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + gkyl_array_dg_find_peaks_advance(peaks, f); + + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 9); + + // Allocate output arrays for projected values. + const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks); + const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks); + struct gkyl_array *g_at_peaks = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume); + + int chosen_idx = 1; + gkyl_array_dg_find_peaks_project_on_peak_idx(peaks, g, chosen_idx, g_at_peaks); + + struct { + enum gkyl_peak_type type; + double z_expected; + } expected_peaks[] = { + { GKYL_PEAK_EDGE_LO, -2.0 * M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -3.0 * M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, -M_PI, }, + { GKYL_PEAK_LOCAL_MAX, -M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, 0.0, }, + { GKYL_PEAK_LOCAL_MAX, M_PI / 2.0, }, + { GKYL_PEAK_LOCAL_MIN, M_PI, }, + { GKYL_PEAK_LOCAL_MAX, 3.0 * M_PI / 2.0, }, + { GKYL_PEAK_EDGE_HI, 2.0 * M_PI, }, + }; + + // Copy back to host for verification. + struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks->ncomp, + g_at_peaks->size); + gkyl_array_copy(g_at_peaks_ho, g_at_peaks); + + const double *g_val = gkyl_array_cfetch(g_at_peaks_ho, 0); + double z = expected_peaks[chosen_idx].z_expected; + double expected = z * z; + TEST_CHECK(gkyl_compare_double(g_val[0], expected, 1e-12)); + + gkyl_array_release(g_at_peaks_ho); + gkyl_array_release(g_at_peaks); + gkyl_array_release(f_ho); + gkyl_array_release(g_ho); + gkyl_array_release(f); + gkyl_array_release(g); + gkyl_array_dg_find_peaks_release(peaks); +} + +// Test 2D project_on_peak_idx with complex function. +void +test_2d_project_on_peak_idx(int poly_order, bool use_gpu) +{ + double lower[] = { 0.5, -2.0 * M_PI }; + double upper[] = { 2.0, 2.0 * M_PI }; + int cells[] = { 16, 64 }; + int ndim = 2; + struct gkyl_rect_grid grid; + gkyl_rect_grid_init(&grid, ndim, lower, upper, cells); + + struct gkyl_basis basis; + gkyl_cart_modal_serendip(&basis, ndim, poly_order); + + int ghost[] = { 1, 1 }; + struct gkyl_range local, local_ext; + gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local); + + // Project test function for peak finding (always on host first). + struct gkyl_array *f_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_2d_complex, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, f_ho); + gkyl_eval_on_nodes_release(ev); + + // Project quadratic function to evaluate at peaks: g(psi, z) = z^2 * psi^2 + struct gkyl_array *g_ho = gkyl_array_new(GKYL_DOUBLE, basis.num_basis, local_ext.volume); + ev = gkyl_eval_on_nodes_new(&grid, &basis, 1, test_func_quadratic_2d, NULL); + gkyl_eval_on_nodes_advance(ev, 0.0, &local, g_ho); + gkyl_eval_on_nodes_release(ev); + + // Create device copies if needed. + struct gkyl_array *f = mkarr(use_gpu, basis.num_basis, local_ext.volume); + struct gkyl_array *g = mkarr(use_gpu, basis.num_basis, local_ext.volume); + gkyl_array_copy(f, f_ho); + gkyl_array_copy(g, g_ho); + + // Create peak finder (search along z, which is direction 1). + struct gkyl_array_dg_find_peaks_inp inp = { + .basis = &basis, + .grid = &grid, + .range = &local, + .range_ext = &local_ext, + .search_dir = 1, // Search along z. + .use_gpu = use_gpu, + }; + struct gkyl_array_dg_find_peaks *peaks = gkyl_array_dg_find_peaks_new(&inp, f); + gkyl_array_dg_find_peaks_advance(peaks, f); + + // Check results. + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peaks); + TEST_CHECK(num_peaks == 9); + + const struct gkyl_basis *out_basis = gkyl_array_dg_find_peaks_get_basis(peaks); + const struct gkyl_range *out_range = gkyl_array_dg_find_peaks_get_range(peaks); + const struct gkyl_range *out_range_ext = gkyl_array_dg_find_peaks_get_range_ext(peaks); + const struct gkyl_rect_grid *out_grid = gkyl_array_dg_find_peaks_get_grid(peaks); + + // Allocate output arrays for projected values. + struct gkyl_array *g_at_peaks[GKYL_DG_FIND_PEAKS_MAX]; + for (int p = 0; p < num_peaks; p++) { + g_at_peaks[p] = mkarr(use_gpu, out_basis->num_basis, out_range_ext->volume); + } + gkyl_array_dg_find_peaks_project_on_peaks(peaks, g, g_at_peaks); + + // Define expected peak locations (same as before). + double expected_z_peaks[] = { + -2.0 * M_PI, -3.0 * M_PI / 2.0, -M_PI, -M_PI / 2.0, 0.0, + M_PI / 2.0, M_PI, 3.0 * M_PI / 2.0, 2.0 * M_PI + }; + + // Get node locations for output basis. + struct gkyl_array *nodes = gkyl_array_new(GKYL_DOUBLE, out_basis->ndim, out_basis->num_basis); + out_basis->node_list(gkyl_array_fetch(nodes, 0)); + + // Verify that g evaluated at each peak matches analytical values. + for (int p = 0; p < num_peaks; p++) { + const struct gkyl_array *coords_d = gkyl_array_dg_find_peaks_acquire_coords(peaks, p); + + // Copy back to host for verification. + struct gkyl_array *coords = gkyl_array_new(GKYL_DOUBLE, coords_d->ncomp, coords_d->size); + gkyl_array_copy(coords, coords_d); + struct gkyl_array *g_at_peaks_ho = gkyl_array_new(GKYL_DOUBLE, g_at_peaks[p]->ncomp, + g_at_peaks[p]->size); + gkyl_array_copy(g_at_peaks_ho, g_at_peaks[p]); + + // Check each psi cell. + struct gkyl_range_iter iter; + gkyl_range_iter_init(&iter, out_range); + while (gkyl_range_iter_next(&iter)) { + long linidx = gkyl_range_idx(out_range, iter.idx); + + const double *g_val_d = gkyl_array_cfetch(g_at_peaks_ho, linidx); + const double *coord_d = gkyl_array_cfetch(coords, linidx); + + // Get cell center for physical psi coordinate. + double xc_out[1]; + gkyl_rect_grid_cell_center(out_grid, (int[]){ iter.idx[0] }, xc_out); + + // Evaluate at each nodal point in this cell. + for (int n = 0; n < out_basis->num_basis; n++) { + const double *nod_log = gkyl_array_cfetch(nodes, n); + double g_at_node = out_basis->eval_expand(nod_log, g_val_d); + double z_at_node = out_basis->eval_expand(nod_log, coord_d); + + // Compute physical psi coordinate at this node. + double nod_phys[1]; + nod_phys[0] = xc_out[0] + nod_log[0] * out_grid->dx[0] / 2.0; + double psi = nod_phys[0]; + + // Analytical value: g(psi, z) = z^2 * psi^2 + double expected = z_at_node * z_at_node * psi * psi; + TEST_CHECK(fabs(z_at_node - expected_z_peaks[p]) < 1e-15); + TEST_CHECK(gkyl_compare_double(g_at_node, expected, 1e-15)); + } + } + gkyl_array_release(coords); + gkyl_array_release(coords_d); + gkyl_array_release(g_at_peaks_ho); + } + + // Clean up. + gkyl_array_release(nodes); + for (int p = 0; p < num_peaks; p++) { + gkyl_array_release(g_at_peaks[p]); + } + gkyl_array_release(f_ho); + gkyl_array_release(g_ho); + gkyl_array_release(f); + gkyl_array_release(g); + gkyl_array_dg_find_peaks_release(peaks); +} + +// CPU test wrappers +void test_1d_cos_p1_ho() +{ + test_1d_find_peaks_cos(1, false); +} + +void test_1d_mirror_p1_ho() +{ + test_1d_find_peaks_mirror(1, false); +} + +void test_1d_complex_p1_ho() +{ + test_1d_find_peaks_complex(1, false); +} + +void test_2d_p1_ho() +{ + test_2d_find_peaks(1, false); +} + +void test_2d_complex_p1_ho() +{ + test_2d_find_peaks_complex(1, false); +} + +void test_1d_project_p1_ho() +{ + test_1d_project_on_peaks(1, false); +} + +void test_2d_project_p1_ho() +{ + test_2d_project_on_peaks(1, false); +} + +void test_1d_project_idx_p1_ho() +{ + test_1d_project_on_peak_idx(1, false); +} + +void test_2d_project_idx_p1_ho() +{ + test_2d_project_on_peak_idx(1, false); +} + +#ifdef GKYL_HAVE_CUDA + +// GPU test wrappers +void test_1d_cos_p1_dev() +{ + test_1d_find_peaks_cos(1, true); +} + +void test_1d_mirror_p1_dev() +{ + test_1d_find_peaks_mirror(1, true); +} + +void test_1d_complex_p1_dev() +{ + test_1d_find_peaks_complex(1, true); +} + +void test_2d_p1_dev() +{ + test_2d_find_peaks(1, true); +} + +void test_2d_complex_p1_dev() +{ + test_2d_find_peaks_complex(1, true); +} + +void test_1d_project_p1_dev() +{ + test_1d_project_on_peaks(1, true); +} + +void test_2d_project_p1_dev() +{ + test_2d_project_on_peaks(1, true); +} + +void test_1d_project_idx_p1_dev() +{ + test_1d_project_on_peak_idx(1, true); +} + +void test_2d_project_idx_p1_dev() +{ + test_2d_project_on_peak_idx(1, true); +} + +#endif + +TEST_LIST = { + { "test_1d_cos_p1", test_1d_cos_p1_ho }, + { "test_1d_mirror_p1", test_1d_mirror_p1_ho }, + { "test_1d_complex_p1", test_1d_complex_p1_ho }, + { "test_2d_p1", test_2d_p1_ho }, + { "test_2d_complex_p1", test_2d_complex_p1_ho }, + { "test_1d_project_p1", test_1d_project_p1_ho }, + { "test_2d_project_p1", test_2d_project_p1_ho }, + { "test_1d_project_idx_p1", test_1d_project_idx_p1_ho }, + { "test_2d_project_idx_p1", test_2d_project_idx_p1_ho }, +#ifdef GKYL_HAVE_CUDA + { "test_1d_cos_p1_gpu", test_1d_cos_p1_dev }, + { "test_1d_mirror_p1_gpu", test_1d_mirror_p1_dev }, + { "test_1d_complex_p1_gpu", test_1d_complex_p1_dev }, + { "test_2d_p1_gpu", test_2d_p1_dev }, + { "test_2d_complex_p1_gpu", test_2d_complex_p1_dev }, + { "test_1d_project_p1_gpu", test_1d_project_p1_dev }, + { "test_2d_project_p1_gpu", test_2d_project_p1_dev }, + { "test_1d_project_idx_p1_gpu", test_1d_project_idx_p1_dev }, + { "test_2d_project_idx_p1_gpu", test_2d_project_idx_p1_dev }, +#endif + { NULL, NULL }, +}; diff --git a/core/zero/array_dg_find_peaks.c b/core/zero/array_dg_find_peaks.c new file mode 100644 index 000000000..d55087e18 --- /dev/null +++ b/core/zero/array_dg_find_peaks.c @@ -0,0 +1,739 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/** + * Scan along the search direction at a fixed preserved-direction coordinate + * to count the number of peaks and determine their types. + */ +static void +count_peaks_along_dir(const struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in_ho, + int preserved_idx, int *num_peaks_out, enum gkyl_peak_type *peak_types_out) +{ + int ndim = up->grid.ndim; + int search_dir = up->search_dir; + + int total_nodes_search = up->total_nodes_search; + + // Use pre-allocated search buffers from the struct. + double *vals = up->search_vals; + double *coords = up->search_coords; + for (int i = 0; i < total_nodes_search; i++) { + vals[i] = 0.0; + coords[i] = 0.0; + } + + // Iterate along cells in search direction and collect nodal values. + for (int cell_idx = up->range.lower[search_dir]; + cell_idx <= up->range.upper[search_dir]; + cell_idx++) { + // Build index array for this cell. + int idx[GKYL_MAX_DIM]; + if (ndim == 1) { + idx[0] = cell_idx; + } + else { + int preserved_dir = (search_dir == 0) ? 1 : 0; + idx[preserved_dir] = preserved_idx; + idx[search_dir] = cell_idx; + } + + long linidx = gkyl_range_idx(&up->range, idx); + const double *f_d = gkyl_array_cfetch(in_ho, linidx); + + double xc[GKYL_MAX_DIM]; + gkyl_rect_grid_cell_center(&up->grid, idx, xc); + + // Evaluate at each node in this cell. + for (int n = 0; n < up->basis.num_basis; n++) { + const double *nod_log = gkyl_array_cfetch(up->nodes, n); + + // Determine node offset in search direction. + int node_offset = (nod_log[search_dir] < 0) ? 0 : 1; + + int cell_local = cell_idx - up->range.lower[search_dir]; + + int search_node_idx = cell_local + node_offset; + + double val = up->basis.eval_expand(nod_log, f_d); + double nod_phys[GKYL_MAX_DIM]; + dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys); + + // Only store if this is the first time we see this search node + // (avoid duplicates at cell boundaries). + if (vals[search_node_idx] == 0.0 && coords[search_node_idx] == 0.0) { + vals[search_node_idx] = val; + coords[search_node_idx] = nod_phys[search_dir]; + } + } + } + + // Now scan the values to find peaks. + // A peak is: EDGE_LO at index 0, EDGE_HI at last index, LOCAL_MAX/MIN in between. + int num_peaks = 0; + + // Always add lower edge. + peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_LO; + + // Scan for local maxima and minima (indices 1 to total_nodes_search-2). + for (int i = 1; i < total_nodes_search - 1; i++) { + double prev = vals[i - 1]; + double curr = vals[i]; + double next = vals[i + 1]; + + if (curr > prev && curr > next) { + // Local maximum. + assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX); + peak_types_out[num_peaks++] = GKYL_PEAK_LOCAL_MAX; + } + else if (curr < prev && curr < next) { + // Local minimum. + assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX); + peak_types_out[num_peaks++] = GKYL_PEAK_LOCAL_MIN; + } + } + + // Always add upper edge. + assert(num_peaks < GKYL_DG_FIND_PEAKS_MAX); + peak_types_out[num_peaks++] = GKYL_PEAK_EDGE_HI; + + *num_peaks_out = num_peaks; +} + +/** + * Find all peaks along the search direction for a given preserved-direction + * node index, storing results in the nodal arrays. + */ +static void +find_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in_ho, + int preserved_node_idx) +{ + int ndim = up->grid.ndim; + int search_dir = up->search_dir; + + int total_nodes_search = up->total_nodes_search; + + // Use pre-allocated search buffers from the struct. + double *vals = up->search_vals; + double *coords = up->search_coords; + bool *visited = up->search_visited; + for (int i = 0; i < total_nodes_search; i++) { + vals[i] = 0.0; + coords[i] = 0.0; + visited[i] = false; + } + + // For 2D, determine the preserved direction cell index from the node index. + int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0); + + // Iterate along cells in search direction and collect nodal values. + for (int cell_idx = up->range.lower[search_dir]; + cell_idx <= up->range.upper[search_dir]; + cell_idx++) { + // For 2D, we need to iterate over cells in the preserved direction that + // contribute to this preserved node index. + int pres_cell_start, pres_cell_end; + if (ndim == 1) { + pres_cell_start = 0; + pres_cell_end = 0; + } + else { + // Determine which cells contribute to this preserved node. + // Node i is shared by cells i and i+1 (0-indexed from lower). + // preserved_node_idx 0 is only in cell lower[preserved_dir]. + // preserved_node_idx N is only in cell upper[preserved_dir]. + if (preserved_node_idx == 0) { + pres_cell_start = up->range.lower[preserved_dir]; + pres_cell_end = up->range.lower[preserved_dir]; + } + else if (preserved_node_idx == up->out_nrange.upper[0]) { + pres_cell_start = up->range.upper[preserved_dir]; + pres_cell_end = up->range.upper[preserved_dir]; + } + else { + pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1; + pres_cell_end = pres_cell_start + 1; + if (pres_cell_end > up->range.upper[preserved_dir]) + pres_cell_end = up->range.upper[preserved_dir]; + } + } + + for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) { + // Build index array for this cell. + int idx[GKYL_MAX_DIM]; + if (ndim == 1) { + idx[0] = cell_idx; + } + else { + idx[preserved_dir] = pres_cell; + idx[search_dir] = cell_idx; + } + + long linidx = gkyl_range_idx(&up->range, idx); + const double *f_d = gkyl_array_cfetch(in_ho, linidx); + + double xc[GKYL_MAX_DIM]; + gkyl_rect_grid_cell_center(&up->grid, idx, xc); + + // Evaluate at each node in this cell. + for (int n = 0; n < up->basis.num_basis; n++) { + const double *nod_log = gkyl_array_cfetch(up->nodes, n); + + // Check if this node corresponds to our preserved node index. + if (ndim > 1) { + int pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1; + int pres_cell_local = pres_cell - up->range.lower[preserved_dir]; + int this_pres_node = pres_cell_local + pres_node_offset; + if (this_pres_node != preserved_node_idx) + continue; + } + + // Determine node offset in search direction. + int search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1; + + int cell_local = cell_idx - up->range.lower[search_dir]; + int search_node_idx = cell_local + search_node_offset; + + if (!visited[search_node_idx]) { + double val = up->basis.eval_expand(nod_log, f_d); // GPU error here + double nod_phys[GKYL_MAX_DIM]; + dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys); + + vals[search_node_idx] = val; + coords[search_node_idx] = nod_phys[search_dir]; + visited[search_node_idx] = true; + } + } + } + } + + // Now extract peaks based on peak_types. + int peak_idx = 0; + + // EDGE_LO is always first peak at index 0. + if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) { + double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx); + double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx); + val_n[0] = vals[0]; + coord_n[0] = coords[0]; + peak_idx++; + } + + // Find local maxima and minima. + for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) { + double prev = vals[i - 1]; + double curr = vals[i]; + double next = vals[i + 1]; + + bool is_max = (curr > prev && curr > next); + bool is_min = (curr < prev && curr < next); + + if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) || + (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) { + double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx); + double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx); + val_n[0] = curr; + coord_n[0] = coords[i]; + peak_idx++; + } + } + + // EDGE_HI is always last peak. + if (peak_idx < up->num_peaks && up->peak_types[peak_idx] == GKYL_PEAK_EDGE_HI) { + double *val_n = gkyl_array_fetch(up->out_vals_nodal[peak_idx], preserved_node_idx); + double *coord_n = gkyl_array_fetch(up->out_coords_nodal[peak_idx], preserved_node_idx); + val_n[0] = vals[total_nodes_search - 1]; + coord_n[0] = coords[total_nodes_search - 1]; + } +} + +/** + * Evaluate an input array at peak locations for a given preserved-direction + * node index, storing results in the nodal output arrays. + */ +static void +eval_array_at_peaks_for_preserved_node(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_ho, int preserved_node_idx, struct gkyl_array **out_vals_nodal, + int peak_idx) +{ + int ndim = up->grid.ndim; + int search_dir = up->search_dir; + int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0); + + // Get the peak coordinate that was found during find_peaks. + const double *peak_coord_n = gkyl_array_cfetch(up->out_coords_nodal[peak_idx], + preserved_node_idx); + double peak_coord_search = peak_coord_n[0]; + + // Find the cell containing this coordinate in the search direction. + // We need to build a point coordinate to pass to find_cell. + double point[GKYL_MAX_DIM]; + int known_idx[GKYL_MAX_DIM]; + int cell_idx[GKYL_MAX_DIM]; + + for (int d = 0; d < ndim; d++) { + if (d == search_dir) { + point[d] = peak_coord_search; + known_idx[d] = -1; // Not known + } + else { + // Use dummy value - we'll specify known_idx. + point[d] = 0.0; + known_idx[d] = -1; + } + } + + // If 2D, we need to determine preserved direction cell from preserved_node_idx. + // For p=1 with N cells (1-based indexing), nodal points map as: + // Node 0 -> cell 1, logical coord -1 (left edge of first cell) + // Node k (1 <= k <= N) -> cell k, logical coord +1 (right edge of cell k) + // This ensures proper continuity at shared cell boundaries. + if (ndim > 1) { + int pres_cell; + if (preserved_node_idx == 0) { + // First node: evaluate at left edge of first cell. + pres_cell = up->range.lower[preserved_dir]; + } + else { + // All other nodes (1 to N): evaluate at right edge of cell with index = node_idx. + // Clamp to upper bound for safety. + pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1; + if (pres_cell > up->range.upper[preserved_dir]) { + pres_cell = up->range.upper[preserved_dir]; + } + } + known_idx[preserved_dir] = pres_cell; + + // Set the coordinate in preserved direction to the cell center. + int pres_cell_idx[GKYL_MAX_DIM]; + for (int d = 0; d < ndim; d++) { + pres_cell_idx[d] = (d == preserved_dir) ? pres_cell : 1; + } + double xc_pres[GKYL_MAX_DIM]; + gkyl_rect_grid_cell_center(&up->grid, pres_cell_idx, xc_pres); + point[preserved_dir] = xc_pres[preserved_dir]; + } + + gkyl_rect_grid_find_cell(&up->grid, point, true, known_idx, cell_idx); + + // Clamp cell_idx to interior range (avoid ghost cells). + for (int d = 0; d < up->grid.ndim; d++) { + if (cell_idx[d] < up->range.lower[d]) { + cell_idx[d] = up->range.lower[d]; + } + if (cell_idx[d] > up->range.upper[d]) { + cell_idx[d] = up->range.upper[d]; + } + } + + // Get the DG coefficients at this cell. + long linidx = gkyl_range_idx(&up->range, cell_idx); + const double *f_d = gkyl_array_cfetch(in_ho, linidx); + + // Get cell center. + double xc[GKYL_MAX_DIM]; + gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc); + + // Convert peak coordinate to logical space. + double nod_log[GKYL_MAX_DIM]; + for (int d = 0; d < ndim; d++) { + if (d == search_dir) { + // Convert physical coordinate to logical [-1, 1]. + nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d]; + } + else if (ndim > 1) { + // In preserved direction, use the node position in the cell. + // Node 0 is at left edge (-1), all others at right edge (+1). + nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0; + } + } + + // Evaluate the DG expansion at this logical coordinate. + double val = up->basis.eval_expand(nod_log, f_d); + + // Store the result. + double *val_n = gkyl_array_fetch(out_vals_nodal[peak_idx], preserved_node_idx); + val_n[0] = val; +} + +struct gkyl_array_dg_find_peaks* +gkyl_array_dg_find_peaks_new(const struct gkyl_array_dg_find_peaks_inp *find_peaks_inp, + const struct gkyl_array *in) +{ + struct gkyl_array_dg_find_peaks *up = gkyl_malloc(sizeof(*up)); + + // Copy input parameters. + up->grid = *find_peaks_inp->grid; + up->basis = *find_peaks_inp->basis; + up->range = *find_peaks_inp->range; + up->range_ext = *find_peaks_inp->range_ext; + up->search_dir = find_peaks_inp->search_dir; + up->use_gpu = find_peaks_inp->use_gpu; + + int ndim = find_peaks_inp->grid->ndim; + int poly_order = find_peaks_inp->basis->poly_order; + int out_dim = ndim - 1; + + assert(find_peaks_inp->search_dir >= 0 && find_peaks_inp->search_dir < ndim); + assert(poly_order == 1); // gkyl_array_dg_find_peaks: only p=1 is supported + + // Set up output grid/basis/range. + if (out_dim == 0) { + // 1D -> 0D case. + int cells_1d[1] = { 1 }; + double lower_1d[1] = { 0.0 }; + double upper_1d[1] = { 1.0 }; + gkyl_rect_grid_init(&up->out_grid, 1, lower_1d, upper_1d, cells_1d); + gkyl_range_init(&up->out_range, 1, (int[]){ 1 }, (int[]){ 1 }); + gkyl_range_init(&up->out_range_ext, 1, (int[]){ 0 }, (int[]){ 2 }); + gkyl_cart_modal_serendip(&up->out_basis, 1, 0); + + int nodes_shape[1] = { 1 }; + gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape); + } + else if (out_dim == 1) { + // 2D -> 1D case. + int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0; + + int cells_out = find_peaks_inp->grid->cells[preserved_dir]; + double lower_out = find_peaks_inp->grid->lower[preserved_dir]; + double upper_out = find_peaks_inp->grid->upper[preserved_dir]; + + gkyl_rect_grid_init(&up->out_grid, 1, &lower_out, &upper_out, &cells_out); + + int lower_idx[1] = { find_peaks_inp->range->lower[preserved_dir] }; + int upper_idx[1] = { find_peaks_inp->range->upper[preserved_dir] }; + gkyl_range_init(&up->out_range, 1, lower_idx, upper_idx); + + int lower_ext_idx[1] = { find_peaks_inp->range_ext->lower[preserved_dir] }; + int upper_ext_idx[1] = { find_peaks_inp->range_ext->upper[preserved_dir] }; + gkyl_range_init(&up->out_range_ext, 1, lower_ext_idx, upper_ext_idx); + + gkyl_cart_modal_serendip(&up->out_basis, 1, poly_order); + + int num_nodes = gkyl_range_shape(&up->out_range, 0) + 1; + int nodes_shape[1] = {num_nodes}; + gkyl_range_init_from_shape(&up->out_nrange, 1, nodes_shape); + } + else { + assert(false); // dg_find_peaks: only 1D->0D and 2D->1D supported + } + + // Store node locations for input basis. + up->nodes = gkyl_array_new(GKYL_DOUBLE, ndim, find_peaks_inp->basis->num_basis); + find_peaks_inp->basis->node_list(gkyl_array_fetch(up->nodes, 0)); + + // Create nodal-to-modal converter. + up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, false); + + // No device basis on CPU. + up->out_basis_on_dev = NULL; + + // Compute total_nodes_search for the struct. + int num_cells_search = find_peaks_inp->range->upper[find_peaks_inp->search_dir] + - find_peaks_inp->range->lower[find_peaks_inp->search_dir] + 1; + up->total_nodes_search = num_cells_search + 1; + + // Pre-allocate search-direction working buffers (reused by advance). + up->search_vals = gkyl_malloc(sizeof(double) * up->total_nodes_search); + up->search_coords = gkyl_malloc(sizeof(double) * up->total_nodes_search); + up->search_visited = gkyl_malloc(sizeof(bool) * up->total_nodes_search); + + // Count peaks at middle preserved coordinate. + int mid_preserved_idx = 0; + if (out_dim == 1) { + int preserved_dir = (find_peaks_inp->search_dir == 0) ? 1 : 0; + mid_preserved_idx = (find_peaks_inp->range->lower[preserved_dir] + + find_peaks_inp->range->upper[preserved_dir]) / 2; + } + + // Copy input to host if needed. + if (up->use_gpu) { + struct gkyl_array *field_ho = gkyl_array_new(GKYL_DOUBLE, in->ncomp, in->size); + gkyl_array_copy(field_ho, in); + count_peaks_along_dir(up, field_ho, mid_preserved_idx, &up->num_peaks, up->peak_types); + gkyl_array_release(field_ho); + } + else { + count_peaks_along_dir(up, in, mid_preserved_idx, &up->num_peaks, up->peak_types); + } + + // Allocate output arrays for each peak. + for (int p = 0; p < up->num_peaks; p++) { + up->out_vals[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, + up->out_range_ext.volume); + up->out_coords[p] = gkyl_array_new(GKYL_DOUBLE, up->out_basis.num_basis, + up->out_range_ext.volume); + up->out_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume); + up->out_coords_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume); + up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_new(GKYL_DOUBLE, 1, up->out_nrange.volume); + } + + // Initialize unused peak arrays to NULL. + for (int p = up->num_peaks; p < GKYL_DG_FIND_PEAKS_MAX; p++) { + up->out_vals[p] = NULL; + up->out_coords[p] = NULL; + up->out_vals_nodal[p] = NULL; + up->out_coords_nodal[p] = NULL; + up->out_eval_at_peaks_vals_nodal[p] = NULL; + } + + up->flags = 0; + GKYL_CLEAR_CU_ALLOC(up->flags); + up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free); + up->on_dev = up; // CPU object points to itself. + + struct gkyl_array_dg_find_peaks *up_out = up; +#ifdef GKYL_HAVE_CUDA + if (up->use_gpu) { + up_out = gkyl_array_dg_find_peaks_new_cu(up); + gkyl_array_dg_find_peaks_release(up); + } +#endif + + return up_out; +} + +void +gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, const struct gkyl_array *in) +{ +#ifdef GKYL_HAVE_CUDA + if (up->use_gpu) { + gkyl_array_dg_find_peaks_advance_cu(up, in); + return; + } +#endif + + int ndim = up->grid.ndim; + int out_dim = ndim - 1; + + // Find peaks for each preserved-direction node. + int num_nodes_out = up->out_nrange.volume; + for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) { + find_peaks_for_preserved_node(up, in, pres_node); + } + + // Transform nodal to modal for each peak. + if (out_dim == 0) { + // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function). + for (int p = 0; p < up->num_peaks; p++) { + double *val_m = gkyl_array_fetch(up->out_vals[p], 0); + double *coord_m = gkyl_array_fetch(up->out_coords[p], 0); + const double *val_n = gkyl_array_cfetch(up->out_vals_nodal[p], 0); + const double *coord_n = gkyl_array_cfetch(up->out_coords_nodal[p], 0); + val_m[0] = val_n[0]; + coord_m[0] = coord_n[0]; + } + } + else { + // 2D -> 1D case: use nodal-to-modal transform. + for (int p = 0; p < up->num_peaks; p++) { + gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid, + &up->out_nrange, &up->out_range, 1, up->out_vals_nodal[p], up->out_vals[p], false); + gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid, + &up->out_nrange, &up->out_range, 1, up->out_coords_nodal[p], up->out_coords[p], false); + } + } +} + +int +gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up) +{ + return up->num_peaks; +} + +enum gkyl_peak_type +gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up, int peak_idx) +{ + assert(peak_idx >= 0 && peak_idx < up->num_peaks); + return up->peak_types[peak_idx]; +} + +const struct gkyl_basis* +gkyl_array_dg_find_peaks_get_basis(const struct gkyl_array_dg_find_peaks *up) +{ + return &up->out_basis; +} + +const struct gkyl_rect_grid* +gkyl_array_dg_find_peaks_get_grid(const struct gkyl_array_dg_find_peaks *up) +{ + return &up->out_grid; +} + +const struct gkyl_range* +gkyl_array_dg_find_peaks_get_range(const struct gkyl_array_dg_find_peaks *up) +{ + return &up->out_range; +} + +const struct gkyl_range* +gkyl_array_dg_find_peaks_get_range_ext(const struct gkyl_array_dg_find_peaks *up) +{ + return &up->out_range_ext; +} + +const struct gkyl_range* +gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *up) +{ + return &up->out_nrange; +} + +const struct gkyl_array* +gkyl_array_dg_find_peaks_acquire_vals(const struct gkyl_array_dg_find_peaks *up, int peak_idx) +{ + assert(peak_idx >= 0 && peak_idx < up->num_peaks); + return gkyl_array_acquire(up->out_vals[peak_idx]); +} + +const struct gkyl_array* +gkyl_array_dg_find_peaks_acquire_vals_nodal(const struct gkyl_array_dg_find_peaks *up, int peak_idx) +{ + assert(peak_idx >= 0 && peak_idx < up->num_peaks); + return gkyl_array_acquire(up->out_vals_nodal[peak_idx]); +} + +const struct gkyl_array* +gkyl_array_dg_find_peaks_acquire_coords(const struct gkyl_array_dg_find_peaks *up, int peak_idx) +{ + assert(peak_idx >= 0 && peak_idx < up->num_peaks); + return gkyl_array_acquire(up->out_coords[peak_idx]); +} + +const struct gkyl_array* +gkyl_array_dg_find_peaks_acquire_coords_nodal(const struct gkyl_array_dg_find_peaks *up, + int peak_idx) +{ + assert(peak_idx >= 0 && peak_idx < up->num_peaks); + return gkyl_array_acquire(up->out_coords_nodal[peak_idx]); +} + +void +gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, struct gkyl_array **out_vals) +{ +#ifdef GKYL_HAVE_CUDA + if (up->use_gpu) { + gkyl_array_dg_find_peaks_project_on_peaks_cu(up, in_array, out_vals); + return; + } +#endif + + int ndim = up->grid.ndim; + int out_dim = ndim - 1; + + // Evaluate the input array at peak locations for each preserved-direction node. + int num_nodes_out = up->out_nrange.volume; + for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) { + for (int p = 0; p < up->num_peaks; p++) { + eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, + up->out_eval_at_peaks_vals_nodal, p); + } + } + // Transform nodal to modal for each peak. + if (out_dim == 0) { + // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function). + for (int p = 0; p < up->num_peaks; p++) { + double *val_m = gkyl_array_fetch(out_vals[p], 0); + const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[p], 0); + val_m[0] = val_n[0]; + } + } + else { + // 2D -> 1D case: use nodal-to-modal transform. + for (int p = 0; p < up->num_peaks; p++) { + gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid, + &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[p], out_vals[p], + false); + } + } +} + +void +gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val) +{ +#ifdef GKYL_HAVE_CUDA + if (up->use_gpu) { + gkyl_array_dg_find_peaks_project_on_peak_idx_cu(up, in_array, peak_idx, out_val); + return; + } +#endif + + int ndim = up->grid.ndim; + int out_dim = ndim - 1; + + // Evaluate the input array at peak locations for each preserved-direction node. + int num_nodes_out = up->out_nrange.volume; + + for (int pres_node = 0; pres_node < num_nodes_out; pres_node++) { + eval_array_at_peaks_for_preserved_node(up, in_array, pres_node, + up->out_eval_at_peaks_vals_nodal, peak_idx); + } + + // Transform nodal to modal for each peak. + if (out_dim == 0) { + // 1D -> 0D case: modal = nodal (p=0 has no nodal_to_modal function). + double *val_m = gkyl_array_fetch(out_val, 0); + const double *val_n = gkyl_array_cfetch(up->out_eval_at_peaks_vals_nodal[peak_idx], 0); + val_m[0] = val_n[0]; + } + else { + // 2D -> 1D case: use nodal-to-modal transform. + gkyl_nodal_ops_n2m(up->n2m, &up->out_basis, &up->out_grid, + &up->out_nrange, &up->out_range, 1, up->out_eval_at_peaks_vals_nodal[peak_idx], out_val, + false); + } +} + +struct gkyl_array_dg_find_peaks* +gkyl_array_dg_find_peaks_acquire(const struct gkyl_array_dg_find_peaks *up) +{ + gkyl_ref_count_inc(&up->ref_count); + return (struct gkyl_array_dg_find_peaks *)up; +} + +void +gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref) +{ + struct gkyl_array_dg_find_peaks *up = + container_of(ref, struct gkyl_array_dg_find_peaks, ref_count); + + for (int p = 0; p < up->num_peaks; p++) { + gkyl_array_release(up->out_vals[p]); + gkyl_array_release(up->out_coords[p]); + gkyl_array_release(up->out_vals_nodal[p]); + gkyl_array_release(up->out_coords_nodal[p]); + gkyl_array_release(up->out_eval_at_peaks_vals_nodal[p]); + } + gkyl_array_release(up->nodes); + gkyl_nodal_ops_release(up->n2m); + + if (GKYL_IS_CU_ALLOC(up->flags)) { + gkyl_cart_modal_basis_release_cu(up->out_basis_on_dev); + gkyl_cu_free(up->search_vals); + gkyl_cu_free(up->search_coords); + gkyl_cu_free(up->search_visited); + gkyl_cu_free(up->on_dev); + } + else { + gkyl_free(up->search_vals); + gkyl_free(up->search_coords); + gkyl_free(up->search_visited); + } + + gkyl_free(up); +} + +void +gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up) +{ + gkyl_ref_count_dec(&up->ref_count); +} diff --git a/core/zero/array_dg_find_peaks_cu.cu b/core/zero/array_dg_find_peaks_cu.cu new file mode 100644 index 000000000..9581f9a37 --- /dev/null +++ b/core/zero/array_dg_find_peaks_cu.cu @@ -0,0 +1,540 @@ +/* -*- c++ -*- */ +extern "C" { +#include +#include +#include +#include +#include +#include +#include +} + +/** + * CUDA kernel: find peaks along the search direction for each preserved-direction + * node index. One thread per preserved_node_idx. + * + * Each thread: + * 1. Scans all cells along the search direction, collecting nodal values/coords + * into thread-local arrays. + * 2. Extracts peaks (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI) and writes results + * into the nodal output arrays. + */ +__global__ void +gkyl_find_peaks_kernel(const struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in, int num_nodes_out) +{ + for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x; + tid < num_nodes_out; tid += blockDim.x * gridDim.x) { + int preserved_node_idx = (int)tid; + + int ndim = up->grid.ndim; + int search_dir = up->search_dir; + int num_basis = up->basis.num_basis; + + // Number of cells and nodes along the search direction. + int num_cells_search = up->range.upper[search_dir] - up->range.lower[search_dir] + 1; + + int total_nodes_search = num_cells_search + 1; + + // Each thread gets its own contiguous slice of the pre-allocated + // search buffers. Offset = preserved_node_idx * total_nodes_search. + long buf_off = (long)preserved_node_idx * total_nodes_search; + double *vals = up->search_vals + buf_off; + double *coords = up->search_coords + buf_off; + bool *visited = up->search_visited + buf_off; + for (int i = 0; i < total_nodes_search; i++) { + vals[i] = 0.0; + coords[i] = 0.0; + visited[i] = false; + } + + // Preserved direction (only used for 2D). + int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0); + + // Iterate along cells in the search direction. + for (int cell_idx = up->range.lower[search_dir]; + cell_idx <= up->range.upper[search_dir]; cell_idx++) { + // For 2D, determine which cells in the preserved direction + // contribute to this preserved_node_idx. + int pres_cell_start, pres_cell_end; + if (ndim == 1) { + pres_cell_start = 0; + pres_cell_end = 0; + } + else { + // Node i is shared by cells i and i+1 (0-indexed from lower). + // preserved_node_idx 0 is only in cell lower[preserved_dir]. + // preserved_node_idx N is only in cell upper[preserved_dir]. + if (preserved_node_idx == 0) { + pres_cell_start = up->range.lower[preserved_dir]; + pres_cell_end = up->range.lower[preserved_dir]; + } + else if (preserved_node_idx == up->out_nrange.upper[0]) { + pres_cell_start = up->range.upper[preserved_dir]; + pres_cell_end = up->range.upper[preserved_dir]; + } + else { + pres_cell_start = up->range.lower[preserved_dir] + preserved_node_idx - 1; + pres_cell_end = pres_cell_start + 1; + if (pres_cell_end > up->range.upper[preserved_dir]) + pres_cell_end = up->range.upper[preserved_dir]; + } + } + + for (int pres_cell = pres_cell_start; pres_cell <= pres_cell_end; pres_cell++) { + // Build cell index. + int idx[GKYL_MAX_DIM]; + if (ndim == 1) { + idx[0] = cell_idx; + } + else { + idx[preserved_dir] = pres_cell; + idx[search_dir] = cell_idx; + } + + long linidx = gkyl_range_idx(&up->range, idx); + const double *f_d = (const double *)gkyl_array_cfetch(in, linidx); + + double xc[GKYL_MAX_DIM]; + gkyl_rect_grid_cell_center(&up->grid, idx, xc); + + // Evaluate at each node in this cell. + for (int n = 0; n < num_basis; n++) { + const double *nod_log = (const double *)gkyl_array_cfetch(up->nodes, n); + + // Check if this node belongs to our preserved_node_idx (2D only). + if (ndim > 1) { + int pres_node_offset = (nod_log[preserved_dir] < 0) ? 0 : 1; + int pres_cell_local = pres_cell - up->range.lower[preserved_dir]; + + int this_pres_node = pres_cell_local + pres_node_offset; + + if (this_pres_node != preserved_node_idx) + continue; + } + + // Determine node offset in the search direction. + int search_node_offset = (nod_log[search_dir] < 0) ? 0 : 1; + + int cell_local = cell_idx - up->range.lower[search_dir]; + + int search_node_idx = cell_local + search_node_offset; + + if (!visited[search_node_idx]) { + double val = up->basis.eval_expand(nod_log, f_d); + double nod_phys[GKYL_MAX_DIM]; + dg_find_peaks_log_to_comp(ndim, nod_log, up->grid.dx, xc, nod_phys); + + vals[search_node_idx] = val; + coords[search_node_idx] = nod_phys[search_dir]; + visited[search_node_idx] = true; + } + } + } + } + + // Extract peaks based on peak_types and write to nodal output arrays. + int peak_idx = 0; + + // EDGE_LO is always the first peak at index 0. + if (up->peak_types[peak_idx] == GKYL_PEAK_EDGE_LO) { + double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx], + preserved_node_idx); + double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx], + preserved_node_idx); + val_n[0] = vals[0]; + coord_n[0] = coords[0]; + peak_idx++; + } + + // Find local maxima and minima. + for (int i = 1; i < total_nodes_search - 1 && peak_idx < up->num_peaks - 1; i++) { + double prev = vals[i - 1]; + double curr = vals[i]; + double next = vals[i + 1]; + + bool is_max = (curr > prev && curr > next); + bool is_min = (curr < prev && curr < next); + + if ((is_max && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MAX) || + (is_min && up->peak_types[peak_idx] == GKYL_PEAK_LOCAL_MIN)) { + double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx], + preserved_node_idx); + double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx], + preserved_node_idx); + val_n[0] = curr; + coord_n[0] = coords[i]; + peak_idx++; + } + } + + // EDGE_HI is always the last peak. + if (peak_idx < up->num_peaks && up->peak_types[peak_idx] == GKYL_PEAK_EDGE_HI) { + double *val_n = (double *)gkyl_array_fetch(up->out_vals_nodal[peak_idx], + preserved_node_idx); + double *coord_n = (double *)gkyl_array_fetch(up->out_coords_nodal[peak_idx], + preserved_node_idx); + val_n[0] = vals[total_nodes_search - 1]; + coord_n[0] = coords[total_nodes_search - 1]; + } + } +} + +/** + * CUDA kernel: evaluate an input array at peak locations for given peak indices. + * Writes results into out_eval_at_peaks_vals_nodal arrays on device. + * + * Thread mapping: one thread per (preserved_node_idx, peak_offset) pair. + * total_threads = num_nodes_out * num_peaks_to_eval. + * + * @param up Device-side updater struct + * @param in Device-side input array (DG field to evaluate) + * @param num_nodes_out Number of preserved-direction nodes + * @param peak_start First peak index to evaluate + * @param num_peaks_to_eval Number of peaks to evaluate (starting from peak_start) + */ +__global__ void +gkyl_eval_at_peaks_kernel(const struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in, int num_nodes_out, + int peak_start, int num_peaks_to_eval) +{ + unsigned long total_threads = (unsigned long)num_nodes_out * num_peaks_to_eval; + + for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x; + tid < total_threads; tid += blockDim.x * gridDim.x) { + int preserved_node_idx = (int)(tid / num_peaks_to_eval); + int peak_offset = (int)(tid % num_peaks_to_eval); + int peak_idx = peak_start + peak_offset; + + int ndim = up->grid.ndim; + int search_dir = up->search_dir; + int preserved_dir = (ndim == 1) ? -1 : ((search_dir == 0) ? 1 : 0); + + // Get the peak coordinate found during advance. + const double *peak_coord_n = (const double *)gkyl_array_cfetch( + up->out_coords_nodal[peak_idx], preserved_node_idx); + double peak_coord_search = peak_coord_n[0]; + + // Determine cell index containing the peak. + // We compute the search-direction cell directly from the uniform grid + // geometry (avoids calling gkyl_rect_grid_find_cell which is not available + // as a device symbol). + int cell_idx[GKYL_MAX_DIM]; + + // Search direction: compute cell from coordinate on uniform grid. + // cell = floor((x - lower) / dx) + 1 (1-based indexing). + // Use pick_lower semantics: if exactly on a boundary, pick the lower cell. + { + double rel = (peak_coord_search - up->grid.lower[search_dir]) / up->grid.dx[search_dir]; + int c = (int)rel + 1; // 1-based + // pick_lower: if exactly on upper boundary of cell c, rel is integer, pick c not c+1. + // The (int) cast truncates toward zero which gives pick_lower behavior for positive rel. + // Clamp to valid range. + if (c < up->range.lower[search_dir]) + c = up->range.lower[search_dir]; + if (c > up->range.upper[search_dir]) + c = up->range.upper[search_dir]; + cell_idx[search_dir] = c; + } + + // For 2D: determine preserved-direction cell from preserved_node_idx. + if (ndim > 1) { + int pres_cell; + if (preserved_node_idx == 0) { + pres_cell = up->range.lower[preserved_dir]; + } + else { + pres_cell = up->range.lower[preserved_dir] + preserved_node_idx - 1; + if (pres_cell > up->range.upper[preserved_dir]) + pres_cell = up->range.upper[preserved_dir]; + } + cell_idx[preserved_dir] = pres_cell; + } + + // Fetch DG coefficients at this cell. + long linidx = gkyl_range_idx(&up->range, cell_idx); + const double *f_d = (const double *)gkyl_array_cfetch(in, linidx); + + // Get cell center for logical coordinate conversion. + double xc[GKYL_MAX_DIM]; + gkyl_rect_grid_cell_center(&up->grid, cell_idx, xc); + + // Convert peak coordinate to logical space [-1, 1]. + double nod_log[GKYL_MAX_DIM]; + for (int d = 0; d < ndim; d++) { + if (d == search_dir) { + nod_log[d] = 2.0 * (peak_coord_search - xc[d]) / up->grid.dx[d]; + } + else if (ndim > 1) { + // Node 0 is at left edge (-1), all others at right edge (+1). + nod_log[d] = (preserved_node_idx == 0) ? -1.0 : 1.0; + } + } + + // Evaluate the DG expansion and store result. + double val = up->basis.eval_expand(nod_log, f_d); + double *val_n = (double *)gkyl_array_fetch( + up->out_eval_at_peaks_vals_nodal[peak_idx], preserved_node_idx); + val_n[0] = val; + } +} + +// Host function to launch the project_on_peaks kernel and run nodal-to-modal transforms. +void +gkyl_array_dg_find_peaks_project_on_peaks_cu(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, struct gkyl_array **out_vals) +{ + int ndim = up->grid.ndim; + int out_dim = ndim - 1; + int num_nodes_out = up->out_nrange.volume; + int num_peaks = up->num_peaks; + + // Launch kernel: one thread per (preserved_node, peak) pair. + long total_threads = (long)num_nodes_out * num_peaks; + int nthreads = 256; + int nblocks = (total_threads + nthreads - 1) / nthreads; + + gkyl_eval_at_peaks_kernel<<>>( + up->on_dev, in_array->on_dev, num_nodes_out, 0, num_peaks); + + // Transform nodal to modal for each peak. + if (out_dim == 0) { + for (int p = 0; p < num_peaks; p++) { + gkyl_array_copy(out_vals[p], up->out_eval_at_peaks_vals_nodal[p]); + } + } + else { + for (int p = 0; p < num_peaks; p++) { + gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid, + &up->out_nrange, &up->out_range, 1, + up->out_eval_at_peaks_vals_nodal[p], out_vals[p]); + } + } +} + +// Host function to launch the project_on_peak_idx kernel and run nodal-to-modal transform. +void +gkyl_array_dg_find_peaks_project_on_peak_idx_cu(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val) +{ + int ndim = up->grid.ndim; + int out_dim = ndim - 1; + int num_nodes_out = up->out_nrange.volume; + + // Launch kernel: one thread per preserved_node, single peak. + int nthreads = 256; + int nblocks = (num_nodes_out + nthreads - 1) / nthreads; + + gkyl_eval_at_peaks_kernel<<>>( + up->on_dev, in_array->on_dev, num_nodes_out, peak_idx, 1); + + // Transform nodal to modal. + if (out_dim == 0) { + gkyl_array_copy(out_val, up->out_eval_at_peaks_vals_nodal[peak_idx]); + } + else { + gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid, + &up->out_nrange, &up->out_range, 1, + up->out_eval_at_peaks_vals_nodal[peak_idx], out_val); + } +} + +// Host function to launch the advance kernel and run nodal-to-modal transforms. +void +gkyl_array_dg_find_peaks_advance_cu(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in) +{ + int ndim = up->grid.ndim; + int out_dim = ndim - 1; + int num_nodes_out = up->out_nrange.volume; + + // Launch the kernel: one thread per preserved node. + int nthreads = 256; + int nblocks = (num_nodes_out + nthreads - 1) / nthreads; + + gkyl_find_peaks_kernel<<>>( + up->on_dev, in->on_dev, num_nodes_out); + + // Transform nodal to modal for each peak. + if (out_dim == 0) { + // 1D -> 0D case: modal = nodal (p=0, single value). + // Copy from nodal to modal arrays on device. + for (int p = 0; p < up->num_peaks; p++) { + gkyl_array_copy(up->out_vals[p], up->out_vals_nodal[p]); + gkyl_array_copy(up->out_coords[p], up->out_coords_nodal[p]); + } + } + else { + // 2D -> 1D case: use nodal-to-modal transform on GPU. + for (int p = 0; p < up->num_peaks; p++) { + gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid, + &up->out_nrange, &up->out_range, 1, + up->out_vals_nodal[p], up->out_vals[p]); + gkyl_nodal_ops_n2m_cu(up->n2m, up->out_basis_on_dev, &up->out_grid, + &up->out_nrange, &up->out_range, 1, + up->out_coords_nodal[p], up->out_coords[p]); + } + } +} + +struct gkyl_array_dg_find_peaks* +gkyl_array_dg_find_peaks_new_cu(struct gkyl_array_dg_find_peaks *up_ho) +{ + struct gkyl_array_dg_find_peaks *up = + (struct gkyl_array_dg_find_peaks *)gkyl_malloc(sizeof(*up)); + + // Copy all scalar/struct fields from host object. + up->grid = up_ho->grid; + up->basis = up_ho->basis; + up->range = up_ho->range; + up->range_ext = up_ho->range_ext; + up->search_dir = up_ho->search_dir; + up->use_gpu = true; + + up->out_grid = up_ho->out_grid; + up->out_basis = up_ho->out_basis; + up->out_range = up_ho->out_range; + up->out_range_ext = up_ho->out_range_ext; + up->out_nrange = up_ho->out_nrange; + + up->num_peaks = up_ho->num_peaks; + for (int p = 0; p < GKYL_DG_FIND_PEAKS_MAX; p++) { + up->peak_types[p] = up_ho->peak_types[p]; + } + + int ndim = up_ho->basis.ndim; + int poly_order = up_ho->basis.poly_order; + int out_dim = ndim - 1; + + // Create a GPU copy of the nodes array so the kernel can access it. + up->nodes = gkyl_array_cu_dev_new(GKYL_DOUBLE, + up_ho->nodes->ncomp, up_ho->nodes->size); + gkyl_array_copy(up->nodes, up_ho->nodes); + + // Create GPU-enabled nodal-to-modal converter. + // Use the host basis (up->out_basis) here because gkyl_nodal_ops_new + // calls cbasis->node_list on the host. + up->n2m = gkyl_nodal_ops_new(&up->out_basis, &up->out_grid, true); + + // Create a device-resident basis with device-callable function pointers. + // This is needed by gkyl_nodal_ops_n2m_cu which passes the basis pointer + // directly to a CUDA kernel that dereferences cbasis->nodal_to_modal(). + if (out_dim > 0) + up->out_basis_on_dev = gkyl_cart_modal_serendip_cu_dev_new(1, poly_order); + else + up->out_basis_on_dev = NULL; + + // Pre-allocate search-direction working arrays on device. + // Each thread (one per preserved node) gets its own contiguous slice + // of total_nodes_search elements, so total size = num_nodes_out * total_nodes_search. + up->total_nodes_search = up_ho->total_nodes_search; + int num_nodes_out = up->out_nrange.volume; + long search_buf_len = (long)num_nodes_out * up->total_nodes_search; + up->search_vals = (double *)gkyl_cu_malloc(sizeof(double) * search_buf_len); + up->search_coords = (double *)gkyl_cu_malloc(sizeof(double) * search_buf_len); + up->search_visited = (bool *)gkyl_cu_malloc(sizeof(bool) * search_buf_len); + + up->flags = 0; + GKYL_SET_CU_ALLOC(up->flags); + up->ref_count = gkyl_ref_count_init(gkyl_array_dg_find_peaks_free); + + // Allocate GPU output arrays for each peak and copy data from host arrays. + for (int p = 0; p < up->num_peaks; p++) { + up->out_vals[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE, + up_ho->out_vals[p]->ncomp, up_ho->out_vals[p]->size); + gkyl_array_copy(up->out_vals[p], up_ho->out_vals[p]); + + up->out_coords[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE, + up_ho->out_coords[p]->ncomp, up_ho->out_coords[p]->size); + gkyl_array_copy(up->out_coords[p], up_ho->out_coords[p]); + + up->out_vals_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE, + up_ho->out_vals_nodal[p]->ncomp, up_ho->out_vals_nodal[p]->size); + gkyl_array_copy(up->out_vals_nodal[p], up_ho->out_vals_nodal[p]); + + up->out_coords_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE, + up_ho->out_coords_nodal[p]->ncomp, up_ho->out_coords_nodal[p]->size); + gkyl_array_copy(up->out_coords_nodal[p], up_ho->out_coords_nodal[p]); + + up->out_eval_at_peaks_vals_nodal[p] = gkyl_array_cu_dev_new(GKYL_DOUBLE, + up_ho->out_eval_at_peaks_vals_nodal[p]->ncomp, + up_ho->out_eval_at_peaks_vals_nodal[p]->size); + gkyl_array_copy(up->out_eval_at_peaks_vals_nodal[p], + up_ho->out_eval_at_peaks_vals_nodal[p]); + } + + // Initialize unused peak arrays to NULL. + for (int p = up->num_peaks; p < GKYL_DG_FIND_PEAKS_MAX; p++) { + up->out_vals[p] = NULL; + up->out_coords[p] = NULL; + up->out_vals_nodal[p] = NULL; + up->out_coords_nodal[p] = NULL; + up->out_eval_at_peaks_vals_nodal[p] = NULL; + } + + // Copy struct to device, with on_dev array pointers and device-callable + // basis function pointers swapped in. + // Save host-side array pointers and basis structs. + struct gkyl_array *ho_nodes = up->nodes; + struct gkyl_basis ho_basis = up->basis; + struct gkyl_basis ho_out_basis = up->out_basis; + struct gkyl_array *ho_out_vals[GKYL_DG_FIND_PEAKS_MAX]; + struct gkyl_array *ho_out_coords[GKYL_DG_FIND_PEAKS_MAX]; + struct gkyl_array *ho_out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; + struct gkyl_array *ho_out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; + struct gkyl_array *ho_out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; + + // Populate device-callable basis function pointers for the H2D copy. + // We allocate temporary device basis structs, initialize them with device + // kernels, then copy back to the host struct fields so that when the + // whole struct is memcpy'd H2D, it contains device-callable pointers. + struct gkyl_basis *tmp_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(ndim, poly_order); + gkyl_cu_memcpy(&up->basis, tmp_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H); + gkyl_cu_free(tmp_basis_dev); + + int out_basis_dim = (out_dim == 0) ? 1 : 1; + int out_basis_po = (out_dim == 0) ? 0 : poly_order; + struct gkyl_basis *tmp_out_basis_dev = gkyl_cart_modal_serendip_cu_dev_new(out_basis_dim, + out_basis_po); + gkyl_cu_memcpy(&up->out_basis, tmp_out_basis_dev, sizeof(struct gkyl_basis), GKYL_CU_MEMCPY_D2H); + gkyl_cu_free(tmp_out_basis_dev); + + // Swap nodes to its device pointer. + up->nodes = up->nodes->on_dev; + + for (int p = 0; p < up->num_peaks; p++) { + ho_out_vals[p] = up->out_vals[p]; + ho_out_coords[p] = up->out_coords[p]; + ho_out_vals_nodal[p] = up->out_vals_nodal[p]; + ho_out_coords_nodal[p] = up->out_coords_nodal[p]; + ho_out_eval_at_peaks_vals_nodal[p] = up->out_eval_at_peaks_vals_nodal[p]; + + // Swap in device pointers for the H2D copy. + up->out_vals[p] = up->out_vals[p]->on_dev; + up->out_coords[p] = up->out_coords[p]->on_dev; + up->out_vals_nodal[p] = up->out_vals_nodal[p]->on_dev; + up->out_coords_nodal[p] = up->out_coords_nodal[p]->on_dev; + up->out_eval_at_peaks_vals_nodal[p] = up->out_eval_at_peaks_vals_nodal[p]->on_dev; + } + + // Allocate device struct and copy host struct (with device pointers) to device. + struct gkyl_array_dg_find_peaks *up_cu = + (struct gkyl_array_dg_find_peaks *)gkyl_cu_malloc(sizeof(*up_cu)); + gkyl_cu_memcpy(up_cu, up, sizeof(struct gkyl_array_dg_find_peaks), GKYL_CU_MEMCPY_H2D); + up->on_dev = up_cu; + + // Restore host-side array pointers and basis so the returned object + // has usable host handles and host-callable function pointers. + up->nodes = ho_nodes; + up->basis = ho_basis; + up->out_basis = ho_out_basis; + for (int p = 0; p < up->num_peaks; p++) { + up->out_vals[p] = ho_out_vals[p]; + up->out_coords[p] = ho_out_coords[p]; + up->out_vals_nodal[p] = ho_out_vals_nodal[p]; + up->out_coords_nodal[p] = ho_out_coords_nodal[p]; + up->out_eval_at_peaks_vals_nodal[p] = ho_out_eval_at_peaks_vals_nodal[p]; + } + + return up; +} diff --git a/core/zero/gkyl_array_dg_find_peaks.h b/core/zero/gkyl_array_dg_find_peaks.h new file mode 100644 index 000000000..ec866a713 --- /dev/null +++ b/core/zero/gkyl_array_dg_find_peaks.h @@ -0,0 +1,314 @@ +#pragma once + +#include +#include +#include +#include + +/** + * Find all peaks (local maxima, local minima, and boundary values) of a DG + * field along one direction. + * + * For a 2D input array f(psi, z), finding peaks along z (dir=1) gives arrays: + * out_val[k](psi) = value of k-th peak along z for each psi + * out_coord[k](psi) = z-coordinate of k-th peak for each psi + * + * For a 1D input array f(z), finding peaks along z (dir=0) gives scalars: + * out_val[k] = value of k-th peak + * out_coord[k] = z-coordinate of k-th peak + * + * Peaks are detected by sampling the field at nodal points along the search + * direction and identifying: + * - EDGE_LO: Value at the lower boundary of the domain + * - LOCAL_MAX: Points where f increases then decreases + * - LOCAL_MIN: Points where f decreases then increases + * - EDGE_HI: Value at the upper boundary of the domain + * + * The number of peaks is determined by scanning along the search direction + * at a middle preserved-direction coordinate. + */ +typedef struct gkyl_array_dg_find_peaks gkyl_array_dg_find_peaks; + +/** Types of peaks that can be found. */ +enum gkyl_peak_type { + GKYL_PEAK_EDGE_LO, // Value at lower boundary + GKYL_PEAK_LOCAL_MAX, // Local maximum + GKYL_PEAK_LOCAL_MIN, // Local minimum + GKYL_PEAK_EDGE_HI, // Value at upper boundary +}; + +/** Input parameters for dg_find_peaks updater. */ +struct gkyl_array_dg_find_peaks_inp { + const struct gkyl_basis *basis; // Input basis (N-dimensional) + const struct gkyl_rect_grid *grid; // Input grid + const struct gkyl_range *range; // Input range (local) + const struct gkyl_range *range_ext; // Input extended range + int search_dir; // Direction to search for peaks (0-indexed) + bool use_gpu; // Whether to run on GPU +}; + +/** + * Create a new peak finder updater. The number of peaks is determined by + * scanning the input field along the search direction at a middle coordinate. + * This must be called AFTER the input field is initialized, as it scans the + * field to determine the number of peaks. + * + * @param inp Input parameters + * @param field Input field to scan for peak count determination + * @return New updater pointer + */ +struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new( + const struct gkyl_array_dg_find_peaks_inp *inp, const struct gkyl_array *field); + +/** + * Compute the peaks. For each point along the preserved dimensions, + * find all peaks along the search direction. + * + * @param up Updater object + * @param in Input array (N-dimensional DG field) + */ +void gkyl_array_dg_find_peaks_advance(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in); + +/** + * Get the number of peaks found. + * + * @param up Updater object + * @return Number of peaks + */ +int gkyl_array_dg_find_peaks_num_peaks(const struct gkyl_array_dg_find_peaks *up); + +/** + * Get the type of a specific peak (EDGE_LO, LOCAL_MAX, LOCAL_MIN, EDGE_HI). + * + * @param up Updater object + * @param peak_idx Index of the peak (0 to num_peaks-1) + * @return Type of the peak + */ +enum gkyl_peak_type gkyl_array_dg_find_peaks_get_type(const struct gkyl_array_dg_find_peaks *up, + int peak_idx); + +/** + * Get the output basis ((N-1)-dimensional, or p=0 1D for 1D->0D). + * + * @param up Updater object + * @return Pointer to output basis + */ +const struct gkyl_basis* gkyl_array_dg_find_peaks_get_basis( + const struct gkyl_array_dg_find_peaks *up); + +/** + * Get the output grid. + * + * @param up Updater object + * @return Pointer to output grid + */ +const struct gkyl_rect_grid* gkyl_array_dg_find_peaks_get_grid( + const struct gkyl_array_dg_find_peaks *up); + +/** + * Get the output range. + * + * @param up Updater object + * @return Pointer to output range + */ +const struct gkyl_range* gkyl_array_dg_find_peaks_get_range( + const struct gkyl_array_dg_find_peaks *up); + +/** + * Get the output extended range. + * + * @param up Updater object + * @return Pointer to output extended range + */ +const struct gkyl_range* gkyl_array_dg_find_peaks_get_range_ext( + const struct gkyl_array_dg_find_peaks *up); + +/** + * Get the output nodal range. + * + * @param up Updater object + * @return Pointer to output nodal range + */ +const struct gkyl_range* +gkyl_array_dg_find_peaks_get_nodal_range(const struct gkyl_array_dg_find_peaks *up); + +/** + * Get the output array containing peak values for a specific peak. + * + * @param up Updater object + * @param peak_idx Index of the peak (0 to num_peaks-1) + * @return Pointer to output values array (modal DG expansion) + */ +const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals( + const struct gkyl_array_dg_find_peaks *up, int peak_idx); + +/** + * Get the output array containing peak values in nodal basis for a specific peak. + * + * @param up Updater object + * @param peak_idx Index of the peak (0 to num_peaks-1) + * @return Pointer to output values array (nodal DG expansion) + */ +const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_vals_nodal( + const struct gkyl_array_dg_find_peaks *up, int peak_idx); + +/** + * Get the output array containing coordinates of a specific peak. + * + * @param up Updater object + * @param peak_idx Index of the peak (0 to num_peaks-1) + * @return Pointer to output coordinates array (modal DG expansion) + */ +const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords( + const struct gkyl_array_dg_find_peaks *up, int peak_idx); + +/** + * Get the output array containing coordinates in nodal basis of a specific peak. + * + * @param up Updater object + * @param peak_idx Index of the peak (0 to num_peaks-1) + * @return Pointer to output coordinates array (nodal DG expansion) + */ +const struct gkyl_array* gkyl_array_dg_find_peaks_acquire_coords_nodal( + const struct gkyl_array_dg_find_peaks *up, int peak_idx); + +/** + * Project (evaluate) an arbitrary array onto the peak locations previously + * found by gkyl_array_dg_find_peaks_advance. + * + * For a 1D case with 5 peaks, this evaluates the input array at those 5 peak + * locations and returns the values. + * + * For a 2D case with peaks along lines (e.g., psi vs z with peaks in z), + * this evaluates the input array along the contours defined by the peak + * locations for each psi. + * + * The peak locations must have been previously computed via + * gkyl_array_dg_find_peaks_advance. This method evaluates the provided array + * at those same locations. + * + * Example usage: + * @code + * // 1. Find peaks in bmag along z direction + * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag); + * gkyl_array_dg_find_peaks_advance(peak_finder, bmag); + * + * // 2. Get bmag_max (LOCAL_MAX peak) location and value + * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder); + * int bmag_max_idx = -1; + * for (int p = 0; p < num_peaks; p++) { + * if (gkyl_array_dg_find_peaks_get_type(peak_finder, p) == GKYL_PEAK_LOCAL_MAX) { + * bmag_max_idx = p; + * break; + * } + * } + * const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(peak_finder, bmag_max_idx); + * const struct gkyl_array *z_max = gkyl_array_dg_find_peaks_acquire_coords(peak_finder, bmag_max_idx); + * + * // 3. Evaluate phi at the same locations where bmag has peaks + * struct gkyl_array *phi_at_peaks[num_peaks]; + * for (int p = 0; p < num_peaks; p++) { + * phi_at_peaks[p] = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume); + * } + * gkyl_array_dg_find_peaks_project_on_peaks(peak_finder, phi, phi_at_peaks); + * + * // 4. Now phi_at_peaks[bmag_max_idx] contains phi evaluated at the mirror throat + * @endcode + * + * @param up Updater object (must have run advance first) + * @param in_array Array to evaluate at peak locations (same grid/basis as original field) + * @param out_vals Output: array of evaluated values for each peak + * (must be pre-allocated with num_peaks elements, each matching out_range_ext) + */ +void gkyl_array_dg_find_peaks_project_on_peaks(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, struct gkyl_array **out_vals); + +/** + * Project (evaluate) an arbitrary array onto a single peak location previously + * found by gkyl_array_dg_find_peaks_advance. + * + * This is a more efficient version of gkyl_array_dg_find_peaks_project_on_peaks + * when you only need the evaluation at one specific peak (e.g., only at the + * mirror throat LOCAL_MAX peak). + * + * Example usage: + * @code + * // 1. Find peaks in bmag along z direction + * struct gkyl_array_dg_find_peaks *peak_finder = gkyl_array_dg_find_peaks_new(&inp, bmag); + * gkyl_array_dg_find_peaks_advance(peak_finder, bmag); + * + * // 2. Find the LOCAL_MAX peak index + * int num_peaks = gkyl_array_dg_find_peaks_num_peaks(peak_finder); + * int bmag_max_idx = num_peaks - 2; // Assuming standard ordering + * + * // 3. Evaluate phi only at the mirror throat (bmag_max location) + * struct gkyl_array *phi_m = gkyl_array_new(GKYL_DOUBLE, out_basis.num_basis, out_range_ext.volume); + * gkyl_array_dg_find_peaks_project_on_peak_idx(peak_finder, phi, bmag_max_idx, phi_m); + * + * // 4. Now phi_m contains phi evaluated at the mirror throat + * @endcode + * + * @param up Updater object (must have run advance first) + * @param in_array Array to evaluate at peak location (same grid/basis as original field) + * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1) + * @param out_val Output: evaluated values at the specified peak + * (must be pre-allocated to match out_range_ext) + */ +void gkyl_array_dg_find_peaks_project_on_peak_idx(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val); + +/** + * Release the updater and all internal arrays. + * + * @param up Updater to delete + */ +void gkyl_array_dg_find_peaks_release(struct gkyl_array_dg_find_peaks *up); + +/** + * Create a new GPU peak finder updater from an already-initialized host object. + * Allocates GPU arrays, copies the struct to device, and returns a host-side + * struct with array pointers referencing device memory. Called internally by + * gkyl_array_dg_find_peaks_new when use_gpu is true. + * + * @param up_ho Host-side updater object (fully initialized) + * @return New updater pointer with GPU arrays + */ +struct gkyl_array_dg_find_peaks* gkyl_array_dg_find_peaks_new_cu( + struct gkyl_array_dg_find_peaks *up_ho); + +/** + * GPU implementation of the advance method. Launches a CUDA kernel to find + * peaks for each preserved-direction node, then runs nodal-to-modal transforms + * on device. + * + * @param up Updater object (with GPU arrays) + * @param in Input array (device-side DG field) + */ +void gkyl_array_dg_find_peaks_advance_cu(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in); + +/** + * GPU implementation of project_on_peaks. Launches a CUDA kernel to evaluate + * an input array at all peak locations, then runs nodal-to-modal transforms + * on device. + * + * @param up Updater object (with GPU arrays) + * @param in_array Input array (device-side DG field) + * @param out_vals Output: array of evaluated values for each peak (device-side) + */ +void gkyl_array_dg_find_peaks_project_on_peaks_cu(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, struct gkyl_array **out_vals); + +/** + * GPU implementation of project_on_peak_idx. Launches a CUDA kernel to evaluate + * an input array at a single peak location, then runs a nodal-to-modal transform + * on device. + * + * @param up Updater object (with GPU arrays) + * @param in_array Input array (device-side DG field) + * @param peak_idx Index of the peak to evaluate at (0 to num_peaks-1) + * @param out_val Output: evaluated values at the specified peak (device-side) + */ +void gkyl_array_dg_find_peaks_project_on_peak_idx_cu(struct gkyl_array_dg_find_peaks *up, + const struct gkyl_array *in_array, int peak_idx, struct gkyl_array *out_val); diff --git a/core/zero/gkyl_array_dg_find_peaks_priv.h b/core/zero/gkyl_array_dg_find_peaks_priv.h new file mode 100644 index 000000000..f8695a569 --- /dev/null +++ b/core/zero/gkyl_array_dg_find_peaks_priv.h @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// Maximum number of peaks we can handle. +#define GKYL_DG_FIND_PEAKS_MAX 16 + +/** + * Convert logical (reference) coordinates to computational (physical) coordinates. + * xout[d] = xc[d] + 0.5*dx[d]*eta[d] + */ +GKYL_CU_DH +static inline void +dg_find_peaks_log_to_comp(int ndim, const double *eta, + const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc, + double *GKYL_RESTRICT xout) +{ + for (int d = 0; d < ndim; ++d) { + xout[d] = 0.5 * dx[d] * eta[d] + xc[d]; + } +} + +/** Internal struct for dg_find_peaks updater. */ +struct gkyl_array_dg_find_peaks { + // Input parameters (copies). + struct gkyl_rect_grid grid; // Input grid (copy) + struct gkyl_basis basis; // Input basis (copy) + struct gkyl_range range; // Input local range (copy) + struct gkyl_range range_ext; // Input extended range (copy) + int search_dir; // Direction to search for peaks + bool use_gpu; + + // Output grid/basis/range (owned). + struct gkyl_rect_grid out_grid; // Output grid (N-1 dim, or 1D 1-cell for 1D->0D) + struct gkyl_basis out_basis; // Output basis (N-1 dim, or p=0 1D for 1D->0D) + struct gkyl_range out_range; // Output range + struct gkyl_range out_range_ext; // Output extended range + struct gkyl_range out_nrange; // Nodal range for output + + // Peak information. + int num_peaks; // Number of peaks detected + enum gkyl_peak_type peak_types[GKYL_DG_FIND_PEAKS_MAX]; // Type of each peak + + // Output arrays (owned) - one per peak. + struct gkyl_array *out_vals[GKYL_DG_FIND_PEAKS_MAX]; // Peak values (modal DG) + struct gkyl_array *out_coords[GKYL_DG_FIND_PEAKS_MAX]; // Peak coordinates (modal DG) + struct gkyl_array *out_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak values + struct gkyl_array *out_coords_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Nodal peak coordinates + struct gkyl_array *out_eval_at_peaks_vals_nodal[GKYL_DG_FIND_PEAKS_MAX]; // Values evaluated at peaks (nodal) + + // Internal working arrays. + struct gkyl_array *nodes; // Node locations in logical coords + + // Working arrays for the find-peaks scan along the search direction. + // On CPU these are malloc'd per call; on GPU they are pre-allocated + // with size (num_nodes_out * total_nodes_search) so each thread + // can index its own contiguous slice. + double *search_vals; // Nodal values along search dir + double *search_coords; // Physical coordinates along search dir + bool *search_visited; // Visited flags along search dir + int total_nodes_search; // Number of nodes along search dir + + // Nodal-to-modal converter. + struct gkyl_nodal_ops *n2m; + + // Device-resident basis for passing to GPU API functions (e.g. gkyl_nodal_ops_n2m_cu). + // Allocated via gkyl_cart_modal_serendip_cu_dev_new; NULL on CPU. + struct gkyl_basis *out_basis_on_dev; + + uint32_t flags; + struct gkyl_array_dg_find_peaks *on_dev; // Pointer to device object (if GPU). + struct gkyl_ref_count ref_count; // Reference counter. +}; + +/** + * Function that actually frees memory associated with this + * object when the number of references has decreased to zero. + * + * @param ref Reference counter for this object. + */ +void gkyl_array_dg_find_peaks_free(const struct gkyl_ref_count *ref); diff --git a/gyrokinetic/apps/gk_species.c b/gyrokinetic/apps/gk_species.c index 3345748d9..ad19b0b6d 100644 --- a/gyrokinetic/apps/gk_species.c +++ b/gyrokinetic/apps/gk_species.c @@ -136,6 +136,8 @@ gk_species_rhs_dynamic(gkyl_gyrokinetic_app *app, struct gk_species *species, // Enforce the omega_H constraint on dt. double dt_omegaH = gk_species_omegaH_dt(app, species, fin); + + gk_species_fdot_multiplier_advance_times_omegaH(app, species, &species->fdot_mult, &dt_omegaH); dt_out = fmin(dt_out, dt_omegaH); app->stat.species_omega_cfl_tm += gkyl_time_diff_now_sec(tm); diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c index cbc907ddf..a8de75530 100644 --- a/gyrokinetic/apps/gk_species_damping.c +++ b/gyrokinetic/apps/gk_species_damping.c @@ -1,16 +1,26 @@ #include -#include -#include #include +#include #include +#include +#include + +static void +proj_on_basis_c2p_position_func(const double *xcomp, double *xphys, void *ctx) +{ + struct gk_proj_on_basis_c2p_func_ctx *c2p_ctx = ctx; + gkyl_position_map_eval_mc2nu(c2p_ctx->pos_map, xcomp, xphys); +} void -gk_species_damping_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_damping_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, + int frame) { } void -gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_damping_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, + int frame) { struct timespec wst = gkyl_wall_clock(); // DG metadata for damping rate. @@ -18,19 +28,21 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 }, { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" }, }; - int mpe_drate_len = sizeof(mpe_drate)/sizeof(mpe_drate[0]); + int mpe_drate_len = sizeof(mpe_drate) / sizeof(mpe_drate[0]); // Update app basic metada with time/frame. gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm); gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame); // Package metadata. - int io_meta_len[] = {app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len}; - const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_drate, app->gk_geom->io_meta}; - struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta); + int io_meta_len[] = { app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len }; + const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_drate, + app->gk_geom->io_meta }; + struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int), + io_meta_len, io_meta); // Write out the damping rate. const char *fmt = "%s-%s_damping_rate_%d.gkyl"; int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame); - char fileNm[sz+1]; // ensures no buffer overflow + char fileNm[sz + 1]; // ensures no buffer overflow snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame); // Copy data from device to host before writing it out. @@ -40,12 +52,13 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->damping.rate_host, fileNm); app->stat.n_io += 1; - gkyl_msgpack_data_release(mt); + gkyl_msgpack_data_release(mt); app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst); } void -gk_species_damping_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_damping_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, + int frame) { gk_species_damping_write_enabled(app, gks, tm, frame); gks->damping.write_func = gk_species_damping_write_disabled; @@ -65,6 +78,7 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks { damp->type = gks->info.damping.type; damp->evolve = false; // Whether the rate is time dependent. + damp->is_tandem = false; // Default to single mirror. int num_quad = gks->info.damping.num_quad? gks->info.damping.num_quad : 1; // Default is a p=0 mask. assert(num_quad == 1); // MF 2025/06/11: Limited to this for now. @@ -72,125 +86,184 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks // Default function pointers. damp->write_func = gk_species_damping_write_disabled; + damp->proj_on_basis_c2p_ctx.cdim = app->cdim; + damp->proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim; + damp->proj_on_basis_c2p_ctx.vel_map = gks->vel_map; + damp->proj_on_basis_c2p_ctx.pos_map = app->position_map; + if (damp->type) { // Allocate rate array. - damp->rate = mkarr(app->use_gpu, num_quad==1? 1 : gks->basis.num_basis, gks->local_ext.volume); + damp->rate = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis, + gks->local_ext.volume); damp->rate_host = damp->rate; if (app->use_gpu) - damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size); + damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size); if (damp->type == GKYL_GK_DAMPING_USER_INPUT) { struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context. proj_on_basis_c2p_ctx.cdim = app->cdim; proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim; proj_on_basis_c2p_ctx.vel_map = gks->vel_map; - gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) { - .grid = &gks->grid, - .basis = &gks->basis, - .num_quad = num_quad, - .num_ret_vals = 1, - .eval = gks->info.damping.rate_profile, - .ctx = gks->info.damping.rate_profile_ctx, - .c2p_func = proj_on_basis_c2p_phase_func, - .c2p_func_ctx = &proj_on_basis_c2p_ctx, - } - ); + gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) { + .grid = &gks->grid, + .basis = &gks->basis, + .num_quad = num_quad, + .num_ret_vals = 1, + .eval = gks->info.damping.rate_profile, + .ctx = gks->info.damping.rate_profile_ctx, + .c2p_func = proj_on_basis_c2p_phase_func, + .c2p_func_ctx = &proj_on_basis_c2p_ctx, + }); gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, damp->rate_host); gkyl_proj_on_basis_release(projup); gkyl_array_copy(damp->rate, damp->rate_host); if (num_quad == 1) - gkyl_array_scale_range(damp->rate, 1.0/pow(sqrt(2.0),gks->grid.ndim), &gks->local); + gkyl_array_scale_range(damp->rate, 1.0 / pow(sqrt(2.0), gks->grid.ndim), &gks->local); } else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) { damp->evolve = true; // Since the loss cone boundary is proportional to phi(t). - // Maximum bmag and its location. - // NOTE: if the same max bmag occurs at multiple locations, - // bmag_max_coord may have different values on different MPI processes. - double bmag_max_coord_ho[GKYL_MAX_CDIM]; - double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho); - double bmag_max_local = bmag_max_ho; - double bmag_max_global; - gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global); - double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim]; - if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) { - for (int d=0; dcdim; d++) - bmag_max_coord_local[d] = bmag_max_coord_ho[d]; + // Create peak finder for bmag to find the mirror throat. + // Search along the parallel (z) direction, which is the last configuration space dimension. + int search_dir = app->cdim - 1; + struct gkyl_array_dg_find_peaks_inp peak_inp = { + .basis = &app->basis, + .grid = &app->grid, + .range = &app->global, + .range_ext = &app->global_ext, + .search_dir = search_dir, + .use_gpu = app->use_gpu, + }; + // Pass a global bmag_int into the peak finder + struct gkyl_array *bmag_int_global = mkarr(app->use_gpu, + app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume); + damp->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume); + + gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, + bmag_int_global); + damp->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global); + gkyl_array_dg_find_peaks_advance(damp->bmag_peak_finder, app->gk_geom->geo_int.bmag); + gkyl_array_release(bmag_int_global); + + // Get the LOCAL_MAX peak (bmag maximum along z direction). + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(damp->bmag_peak_finder); + damp->bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less + damp->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, + damp->bmag_max_peak_idx); + damp->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, + damp->bmag_max_peak_idx); + damp->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, + num_peaks - 1); + damp->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, + num_peaks - 1); + damp->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(damp->bmag_peak_finder); + damp->bmag_max_range = gkyl_array_dg_find_peaks_get_range(damp->bmag_peak_finder); + damp->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(damp->bmag_peak_finder); + + damp->phi_at_bmag_max = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, + damp->bmag_max_range_ext->volume); + damp->phi_at_bmag_tandem = mkarr(app->use_gpu, damp->bmag_max_basis->num_basis, + damp->bmag_max_range_ext->volume); + // phi is defined as 0 at the wall + + bool is_symmetric; + int cdim = app->cdim; + if (gkyl_compare_double(-app->grid.lower[cdim - 1], app->grid.upper[cdim - 1], 1e-12)) { + is_symmetric = true; + } + else if (gkyl_compare_double(app->grid.lower[cdim - 1], 0.0, 1e-12)) { + is_symmetric = false; } else { - for (int d=0; dcdim; d++) - bmag_max_coord_local[d] = -DBL_MAX; + assert(false); // Needs either the lower bound at 0 or symmetric grid } - gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global); - if (app->use_gpu) { - damp->bmag_max = gkyl_cu_malloc(sizeof(double)); - damp->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double)); - gkyl_cu_memcpy(damp->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D); - gkyl_cu_memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D); + if ( (is_symmetric && num_peaks == 5) || (!is_symmetric && num_peaks == 3) ) { + damp->is_tandem = false; + } + else if ((is_symmetric && num_peaks == 9) || (!is_symmetric && num_peaks == 5)) { + damp->is_tandem = true; } else { - damp->bmag_max = gkyl_malloc(sizeof(double)); - damp->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double)); - memcpy(damp->bmag_max, &bmag_max_global, sizeof(double)); - memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double)); + assert(false); // Unsupported number of extrema for loss-cone damping } - // Electrostatic potential at bmag_max_coord. - if (app->use_gpu) { - damp->phi_m = gkyl_cu_malloc(sizeof(double)); - damp->phi_m_global = gkyl_cu_malloc(sizeof(double)); + if (damp->is_tandem) { + damp->bmag_tandem_peak_idx = num_peaks - 4; } else { - damp->phi_m = gkyl_malloc(sizeof(double)); - damp->phi_m_global = gkyl_malloc(sizeof(double)); + damp->bmag_tandem_peak_idx = num_peaks - 2; } + damp->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(damp->bmag_peak_finder, + damp->bmag_tandem_peak_idx); + damp->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(damp->bmag_peak_finder, + damp->bmag_tandem_peak_idx); // Operator that projects the loss cone mask. struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = { .phase_grid = &gks->grid, .conf_basis = &app->basis, .phase_basis = &gks->basis, - .conf_range = &app->local, + .conf_range = &app->local, .conf_range_ext = &app->local_ext, - .vel_range = &gks->local_vel, + .vel_range = &gks->local_vel, .vel_map = gks->vel_map, .bmag = app->gk_geom->geo_int.bmag, .bmag_max = damp->bmag_max, - .bmag_max_loc = damp->bmag_max_coord, + .bmag_max_z_coord = damp->bmag_max_z_coord, + .bmag_wall = damp->bmag_wall, + .bmag_wall_z_coord = damp->bmag_wall_z_coord, + .bmag_tandem = damp->bmag_tandem, + .bmag_tandem_z_coord = damp->bmag_tandem_z_coord, + .is_tandem = damp->is_tandem, + .bmag_max_basis = damp->bmag_max_basis, + .bmag_max_range = damp->bmag_max_range, .mass = gks->info.mass, .charge = gks->info.charge, .num_quad = num_quad, .use_gpu = app->use_gpu, }; - damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj ); + damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj); // Project the conf-space rate profile provided. - struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis, gks->local_ext.volume); - struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false, scale_prof_high_order->ncomp, scale_prof_high_order->size) + struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis, + gks->local_ext.volume); + struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false, + scale_prof_high_order->ncomp, scale_prof_high_order->size) : gkyl_array_acquire(scale_prof_high_order); - - gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1, + + gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1, gks->info.damping.rate_profile, gks->info.damping.rate_profile_ctx); gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, scale_prof_high_order_ho); gkyl_proj_on_basis_release(projup); gkyl_array_copy(scale_prof_high_order, scale_prof_high_order_ho); - damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis, gks->local_ext.volume); - gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0),gks->grid.ndim), scale_prof_high_order, 0); + damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis, + gks->local_ext.volume); + gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0), gks->grid.ndim), scale_prof_high_order, + 0); gkyl_array_release(scale_prof_high_order_ho); gkyl_array_release(scale_prof_high_order); // Compute the initial damping rate (assuming phi=0 because phi hasn't been computed). // Find the potential at the mirror throat. - gkyl_dg_basis_ops_eval_array_at_coord_comp(app->field->phi_smooth, damp->bmag_max_coord, - app->basis_on_dev, &app->grid, &app->local, damp->phi_m); - gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global); - // Project the loss cone mask. - gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local, - app->field->phi_smooth, damp->phi_m_global, damp->rate); + gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth, + damp->bmag_max_peak_idx, damp->phi_at_bmag_max); + + if (damp->is_tandem) { + gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, app->field->phi_smooth, + damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem); + // Project the loss cone mask. + gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local, + app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate); + } + else { + // Project the loss cone mask using the phi_m array. + gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local, + app->field->phi_smooth, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate); + } // Multiply by the user's scaling profile. gkyl_array_scale_by_cell(damp->rate, damp->scale_prof); } @@ -206,7 +279,8 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks } void -gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_damping *damp, +gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks, + struct gk_damping *damp, const struct gkyl_array *phi, const struct gkyl_array *fin, struct gkyl_array *f_buffer, struct gkyl_array *rhs, struct gkyl_array *cflrate) { @@ -218,21 +292,28 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g gkyl_array_accumulate(rhs, -1.0, f_buffer); } else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) { - // Find the potential at the mirror throat. - gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, damp->bmag_max_coord, - app->basis_on_dev, &app->grid, &app->local, damp->phi_m); - gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global); - - // Project the loss cone mask. - gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local, - phi, damp->phi_m_global, damp->rate); + gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, damp->phi_smooth_global); + // Find the potential at bmag_max + gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, damp->phi_smooth_global, + damp->bmag_max_peak_idx, damp->phi_at_bmag_max); + + if (damp->is_tandem) { + gkyl_array_dg_find_peaks_project_on_peak_idx(damp->bmag_peak_finder, + damp->phi_smooth_global, + damp->bmag_tandem_peak_idx, damp->phi_at_bmag_tandem); + gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local, + damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_tandem, damp->rate); + } + else { + gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local, + damp->phi_smooth_global, damp->phi_at_bmag_max, damp->phi_at_bmag_max, damp->rate); + } // Assemble the damping term -scale_prof * mask * f. gkyl_array_set(f_buffer, 1.0, fin); gkyl_array_scale_by_cell(damp->rate, damp->scale_prof); gkyl_array_scale_by_cell(f_buffer, damp->rate); gkyl_array_accumulate(rhs, -1.0, f_buffer); - } // Add the frequency to the CFL frequency. @@ -243,7 +324,7 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g } void -gk_species_damping_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_damping_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, int frame) { gks->damping.write_func(app, gks, tm, frame); } @@ -253,25 +334,26 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct { if (damp->type) { gkyl_array_release(damp->rate); - if (app->use_gpu) + if (app->use_gpu) { gkyl_array_release(damp->rate_host); + } if (damp->type == GKYL_GK_DAMPING_USER_INPUT) { // Nothing to release. } else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) { - if (app->use_gpu) { - gkyl_cu_free(damp->bmag_max); - gkyl_cu_free(damp->bmag_max_coord); - gkyl_cu_free(damp->phi_m); - gkyl_cu_free(damp->phi_m_global); - } - else { - gkyl_free(damp->bmag_max); - gkyl_free(damp->bmag_max_coord); - gkyl_free(damp->phi_m); - gkyl_free(damp->phi_m_global); - } + gkyl_array_release(damp->bmag_max); + gkyl_array_release(damp->bmag_max_z_coord); + gkyl_array_release(damp->bmag_wall); + gkyl_array_release(damp->bmag_wall_z_coord); + gkyl_array_release(damp->bmag_tandem); + gkyl_array_release(damp->bmag_tandem_z_coord); + + gkyl_array_release(damp->phi_at_bmag_max); + gkyl_array_release(damp->phi_at_bmag_tandem); + + gkyl_array_release(damp->phi_smooth_global); + gkyl_array_dg_find_peaks_release(damp->bmag_peak_finder); gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op); gkyl_array_release(damp->scale_prof); } diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c index 0a983aeaa..0fbd4002a 100644 --- a/gyrokinetic/apps/gk_species_fdot_multiplier.c +++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c @@ -1,16 +1,19 @@ #include -#include -#include #include +#include #include +#include +#include void -gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, + double tm, int frame) { } void -gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, + double tm, int frame) { struct timespec wst = gkyl_wall_clock(); // DG metadata for multiplier. @@ -18,34 +21,38 @@ gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app* app, struct gk_sp { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 }, { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" }, }; - int mpe_mult_len = sizeof(mpe_mult)/sizeof(mpe_mult[0]); + int mpe_mult_len = sizeof(mpe_mult) / sizeof(mpe_mult[0]); // Update app basic metada with time/frame. gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm); gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame); // Package metadata. - int io_meta_len[] = {app->io_meta_basic_len, mpe_mult_len, app->gk_geom->io_meta_len}; - const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_mult, app->gk_geom->io_meta}; - struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta); + int io_meta_len[] = { app->io_meta_basic_len, mpe_mult_len, app->gk_geom->io_meta_len }; + const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_mult, + app->gk_geom->io_meta }; + struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int), + io_meta_len, io_meta); // Write out the multiplicative function. const char *fmt = "%s-%s_fdot_multiplier_%d.gkyl"; int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame); - char fileNm[sz+1]; // ensures no buffer overflow + char fileNm[sz + 1]; // ensures no buffer overflow snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame); // Copy data from device to host before writing it out. if (app->use_gpu) gkyl_array_copy(gks->fdot_mult.multiplier_host, gks->fdot_mult.multiplier); - gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->fdot_mult.multiplier_host, fileNm); + gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->fdot_mult.multiplier_host, + fileNm); app->stat.n_io += 1; - gkyl_msgpack_data_release(mt); + gkyl_msgpack_data_release(mt); app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst); } void -gk_species_fdot_multiplier_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_fdot_multiplier_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks, + double tm, int frame) { gk_species_fdot_multiplier_write_enabled(app, gks, tm, frame); gks->fdot_mult.write_func = gk_species_fdot_multiplier_write_disabled; @@ -60,19 +67,44 @@ gk_species_fdot_multiplier_advance_mult(gkyl_gyrokinetic_app *app, const struct } void -gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, const struct gk_species *gks, - struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out) +gk_species_fdot_multiplier_advance_omegaH_mult(gkyl_gyrokinetic_app *app, + const struct gk_species *gks, + struct gk_fdot_multiplier *fdmul, double *out) { - // Find the potential at the mirror throat. - gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, fdmul->bmag_max_coord, - app->basis_on_dev, &app->grid, &app->local, fdmul->phi_m); - gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, fdmul->phi_m, fdmul->phi_m_global); + // Multiply out by the multplier. + out[0] = out[0] / gks->collisionless.scale_fac; +} - // Project the loss cone mask. - gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local, - phi, fdmul->phi_m_global, fdmul->multiplier); +void +gk_species_fdot_multiplier_advance_omegaH_disabled(gkyl_gyrokinetic_app *app, + const struct gk_species *gks, + struct gk_fdot_multiplier *fdmul, double *out) +{ +} - // Multiply out by the multplier. +void +gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app, + const struct gk_species *gks, + struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out) +{ + gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, fdmul->phi_smooth_global); + // Find the potential at bmag_max + gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global, + fdmul->bmag_max_peak_idx, fdmul->phi_at_bmag_max); + + if (fdmul->is_tandem) { + gkyl_array_dg_find_peaks_project_on_peak_idx(fdmul->bmag_peak_finder, fdmul->phi_smooth_global, + fdmul->bmag_tandem_peak_idx, fdmul->phi_at_bmag_tandem); + gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local, + phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_tandem, + fdmul->multiplier); + } + else { + gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local, + phi, fdmul->phi_at_bmag_max, fdmul->phi_at_bmag_max, fdmul->multiplier); + } + + // Multiply out by the multiplier. gkyl_array_scale_by_cell(out, fdmul->multiplier); } @@ -108,6 +140,7 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec // Default function pointers. fdmul->write_func = gk_species_fdot_multiplier_write_disabled; fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_disabled; + fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_disabled; fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_disabled; if (fdmul->type) { @@ -123,7 +156,8 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec // Allocate multiplier array. fdmul->multiplier = mkarr(app->use_gpu, basis_mult.num_basis, gks->local_ext.volume); - fdmul->multiplier_host = app->use_gpu? mkarr(false, fdmul->multiplier->ncomp, fdmul->multiplier->size) + fdmul->multiplier_host = app->use_gpu? mkarr(false, fdmul->multiplier->ncomp, + fdmul->multiplier->size) : gkyl_array_acquire(fdmul->multiplier); // Context for c2p function passed to proj_on_basis. @@ -134,114 +168,157 @@ gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk_spec if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_USER_INPUT) { - gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) { - .grid = &gks->grid, - .basis = &basis_mult, - .num_quad = basis_mult.poly_order+1, - .num_ret_vals = 1, - .eval = gks->info.time_rate_multiplier.profile, - .ctx = gks->info.time_rate_multiplier.profile_ctx, - .c2p_func = proj_on_basis_c2p_phase_func, - .c2p_func_ctx = &fdmul->proj_on_basis_c2p_ctx, - } - ); + gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) { + .grid = &gks->grid, + .basis = &basis_mult, + .num_quad = basis_mult.poly_order + 1, + .num_ret_vals = 1, + .eval = gks->info.time_rate_multiplier.profile, + .ctx = gks->info.time_rate_multiplier.profile_ctx, + .c2p_func = proj_on_basis_c2p_phase_func, + .c2p_func_ctx = &fdmul->proj_on_basis_c2p_ctx, + }); gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, fdmul->multiplier_host); gkyl_proj_on_basis_release(projup); gkyl_array_copy(fdmul->multiplier, fdmul->multiplier_host); fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_mult; + fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult; fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult; if (fdmul->write_diagnostics) fdmul->write_func = gk_species_fdot_multiplier_write_init_only; else gkyl_array_release(fdmul->multiplier_host); - } else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) { // Available options: - // A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array. - // B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array. + // A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array. + // B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_const=true. Output: ncomp=1 array. enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD; - int num_quad = gks->basis.poly_order+1; // This can be p+1 or 1. Must be - // at leat p+1 for Gauss-Lobatto. - - // Maximum bmag and its location. - // NOTE: if the same max bmag occurs at multiple locations, - // bmag_max_coord may have different values on different MPI processes. - double bmag_max_coord_ho[GKYL_MAX_CDIM]; - double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho); - double bmag_max_local = bmag_max_ho; - double bmag_max_global; - gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global); - double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim]; - if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) { - for (int d=0; dcdim; d++) - bmag_max_coord_local[d] = bmag_max_coord_ho[d]; + int num_quad = gks->basis.poly_order + 1; // This can be p+1 or 1. Must be + // at least p+1 for Gauss-Lobatto. + + // Create peak finder for bmag to find the mirror throat. + // Search along the parallel (z) direction, which is the last configuration space dimension. + int search_dir = app->cdim - 1; + struct gkyl_array_dg_find_peaks_inp peak_inp = { + .basis = &app->basis, + .grid = &app->grid, + .range = &app->global, + .range_ext = &app->global_ext, + .search_dir = search_dir, + .use_gpu = app->use_gpu, + }; + // Pass a global bmag_int into the peak finder + struct gkyl_array *bmag_int_global = mkarr(app->use_gpu, + app->gk_geom->geo_int.bmag->ncomp, app->global_ext.volume); + fdmul->phi_smooth_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume); + + gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_int.bmag, + bmag_int_global); + + fdmul->bmag_peak_finder = gkyl_array_dg_find_peaks_new(&peak_inp, bmag_int_global); + gkyl_array_dg_find_peaks_advance(fdmul->bmag_peak_finder, bmag_int_global); + gkyl_array_release(bmag_int_global); + + // Get the LOCAL_MAX peak (bmag maximum along z direction). + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(fdmul->bmag_peak_finder); + fdmul->bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less + fdmul->bmag_max = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, + fdmul->bmag_max_peak_idx); + fdmul->bmag_max_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, + fdmul->bmag_max_peak_idx); + fdmul->bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, + num_peaks - 1); + fdmul->bmag_wall_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, + num_peaks - 1); + fdmul->bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(fdmul->bmag_peak_finder); + fdmul->bmag_max_range = gkyl_array_dg_find_peaks_get_range(fdmul->bmag_peak_finder); + fdmul->bmag_max_range_ext = gkyl_array_dg_find_peaks_get_range_ext(fdmul->bmag_peak_finder); + + fdmul->phi_at_bmag_max = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, + fdmul->bmag_max_range_ext->volume); + fdmul->phi_at_bmag_tandem = mkarr(app->use_gpu, fdmul->bmag_max_basis->num_basis, + fdmul->bmag_max_range_ext->volume); + // phi is defined as 0 at the wall + + bool is_symmetric, is_tandem; + int cdim = app->cdim; + if (gkyl_compare_double(-app->grid.lower[cdim - 1], app->grid.upper[cdim - 1], 1e-12)) { + is_symmetric = true; + } + else if (gkyl_compare_double(app->grid.lower[cdim - 1], 0.0, 1e-12)) { + is_symmetric = false; } else { - for (int d=0; dcdim; d++) - bmag_max_coord_local[d] = -DBL_MAX; + assert(false); // Needs either the lower bound at 0 or symmetric grid } - gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global); - if (app->use_gpu) { - fdmul->bmag_max = gkyl_cu_malloc(sizeof(double)); - fdmul->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double)); - gkyl_cu_memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D); - gkyl_cu_memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D); + if ( (is_symmetric && num_peaks == 5) || (!is_symmetric && num_peaks == 3) ) { + is_tandem = false; + } + else if ((is_symmetric && num_peaks == 9) || (!is_symmetric && num_peaks == 5)) { + is_tandem = true; } else { - fdmul->bmag_max = gkyl_malloc(sizeof(double)); - fdmul->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double)); - memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double)); - memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double)); + assert(false); // Unsupported number of extrema for loss-cone multiplier } - // Electrostatic potential at bmag_max_coord. - if (app->use_gpu) { - fdmul->phi_m = gkyl_cu_malloc(sizeof(double)); - fdmul->phi_m_global = gkyl_cu_malloc(sizeof(double)); + if (is_tandem) { + fdmul->bmag_tandem_peak_idx = num_peaks - 4; } else { - fdmul->phi_m = gkyl_malloc(sizeof(double)); - fdmul->phi_m_global = gkyl_malloc(sizeof(double)); + fdmul->bmag_tandem_peak_idx = num_peaks - 2; } + fdmul->bmag_tandem = gkyl_array_dg_find_peaks_acquire_vals(fdmul->bmag_peak_finder, + fdmul->bmag_tandem_peak_idx); + fdmul->bmag_tandem_z_coord = gkyl_array_dg_find_peaks_acquire_coords(fdmul->bmag_peak_finder, + fdmul->bmag_tandem_peak_idx); // Operator that projects the loss cone mask. struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = { .phase_grid = &gks->grid, .conf_basis = &app->basis, .phase_basis = &gks->basis, - .conf_range = &app->local, + .conf_range = &app->local, .conf_range_ext = &app->local_ext, - .vel_range = &gks->local_vel, + .vel_range = &gks->local_vel, .vel_map = gks->vel_map, .bmag = app->gk_geom->geo_int.bmag, .bmag_max = fdmul->bmag_max, - .bmag_max_loc = fdmul->bmag_max_coord, + .bmag_max_z_coord = fdmul->bmag_max_z_coord, + .bmag_wall = fdmul->bmag_wall, + .bmag_wall_z_coord = fdmul->bmag_wall_z_coord, + .bmag_tandem = fdmul->bmag_tandem, + .bmag_tandem_z_coord = fdmul->bmag_tandem_z_coord, + .is_tandem = is_tandem, + .bmag_max_basis = fdmul->bmag_max_basis, + .bmag_max_range = fdmul->bmag_max_range, .mass = gks->info.mass, .charge = gks->info.charge, .qtype = qtype, .num_quad = num_quad, .cellwise_trap_loss = cellwise_const, - .c2p_pos_func = proj_on_basis_c2p_position_func, - .c2p_pos_func_ctx = &fdmul->proj_on_basis_c2p_ctx, .use_gpu = app->use_gpu, }; - fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj ); + fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj); fdmul->advance_times_cfl_func = gk_species_fdot_multiplier_advance_loss_cone_mult; + fdmul->advance_times_omegaH_func = gk_species_fdot_multiplier_advance_omegaH_mult; fdmul->advance_times_rate_func = gk_species_fdot_multiplier_advance_mult; - if (fdmul->write_diagnostics) + if (fdmul->write_diagnostics) { fdmul->write_func = gk_species_fdot_multiplier_write_enabled; - else + } + else { gkyl_array_release(fdmul->multiplier_host); + } } } } void -gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const struct gk_species *gks, +gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, + const struct gk_species *gks, struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out) { struct timespec wst = gkyl_wall_clock(); @@ -250,9 +327,22 @@ gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const st app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst); } - + void -gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const struct gk_species *gks, +gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, + const struct gk_species *gks, + struct gk_fdot_multiplier *fdmul, double *out) +{ + struct timespec wst = gkyl_wall_clock(); + + fdmul->advance_times_omegaH_func(app, gks, fdmul, out); + + app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst); +} + +void +gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, + const struct gk_species *gks, struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out) { struct timespec wst = gkyl_wall_clock(); @@ -260,46 +350,48 @@ gk_species_fdot_multiplier_advance_times_rate(gkyl_gyrokinetic_app *app, const s fdmul->advance_times_rate_func(app, gks, fdmul, phi, out); app->stat.species_fdot_mult_tm += gkyl_time_diff_now_sec(wst); - } void -gk_species_fdot_multiplier_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame) +gk_species_fdot_multiplier_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, + int frame) { gks->fdot_mult.write_func(app, gks, tm, frame); } void -gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, const struct gk_fdot_multiplier *fdmul) +gk_species_fdot_multiplier_release(const struct gkyl_gyrokinetic_app *app, + const struct gk_fdot_multiplier *fdmul) { if (fdmul->type) { gkyl_array_release(fdmul->multiplier); - if (fdmul->write_diagnostics) + if (fdmul->write_diagnostics) { gkyl_array_release(fdmul->multiplier_host); + } - if (fdmul->type == GKYL_GK_DAMPING_USER_INPUT) { + if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_USER_INPUT) { // Nothing to release. } - else if (fdmul->type == GKYL_GK_DAMPING_LOSS_CONE) { - if (app->use_gpu) { - gkyl_cu_free(fdmul->bmag_max); - gkyl_cu_free(fdmul->bmag_max_coord); - gkyl_cu_free(fdmul->phi_m); - gkyl_cu_free(fdmul->phi_m_global); - } - else { - gkyl_free(fdmul->bmag_max); - gkyl_free(fdmul->bmag_max_coord); - gkyl_free(fdmul->phi_m); - gkyl_free(fdmul->phi_m_global); - } + else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) { + gkyl_array_release(fdmul->bmag_max); + gkyl_array_release(fdmul->bmag_max_z_coord); + gkyl_array_release(fdmul->bmag_wall); + gkyl_array_release(fdmul->bmag_wall_z_coord); + gkyl_array_release(fdmul->bmag_tandem); + gkyl_array_release(fdmul->bmag_tandem_z_coord); + + gkyl_array_release(fdmul->phi_at_bmag_max); + gkyl_array_release(fdmul->phi_at_bmag_tandem); + + gkyl_array_release(fdmul->phi_smooth_global); + gkyl_array_dg_find_peaks_release(fdmul->bmag_peak_finder); gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op); } } } void -gk_species_fdot_multiplier_reset(gkyl_gyrokinetic_app* app, double tm, struct gk_species *gks, +gk_species_fdot_multiplier_reset(gkyl_gyrokinetic_app *app, double tm, struct gk_species *gks, struct gk_fdot_multiplier *fdmul, struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp) { gk_species_fdot_multiplier_release(app, fdmul); diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h index ae971282b..c49da3a1a 100644 --- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h +++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h @@ -801,12 +801,27 @@ struct gk_source { struct gk_damping { enum gkyl_gyrokinetic_damping_type type; // Type of damping term. bool evolve; // Whether the source is time dependent. + bool is_tandem; // Whether we are doing a tandem mirror. struct gkyl_array *rate; // Damping rate. struct gkyl_array *rate_host; // Host copy for use in IO and projecting. + struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context. struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask. - double *bmag_max; // Maximum magnetic field amplitude. - double *bmag_max_coord; // Location of bmag_max. - double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max. + struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction. + struct gkyl_array *phi_smooth_global; // Smoothed electrostatic potential on the global grid. + // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder). + const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line. + const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line. + const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line. + const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line. + const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case). + const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line. + const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays. + const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays. + const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays. + int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder. + int bmag_tandem_peak_idx; // Index of the TANDEM_MIRROR peak in the peak finder. + struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations. + struct gkyl_array *phi_at_bmag_tandem; // Phi evaluated at tandem mirror locations. struct gkyl_array *scale_prof; // Conf-space scaling factor profile. // Functions chosen at runtime. void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame); @@ -816,19 +831,36 @@ struct gk_fdot_multiplier { enum gkyl_gyrokinetic_fdot_multiplier_type type; // Type of multiplicative function term. bool write_diagnostics; // Whether to write diagnostics out. bool evolve; // Whether the multiplicative function is time dependent. + bool is_tandem; // Whether we are doing a tandem mirror struct gkyl_array *multiplier; // Damping rate. struct gkyl_array *multiplier_host; // Host copy for use in IO and projecting. struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context. struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask. - double *bmag_max; // Maximum magnetic field amplitude. - double *bmag_max_coord; // Location of bmag_max. - double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max. + // Updater to find bmag peaks (mirror throat location). + struct gkyl_array_dg_find_peaks *bmag_peak_finder; // Finds peaks in bmag along parallel direction. + struct gkyl_array *phi_smooth_global; // Smoothed electrostatic potential on the global grid. + // Per-field-line bmag_max arrays (pointers to arrays owned by bmag_peak_finder). + const struct gkyl_array *bmag_max; // Maximum magnetic field amplitude per field line. + const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line. + const struct gkyl_array *bmag_wall; // Magnetic field amplitude at the wall per field line. + const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag_wall per field line. + const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case). + const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line. + const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays. + const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays. + const struct gkyl_range *bmag_max_range_ext; // Extended range for bmag_max arrays. + int bmag_max_peak_idx; // Index of the LOCAL_MAX peak in the peak finder. + int bmag_tandem_peak_idx; // Index of the TANDEM_MIRROR peak in the peak finder. + struct gkyl_array *phi_at_bmag_max; // Phi evaluated at all peak locations. + struct gkyl_array *phi_at_bmag_tandem; // Phi evaluated at tandem mirror locations. // Functions chosen at runtime. void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame); void (*advance_times_rate_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out); void (*advance_times_cfl_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out); + void (*advance_times_omegaH_func)(gkyl_gyrokinetic_app *app, const struct gk_species *gks, + struct gk_fdot_multiplier *fdmul, double *out); }; struct gk_heating { @@ -2830,6 +2862,17 @@ void gk_species_fdot_multiplier_init(struct gkyl_gyrokinetic_app *app, struct gk void gk_species_fdot_multiplier_advance_times_cfl(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_fdot_multiplier *fdmul, const struct gkyl_array *phi, struct gkyl_array *out); +/** + * Multiply the omegaH rate. + * + * @param app gyrokinetic app object. + * @param gks Species object. + * @param fdmul Species df/dt multiplier object. + * @param out omegaH rate to multiply. + */ +void gk_species_fdot_multiplier_advance_times_omegaH(gkyl_gyrokinetic_app *app, const struct gk_species *gks, + struct gk_fdot_multiplier *fdmul, double *out); + /** * Multiply df/dt. * diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c index 87c13efe3..e8e12e017 100644 --- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c +++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c @@ -30,80 +30,44 @@ struct gk_poa_phase_params { }; // Define the context of the simulation. This is basically all the globals -struct gk_mirror_ctx -{ +struct gk_mirror_ctx { int cdim, vdim; // Dimensionality. - // Plasma parameters - double mi; // Ion mass. - double me; // Electron mass. - double qi; // Ion charge. - double qe; // Electron charge. - double Te0; // Electron temperature. - double Ti0; // Ion temperature. - double n0; // Density. - double B_p; // Plasma magnetic field (mirror center). - double beta; // Plasma beta in the center. - double tau; // Temperature ratio. - - // Parameters controlling initial conditions. - double alim; - double alphaIC0; - double alphaIC1; - double Ti_perp0; // Reference ion perp temperature. - double Ti_par0; // Reference ion par temperature. - double Ti_perp_m; // Ion perp temperature at the throat. - double Ti_par_m; // Ion par temperature at the throat. - double cs_m; // Ion sound speed at the throat. - - double nuFrac; // Fraction multiplying collision frequency. - double logLambdaIon; // Ion Coulomb logarithm. - double nuIon; // Ion-ion collision freq. - - double vti; // Ion thermal speed. - double vte; // Electron thermal speed. - double c_s; // Ion sound speed. - double omega_ci; // Ion gyrofrequency. - double rho_s; // Ion sound gyroradius. - + double mi; + double qi; + double me; + double qe; + double Te0; + double n0; + double B_p; + double beta; + double tau; + double Ti0; + double nuFrac; + // Ion-ion collision freq. + double logLambdaIon; + double nuIon; + double vti; double RatZeq0; // Radius of the field line at Z=0. - double Z_min; // Minimum axial coordinate Z. - double Z_max; // Maximum axial coordinate Z. - double z_min; // Minimum value of the position along the field line. - double z_max; // Maximum value of the position along the field line. - double psi_eval; // Psi (poloidal flux) of the field line. - double psi_in, z_in; // Auxiliary psi and z. - - // Magnetic equilibrium model. - double mcB; - double gamma; - double Z_m; // Axial coordinate at mirror throat. - double z_m; // Computational coordinate at mirror throat. - - // Source parameters - double NSrcIon; - double lineLengthSrcIon; - double sigSrcIon; - double NSrcFloorIon; - double TSrc0Ion; - double TSrcFloorIon; - - // Physical velocity space limits. - double vpar_min_ion, vpar_max_ion; + // Axial coordinate Z extents. Endure that Z=0 is not on + double z_min; + double z_max; + double psi_eval; + // Physics parameters at mirror throat + double vpar_max_ion; double mu_max_ion; - // Computational velocity space limits. - double vpar_lin_fac_inv, mu_lin_fac_inv; // Inverse factor of where linear mapping ends. - double vpar_pow, mu_pow; // Power of the velocity grid. - double vpar_min_ion_c, vpar_max_ion_c; - double mu_min_ion_c, mu_max_ion_c; - - // Grid DOF. + int Npsi; int Nz; int Nvpar; int Nmu; int cells[GKYL_MAX_DIM]; // Number of cells in all directions. int poly_order; + // Source parameters + double ion_source_amplitude; + double ion_source_sigma; + double ion_source_temp; + double t_end; // End time. int num_frames; // Number of output frames. int num_phases; // Number of phases. @@ -112,6 +76,15 @@ struct gk_mirror_ctx double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames). double dt_failure_tol; // Minimum allowable fraction of initial time-step. int num_failures_max; // Maximum allowable number of consecutive small time-steps. + + // Geometry parameters for Lorentzian mirror + double mcB; // Magnetic field parameter + double gamma; // Width parameter for Lorentzian profile + double Z_m; // Mirror throat location + double Z_min; // Minimum Z coordinate + double Z_max; // Maximum Z coordinate + double psi_in; // Working variable for psi integration + double z_in; // Working variable for z integration }; double @@ -121,9 +94,10 @@ psi_RZ(double RIn, double ZIn, void *ctx) double mcB = app->mcB; double gamma = app->gamma; double Z_m = app->Z_m; + double psi = 0.5 * pow(RIn, 2.) * mcB * - (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + - 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.)))); + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.)))); return psi; } @@ -131,9 +105,13 @@ double R_psiZ(double psiIn, double ZIn, void *ctx) { struct gk_mirror_ctx *app = ctx; - double Rout = sqrt(2.0 * psiIn / (app->mcB * - (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) + - 1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.)))))); + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + + double Rout = sqrt(2. * psiIn / (mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.)))))); return Rout; } @@ -141,17 +119,21 @@ void Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag) { struct gk_mirror_ctx *app = ctx; - double Rcoord = R_psiZ(psiIn, ZIn, ctx); double mcB = app->mcB; double gamma = app->gamma; double Z_m = app->Z_m; - *BRad = -(1.0 / 2.0) * Rcoord * mcB * - (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) - - 2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.)))); - *BZ = mcB * - (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) + - 1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.)))); - *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2)); + + double Rcoord = R_psiZ(psiIn, ZIn, ctx); + + BRad[0] = -(1. / 2.) * Rcoord * mcB * + (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) - + 2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.)))); + + BZ[0] = mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) ); + + Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2)); } double @@ -168,15 +150,13 @@ double z_psiZ(double psiIn, double ZIn, void *ctx) { struct gk_mirror_ctx *app = ctx; - app->psi_in = psiIn; double eps = 0.0; + app->psi_in = psiIn; struct gkyl_qr_res integral; - if (eps <= ZIn) - { + if (eps <= ZIn) { integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14); } - else - { + else { integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14); integral.res = -integral.res; } @@ -200,14 +180,12 @@ Z_psiz(double psiIn, double zIn, void *ctx) app->psi_in = psiIn; app->z_in = zIn; struct gkyl_qr_res Zout; - if (zIn >= 0.0) - { + if (0.0 <= zIn) { double fl = root_Z_psiz(-eps, ctx); double fr = root_Z_psiz(app->Z_max + eps, ctx); Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14); } - else - { + else { double fl = root_Z_psiz(app->Z_min - eps, ctx); double fr = root_Z_psiz(eps, ctx); Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14); @@ -215,235 +193,128 @@ Z_psiz(double psiIn, double zIn, void *ctx) return Zout.res; } +// Geometry evaluation functions for the gk app void -eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx) { - double z = xn[0]; + double psi = xc[0], theta = xc[1], z = xc[2]; - struct gk_mirror_ctx *app = ctx; - double NSrc = app->NSrcIon; - double zSrc = app->lineLengthSrcIon; - double sigSrc = app->sigSrcIon; - double NSrcFloor = app->NSrcFloorIon; + double Z = Z_psiz(psi, z, ctx); + double R = R_psiZ(psi, Z, ctx); - double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. - double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + // Cartesian coordinates on plane perpendicular to Z axis. + double x = R * cos(theta); + double y = R * sin(theta); - if (fabs(Z) <= app->Z_m) - { - fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) * - exp(-pow(z - zSrc, 2) / (2.0 * pow(sigSrc, 2)))); - } - else - { - fout[0] = 1e-16; - } + xp[0] = x; xp[1] = y; xp[2] = Z; } void -eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) { - fout[0] = 0.0; + struct gk_mirror_ctx *app = ctx; + double z = xc[2]; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double Z = Z_psiz(psi, z, ctx); + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + + double phi = xc[1]; + // zc are computational coords. + // Set Cartesian components of magnetic field. + fout[0] = BRad * cos(phi); + fout[1] = BRad * sin(phi); + fout[2] = BZ; } +// Evaluate collision frequencies void -eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) { - double z = xn[0]; - struct gk_mirror_ctx *app = ctx; - double sigSrc = app->sigSrcIon; - double TSrc0 = app->TSrc0Ion; - double Tfloor = app->TSrcFloorIon; - - if (fabs(z) <= 2.0 * sigSrc) - { - fout[0] = TSrc0; - } - else - { - fout[0] = Tfloor; - } + fout[0] = app->nuIon; } -// Ion initial conditions void eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) { - double z = xn[0]; - struct gk_mirror_ctx *app = ctx; - double z_m = app->z_m; - double sigma = 0.9*z_m; - if (fabs(z) <= sigma) - { - fout[0] = 0.5*app->n0*(1. + tanh(10. * sigma * fabs(sigma - fabs(z)))); - } - else - { - fout[0] = 0.5*app->n0*exp(-5 * (fabs(sigma - fabs(z)))); - } + double z = xn[0]; + fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2)); } void eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) { - double z = xn[0]; - struct gk_mirror_ctx *app = ctx; - double cs_m = app->cs_m; - double z_m = app->z_m; - double z_max = app->z_max; - if (fabs(z) <= z_m) - { - fout[0] = 0.0; - } - else - { - fout[0] = (fabs(z) / z) * cs_m * tanh(3 * (z_max - z_m) * fabs(fabs(z) - z_m)); - } + fout[0] = 0.0; } void -eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) { - double z = xn[0]; - struct gk_mirror_ctx *app = ctx; - double z_m = app->z_m; - double Ti_par0 = app->Ti_par0; - double Ti_par_m = app->Ti_par_m; - if (fabs(z) <= z_m) - { - fout[0] = Ti_par_m+(Ti_par0-Ti_par_m)*tanh(4 * fabs(z_m - fabs(z))); - } - else - { - fout[0] = Ti_par_m; - } + fout[0] = app->Ti0; } void -eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) { - double z = xn[0]; - struct gk_mirror_ctx *app = ctx; - double z_m = app->z_m; - double Ti_perp0 = app->Ti_perp0; - double Ti_perp_m = app->Ti_perp_m; - if (fabs(z) <= z_m) - { - fout[0] = Ti_perp_m - Ti_perp0*tanh(3.*fabs(z_m-fabs(z))); + double z = xn[0]; + double src_amp = app->ion_source_amplitude; + double z_src = 0.0; + double src_sigma = app->ion_source_sigma; + double src_amp_floor = src_amp * 1e-2; + if (fabs(z) <= 0.98) { + fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98); } - else - { - fout[0] = Ti_perp_m * GKYL_MAX2(1.e-3, exp(-5. * (fabs(z_m - fabs(z))))); + else { + fout[0] = 1e-16; } } void -evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) -{ - struct gk_mirror_ctx *app = ctx; - fout[0] = app->nuIon; -} - -// Geometry evaluation functions for the gk app -// mapc2p must assume a 3d input xc -void -mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx) -{ - double psi = xc[0]; - double theta = xc[1]; - double z = xc[2]; - - double Z = Z_psiz(psi, z, ctx); - double R = R_psiZ(psi, Z, ctx); - - // Cartesian coordinates on plane perpendicular to Z axis. - double x = R * cos(theta); - double y = R * sin(theta); - xp[0] = x; - xp[1] = y; - xp[2] = Z; -} - -// bmag_func must assume a 3d input xc -void -bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) { - double z = xc[2]; - - struct gk_mirror_ctx *app = ctx; - double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. - double Z = Z_psiz(psi, z, ctx); - double BRad, BZ, Bmag; - Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); - fout[0] = Bmag; + fout[0] = 0.0; } -// bfield_func must assume a 3d input xc void -bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) { - double z = xc[2]; - struct gk_mirror_ctx *app = ctx; - double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. - double Z = Z_psiz(psi, z, ctx); - double BRad, BZ, Bmag; - Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); - - double phi = xc[1]; - // zc are computational coords. - // Set Cartesian components of magnetic field. - fout[0] = BRad*cos(phi); - fout[1] = BRad*sin(phi); - fout[2] = BZ; + double z = xn[0]; + double TSrc0 = app->ion_source_temp; + double Tfloor = TSrc0 * 1e-2; + if (fabs(z) <= 0.98) { + fout[0] = TSrc0; + } + else { + fout[0] = Tfloor; + } } -void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx) +void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx) { struct gk_mirror_ctx *app = ctx; double vpar_max_ion = app->vpar_max_ion; double mu_max_ion = app->mu_max_ion; double cvpar = vc[0], cmu = vc[1]; - // Linear map up to vpar_max/lin_frac_inv, then a power grid. - double vpar_lin_fac_inv = app->vpar_lin_fac_inv; - double vpar_pow = app->vpar_pow; - if (fabs(cvpar) <= 1.0/vpar_lin_fac_inv) - vp[0] = vpar_max_ion*cvpar; - else if (cvpar < -1.0/vpar_lin_fac_inv) - vp[0] = -vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow); - else - vp[0] = vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow); - -// // Quadratic mu. -// vp[1] = mu_max_ion*pow(cmu,2.0); - // Linear map up to mu_max/lin_frac_inv, then a power grid. - double mu_lin_fac_inv = app->mu_lin_fac_inv; - double mu_pow = app->mu_pow; -// if (cmu <= 1.0/mu_lin_fac_inv) -// vp[0] = mu_max_ion*cmu; -// else -// vp[0] = mu_max_ion*pow(mu_lin_fac_inv,mu_pow-1)*pow(cmu,mu_pow); - double w = 0.3; - double f = 0.012; - double a = mu_max_ion*(f-1.0)/(w*w-1.0); - double b = mu_max_ion*(w*w-f)/(w*w-1.0); - if (cmu <= w) - vp[1] = (f*mu_max_ion/w)*cmu; - else - vp[1] = a*pow(cmu,2)+b; - + double b = 1.4; + vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b); + vp[1] = mu_max_ion * pow(cmu, 3); } struct gk_mirror_ctx create_ctx(void) { int cdim = 1, vdim = 2; // Dimensionality. + int poly_order = 1; // Universal constant parameters. double eps0 = GKYL_EPSILON0; @@ -463,202 +334,150 @@ create_ctx(void) double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.; double Ti0 = tau * Te0; - // Parameters controlling initial conditions. - double alim = 0.125; - double alphaIC0 = 2; - double alphaIC1 = 10; - - double nuFrac = 1.0; // Ion-ion collision freq. + double nuFrac = 1.0; double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV); double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 / - (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.)); + (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.)); // Thermal speeds. double vti = sqrt(Ti0 / mi); - double vte = sqrt(Te0 / me); - double c_s = sqrt(Te0 / mi); - - // Gyrofrequencies and gyroradii. - double omega_ci = eV * B_p / mi; - double rho_s = c_s / omega_ci; - - // Geometry parameters. - double RatZeq0 = 0.10; // Radius of the field line at Z=0. - // Axial coordinate Z extents. Endure that Z=0 is not on - // the boundary of a cell (due to AD errors). - double Z_min = -2.5; - double Z_max = 2.5; - - // Parameters controlling the magnetic equilibrium model. - double mcB = 6.51292; - double gamma = 0.124904; - double Z_m = 0.98; - - // Source parameters - double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25; - double lineLengthSrcIon = 0.0; - double sigSrcIon = Z_m / 4.0; - double NSrcFloorIon = 0.05 * NSrcIon; - double TSrc0Ion = Ti0 * 1.25; - double TSrcFloorIon = TSrc0Ion / 8.0; // Grid parameters double vpar_max_ion = 16 * vti; - double vpar_min_ion = -vpar_max_ion; double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p); + int Nz = 64; + int Nvpar = 32; // 96 uniform + int Nmu = 16; // 192 uniform - // Computational velocity space limits. - double vpar_lin_fac_inv = 4; - double vpar_pow = 3; - double vpar_min_ion_c = -1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow); - double vpar_max_ion_c = 1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow); - double mu_min_ion_c = 0.; - double mu_max_ion_c = 1.; - double mu_lin_fac_inv = 1.0/0.012; - double mu_pow = 2; -// double mu_min_ion_c = 0.0; -// double mu_max_ion_c = 1.0/pow(mu_lin_fac_inv,(mu_pow-1)/mu_pow); - - // Grid DOF: - int Nz = 64; // Number of cells in z direction. - int Nvpar = 32; // Number of cells in parallel velocity direction. - int Nmu = 16; // Number of cells in mu direction. - int poly_order = 1; - - // Initial conditions parameter.s - double Ti_perp0 = 10000 * eV; - double Ti_par0 = 7500 * eV; + // Source parameters + double ion_source_amplitude = 1.e20; + double ion_source_sigma = 0.5; + double ion_source_temp = 5000. * eV; - // Parameters at mirror throat - double Ti_perp_m = 15000 * eV; - double Ti_par_m = 1000 * eV; - double z_m = 0.982544; - double cs_m = sqrt((Te0+3.0*Ti_par_m)/mi); + // Geometry parameters. + double RatZeq0 = 0.10; // Radius of the field line at Z=0. + double Z_min = -2.5; + double Z_max = 2.5; + double mcB = 3.691260; + double gamma = 0.226381; + double Z_m = 0.98; - // Factor multiplying collisionless terms. - double alpha_oap = 0.01; + // POA parameters + double alpha_oap = 5e-6; // Factor multiplying collisionless terms. double alpha_fdp = 1.0; - // Duration of each phase. - double tau_oap = 2400.0e-9; - double tau_fdp = 24.0e-9; - double tau_fdp_extra = 2*tau_fdp; + double tau_oap = 0.001; // Duration of each phase. + double tau_fdp = 7e-9; + double tau_fdp_extra = 2e-9; int num_cycles = 2; // Number of OAP+FDP cycles to run. // Frame counts for each phase type (specified independently) - int num_frames_oap = 1; // Frames per OAP phase - int num_frames_fdp = 1; // Frames per FDP phase - int num_frames_fdp_extra = 2*num_frames_fdp; // Frames for the extra FDP phase + int num_frames_oap = 1; // Frames per OAP phase + int num_frames_fdp = 1; // Frames per FDP phase + int num_frames_fdp_extra = 2; // Frames for the extra FDP phase // Whether to evolve the field. bool is_static_field_oap = true; bool is_static_field_fdp = false; - // Whether to enable positivity. + + // Whether positivity is enabled. bool is_positivity_enabled_oap = false; bool is_positivity_enabled_fdp = true; + // Type of df/dt multipler. enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE; enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE; // Calculate phase structure - double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra; - double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair. - int num_phases = 2*num_cycles + 1; + double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra; + double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair. + int num_phases = 2 * num_cycles + 1; int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra; - struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params)); - for (int i=0; i<(num_phases-1)/2; i++) { + struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * + sizeof(struct gk_poa_phase_params)); + for (int i = 0; i < (num_phases - 1) / 2; i++) { // OAPs. - poa_phases[2*i].phase = GK_POA_OAP; - poa_phases[2*i].num_frames = num_frames_oap; - poa_phases[2*i].duration = tau_oap; - poa_phases[2*i].alpha = alpha_oap; - poa_phases[2*i].is_static_field = is_static_field_oap; - poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap; - poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap; + poa_phases[2 * i].phase = GK_POA_OAP; + poa_phases[2 * i].num_frames = num_frames_oap; + poa_phases[2 * i].duration = tau_oap; + poa_phases[2 * i].alpha = alpha_oap; + poa_phases[2 * i].is_static_field = is_static_field_oap; + poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap; + poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap; // FDPs. - poa_phases[2*i+1].phase = GK_POA_FDP; - poa_phases[2*i+1].num_frames = num_frames_fdp; - poa_phases[2*i+1].duration = tau_fdp; - poa_phases[2*i+1].alpha = alpha_fdp; - poa_phases[2*i+1].is_static_field = is_static_field_fdp; - poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp; - poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp; + poa_phases[2 * i + 1].phase = GK_POA_FDP; + poa_phases[2 * i + 1].num_frames = num_frames_fdp; + poa_phases[2 * i + 1].duration = tau_fdp; + poa_phases[2 * i + 1].alpha = alpha_fdp; + poa_phases[2 * i + 1].is_static_field = is_static_field_fdp; + poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp; } - // Add an extra, longer FDP. - poa_phases[num_phases-1].phase = GK_POA_FDP; - poa_phases[num_phases-1].num_frames = num_frames_fdp_extra; - poa_phases[num_phases-1].duration = tau_fdp_extra; - poa_phases[num_phases-1].alpha = alpha_fdp; - poa_phases[num_phases-1].is_static_field = is_static_field_fdp; - poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp; - poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp; - - double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + // The final stage is an extra, longer FDP. + poa_phases[num_phases - 1].phase = GK_POA_FDP; + poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra; + poa_phases[num_phases - 1].duration = tau_fdp_extra; + poa_phases[num_phases - 1].alpha = alpha_fdp; + poa_phases[num_phases - 1].is_static_field = is_static_field_fdp; + poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp; + + double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames). double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step. int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps. struct gk_mirror_ctx ctx = { - .cdim = cdim, .vdim = vdim, - .mi = mi, .qi = qi, - .me = me, .qe = qe, - .Te0 = Te0, .Ti0 = Ti0, .n0 = n0, - .B_p = B_p, .beta = beta, .tau = tau, - .alim = alim, - .alphaIC0 = alphaIC0, - .alphaIC1 = alphaIC1, - .nuFrac = nuFrac, .logLambdaIon = logLambdaIon, .nuIon = nuIon, - .vti = vti, .vte = vte, .c_s = c_s, - .omega_ci = omega_ci, .rho_s = rho_s, + .cdim = cdim, + .vdim = vdim, + .mi = mi, + .qi = qi, + .me = me, + .qe = qe, + .Te0 = Te0, + .n0 = n0, + .B_p = B_p, + .beta = beta, + .tau = tau, + .Ti0 = Ti0, + .nuFrac = nuFrac, + .logLambdaIon = logLambdaIon, + .nuIon = nuIon, + .vti = vti, .RatZeq0 = RatZeq0, - .Z_min = Z_min, .Z_max = Z_max, - // Parameters controlling the magnetic equilibrium model. - .mcB = mcB, .gamma = gamma, - .Z_m = Z_m, - .z_m = z_m, - // Initial condition parameters. - .Ti_perp0 = Ti_perp0, .Ti_par0 = Ti_par0, - .Ti_perp_m = Ti_perp_m, .Ti_par_m = Ti_par_m, .cs_m = cs_m, - // Source parameters - .NSrcIon = NSrcIon, .NSrcFloorIon = NSrcFloorIon, - .TSrc0Ion = TSrc0Ion, .TSrcFloorIon = TSrcFloorIon, - .lineLengthSrcIon = lineLengthSrcIon, .sigSrcIon = sigSrcIon, - // Physical velocity space limits. - .vpar_min_ion = vpar_min_ion, .vpar_max_ion = vpar_max_ion, .mu_max_ion = mu_max_ion, - // Computational velocity space limits. - .vpar_lin_fac_inv = vpar_lin_fac_inv, - .vpar_pow = vpar_pow, - .vpar_min_ion_c = vpar_min_ion_c, - .vpar_max_ion_c = vpar_max_ion_c, - .mu_lin_fac_inv = mu_lin_fac_inv, - .mu_pow = mu_pow, - .mu_min_ion_c = mu_min_ion_c, - .mu_max_ion_c = mu_max_ion_c, - // Grid DOF. .Nz = Nz, .Nvpar = Nvpar, .Nmu = Nmu, - .cells = {Nz, Nvpar, Nmu}, + .cells = { Nz, Nvpar, Nmu }, .poly_order = poly_order, - // Time integration and I/O parameters. .t_end = t_end, .num_frames = num_frames, .num_phases = num_phases, .poa_phases = poa_phases, - .write_phase_freq = write_phase_freq , - .int_diag_calc_freq = int_diag_calc_freq , - .dt_failure_tol = dt_failure_tol , - .num_failures_max = num_failures_max , + .write_phase_freq = write_phase_freq, + .int_diag_calc_freq = int_diag_calc_freq, + .dt_failure_tol = dt_failure_tol, + .num_failures_max = num_failures_max, + + .ion_source_amplitude = ion_source_amplitude, + .ion_source_sigma = ion_source_sigma, + .ion_source_temp = ion_source_temp, + + .mcB = mcB, + .gamma = gamma, + .Z_m = Z_m, + .Z_min = Z_min, + .Z_max = Z_max, }; // Populate a couple more values in the context. ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx); - ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx); - ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx); + ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx); + ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx); return ctx; } @@ -670,25 +489,25 @@ release_ctx(struct gk_mirror_ctx *ctx) } void -calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app, +calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app, double t_curr, bool force_calc, double dt) { if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) { gkyl_gyrokinetic_app_calc_field_energy(app, t_curr); gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr); - if ( !(dt < 0.0) ) + if (!(dt < 0.0) ) gkyl_gyrokinetic_app_save_dt(app, t_curr, dt); } } void -write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase, - gkyl_gyrokinetic_app* app, double t_curr, bool force_write) +write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase, + gkyl_gyrokinetic_app *app, double t_curr, bool force_write) { bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr); if (trig_now_conf || force_write) { - int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1; + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; gkyl_gyrokinetic_app_write_conf(app, t_curr, frame); gkyl_gyrokinetic_app_write_field_energy(app); @@ -698,7 +517,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase, bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr); if (trig_now_phase || force_write) { - int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1; + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; gkyl_gyrokinetic_app_write_phase(app, t_curr, frame); } @@ -720,7 +539,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs, double t_end = tfs->t_end; int frame_curr = tfs->frame_curr; int num_frames = tfs->num_frames; - int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames; + int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames; // Prevent division by zero when frame_curr equals num_frames int frames_remaining = num_frames - frame_curr; @@ -734,15 +553,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs, trig_write_phase->tcurr = t_curr; trig_write_phase->curr = frame_curr; - int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining); + int diag_frames = GKYL_MAX2(frames_remaining, + (num_int_diag_calc / num_frames) * frames_remaining); trig_calc_intdiag->dt = time_remaining / diag_frames; trig_calc_intdiag->tcurr = t_curr; trig_calc_intdiag->curr = frame_curr; } -void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps, +void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps, struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, - struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs, + struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs, struct gk_poa_phase_params *pparams) { tfs->t_end = tfs->t_curr + pparams->duration; @@ -751,7 +571,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_ // Run an OAP or FDP. double t_curr = tfs->t_curr; double t_end = tfs->t_end; - + // Reset I/O triggers: reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag); @@ -791,27 +611,21 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_ int num_failures = 0, num_failures_max = ctx->num_failures_max; long step = 1; - while ((t_curr < t_end) && (step <= num_steps)) - { - if (step == 1 || step % 20 == 0) - gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr); - - dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end. + while ((t_curr < t_end) && (step <= num_steps)) { + gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr); + dt = t_end - t_curr; // Ensure we don't step beyond t_end. struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt); + gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual); - if (step == 1 || step % 20 == 0) - gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual); - - if (!status.success) - { + if (!status.success) { gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n"); break; } t_curr += status.dt_actual; dt = status.dt_suggested; - calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual); - write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end); + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end); if (dt_init < 0.0) { dt_init = status.dt_actual; @@ -823,8 +637,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_ gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol); gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures); if (num_failures >= num_failures_max) { - gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol); - gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max); + gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", + dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", + num_failures_max); calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual); write_data(trig_write_conf, trig_write_phase, app, t_curr, true); break; @@ -838,7 +654,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_ } tfs->t_curr = t_curr; - tfs->frame_curr = tfs->frame_curr+pparams->num_frames; + tfs->frame_curr = tfs->frame_curr + pparams->num_frames; } int main(int argc, char **argv) @@ -857,96 +673,106 @@ int main(int argc, char **argv) struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions. int cells_x[ctx.cdim], cells_v[ctx.vdim]; - for (int d=0; d 0) - { - gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]); - gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]); + if (stat.nstage_2_fail > 0) { + gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[1]); + gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[0]); } gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail); gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io); gkyl_gyrokinetic_app_print_timings(app, stdout); - freeresources: +freeresources: // simulation complete, free app gkyl_gyrokinetic_app_release(app); gkyl_gyrokinetic_comms_release(comm); release_ctx(&ctx); - + #ifdef GKYL_HAVE_MPI if (app_args.use_mpi) MPI_Finalize(); diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c new file mode 100644 index 000000000..cd44c494f --- /dev/null +++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c @@ -0,0 +1,917 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// State of the pseudo orbit-averaged integrator. +enum gk_poa_state { + GK_POA_NONE = 0, // Haven't started. + GK_POA_OAP, // Orbit averaged phase. + GK_POA_FDP, // Full dynamics phase. + GK_POA_COMPLETED, // Finished simulation. +}; + +struct gk_poa_phase_params { + enum gk_poa_state phase; // Type of phase. + int num_frames; // Number of frames. + double duration; // Duration. + double alpha; // Factor multiplying collisionless terms. + bool is_static_field; // Whether to evolve the field. + bool is_positivity_enabled; // Whether positivity is enabled. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler. +}; + +// Define the context of the simulation. This is basically all the globals +struct gk_mirror_ctx { + int cdim, vdim; // Dimensionality. + // Plasma parameters + double mi; + double qi; + double me; + double qe; + double Te0; + double n0; + double B_p; + double beta; + double tau; + double Ti0; + double nuFrac; + // Ion-ion collision freq. + double logLambdaIon; + double nuIon; + double vti; + double RatZeq0; // Radius of the field line at Z=0. + // Axial coordinate Z extents. Endure that Z=0 is not on + double z_min; + double z_max; + double psi_max; + double psi_min; + // Physics parameters at mirror throat + double vpar_max_ion; + double mu_max_ion; + int Npsi; + int Nz; + int Nvpar; + int Nmu; + int cells[GKYL_MAX_DIM]; // Number of cells in all directions. + int poly_order; + + // Source parameters + double ion_source_amplitude; + double ion_source_sigma; + double ion_source_temp; + + double t_end; // End time. + int num_frames; // Number of output frames. + int num_phases; // Number of phases. + struct gk_poa_phase_params *poa_phases; // Phases to run. + double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol; // Minimum allowable fraction of initial time-step. + int num_failures_max; // Maximum allowable number of consecutive small time-steps. + + // Geometry parameters for Lorentzian mirror + double mcB; // Magnetic field parameter + double gamma; // Width parameter for Lorentzian profile + double Z_m; // Mirror throat location + double Z_min; // Minimum Z coordinate + double Z_max; // Maximum Z coordinate + double psi_in; // Working variable for psi integration + double z_in; // Working variable for z integration +}; + +double +psi_RZ(double RIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + + double psi = 0.5 * pow(RIn, 2.) * mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.)))); + return psi; +} + +double +R_psiZ(double psiIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + + double Rout = sqrt(2. * psiIn / (mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.)))))); + return Rout; +} + +void +Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag) +{ + struct gk_mirror_ctx *app = ctx; + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + + double Rcoord = R_psiZ(psiIn, ZIn, ctx); + + BRad[0] = -(1. / 2.) * Rcoord * mcB * + (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) - + 2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.)))); + + BZ[0] = mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) ); + + Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2)); +} + +double +integrand_z_psiZ(double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = app->psi_in; + double BRad, BZ, Bmag; + Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag); + return Bmag / BZ; +} + +double +z_psiZ(double psiIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double eps = 0.0; + app->psi_in = psiIn; + struct gkyl_qr_res integral; + if (eps <= ZIn) { + integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14); + } + else { + integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14); + integral.res = -integral.res; + } + return integral.res; +} + +// Invert z(Z) via root-finding. +double +root_Z_psiz(double Z, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + return app->z_in - z_psiZ(app->psi_in, Z, ctx); +} + +double +Z_psiz(double psiIn, double zIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double maxL = app->Z_max - app->Z_min; + double eps = maxL / app->Nz; // Interestingly using a smaller eps yields larger errors in some geo quantities. + app->psi_in = psiIn; + app->z_in = zIn; + struct gkyl_qr_res Zout; + if (0.0 <= zIn) { + double fl = root_Z_psiz(-eps, ctx); + double fr = root_Z_psiz(app->Z_max + eps, ctx); + Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14); + } + else { + double fl = root_Z_psiz(app->Z_min - eps, ctx); + double fr = root_Z_psiz(eps, ctx); + Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14); + } + return Zout.res; +} + +// Geometry evaluation functions for the gk app +void +mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx) +{ + double psi = xc[0], theta = xc[1], z = xc[2]; + + double Z = Z_psiz(psi, z, ctx); + double R = R_psiZ(psi, Z, ctx); + + // Cartesian coordinates on plane perpendicular to Z axis. + double x = R * cos(theta); + double y = R * sin(theta); + + xp[0] = x; xp[1] = y; xp[2] = Z; +} + +void +bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xc[2]; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double Z = Z_psiz(psi, z, ctx); + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + + double phi = xc[1]; + // zc are computational coords. + // Set Cartesian components of magnetic field. + fout[0] = BRad * cos(phi); + fout[1] = BRad * sin(phi); + fout[2] = BZ; +} + +// Evaluate collision frequencies +void +evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->nuIon; +} + +void +eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[1]; + fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2)); +} + +void +eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = 0.0; +} + +void +eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->Ti0; +} + +void +eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[1]; + double src_amp = app->ion_source_amplitude; + double z_src = 0.0; + double src_sigma = app->ion_source_sigma; + double src_amp_floor = src_amp * 1e-2; + if (fabs(z) <= 0.98) { + fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98); + } + else { + fout[0] = 1e-16; + } +} + +void +eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + fout[0] = 0.0; +} + +void +eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[1]; + double TSrc0 = app->ion_source_temp; + double Tfloor = TSrc0 * 1e-2; + if (fabs(z) <= 0.98) { + fout[0] = TSrc0; + } + else { + fout[0] = Tfloor; + } +} + +void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double vpar_max_ion = app->vpar_max_ion; + double mu_max_ion = app->mu_max_ion; + + double cvpar = vc[0], cmu = vc[1]; + double b = 1.4; + vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b); + vp[1] = mu_max_ion * pow(cmu, 3); +} + +struct gk_mirror_ctx +create_ctx(void) +{ + int cdim = 2, vdim = 2; // Dimensionality. + int poly_order = 1; + + // Universal constant parameters. + double eps0 = GKYL_EPSILON0; + double mu0 = GKYL_MU0; + double eV = GKYL_ELEMENTARY_CHARGE; + double mp = GKYL_PROTON_MASS; + double me = GKYL_ELECTRON_MASS; + double qi = eV; // ion charge + double qe = -eV; // electron charge + + // Plasma parameters. + double mi = 2.014 * mp; + double Te0 = 940 * eV; + double n0 = 3e19; + double B_p = 0.53; + double beta = 0.4; + double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.; + double Ti0 = tau * Te0; + + // Ion-ion collision freq. + double nuFrac = 1.0; + double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV); + double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 / + (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.)); + + // Thermal speeds. + double vti = sqrt(Ti0 / mi); + + // Grid parameters + double vpar_max_ion = 16 * vti; + double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p); + int Nz = 64; + int Npsi = 4; + int Nvpar = 32; // 96 uniform + int Nmu = 16; // 192 uniform + + // Source parameters + double ion_source_amplitude = 1.e20; + double ion_source_sigma = 0.5; + double ion_source_temp = 5000. * eV; + + // Geometry parameters. + double RatZeq0 = 0.10; // Radius of the field line at Z=0. + double Z_min = -2.5; + double Z_max = 2.5; + double mcB = 3.691260; + double gamma = 0.226381; + double Z_m = 0.98; + + // POA parameters + double alpha_oap = 5e-6; // Factor multiplying collisionless terms. + double alpha_fdp = 1.0; + double tau_oap = 0.001; // Duration of each phase. + double tau_fdp = 7e-9; + double tau_fdp_extra = 2e-9; + int num_cycles = 2; // Number of OAP+FDP cycles to run. + + // Frame counts for each phase type (specified independently) + int num_frames_oap = 1; // Frames per OAP phase + int num_frames_fdp = 1; // Frames per FDP phase + int num_frames_fdp_extra = 2; // Frames for the extra FDP phase + + // Whether to evolve the field. + bool is_static_field_oap = true; + bool is_static_field_fdp = false; + + // Whether positivity is enabled. + bool is_positivity_enabled_oap = false; + bool is_positivity_enabled_fdp = false; + + // Type of df/dt multipler. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE; + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE; + + // Calculate phase structure + double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra; + double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair. + int num_phases = 2 * num_cycles + 1; + int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra; + + struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * + sizeof(struct gk_poa_phase_params)); + for (int i = 0; i < (num_phases - 1) / 2; i++) { + // OAPs. + poa_phases[2 * i].phase = GK_POA_OAP; + poa_phases[2 * i].num_frames = num_frames_oap; + poa_phases[2 * i].duration = tau_oap; + poa_phases[2 * i].alpha = alpha_oap; + poa_phases[2 * i].is_static_field = is_static_field_oap; + poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap; + poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap; + + // FDPs. + poa_phases[2 * i + 1].phase = GK_POA_FDP; + poa_phases[2 * i + 1].num_frames = num_frames_fdp; + poa_phases[2 * i + 1].duration = tau_fdp; + poa_phases[2 * i + 1].alpha = alpha_fdp; + poa_phases[2 * i + 1].is_static_field = is_static_field_fdp; + poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp; + } + // The final stage is an extra, longer FDP. + poa_phases[num_phases - 1].phase = GK_POA_FDP; + poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra; + poa_phases[num_phases - 1].duration = tau_fdp_extra; + poa_phases[num_phases - 1].alpha = alpha_fdp; + poa_phases[num_phases - 1].is_static_field = is_static_field_fdp; + poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp; + + double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step. + int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps. + + struct gk_mirror_ctx ctx = { + .cdim = cdim, + .vdim = vdim, + .mi = mi, + .qi = qi, + .me = me, + .qe = qe, + .Te0 = Te0, + .n0 = n0, + .B_p = B_p, + .beta = beta, + .tau = tau, + .Ti0 = Ti0, + .nuFrac = nuFrac, + .logLambdaIon = logLambdaIon, + .nuIon = nuIon, + .vti = vti, + .RatZeq0 = RatZeq0, + .vpar_max_ion = vpar_max_ion, + .mu_max_ion = mu_max_ion, + .Npsi = Npsi, + .Nz = Nz, + .Nvpar = Nvpar, + .Nmu = Nmu, + .cells = { Npsi, Nz, Nvpar, Nmu }, + .poly_order = poly_order, + .t_end = t_end, + .num_frames = num_frames, + .num_phases = num_phases, + .poa_phases = poa_phases, + .write_phase_freq = write_phase_freq, + .int_diag_calc_freq = int_diag_calc_freq, + .dt_failure_tol = dt_failure_tol, + .num_failures_max = num_failures_max, + + .ion_source_amplitude = ion_source_amplitude, + .ion_source_sigma = ion_source_sigma, + .ion_source_temp = ion_source_temp, + + .mcB = mcB, + .gamma = gamma, + .Z_m = Z_m, + .Z_min = Z_min, + .Z_max = Z_max, + }; + + // Populate a couple more values in the context. + ctx.psi_max = psi_RZ(ctx.RatZeq0, 0., &ctx); + ctx.psi_min = psi_RZ(ctx.RatZeq0 / 10, 0., &ctx); + ctx.z_min = z_psiZ(ctx.psi_max, ctx.Z_min, &ctx); + ctx.z_max = z_psiZ(ctx.psi_max, ctx.Z_max, &ctx); + + return ctx; +} + +void +release_ctx(struct gk_mirror_ctx *ctx) +{ + gkyl_free(ctx->poa_phases); +} + +void +calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app, + double t_curr, bool force_calc, double dt) +{ + if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) { + gkyl_gyrokinetic_app_calc_field_energy(app, t_curr); + gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr); + + if (!(dt < 0.0) ) + gkyl_gyrokinetic_app_save_dt(app, t_curr, dt); + } +} + +void +write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase, + gkyl_gyrokinetic_app *app, double t_curr, bool force_write) +{ + bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr); + if (trig_now_conf || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + gkyl_gyrokinetic_app_write_conf(app, t_curr, frame); + + gkyl_gyrokinetic_app_write_field_energy(app); + gkyl_gyrokinetic_app_write_integrated_mom(app); + gkyl_gyrokinetic_app_write_dt(app); + } + + bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr); + if (trig_now_phase || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + + gkyl_gyrokinetic_app_write_phase(app, t_curr, frame); + } +} + +struct time_frame_state { + double t_curr; // Current simulation time. + double t_end; // End time of current phase. + int frame_curr; // Current frame. + int num_frames; // Number of frames at the end of current phase. +}; + +void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag) +{ + // Reset I/O triggers: + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + int frame_curr = tfs->frame_curr; + int num_frames = tfs->num_frames; + int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames; + + // Prevent division by zero when frame_curr equals num_frames + int frames_remaining = num_frames - frame_curr; + double time_remaining = t_end - t_curr; + + trig_write_conf->dt = time_remaining / frames_remaining; + trig_write_conf->tcurr = t_curr; + trig_write_conf->curr = frame_curr; + + trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining); + trig_write_phase->tcurr = t_curr; + trig_write_phase->curr = frame_curr; + + int diag_frames = GKYL_MAX2(frames_remaining, + (num_int_diag_calc / num_frames) * frames_remaining); + trig_calc_intdiag->dt = time_remaining / diag_frames; + trig_calc_intdiag->tcurr = t_curr; + trig_calc_intdiag->curr = frame_curr; +} + +void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs, + struct gk_poa_phase_params *pparams) +{ + tfs->t_end = tfs->t_curr + pparams->duration; + tfs->num_frames = tfs->frame_curr + pparams->num_frames; + + // Run an OAP or FDP. + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + + // Reset I/O triggers: + reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag); + + // Reset simulation parameters and function pointers. + struct gkyl_gyrokinetic_collisionless collisionless_inp = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = pparams->alpha, + }; + struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = { + .type = pparams->fdot_mult_type, + .cellwise_const = true, + .write_diagnostics = true, + }; + struct gkyl_gyrokinetic_field field_inp = { + .gkfield_id = GKYL_GK_FIELD_BOLTZMANN, + .electron_mass = ctx->me, + .electron_charge = ctx->qe, + .electron_temp = ctx->Te0, + .polarization_bmag = ctx->B_p, + .is_static = pparams->is_static_field, + }; + struct gkyl_gyrokinetic_positivity positivity_inp = { + .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE, + .write_diagnostics = pparams->is_positivity_enabled, + }; + + gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp); + gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp); + gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp); + gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp); + + // Compute initial guess of maximum stable time-step. + double dt = t_end - t_curr; + + // Initialize small time-step check. + double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol; + int num_failures = 0, num_failures_max = ctx->num_failures_max; + + long step = 1; + while ((t_curr < t_end) && (step <= num_steps)) { + gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr); + dt = t_end - t_curr; // Ensure we don't step beyond t_end. + struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt); + gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual); + + if (!status.success) { + gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n"); + break; + } + t_curr += status.dt_actual; + dt = status.dt_suggested; + + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end); + + if (dt_init < 0.0) { + dt_init = status.dt_actual; + } + else if (status.dt_actual < dt_failure_tol * dt_init) { + num_failures += 1; + + gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual); + gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures); + if (num_failures >= num_failures_max) { + gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", + dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", + num_failures_max); + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, true); + break; + } + } + else { + num_failures = 0; + } + + step += 1; + } + + tfs->t_curr = t_curr; + tfs->frame_curr = tfs->frame_curr + pparams->num_frames; +} + +int main(int argc, char **argv) +{ + struct gkyl_app_args app_args = parse_app_args(argc, argv); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) MPI_Init(&argc, &argv); +#endif + + if (app_args.trace_mem) { + gkyl_cu_dev_mem_debug_set(true); + gkyl_mem_debug_set(true); + } + + struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions. + + int cells_x[ctx.cdim], cells_v[ctx.vdim]; + for (int d = 0; d < ctx.cdim; d++) { + cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]); + } + for (int d = 0; d < ctx.vdim; d++) { + cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]); + } + + // Construct communicator for use in app. + struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr); + + struct gkyl_gyrokinetic_species ion = { + .name = "ion", + .charge = ctx.qi, + .mass = ctx.mi, + .vdim = ctx.vdim, + .lower = { -1.0, 0.0 }, + .upper = { 1.0, 1.0 }, + .cells = { cells_v[0], cells_v[1] }, + .polarization_density = ctx.n0, + + .projection = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .density = eval_density_ion, + .ctx_density = &ctx, + .upar = eval_upar_ion, + .ctx_upar = &ctx, + .temp = eval_temp_ion, + .ctx_temp = &ctx, + }, + + .mapc2p = { + .mapping = mapc2p_vel_ion, + .ctx = &ctx, + }, + + .collisionless = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = 1.0, // Will be replaced below. + .write_diagnostics = true, + }, + .time_rate_multiplier = { + .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, + .cellwise_const = true, + .write_diagnostics = true, + }, + + .collisions = { + .collision_id = GKYL_LBO_COLLISIONS, + .den_ref = ctx.n0, + .temp_ref = ctx.Te0, + .write_diagnostics = true, + }, + .source = { + .source_id = GKYL_PROJ_SOURCE, + .num_sources = 1, + .projection[0] = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density_ion_source, + .ctx_upar = &ctx, + .upar = eval_upar_ion_source, + .ctx_temp = &ctx, + .temp = eval_temp_ion_source, + }, + .diagnostics = { + .num_diag_moments = 6, + .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, + GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN }, + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP }, + }, + }, + + .bcs = { + { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_ZERO_FLUX }, + { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_ABSORB }, + { .dir = 1, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH }, + { .dir = 1, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH }, + }, + .write_omega_cfl = true, + .num_diag_moments = 8, + .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, + GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, + GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP }, + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP }, + .time_rate_diagnostics = true, + + .boundary_flux_diagnostics = { + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP }, + }, + }; + struct gkyl_gyrokinetic_field field = { + .gkfield_id = GKYL_GK_FIELD_BOLTZMANN, + .electron_mass = ctx.me, + .electron_charge = ctx.qe, + .electron_temp = ctx.Te0, + .is_static = false, + }; + + struct gkyl_gk app_inp = { // GK app + .name = "gk_mirror_boltz_elc_poa_2x2v_p1", + .cdim = ctx.cdim, + .upper = { ctx.psi_max, ctx.Z_max }, + .lower = { ctx.psi_min, ctx.Z_min }, + .cells = { cells_x[0], cells_x[1] }, + .poly_order = ctx.poly_order, + .basis_type = app_args.basis_type, + + .geometry = { + .geometry_id = GKYL_GEOMETRY_MAPC2P, + .world = { 0.0 }, + .mapc2p = mapc2p, // Mapping of computational to physical space. + .c2p_ctx = &ctx, + .bfield_func = bfield_func, // Magnetic field. + .bfield_ctx = &ctx, + }, + + .num_periodic_dir = 0, + .periodic_dirs = {}, + + .num_species = 1, + .species = { ion }, + + .field = field, + + .parallelism = { + .use_gpu = app_args.use_gpu, + .cuts = { app_args.cuts[0], app_args.cuts[1] }, + .comm = comm, + }, + }; + + // Set app output name from the executable name (argv[0]). + snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name); + + // Create app object. + gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp); + + // Triggers for IO. + struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag; + + struct time_frame_state tfs = { + .t_curr = 0.0, // Initial simulation time. + .frame_curr = 0, // Initial frame. + .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase. + .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase. + }; + + int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index. + if (app_args.is_restart) { + struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, + app_args.restart_frame); + + if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) { + gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", + gkyl_array_rio_status_msg(status.io_status)); + goto freeresources; + } + + tfs.frame_curr = status.frame; + tfs.t_curr = status.stime; + + // Find out what phase we are in. + double time_count = 0.0; + int frame_count = 0; + int pit_curr = 0; + for (int pit = 0; pit < ctx.num_phases; pit++) { + time_count += ctx.poa_phases[pit].duration; + frame_count += ctx.poa_phases[pit].num_frames; + if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) { + pit_curr = pit; + break; + } + } + ; + phase_idx_init = pit_curr; + + // Change the duration and number frames so this phase reaches the expected + // time and number of frames and not beyond. + struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init]; + pparams->num_frames = frame_count - tfs.frame_curr; + pparams->duration = time_count - tfs.t_curr; + + gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr); + gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr); + } + else { + gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr); + + // Write out ICs. + reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag); + + calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0); + write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true); + } + + if (app_args.num_steps != INT_MAX) + phase_idx_end = 1; + + // Loop over number of number of phases; + for (int pit = phase_idx_init; pit < phase_idx_end; pit++) { + gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr); + struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit]; + run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, + &trig_calc_intdiag, &tfs, phase_params); + } + + gkyl_gyrokinetic_app_stat_write(app); + + struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics + gkyl_gyrokinetic_app_cout(app, stdout, "\n"); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail); + if (stat.nstage_2_fail > 0) { + gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[1]); + gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[0]); + } + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io); + gkyl_gyrokinetic_app_print_timings(app, stdout); + +freeresources: + // simulation complete, free app + gkyl_gyrokinetic_app_release(app); + gkyl_gyrokinetic_comms_release(comm); + release_ctx(&ctx); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) + MPI_Finalize(); +#endif + return 0; +} diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c new file mode 100644 index 000000000..0ce0adf5b --- /dev/null +++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c @@ -0,0 +1,1265 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// State of the pseudo orbit-averaged integrator. +enum gk_poa_state { + GK_POA_NONE = 0, // Haven't started. + GK_POA_OAP, // Orbit averaged phase. + GK_POA_FDP, // Full dynamics phase. + GK_POA_COMPLETED, // Finished simulation. +}; + +struct gk_poa_phase_params { + enum gk_poa_state phase; // Type of phase. + int num_frames; // Number of frames. + double duration; // Duration. + double alpha; // Factor multiplying collisionless terms. + bool is_static_field; // Whether to evolve the field. + bool is_positivity_enabled; // Whether positivity is enabled. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler. +}; + +// Define the context of the simulation. This is basically all the globals +struct gk_mirror_ctx { + int cdim, vdim; // Dimensionality. + + // Plasma parameters + double mi; + double qi; + double me; + double qe; + double Te0; + double n0; + double B_p; + double beta; + double tau; + double Ti0; + double kperpRhos; + // Parameters controlling initial conditions. + double alim; + double alphaIC0; + double alphaIC1; + double nuFrac; + // Electron-electron collision freq. + double logLambdaElc; + double nuElc; + // Ion-ion collision freq. + double logLambdaIon; + double nuIon; + // Thermal speeds. + double vti; + double vte; + double c_s; + // Gyrofrequencies and gyroradii. + double omega_ci; + double rho_s; + double kperp; // Perpendicular wavenumber in SI units. + double RatZeq0; // Radius of the field line at Z=0. + // Axial coordinate Z extents. Endure that Z=0 is not on + double Z_min; + double Z_max; + double z_min; + double z_max; + double psi_eval; + double psi_in; + double z_in; + // Magnetic equilibrium model. + double mcB; + double gamma; + double Z_m; + // Bananna tip info. Hardcoad to avoid dependency on ctx + double B_bt; + double R_bt; + double Z_bt; + double z_bt; + double R_m; + double B_m; + double z_m; + // Physics parameters at mirror throat + double n_m; + double Te_m; + double Ti_m; + double cs_m; + // Source parameters + double NSrcIon; + double lineLengthSrcIon; + double sigSrcIon; + double NSrcFloorIon; + double TSrc0Ion; + double TSrcFloorIon; + double NSrcElc; + double lineLengthSrcElc; + double sigSrcElc; + double NSrcFloorElc; + double TSrc0Elc; + double TSrcFloorElc; + double alpha; // Multirate factor. + // Grid parameters + double vpar_max_ion; + double vpar_max_elc; + double mu_max_ion; + double mu_max_elc; + int Nz; + int Nvpar; + int Nmu; + int cells[GKYL_MAX_DIM]; // Number of cells in all directions. + int poly_order; + + double t_end; // End time. + int num_frames; // Number of output frames. + int num_phases; // Number of phases. + struct gk_poa_phase_params *poa_phases; // Phases to run. + double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol; // Minimum allowable fraction of initial time-step. + int num_failures_max; // Maximum allowable number of consecutive small time-steps. +}; + +double +psi_RZ(double RIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + double psi = 0.5 * pow(RIn, 2.) * mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.)))); + return psi; +} + +double +R_psiZ(double psiIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double Rout = sqrt(2.0 * psiIn / (app->mcB * + (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) + + 1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.)))))); + return Rout; +} + +void +Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag) +{ + struct gk_mirror_ctx *app = ctx; + double Rcoord = R_psiZ(psiIn, ZIn, ctx); + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + *BRad = -(1.0 / 2.0) * Rcoord * mcB * + (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) - + 2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.)))); + *BZ = mcB * + (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) + + 1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.)))); + *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2)); +} + +double +integrand_z_psiZ(double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = app->psi_in; + double BRad, BZ, Bmag; + Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag); + return Bmag / BZ; +} + +double +z_psiZ(double psiIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + app->psi_in = psiIn; + double eps = 0.0; + struct gkyl_qr_res integral; + if (eps <= ZIn) { + integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14); + } + else { + integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14); + integral.res = -integral.res; + } + return integral.res; +} + +// Invert z(Z) via root-finding. +double +root_Z_psiz(double Z, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + return app->z_in - z_psiZ(app->psi_in, Z, ctx); +} + +double +Z_psiz(double psiIn, double zIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double maxL = app->Z_max - app->Z_min; + double eps = maxL / app->Nz; // Interestingly using a smaller eps yields larger errors in some geo quantities. + app->psi_in = psiIn; + app->z_in = zIn; + struct gkyl_qr_res Zout; + if (zIn >= 0.0) { + double fl = root_Z_psiz(-eps, ctx); + double fr = root_Z_psiz(app->Z_max + eps, ctx); + Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14); + } + else { + double fl = root_Z_psiz(app->Z_min - eps, ctx); + double fr = root_Z_psiz(eps, ctx); + Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14); + } + return Zout.res; +} + +// -- Source functions. +void +eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + double NSrc = app->NSrcElc; + double zSrc = app->lineLengthSrcElc; + double sigSrc = app->sigSrcElc; + double NSrcFloor = app->NSrcFloorElc; + if (fabs(Z) <= app->Z_m) { + fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2.))) * + exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2.)))); + } + else { + fout[0] = 1e-16; + } +} + +void +eval_upar_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + fout[0] = 0.0; +} + +void +eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double sigSrc = app->sigSrcElc; + double TSrc0 = app->TSrc0Elc; + double Tfloor = app->TSrcFloorElc; + if (fabs(z) <= 2.0 * sigSrc) { + fout[0] = TSrc0; + } + else { + fout[0] = Tfloor; + } +} + +void +eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + double NSrc = app->NSrcIon; + double zSrc = app->lineLengthSrcIon; + double sigSrc = app->sigSrcIon; + double NSrcFloor = app->NSrcFloorIon; + if (fabs(Z) <= app->Z_m) { + fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) * + exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2)))); + } + else { + fout[0] = 1e-16; + } +} + +void +eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + fout[0] = 0.0; +} + +void +eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double sigSrc = app->sigSrcIon; + double TSrc0 = app->TSrc0Ion; + double Tfloor = app->TSrcFloorIon; + if (fabs(z) <= 2.0 * sigSrc) { + fout[0] = TSrc0; + } + else { + fout[0] = Tfloor; + } +} + +// Electrons initial conditions +void +eval_density_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate. + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + if (fabs(Z) <= app->Z_bt) { + fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.); + } + else if (fabs(Z) <= app->Z_m) { + fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.); + } + else { + fout[0] = app->n_m * sqrt(Bmag / app->B_m); + } +} + +void +eval_upar_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + if (fabs(z) <= app->z_m) { + fout[0] = 0.0; + } + else if (z > app->z_m) { + fout[0] = app->cs_m * (z - app->z_m); + } + else { + fout[0] = app->cs_m * (z + app->z_m); + } +} + +void +eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate. + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + if (fabs(Z) <= app->Z_bt) { + fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.); + } + else if (fabs(Z) <= app->Z_m) { + fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.); + } + else { + fout[0] = app->Te_m * sqrt(Bmag / app->B_m); + } +} + +// Ion initial conditions +void +eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate. + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + if (fabs(Z) <= app->Z_bt) { + fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC0 / 2); + } + else if (fabs(Z) <= app->Z_m) { + fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC1 / 2); + } + else { + fout[0] = app->n_m * sqrt(Bmag / app->B_m); + } +} + +void +eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + if (fabs(z) <= app->z_m) { + fout[0] = 0.0; + } + else if (z > app->z_m) { + fout[0] = app->cs_m * (z - app->z_m); // * (z - / app->z_m); + } + else { + fout[0] = app->cs_m * (z + app->z_m); // * (z + app->z_m) / app->z_m; + } +} + +void +eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double z = xn[0]; + double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate. + double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate. + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + if (fabs(Z) <= app->Z_bt) { + fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC0 / 2); + } + else if (fabs(Z) <= app->Z_m) { + fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC1 / 2); + } + else { + fout[0] = app->Ti_m * sqrt(Bmag / app->B_m); + } +} + +// Potential initial condition +void +eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[0]; + double z_m = 0.98; + double z_max = app->z_max; + double sigma = 0.2 * z_m; + double center_potential = 8.0 * app->Te0 / app->qi; + if (fabs(z) <= sigma) { + fout[0] = center_potential; + } + else { + fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma)); + } +} + +// Evaluate collision frequencies +void +evalNuElc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->nuElc; +} + +void +evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->nuIon; +} + +// Geometry evaluation functions for the gk app +// mapc2p must assume a 3d input xc +void +mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx) +{ + double psi = xc[0]; + double theta = xc[1]; + double z = xc[2]; + + double Z = Z_psiz(psi, z, ctx); + double R = R_psiZ(psi, Z, ctx); + + // Cartesian coordinates on plane perpendicular to Z axis. + double x = R * cos(theta); + double y = R * sin(theta); + xp[0] = x; + xp[1] = y; + xp[2] = Z; +} + +// bmag_func must assume a 3d input xc +void +bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xc[2]; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double Z = Z_psiz(psi, z, ctx); + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + fout[0] = Bmag; +} + +// bfield_func must assume a 3d input xc +void +bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xc[2]; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double Z = Z_psiz(psi, z, ctx); + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + + double phi = xc[1]; + // zc are computational coords. + // Set Cartesian components of magnetic field. + fout[0] = BRad * cos(phi); + fout[1] = BRad * sin(phi); + fout[2] = BZ; +} + +struct gk_mirror_ctx +create_ctx(void) +{ + int cdim = 1, vdim = 2; // Dimensionality. + + // Universal constant parameters. + double eps0 = GKYL_EPSILON0; + double mu0 = GKYL_MU0; // Not sure if this is right + double eV = GKYL_ELEMENTARY_CHARGE; + double mp = GKYL_PROTON_MASS; // ion mass + double me = GKYL_ELECTRON_MASS; + double qi = eV; // ion charge + double qe = -eV; // electron charge + + // Plasma parameters. + double mi = 2.014 * mp; + double Te0 = 940 * eV; + double n0 = 3e19; + double B_p = 0.53; + double beta = 0.4; + double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.; + double Ti0 = tau * Te0; + double kperpRhos = 0.1; + + // Parameters controlling initial conditions. + double alim = 0.125; + double alphaIC0 = 2; + double alphaIC1 = 10; + + double nuFrac = 1.0; + // Electron-electron collision freq. + double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV); + double nuElc = nuFrac * logLambdaElc * pow(eV, 4.) * n0 / + (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.)); + // Ion-ion collision freq. + double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV); + double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 / + (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.)); + + // Thermal speeds. + double vti = sqrt(Ti0 / mi); + double vte = sqrt(Te0 / me); + double c_s = sqrt(Te0 / mi); + + // Gyrofrequencies and gyroradii. + double omega_ci = eV * B_p / mi; + double rho_s = c_s / omega_ci; + + // Perpendicular wavenumber in SI units: + double kperp = kperpRhos / rho_s; + + // Geometry parameters. + double RatZeq0 = 0.10; // Radius of the field line at Z=0. + // Axial coordinate Z extents. Endure that Z=0 is not on + // the boundary of a cell (due to AD errors). + double Z_min = -2.5; + double Z_max = 2.5; + + // Parameters controlling the magnetic equilibrium model. + double mcB = 6.51292; + double gamma = 0.124904; + double Z_m = 0.98; + + // Source parameters + double NSrcIon = 3.1715e23 / 8.0; + double lineLengthSrcIon = 0.0; + double sigSrcIon = Z_m / 4.0; + double NSrcFloorIon = 0.05 * NSrcIon; + double TSrc0Ion = Ti0 * 1.25; + double TSrcFloorIon = TSrc0Ion / 8.0; + double NSrcElc = NSrcIon; + double lineLengthSrcElc = lineLengthSrcIon; + double sigSrcElc = sigSrcIon; + double NSrcFloorElc = NSrcFloorIon; + double TSrc0Elc = TSrc0Ion / tau; + double TSrcFloorElc = TSrcFloorIon / tau; + + // Bananna tip info. Hardcoad to avoid dependency on ctx + double B_bt = 1.058278; + double R_bt = 0.071022; + double Z_bt = 0.467101; + double z_bt = 0.468243; + double R_m = 0.017845; + double B_m = 16.662396; + double z_m = 0.982544; + + // Physics parameters at mirror throat + double n_m = 1.105617e19; + double Te_m = 346.426583 * eV; + double Ti_m = 3081.437703 * eV; + double cs_m = 4.037740e5; + + double alpha = 0.01; // Multirate factor. + + // Grid parameters + double vpar_max_elc = 20 * vte; + double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p); + double vpar_max_ion = 20 * vti; + double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p); + int Nz = 32; + int Nvpar = 32; // Number of cells in the paralell velocity direction 96 + int Nmu = 16; // Number of cells in the mu direction 192 + int poly_order = 1; + + // Factor multiplying collisionless terms. + double alpha_oap = 0.01; + double alpha_fdp = 1.0; + // Duration of each phase. + double tau_oap = 1e-7; + double tau_fdp = 3e-10; + double tau_fdp_extra = 2 * tau_fdp; + int num_cycles = 2; // Number of OAP+FDP cycles to run. + + // Frame counts for each phase type (specified independently) + int num_frames_oap = 1; // Frames per OAP phase + int num_frames_fdp = 1; // Frames per FDP phase + int num_frames_fdp_extra = 2 * num_frames_fdp; // Frames for the extra FDP phase + + // Whether to evolve the field. + bool is_static_field_oap = true; + bool is_static_field_fdp = false; + // Whether to enable positivity. + bool is_positivity_enabled_oap = false; + bool is_positivity_enabled_fdp = true; + // Type of df/dt multipler. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE; + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE; + + // Calculate phase structure + double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra; + double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair. + int num_phases = 2 * num_cycles + 1; + int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra; + + struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * + sizeof(struct gk_poa_phase_params)); + for (int i = 0; i < (num_phases - 1) / 2; i++) { + // OAPs. + poa_phases[2 * i].phase = GK_POA_OAP; + poa_phases[2 * i].num_frames = num_frames_oap; + poa_phases[2 * i].duration = tau_oap; + poa_phases[2 * i].alpha = alpha_oap; + poa_phases[2 * i].is_static_field = is_static_field_oap; + poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap; + poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap; + + // FDPs. + poa_phases[2 * i + 1].phase = GK_POA_FDP; + poa_phases[2 * i + 1].num_frames = num_frames_fdp; + poa_phases[2 * i + 1].duration = tau_fdp; + poa_phases[2 * i + 1].alpha = alpha_fdp; + poa_phases[2 * i + 1].is_static_field = is_static_field_fdp; + poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp; + } + // Add an extra, longer FDP. + poa_phases[num_phases - 1].phase = GK_POA_FDP; + poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra; + poa_phases[num_phases - 1].duration = tau_fdp_extra; + poa_phases[num_phases - 1].alpha = alpha_fdp; + poa_phases[num_phases - 1].is_static_field = is_static_field_fdp; + poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp; + + double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step. + int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps. + + struct gk_mirror_ctx ctx = { + .cdim = cdim, + .vdim = vdim, + .mi = mi, + .qi = qi, + .me = me, + .qe = qe, + .Te0 = Te0, + .n0 = n0, + .B_p = B_p, + .beta = beta, + .tau = tau, + .Ti0 = Ti0, + .kperpRhos = kperpRhos, + .alim = alim, + .alphaIC0 = alphaIC0, + .alphaIC1 = alphaIC1, + .nuFrac = nuFrac, + .logLambdaElc = logLambdaElc, + .nuElc = nuElc, + .logLambdaIon = logLambdaIon, + .nuIon = nuIon, + .vti = vti, + .vte = vte, + .c_s = c_s, + .omega_ci = omega_ci, + .rho_s = rho_s, + .kperp = kperp, + .RatZeq0 = RatZeq0, + .Z_min = Z_min, + .Z_max = Z_max, + .mcB = mcB, + .gamma = gamma, + .Z_m = Z_m, + .B_bt = B_bt, + .R_bt = R_bt, + .Z_bt = Z_bt, + .z_bt = z_bt, + .R_m = R_m, + .B_m = B_m, + .z_m = z_m, + .n_m = n_m, + .Te_m = Te_m, + .Ti_m = Ti_m, + .cs_m = cs_m, + .NSrcIon = NSrcIon, + .lineLengthSrcIon = lineLengthSrcIon, + .sigSrcIon = sigSrcIon, + .NSrcFloorIon = NSrcFloorIon, + .TSrc0Ion = TSrc0Ion, + .TSrcFloorIon = TSrcFloorIon, + .NSrcElc = NSrcElc, + .lineLengthSrcElc = lineLengthSrcElc, + .sigSrcElc = sigSrcElc, + .NSrcFloorElc = NSrcFloorElc, + .TSrc0Elc = TSrc0Elc, + .TSrcFloorElc = TSrcFloorElc, + .vpar_max_ion = vpar_max_ion, + .vpar_max_elc = vpar_max_elc, + .mu_max_ion = mu_max_ion, + .mu_max_elc = mu_max_elc, + .Nz = Nz, + .Nvpar = Nvpar, + .Nmu = Nmu, + .cells = { Nz, Nvpar, Nmu }, + .poly_order = poly_order, + .t_end = t_end, + .num_frames = num_frames, + .num_phases = num_phases, + .poa_phases = poa_phases, + .write_phase_freq = write_phase_freq, + .int_diag_calc_freq = int_diag_calc_freq, + .dt_failure_tol = dt_failure_tol, + .num_failures_max = num_failures_max, + }; + + // Populate a couple more values in the context. + ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx); + ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx); + ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx); + + return ctx; +} + +void +release_ctx(struct gk_mirror_ctx *ctx) +{ + gkyl_free(ctx->poa_phases); +} + +void +calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app, + double t_curr, bool force_calc, double dt) +{ + if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) { + gkyl_gyrokinetic_app_calc_field_energy(app, t_curr); + gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr); + + if (!(dt < 0.0) ) + gkyl_gyrokinetic_app_save_dt(app, t_curr, dt); + } +} + +void +write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase, + gkyl_gyrokinetic_app *app, double t_curr, bool force_write) +{ + bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr); + if (trig_now_conf || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + gkyl_gyrokinetic_app_write_conf(app, t_curr, frame); + + gkyl_gyrokinetic_app_write_field_energy(app); + gkyl_gyrokinetic_app_write_integrated_mom(app); + gkyl_gyrokinetic_app_write_dt(app); + } + + bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr); + if (trig_now_phase || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + + gkyl_gyrokinetic_app_write_phase(app, t_curr, frame); + } +} + +struct time_frame_state { + double t_curr; // Current simulation time. + double t_end; // End time of current phase. + int frame_curr; // Current frame. + int num_frames; // Number of frames at the end of current phase. +}; + +void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag) +{ + // Reset I/O triggers: + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + int frame_curr = tfs->frame_curr; + int num_frames = tfs->num_frames; + int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames; + + // Prevent division by zero when frame_curr equals num_frames + int frames_remaining = num_frames - frame_curr; + double time_remaining = t_end - t_curr; + + trig_write_conf->dt = time_remaining / frames_remaining; + trig_write_conf->tcurr = t_curr; + trig_write_conf->curr = frame_curr; + + trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining); + trig_write_phase->tcurr = t_curr; + trig_write_phase->curr = frame_curr; + + int diag_frames = GKYL_MAX2(frames_remaining, + (num_int_diag_calc / num_frames) * frames_remaining); + trig_calc_intdiag->dt = time_remaining / diag_frames; + trig_calc_intdiag->tcurr = t_curr; + trig_calc_intdiag->curr = frame_curr; +} + +void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs, + struct gk_poa_phase_params *pparams) +{ + tfs->t_end = tfs->t_curr + pparams->duration; + tfs->num_frames = tfs->frame_curr + pparams->num_frames; + + // Run an OAP or FDP. + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + + // Reset I/O triggers: + reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag); + + // Reset simulation parameters and function pointers. + struct gkyl_gyrokinetic_collisionless collisionless_inp = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = pparams->alpha, + }; + struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = { + .type = pparams->fdot_mult_type, + .cellwise_const = true, + .write_diagnostics = true, + }; + struct gkyl_gyrokinetic_field field_inp = { + .polarization_bmag = ctx->B_p, + .kperpSq = pow(ctx->kperp, 2.), + .is_static = pparams->is_static_field, + }; + struct gkyl_gyrokinetic_positivity positivity_inp = { + .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE, + .write_diagnostics = pparams->is_positivity_enabled, + }; + + gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp); + gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp); + gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp); + gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp); + gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp); + gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp); + gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp); + + // Compute initial guess of maximum stable time-step. + double dt = t_end - t_curr; + + // Initialize small time-step check. + double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol; + int num_failures = 0, num_failures_max = ctx->num_failures_max; + + long step = 1; + while ((t_curr < t_end) && (step <= num_steps)) { + if (step == 1 || step % 1 == 0) + gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr); + + dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end. + struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt); + + if (step == 1 || step % 1 == 0) + gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual); + + if (!status.success) { + gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n"); + break; + } + t_curr += status.dt_actual; + dt = status.dt_suggested; + + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end); + + if (dt_init < 0.0) { + dt_init = status.dt_actual; + } + else if (status.dt_actual < dt_failure_tol * dt_init) { + num_failures += 1; + + gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual); + gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures); + if (num_failures >= num_failures_max) { + gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", + dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", + num_failures_max); + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, true); + break; + } + } + else { + num_failures = 0; + } + + step += 1; + } + + tfs->t_curr = t_curr; + tfs->frame_curr = tfs->frame_curr + pparams->num_frames; +} + +int main(int argc, char **argv) +{ + struct gkyl_app_args app_args = parse_app_args(argc, argv); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) MPI_Init(&argc, &argv); +#endif + + if (app_args.trace_mem) { + gkyl_cu_dev_mem_debug_set(true); + gkyl_mem_debug_set(true); + } + + struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions. + + int cells_x[ctx.cdim], cells_v[ctx.vdim]; + for (int d = 0; d < ctx.cdim; d++) { + cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]); + } + for (int d = 0; d < ctx.vdim; d++) { + cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]); + } + + // Construct communicator for use in app. + struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr); + + struct gkyl_gyrokinetic_species elc = { + .name = "elc", + .charge = ctx.qe, + .mass = ctx.me, + .vdim = ctx.vdim, + .lower = { -ctx.vpar_max_elc, 0.0 }, + .upper = { ctx.vpar_max_elc, ctx.mu_max_elc }, + .cells = { cells_v[0], cells_v[1] }, + + .polarization_density = ctx.n0, + + .projection = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density_elc, + .ctx_upar = &ctx, + .upar = eval_upar_elc, + .ctx_temp = &ctx, + .temp = eval_temp_elc, + }, + + .collisionless = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = 1.0, // Will be replaced below. + }, + + .collisions = { + .collision_id = GKYL_LBO_COLLISIONS, + .den_ref = ctx.n0, + .temp_ref = ctx.Te0, + .num_cross_collisions = 1, + .collide_with = { "ion" }, + }, + + .source = { + .source_id = GKYL_PROJ_SOURCE, + .num_sources = 1, + .projection[0] = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density_elc_source, + .ctx_upar = &ctx, + .upar = eval_upar_elc_source, + .ctx_temp = &ctx, + .temp = eval_temp_elc_source, + }, + }, + + .time_rate_multiplier = { + .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated. + .cellwise_const = true, + .write_diagnostics = true, + }, + + .positivity = { + .type = GKYL_GK_POSITIVITY_SHIFT, + .write_diagnostics = true, + }, + + .bcs = { + { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + }, + + .write_omega_cfl = true, + .num_diag_moments = 8, + .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, + GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP, + GKYL_F_MOMENT_BIMAXWELLIAN }, + }; + + struct gkyl_gyrokinetic_species ion = { + .name = "ion", + .charge = ctx.qi, + .mass = ctx.mi, + .vdim = ctx.vdim, + .lower = { -ctx.vpar_max_ion, 0.0 }, + .upper = { ctx.vpar_max_ion, ctx.mu_max_ion }, + .cells = { cells_v[0], cells_v[1] }, + .scale_with_polarization = true, + + .polarization_density = ctx.n0, + + .projection = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density, + .ctx_upar = &ctx, + .upar = eval_upar, + .ctx_temp = &ctx, + .temp = eval_temp_ion, + }, + + .collisionless = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = 1.0, // Will be replaced below. + }, + + .collisions = { + .collision_id = GKYL_LBO_COLLISIONS, + .den_ref = ctx.n0, + .temp_ref = ctx.Ti0, + .num_cross_collisions = 1, + .collide_with = { "elc" }, + }, + + .source = { + .source_id = GKYL_PROJ_SOURCE, + .num_sources = 1, + .projection[0] = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density_source, + .ctx_upar = &ctx, + .upar = eval_upar_source, + .ctx_temp = &ctx, + .temp = eval_temp_ion_source, + }, + }, + + .time_rate_multiplier = { + .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated. + .cellwise_const = true, + .write_diagnostics = true, + }, + + .positivity = { + .type = GKYL_GK_POSITIVITY_SHIFT, + .write_diagnostics = true, + }, + + .bcs = { + { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + }, + + .write_omega_cfl = true, + .num_diag_moments = 8, + .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, + GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP, + GKYL_F_MOMENT_BIMAXWELLIAN }, + }; + + struct gkyl_gyrokinetic_field field = { + .polarization_bmag = ctx.B_p, // Issue here. B0 from soloviev, so not sure what to do. Ours is not constant + .kperpSq = pow(ctx.kperp, 2.), + .is_static = false, // Will be replaced below. + .polarization_potential = eval_potential, + .polarization_potential_ctx = &ctx, + }; + + // GK app + struct gkyl_gk app_inp = { + .name = "gk_mirror_kinetic_elc_poa_1x2v_p1", + .cdim = ctx.cdim, + .lower = { ctx.z_min }, + .upper = { ctx.z_max }, + .cells = { cells_x[0] }, + .poly_order = ctx.poly_order, + .basis_type = app_args.basis_type, + + .geometry = { + .geometry_id = GKYL_GEOMETRY_MAPC2P, + .world = { ctx.psi_eval, 0.0 }, + .mapc2p = mapc2p, // Mapping of computational to physical space. + .c2p_ctx = &ctx, + .bfield_func = bfield_func, // Magnetic field. + .bfield_ctx = &ctx + }, + + .num_periodic_dir = 0, + .periodic_dirs = {}, + .num_species = 2, + .species = { elc, ion }, + .field = field, + + .parallelism = { + .use_gpu = app_args.use_gpu, + .cuts = { app_args.cuts[0] }, + .comm = comm, + }, + }; + + // Set app output name from the executable name (argv[0]). + snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name); + + // Create app object. + gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp); + + // Triggers for IO. + struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag; + + struct time_frame_state tfs = { + .t_curr = 0.0, // Initial simulation time. + .frame_curr = 0, // Initial frame. + .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase. + .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase. + }; + + int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index. + if (app_args.is_restart) { + struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, + app_args.restart_frame); + + if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) { + gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", + gkyl_array_rio_status_msg(status.io_status)); + goto freeresources; + } + + tfs.frame_curr = status.frame; + tfs.t_curr = status.stime; + + // Find out what phase we are in. + double time_count = 0.0; + int frame_count = 0; + int pit_curr = 0; + for (int pit = 0; pit < ctx.num_phases; pit++) { + time_count += ctx.poa_phases[pit].duration; + frame_count += ctx.poa_phases[pit].num_frames; + if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) { + pit_curr = pit; + break; + } + } + ; + phase_idx_init = pit_curr; + + // Change the duration and number frames so this phase reaches the expected + // time and number of frames and not beyond. + struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init]; + pparams->num_frames = frame_count - tfs.frame_curr; + pparams->duration = time_count - tfs.t_curr; + + gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr); + gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr); + } + else { + gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr); + + // Write out ICs. + reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag); + + calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0); + write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true); + } + + if (app_args.num_steps != INT_MAX) + phase_idx_end = 1; + + // Loop over number of number of phases; + for (int pit = phase_idx_init; pit < phase_idx_end; pit++) { + gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr); + struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit]; + run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, + &trig_calc_intdiag, &tfs, phase_params); + } + + gkyl_gyrokinetic_app_stat_write(app); + + struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics + gkyl_gyrokinetic_app_cout(app, stdout, "\n"); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail); + if (stat.nstage_2_fail > 0) { + gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[1]); + gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[0]); + } + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io); + gkyl_gyrokinetic_app_print_timings(app, stdout); + +freeresources: + // simulation complete, free app + gkyl_gyrokinetic_app_release(app); + gkyl_gyrokinetic_comms_release(comm); + release_ctx(&ctx); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) + MPI_Finalize(); +#endif + return 0; +} diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c new file mode 100644 index 000000000..7663554d9 --- /dev/null +++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c @@ -0,0 +1,941 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// State of the pseudo orbit-averaged integrator. +enum gk_poa_state { + GK_POA_NONE = 0, // Haven't started. + GK_POA_OAP, // Orbit averaged phase. + GK_POA_FDP, // Full dynamics phase. + GK_POA_COMPLETED, // Finished simulation. +}; + +struct gk_poa_phase_params { + enum gk_poa_state phase; // Type of phase. + int num_frames; // Number of frames. + double duration; // Duration. + double alpha; // Factor multiplying collisionless terms. + bool is_static_field; // Whether to evolve the field. + bool is_positivity_enabled; // Whether positivity is enabled. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler. +}; + +// Define the context of the simulation. This is basically all the globals +struct gk_mirror_ctx { + int cdim, vdim; // Dimensionality. + + // Plasma parameters + double mi; // Ion mass. + double me; // Electron mass. + double qi; // Ion charge. + double qe; // Electron charge. + double Te0; // Electron temperature. + double Ti0; // Ion temperature. + double n0; // Density. + double B_p; // Plasma magnetic field (mirror center). + double beta; // Plasma beta in the center. + double tau; // Temperature ratio. + + double Ti_perp0; // Reference ion perp temperature. + double Ti_par0; // Reference ion par temperature. + double cs_m; // Ion sound speed at the throat. + + double nuFrac; // Fraction multiplying collision frequency. + double logLambdaIon; // Ion Coulomb logarithm. + double nuIon; // Ion-ion collision freq. + + double vti; // Ion thermal speed. + double vte; // Electron thermal speed. + double c_s; // Ion sound speed. + double omega_ci; // Ion gyrofrequency. + double rho_s; // Ion sound gyroradius. + + double RatZeq0; // Radius of the field line at Z=0. + double Z_min; // Minimum axial coordinate Z. + double Z_max; // Maximum axial coordinate Z. + double z_min; // Minimum value of the position along the field line. + double z_max; // Maximum value of the position along the field line. + double psi_eval; // Psi (poloidal flux) of the field line. + double psi_in, z_in; // Auxiliary psi and z. + + // Magnetic equilibrium model. + double mcB; + double gamma; + double Z_m; // Axial coordinate at mirror throat. + double z_m; // Computational coordinate at mirror throat. + + // Source parameters + double NSrcIon; + double TSrc0Ion; + + // Physical velocity space limits. + double vpar_min_ion, vpar_max_ion; + double mu_max_ion; + // Computational velocity space limits. + double vpar_min_ion_c, vpar_max_ion_c; + double mu_min_ion_c, mu_max_ion_c; + + // Grid DOF. + int Nz; + int Nvpar; + int Nmu; + int cells[GKYL_MAX_DIM]; // Number of cells in all directions. + int poly_order; + + double t_end; // End time. + int num_frames; // Number of output frames. + int num_phases; // Number of phases. + struct gk_poa_phase_params *poa_phases; // Phases to run. + double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol; // Minimum allowable fraction of initial time-step. + int num_failures_max; // Maximum allowable number of consecutive small time-steps. +}; + +double +psi_RZ(double RIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + double psi = 0.5 * pow(RIn, 2.) * mcB * + (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) + + 1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) + + 2. / (M_PI * gamma * (1. + pow((ZIn - 2 * Z_m) / gamma, 2.))) + + 2. / (M_PI * gamma * (1. + pow((ZIn + 2 * Z_m) / gamma, 2.)))); + return psi; +} + +double +R_psiZ(double psiIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double Rout = sqrt(2.0 * psiIn / (app->mcB * + (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) + + 1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))) + + 2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - 2 * app->Z_m) / app->gamma, 2.))) + + 2.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + 2 * app->Z_m) / app->gamma, 2.))) + ))); + return Rout; +} + +void +Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag) +{ + struct gk_mirror_ctx *app = ctx; + double Rcoord = R_psiZ(psiIn, ZIn, ctx); + double mcB = app->mcB; + double gamma = app->gamma; + double Z_m = app->Z_m; + *BRad = -(1.0 / 2.0) * Rcoord * mcB * + (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) + + -2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))) + + -4.0 * (ZIn - 2 * Z_m) / (M_PI * pow(gamma, + 3.) * (pow(1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.), 2.))) + + -4.0 * (ZIn + 2 * Z_m) / (M_PI * pow(gamma, + 3.) * (pow(1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.), 2.))) + ); + *BZ = mcB * + (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) + + 1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))) + + 2.0 / (M_PI * gamma * (1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.))) + + 2.0 / (M_PI * gamma * (1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.))) + ); + *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2)); +} + +double +integrand_z_psiZ(double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double psi = app->psi_in; + double BRad, BZ, Bmag; + Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag); + return Bmag / BZ; +} + +double +z_psiZ(double psiIn, double ZIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + app->psi_in = psiIn; + double eps = 0.0; + struct gkyl_qr_res integral; + if (eps <= ZIn) { + integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14); + } + else { + integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14); + integral.res = -integral.res; + } + return integral.res; +} + +// Invert z(Z) via root-finding. +double +root_Z_psiz(double Z, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + return app->z_in - z_psiZ(app->psi_in, Z, ctx); +} + +double +Z_psiz(double psiIn, double zIn, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double maxL = app->Z_max - app->Z_min; + double eps = maxL / app->Nz; // Interestingly using a smaller eps yields larger errors in some geo quantities. + app->psi_in = psiIn; + app->z_in = zIn; + struct gkyl_qr_res Zout; + if (zIn >= 0.0) { + double fl = root_Z_psiz(-eps, ctx); + double fr = root_Z_psiz(app->Z_max + eps, ctx); + Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14); + } + else { + double fl = root_Z_psiz(app->Z_min - eps, ctx); + double fr = root_Z_psiz(eps, ctx); + Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14); + } + return Zout.res; +} + +void +eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->NSrcIon; +} + +void +eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + fout[0] = 0.0; +} + +void +eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->TSrc0Ion; +} + +// Ion initial conditions +void +eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->n0; +} + +void +eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + fout[0] = 0.0; +} + +void +eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->Ti_par0; +} + +void +eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->Ti_perp0; +} + +void +evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->nuIon; +} + +// Geometry evaluation functions for the gk app +// mapc2p must assume a 3d input xc +void +mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx) +{ + double psi = xc[0]; + double theta = xc[1]; + double z = xc[2]; + + double Z = Z_psiz(psi, z, ctx); + double R = R_psiZ(psi, Z, ctx); + + // Cartesian coordinates on plane perpendicular to Z axis. + double x = R * cos(theta); + double y = R * sin(theta); + xp[0] = x; + xp[1] = y; + xp[2] = Z; +} + +// bfield_func must assume a 3d input xc +void +bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +{ + double z = xc[2]; + + struct gk_mirror_ctx *app = ctx; + double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line. + double Z = Z_psiz(psi, z, ctx); + double BRad, BZ, Bmag; + Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag); + + double phi = xc[1]; + // zc are computational coords. + // Set Cartesian components of magnetic field. + fout[0] = BRad * cos(phi); + fout[1] = BRad * sin(phi); + fout[2] = BZ; +} + +void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double vpar_max_ion = app->vpar_max_ion; + double mu_max_ion = app->mu_max_ion; + + double cvpar = vc[0], cmu = vc[1]; + double b = 1.4; + vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b); + // Cubic map in mu. + vp[1] = mu_max_ion * pow(cmu, 3); +} + +struct gk_mirror_ctx +create_ctx(void) +{ + int cdim = 1, vdim = 2; // Dimensionality. + + // Universal constant parameters. + double eps0 = GKYL_EPSILON0; + double mu0 = GKYL_MU0; + double eV = GKYL_ELEMENTARY_CHARGE; + double mp = GKYL_PROTON_MASS; + double me = GKYL_ELECTRON_MASS; + double qi = eV; // ion charge + double qe = -eV; // electron charge + + // Plasma parameters. + double mi = 2.014 * mp; + double Te0 = 940 * eV; + double n0 = 3e19; + double B_p = 0.53; + double beta = 0.4; + double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.; + double Ti0 = tau * Te0; + + double nuFrac = 1.0; + // Ion-ion collision freq. + double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV); + double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 / + (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.)); + + // Thermal speeds. + double vti = sqrt(Ti0 / mi); + double vte = sqrt(Te0 / me); + double c_s = sqrt(Te0 / mi); + + // Gyrofrequencies and gyroradii. + double omega_ci = eV * B_p / mi; + double rho_s = c_s / omega_ci; + + // Geometry parameters. + double RatZeq0 = 0.10; // Radius of the field line at Z=0. + // Axial coordinate Z extents. Endure that Z=0 is not on + // the boundary of a cell (due to AD errors). + double Z_min = -3.0; + double Z_max = 3.0; + + // Parameters controlling the magnetic equilibrium model. + double mcB = 1; + double gamma = 0.124904; + double Z_m = 1.0; + + // Source parameters + double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25; + double TSrc0Ion = Ti0 * 1.25; + + // Grid parameters + double vpar_max_ion = 16 * vti; + double vpar_min_ion = -vpar_max_ion; + double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p); + + // Computational velocity space limits. + double vpar_min_ion_c = -1.0; + double vpar_max_ion_c = 1.0; + double mu_min_ion_c = 0.; + double mu_max_ion_c = 1.; + + // Grid DOF: + int Nz = 200; // Number of cells in z direction. + int Nvpar = 48; // Number of cells in parallel velocity direction. + int Nmu = 16; // Number of cells in mu direction. + int poly_order = 1; + + // Initial conditions parameter.s + double Ti_perp0 = 10000 * eV; + double Ti_par0 = 7500 * eV; + + // Factor multiplying collisionless terms. + double alpha_oap = 0.01; + double alpha_fdp = 1.0; + // Duration of each phase. + double tau_oap = 5e-7; + double tau_fdp = 3e-9; + double tau_fdp_extra = 2 * tau_fdp; + int num_cycles = 2; // Number of OAP+FDP cycles to run. + + // Frame counts for each phase type (specified independently) + int num_frames_oap = 1; // Frames per OAP phase + int num_frames_fdp = 1; // Frames per FDP phase + int num_frames_fdp_extra = 2 * num_frames_fdp; // Frames for the extra FDP phase + + // Whether to evolve the field. + bool is_static_field_oap = true; + bool is_static_field_fdp = false; + // Whether to enable positivity. + bool is_positivity_enabled_oap = false; + bool is_positivity_enabled_fdp = true; + // Type of df/dt multipler. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE; + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE; + + // Calculate phase structure + double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra; + double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair. + int num_phases = 2 * num_cycles + 1; + int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra; + + struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * + sizeof(struct gk_poa_phase_params)); + for (int i = 0; i < (num_phases - 1) / 2; i++) { + // OAPs. + poa_phases[2 * i].phase = GK_POA_OAP; + poa_phases[2 * i].num_frames = num_frames_oap; + poa_phases[2 * i].duration = tau_oap; + poa_phases[2 * i].alpha = alpha_oap; + poa_phases[2 * i].is_static_field = is_static_field_oap; + poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap; + poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap; + + // FDPs. + poa_phases[2 * i + 1].phase = GK_POA_FDP; + poa_phases[2 * i + 1].num_frames = num_frames_fdp; + poa_phases[2 * i + 1].duration = tau_fdp; + poa_phases[2 * i + 1].alpha = alpha_fdp; + poa_phases[2 * i + 1].is_static_field = is_static_field_fdp; + poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp; + } + // Add an extra, longer FDP. + poa_phases[num_phases - 1].phase = GK_POA_FDP; + poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra; + poa_phases[num_phases - 1].duration = tau_fdp_extra; + poa_phases[num_phases - 1].alpha = alpha_fdp; + poa_phases[num_phases - 1].is_static_field = is_static_field_fdp; + poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp; + + double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step. + int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps. + + struct gk_mirror_ctx ctx = { + .cdim = cdim, .vdim = vdim, + .mi = mi, .qi = qi, + .me = me, .qe = qe, + .Te0 = Te0, .Ti0 = Ti0, .n0 = n0, + .B_p = B_p, .beta = beta, .tau = tau, + .nuFrac = nuFrac, .logLambdaIon = logLambdaIon, .nuIon = nuIon, + .vti = vti, .vte = vte, .c_s = c_s, + .omega_ci = omega_ci, .rho_s = rho_s, + .RatZeq0 = RatZeq0, + .Z_min = Z_min, .Z_max = Z_max, + // Parameters controlling the magnetic equilibrium model. + .mcB = mcB, .gamma = gamma, + .Z_m = Z_m, + // Initial condition parameters. + .Ti_perp0 = Ti_perp0, .Ti_par0 = Ti_par0, + // Source parameters + .NSrcIon = NSrcIon, + .TSrc0Ion = TSrc0Ion, + // Physical velocity space limits. + .vpar_min_ion = vpar_min_ion, + .vpar_max_ion = vpar_max_ion, + .mu_max_ion = mu_max_ion, + // Computational velocity space limits. + .vpar_min_ion_c = vpar_min_ion_c, + .vpar_max_ion_c = vpar_max_ion_c, + .mu_min_ion_c = mu_min_ion_c, + .mu_max_ion_c = mu_max_ion_c, + // Grid DOF. + .Nz = Nz, + .Nvpar = Nvpar, + .Nmu = Nmu, + .cells = { Nz, Nvpar, Nmu }, + .poly_order = poly_order, + // Time integration and I/O parameters. + .t_end = t_end, + .num_frames = num_frames, + .num_phases = num_phases, + .poa_phases = poa_phases, + .write_phase_freq = write_phase_freq, + .int_diag_calc_freq = int_diag_calc_freq, + .dt_failure_tol = dt_failure_tol, + .num_failures_max = num_failures_max, + }; + + // Populate a couple more values in the context. + ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx); + ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx); + ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx); + + return ctx; +} + +void +release_ctx(struct gk_mirror_ctx *ctx) +{ + gkyl_free(ctx->poa_phases); +} + +void +calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app, + double t_curr, bool force_calc, double dt) +{ + if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) { + gkyl_gyrokinetic_app_calc_field_energy(app, t_curr); + gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr); + + if (!(dt < 0.0) ) + gkyl_gyrokinetic_app_save_dt(app, t_curr, dt); + } +} + +void +write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase, + gkyl_gyrokinetic_app *app, double t_curr, bool force_write) +{ + bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr); + if (trig_now_conf || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + gkyl_gyrokinetic_app_write_conf(app, t_curr, frame); + + gkyl_gyrokinetic_app_write_field_energy(app); + gkyl_gyrokinetic_app_write_integrated_mom(app); + gkyl_gyrokinetic_app_write_dt(app); + } + + bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr); + if (trig_now_phase || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + + gkyl_gyrokinetic_app_write_phase(app, t_curr, frame); + } +} + +struct time_frame_state { + double t_curr; // Current simulation time. + double t_end; // End time of current phase. + int frame_curr; // Current frame. + int num_frames; // Number of frames at the end of current phase. +}; + +void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag) +{ + // Reset I/O triggers: + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + int frame_curr = tfs->frame_curr; + int num_frames = tfs->num_frames; + int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames; + + // Prevent division by zero when frame_curr equals num_frames + int frames_remaining = num_frames - frame_curr; + double time_remaining = t_end - t_curr; + + trig_write_conf->dt = time_remaining / frames_remaining; + trig_write_conf->tcurr = t_curr; + trig_write_conf->curr = frame_curr; + + trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining); + trig_write_phase->tcurr = t_curr; + trig_write_phase->curr = frame_curr; + + int diag_frames = GKYL_MAX2(frames_remaining, + (num_int_diag_calc / num_frames) * frames_remaining); + trig_calc_intdiag->dt = time_remaining / diag_frames; + trig_calc_intdiag->tcurr = t_curr; + trig_calc_intdiag->curr = frame_curr; +} + +void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs, + struct gk_poa_phase_params *pparams) +{ + tfs->t_end = tfs->t_curr + pparams->duration; + tfs->num_frames = tfs->frame_curr + pparams->num_frames; + + // Run an OAP or FDP. + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + + // Reset I/O triggers: + reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag); + + // Reset simulation parameters and function pointers. + struct gkyl_gyrokinetic_collisionless collisionless_inp = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = pparams->alpha, + }; + struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = { + .type = pparams->fdot_mult_type, + .cellwise_const = true, + .write_diagnostics = true, + }; + struct gkyl_gyrokinetic_field field_inp = { + .gkfield_id = GKYL_GK_FIELD_BOLTZMANN, + .electron_mass = ctx->me, + .electron_charge = ctx->qe, + .electron_temp = ctx->Te0, + .polarization_bmag = ctx->B_p, + .is_static = pparams->is_static_field, + }; + struct gkyl_gyrokinetic_positivity positivity_inp = { + .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE, + .write_diagnostics = pparams->is_positivity_enabled, + }; + + gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp); + gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp); + gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp); + gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp); + + // Compute initial guess of maximum stable time-step. + double dt = t_end - t_curr; + + // Initialize small time-step check. + double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol; + int num_failures = 0, num_failures_max = ctx->num_failures_max; + + long step = 1; + while ((t_curr < t_end) && (step <= num_steps)) { + if (step == 1 || step % 1 == 0) + gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr); + + dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end. + struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt); + + if (step == 1 || step % 1 == 0) + gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual); + + if (!status.success) { + gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n"); + break; + } + t_curr += status.dt_actual; + dt = status.dt_suggested; + + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end); + + if (dt_init < 0.0) { + dt_init = status.dt_actual; + } + else if (status.dt_actual < dt_failure_tol * dt_init) { + num_failures += 1; + + gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual); + gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures); + if (num_failures >= num_failures_max) { + gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", + dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", + num_failures_max); + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, true); + break; + } + } + else { + num_failures = 0; + } + + step += 1; + } + + tfs->t_curr = t_curr; + tfs->frame_curr = tfs->frame_curr + pparams->num_frames; +} + +int main(int argc, char **argv) +{ + struct gkyl_app_args app_args = parse_app_args(argc, argv); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) MPI_Init(&argc, &argv); +#endif + + if (app_args.trace_mem) { + gkyl_cu_dev_mem_debug_set(true); + gkyl_mem_debug_set(true); + } + + struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions. + + int cells_x[ctx.cdim], cells_v[ctx.vdim]; + for (int d = 0; d < ctx.cdim; d++) { + cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]); + } + for (int d = 0; d < ctx.vdim; d++) { + cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]); + } + + // Construct communicator for use in app. + struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr); + + struct gkyl_gyrokinetic_species ion = { + .name = "ion", + .charge = ctx.qi, .mass = ctx.mi, + .vdim = ctx.vdim, + .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c }, + .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c }, + .cells = { cells_v[0], cells_v[1] }, + + .polarization_density = ctx.n0, + + .mapc2p = { + .mapping = mapc2p_vel_ion, + .ctx = &ctx, + }, + + .projection = { + .proj_id = GKYL_PROJ_BIMAXWELLIAN, + .density = eval_density_ion, + .upar = eval_upar_ion, + .temppar = eval_temp_par_ion, + .tempperp = eval_temp_perp_ion, + .ctx_density = &ctx, + .ctx_upar = &ctx, + .ctx_temppar = &ctx, + .ctx_tempperp = &ctx, + }, + + .collisionless = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = 1.0, // Will be replaced below. + }, + + .collisions = { + .collision_id = GKYL_LBO_COLLISIONS, + .self_nu = evalNuIon, + .self_nu_ctx = &ctx, + }, + + .source = { + .source_id = GKYL_PROJ_SOURCE, + .num_sources = 1, + .projection[0] = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .density = eval_density_ion_source, + .upar = eval_upar_ion_source, + .temp = eval_temp_ion_source, + .ctx_density = &ctx, + .ctx_upar = &ctx, + .ctx_temp = &ctx, + }, + }, + + .time_rate_multiplier = { + .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated. + .cellwise_const = true, + .write_diagnostics = true, + }, + + .positivity = { + .type = GKYL_GK_POSITIVITY_SHIFT, + .write_diagnostics = true, + }, + + .bcs = { + { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + }, + + .num_diag_moments = 4, + .diag_moments = { GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, + GKYL_F_MOMENT_BIMAXWELLIAN }, + }; + + struct gkyl_gyrokinetic_field field = { + .gkfield_id = GKYL_GK_FIELD_BOLTZMANN, + .electron_mass = ctx.me, + .electron_charge = ctx.qe, + .electron_temp = ctx.Te0, + .is_static = false, // So solvers are allocated. + }; + + // GK app + struct gkyl_gk app_inp = { + .name = "gk_mirror_tandem_boltz_elc_poa_1x2v", + .cdim = ctx.cdim, + .lower = { ctx.z_min }, + .upper = { ctx.z_max }, + .cells = { cells_x[0] }, + .poly_order = ctx.poly_order, + .basis_type = app_args.basis_type, + + .geometry = { + .geometry_id = GKYL_GEOMETRY_MAPC2P, + .world = { ctx.psi_eval, 0.0 }, + .mapc2p = mapc2p, // Mapping of computational to physical space. + .c2p_ctx = &ctx, + .bfield_func = bfield_func, // Magnetic field. + .bfield_ctx = &ctx + }, + + .num_periodic_dir = 0, + .periodic_dirs = {}, + + .num_species = 1, + .species = { ion }, + + .field = field, + + .parallelism = { + .use_gpu = app_args.use_gpu, + .cuts = { app_args.cuts[0] }, + .comm = comm, + }, + }; + + // Set app output name from the executable name (argv[0]). + snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name); + + // Create app object. + gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp); + + // Triggers for IO. + struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag; + + struct time_frame_state tfs = { + .t_curr = 0.0, // Initial simulation time. + .frame_curr = 0, // Initial frame. + .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase. + .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase. + }; + + int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index. + if (app_args.is_restart) { + struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, + app_args.restart_frame); + + if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) { + gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", + gkyl_array_rio_status_msg(status.io_status)); + goto freeresources; + } + + tfs.frame_curr = status.frame; + tfs.t_curr = status.stime; + + // Find out what phase we are in. + double time_count = 0.0; + int frame_count = 0; + int pit_curr = 0; + for (int pit = 0; pit < ctx.num_phases; pit++) { + time_count += ctx.poa_phases[pit].duration; + frame_count += ctx.poa_phases[pit].num_frames; + if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) { + pit_curr = pit; + break; + } + } + ; + phase_idx_init = pit_curr; + + // Change the duration and number frames so this phase reaches the expected + // time and number of frames and not beyond. + struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init]; + pparams->num_frames = frame_count - tfs.frame_curr; + pparams->duration = time_count - tfs.t_curr; + + gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr); + gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr); + } + else { + gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr); + + // Write out ICs. + reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag); + + calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0); + write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true); + } + + if (app_args.num_steps != INT_MAX) + phase_idx_end = 1; + + // Loop over number of number of phases; + for (int pit = phase_idx_init; pit < phase_idx_end; pit++) { + gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr); + struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit]; + run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, + &trig_calc_intdiag, &tfs, phase_params); + } + + gkyl_gyrokinetic_app_stat_write(app); + + struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics + gkyl_gyrokinetic_app_cout(app, stdout, "\n"); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail); + if (stat.nstage_2_fail > 0) { + gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[1]); + gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[0]); + } + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io); + gkyl_gyrokinetic_app_print_timings(app, stdout); + +freeresources: + // simulation complete, free app + gkyl_gyrokinetic_app_release(app); + gkyl_gyrokinetic_comms_release(comm); + release_ctx(&ctx); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) + MPI_Finalize(); +#endif + return 0; +} \ No newline at end of file diff --git a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c index 3e9808b9d..3da11c59b 100644 --- a/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c +++ b/gyrokinetic/creg/rt_gk_wham_boltz_elc_poa_1x2v_p1.c @@ -685,9 +685,10 @@ int main(int argc, char **argv) }, }; - // Create app object. // Set app output name from the executable name (argv[0]). snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name); + + // Create app object. gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp); // Triggers for IO. diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c new file mode 100644 index 000000000..5ba8cae6d --- /dev/null +++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c @@ -0,0 +1,966 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +// State of the pseudo orbit-averaged integrator. +enum gk_poa_state { + GK_POA_NONE = 0, // Haven't started. + GK_POA_OAP, // Orbit averaged phase. + GK_POA_FDP, // Full dynamics phase. + GK_POA_COMPLETED, // Finished simulation. +}; + +struct gk_poa_phase_params { + enum gk_poa_state phase; // Type of phase. + int num_frames; // Number of frames. + double duration; // Duration. + double alpha; // Factor multiplying collisionless terms. + bool is_static_field; // Whether to evolve the field. + bool is_positivity_enabled; // Whether positivity is enabled. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler. +}; + +// Define the context of the simulation. This is basically all the globals +struct gk_mirror_ctx { + int cdim, vdim; // Dimensionality. + // Plasma parameters + double mi; + double qi; + double me; + double qe; + double Te0; + double n0; + double B_p; + double beta; + double tau; + double Ti0; + double kperpRhos; + // Parameters controlling initial conditions. + double alim; + double nuFrac; + // Electron-electron collision freq. + double logLambdaElc; + double nuElc; + double elc_nuFrac; + // Ion-ion collision freq. + double logLambdaIon; + double nuIon; + // Thermal speeds. + double vti; + double vte; + double c_s; + // Gyrofrequencies and gyroradii. + double omega_ci; + double rho_s; + double kperp; // Perpendicular wavenumber in SI units. + double RatZeq0; // Radius of the field line at Z=0. + // Axial coordinate Z extents. Endure that Z=0 is not on + double z_min; + double z_max; + double psi_min; + double psi_eval; + double psi_max; + // Physics parameters at mirror throat + double vpar_max_ion; + double vpar_max_elc; + double mu_max_ion; + double mu_max_elc; + int Nz; + int Nvpar; + int Nmu; + int cells[GKYL_MAX_DIM]; // Number of cells in all directions. + int poly_order; + + double t_end; // End time. + int num_frames; // Number of output frames. + int num_phases; // Number of phases. + struct gk_poa_phase_params *poa_phases; // Phases to run. + double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol; // Minimum allowable fraction of initial time-step. + int num_failures_max; // Maximum allowable number of consecutive small time-steps. + + // Source parameters + double source_amplitude; + double source_sigma; + double ion_source_temp; + double elc_source_temp; +}; + +void +eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = 1e17; +} + +void +eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = 0.0; +} + +void +eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->Ti0; +} + +void +eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + fout[0] = app->Te0; +} + +void +eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[0]; + double src_amp = app->source_amplitude; + double z_src = 0.0; + double src_sigma = app->source_sigma; + double src_amp_floor = src_amp * 1e-2; + if (fabs(z) <= 1.0) { + fout[0] = src_amp * (1 - pow(fabs(z), 6)); + } + else { + fout[0] = 1e-16; + } +} + +void +eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + fout[0] = 0.0; +} + +void +eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[0]; + double TSrc0 = app->ion_source_temp; + double Tfloor = TSrc0 * 1e-2; + if (fabs(z) <= 1.0) { + fout[0] = TSrc0; + } + else { + fout[0] = Tfloor; + } +} + +void +eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, + void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[0]; + double TSrc0 = app->elc_source_temp; // Using same temp as ion source for simplicity + double Tfloor = TSrc0 * 1e-2; + if (fabs(z) <= 1.0) { + fout[0] = TSrc0; + } + else { + fout[0] = Tfloor; + } +} + +// Potential initial condition +void +eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double z = xn[0]; + double z_m = 0.98; + double z_max = app->z_max; + double sigma = 0.2 * z_m; + double center_potential = 8.0 * app->Te0 / app->qi; + if (fabs(z) <= sigma) { + fout[0] = center_potential; + } + else { + fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma)); + } +} + +void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double vpar_max_ion = app->vpar_max_ion; + double mu_max_ion = app->mu_max_ion; + + double cvpar = vc[0], cmu = vc[1]; + double b = 1.45; + double linear_velocity_threshold = 1. / 6.; + double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b)); + if (fabs(cvpar) < frac_linear) { + double func_frac = tan(frac_linear * b) / tan(b); + vp[0] = vpar_max_ion * func_frac * cvpar / frac_linear; + } + else { + vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b); + } + // Quadratic map in mu. + vp[1] = mu_max_ion * pow(cmu, 3); +} + +void mapc2p_vel_elc(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx) +{ + struct gk_mirror_ctx *app = ctx; + double vpar_max_elc = app->vpar_max_elc; + double mu_max_elc = app->mu_max_elc; + + double cvpar = vc[0], cmu = vc[1]; + double b = 1.45; + double linear_velocity_threshold = 1. / 6.; + double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b)); + if (fabs(cvpar) < frac_linear) { + double func_frac = tan(frac_linear * b) / tan(b); + vp[0] = vpar_max_elc * func_frac * cvpar / frac_linear; + } + else { + vp[0] = vpar_max_elc * tan(cvpar * b) / tan(b); + } + // Quadratic map in mu. + vp[1] = mu_max_elc * pow(cmu, 3.0 / 2.0); +} + +struct gk_mirror_ctx +create_ctx(void) +{ + int cdim = 1, vdim = 2; // Dimensionality. + + // Universal constant parameters. + double eps0 = GKYL_EPSILON0; + double mu0 = GKYL_MU0; // Not sure if this is right + double eV = GKYL_ELEMENTARY_CHARGE; + double mp = GKYL_PROTON_MASS; // ion mass + double me = GKYL_ELECTRON_MASS; + double qi = eV; // ion charge + double qe = -eV; // electron charge + + // Plasma parameters. + double mi = 2.014 * mp; + double Te0 = 940 * eV; + double n0 = 3e19; + double B_p = 0.53; + double beta = 0.4; + double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.; + double Ti0 = tau * Te0; + double kperpRhos = 0.1; + + // Parameters controlling initial conditions. + double alim = 0.125; + double alphaIC0 = 2; + double alphaIC1 = 10; + + double nuFrac = 1.0; + double elc_nuFrac = 1 / 5.489216862238348; + // Electron-electron collision freq. + double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV); + double nuElc = elc_nuFrac * nuFrac * logLambdaElc * pow(eV, 4.) * n0 / + (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.)); + // Ion-ion collision freq. + double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV); + double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 / + (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.)); + + // Thermal speeds. + double vti = sqrt(Ti0 / mi); + double vte = sqrt(Te0 / me); + double c_s = sqrt(Te0 / mi); + + // Gyrofrequencies and gyroradii. + double omega_ci = eV * B_p / mi; + double rho_s = c_s / omega_ci; + + // Perpendicular wavenumber in SI units: + double kperp = kperpRhos / rho_s; + + // Geometry parameters. + double z_min = -2.0; + double z_max = 2.0; + double psi_eval = 1e-3; + + // Grid parameters + double vpar_max_elc = 30 * vte; + double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p); + double vpar_max_ion = 30 * vti; + double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p); + int Nz = 32; + int Nvpar = 32; // Number of cells in the paralell velocity direction 96 + int Nmu = 16; // Number of cells in the mu direction 192 + int poly_order = 1; + + // Factor multiplying collisionless terms. + double alpha_oap = 0.01; + double alpha_fdp = 1.0; + // Duration of each phase. + double tau_oap = 1.5e-8; + double tau_fdp = 1.5e-10; + double tau_fdp_extra = 2 * tau_fdp; + int num_cycles = 2; // Number of OAP+FDP cycles to run. + + // Frame counts for each phase type (specified independently) + int num_frames_oap = 1; // Frames per OAP phase + int num_frames_fdp = 1; // Frames per FDP phase + int num_frames_fdp_extra = 2 * num_frames_fdp; // Frames for the extra FDP phase + + // Whether to evolve the field. + bool is_static_field_oap = true; + bool is_static_field_fdp = false; + // Whether to enable positivity. + bool is_positivity_enabled_oap = false; + bool is_positivity_enabled_fdp = true; + // Type of df/dt multipler. + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE; + enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE; + + // Calculate phase structure + double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra; + double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair. + int num_phases = 2 * num_cycles + 1; + int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra; + + struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * + sizeof(struct gk_poa_phase_params)); + for (int i = 0; i < (num_phases - 1) / 2; i++) { + // OAPs. + poa_phases[2 * i].phase = GK_POA_OAP; + poa_phases[2 * i].num_frames = num_frames_oap; + poa_phases[2 * i].duration = tau_oap; + poa_phases[2 * i].alpha = alpha_oap; + poa_phases[2 * i].is_static_field = is_static_field_oap; + poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap; + poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap; + + // FDPs. + poa_phases[2 * i + 1].phase = GK_POA_FDP; + poa_phases[2 * i + 1].num_frames = num_frames_fdp; + poa_phases[2 * i + 1].duration = tau_fdp; + poa_phases[2 * i + 1].alpha = alpha_fdp; + poa_phases[2 * i + 1].is_static_field = is_static_field_fdp; + poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp; + } + // Add an extra, longer FDP. + poa_phases[num_phases - 1].phase = GK_POA_FDP; + poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra; + poa_phases[num_phases - 1].duration = tau_fdp_extra; + poa_phases[num_phases - 1].alpha = alpha_fdp; + poa_phases[num_phases - 1].is_static_field = is_static_field_fdp; + poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp; + poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp; + + double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames). + double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames). + double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step. + int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps. + + // Source parameters + double source_amplitude = 1.e20; + double source_sigma = 0.5; + double ion_source_temp = 5000. * eV; + double elc_source_temp = 5000. * eV; // Using same temp as ion source for simplicity + + struct gk_mirror_ctx ctx = { + .cdim = cdim, + .vdim = vdim, + .mi = mi, + .qi = qi, + .me = me, + .qe = qe, + .Te0 = Te0, + .n0 = n0, + .B_p = B_p, + .beta = beta, + .tau = tau, + .Ti0 = Ti0, + .kperpRhos = kperpRhos, + .alim = alim, + .nuFrac = nuFrac, + .logLambdaElc = logLambdaElc, + .nuElc = nuElc, + .elc_nuFrac = elc_nuFrac, + .logLambdaIon = logLambdaIon, + .nuIon = nuIon, + .vti = vti, + .vte = vte, + .c_s = c_s, + .omega_ci = omega_ci, + .rho_s = rho_s, + .kperp = kperp, + .z_min = z_min, + .z_max = z_max, + .psi_eval = psi_eval, + .vpar_max_ion = vpar_max_ion, + .vpar_max_elc = vpar_max_elc, + .mu_max_ion = mu_max_ion, + .mu_max_elc = mu_max_elc, + .Nz = Nz, + .Nvpar = Nvpar, + .Nmu = Nmu, + .cells = { Nz, Nvpar, Nmu }, + .poly_order = poly_order, + .t_end = t_end, + .num_frames = num_frames, + .num_phases = num_phases, + .poa_phases = poa_phases, + .write_phase_freq = write_phase_freq, + .int_diag_calc_freq = int_diag_calc_freq, + .dt_failure_tol = dt_failure_tol, + .num_failures_max = num_failures_max, + }; + + return ctx; +} + +void +release_ctx(struct gk_mirror_ctx *ctx) +{ + gkyl_free(ctx->poa_phases); +} + +void +calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app, + double t_curr, bool force_calc, double dt) +{ + if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) { + gkyl_gyrokinetic_app_calc_field_energy(app, t_curr); + gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr); + + if (!(dt < 0.0) ) + gkyl_gyrokinetic_app_save_dt(app, t_curr, dt); + } +} + +void +write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase, + gkyl_gyrokinetic_app *app, double t_curr, bool force_write) +{ + bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr); + if (trig_now_conf || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + gkyl_gyrokinetic_app_write_conf(app, t_curr, frame); + + gkyl_gyrokinetic_app_write_field_energy(app); + gkyl_gyrokinetic_app_write_integrated_mom(app); + gkyl_gyrokinetic_app_write_dt(app); + } + + bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr); + if (trig_now_phase || force_write) { + int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1; + + gkyl_gyrokinetic_app_write_phase(app, t_curr, frame); + } +} + +struct time_frame_state { + double t_curr; // Current simulation time. + double t_end; // End time of current phase. + int frame_curr; // Current frame. + int num_frames; // Number of frames at the end of current phase. +}; + +void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag) +{ + // Reset I/O triggers: + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + int frame_curr = tfs->frame_curr; + int num_frames = tfs->num_frames; + int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames; + + // Prevent division by zero when frame_curr equals num_frames + int frames_remaining = num_frames - frame_curr; + double time_remaining = t_end - t_curr; + + trig_write_conf->dt = time_remaining / frames_remaining; + trig_write_conf->tcurr = t_curr; + trig_write_conf->curr = frame_curr; + + trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining); + trig_write_phase->tcurr = t_curr; + trig_write_phase->curr = frame_curr; + + int diag_frames = GKYL_MAX2(frames_remaining, + (num_int_diag_calc / num_frames) * frames_remaining); + trig_calc_intdiag->dt = time_remaining / diag_frames; + trig_calc_intdiag->tcurr = t_curr; + trig_calc_intdiag->curr = frame_curr; +} + +void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps, + struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase, + struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs, + struct gk_poa_phase_params *pparams) +{ + tfs->t_end = tfs->t_curr + pparams->duration; + tfs->num_frames = tfs->frame_curr + pparams->num_frames; + + // Run an OAP or FDP. + double t_curr = tfs->t_curr; + double t_end = tfs->t_end; + + // Reset I/O triggers: + reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag); + + // Reset simulation parameters and function pointers. + struct gkyl_gyrokinetic_collisionless collisionless_inp = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = pparams->alpha, + }; + struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = { + .type = pparams->fdot_mult_type, + .cellwise_const = true, + .write_diagnostics = true, + }; + struct gkyl_gyrokinetic_field field_inp = { + .polarization_bmag = ctx->B_p, + .kperpSq = pow(ctx->kperp, 2.), + .is_static = pparams->is_static_field, + .time_rate_diagnostics = true, + .polarization_potential = eval_potential, + .polarization_potential_ctx = &ctx, + }; + struct gkyl_gyrokinetic_positivity positivity_inp = { + .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE, + .write_diagnostics = pparams->is_positivity_enabled, + }; + + gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp); + gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp); + gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp); + gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp); + gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp); + gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp); + gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp); + + // Compute initial guess of maximum stable time-step. + double dt = t_end - t_curr; + + // Initialize small time-step check. + double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol; + int num_failures = 0, num_failures_max = ctx->num_failures_max; + + long step = 1; + while ((t_curr < t_end) && (step <= num_steps)) { + gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr); + + dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end. + struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt); + + gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual); + + if (!status.success) { + gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n"); + break; + } + t_curr += status.dt_actual; + dt = status.dt_suggested; + + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end); + + if (dt_init < 0.0) { + dt_init = status.dt_actual; + } + else if (status.dt_actual < dt_failure_tol * dt_init) { + num_failures += 1; + + gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual); + gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures); + if (num_failures >= num_failures_max) { + gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", + dt_failure_tol); + gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", + num_failures_max); + calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual); + write_data(trig_write_conf, trig_write_phase, app, t_curr, true); + break; + } + } + else { + num_failures = 0; + } + step += 1; + } + + tfs->t_curr = t_curr; + tfs->frame_curr = tfs->frame_curr + pparams->num_frames; +} + +int main(int argc, char **argv) +{ + struct gkyl_app_args app_args = parse_app_args(argc, argv); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) MPI_Init(&argc, &argv); +#endif + + if (app_args.trace_mem) { + gkyl_cu_dev_mem_debug_set(true); + gkyl_mem_debug_set(true); + } + + struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions. + + int cells_x[ctx.cdim], cells_v[ctx.vdim]; + for (int d = 0; d < ctx.cdim; d++) { + cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]); + } + for (int d = 0; d < ctx.vdim; d++) { + cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]); + } + + // Construct communicator for use in app. + struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr); + + struct gkyl_gyrokinetic_species elc = { + .name = "elc", + .charge = ctx.qe, + .mass = ctx.me, + .vdim = ctx.vdim, + .lower = { -1.0, 0.0 }, + .upper = { 1.0, 1.0 }, + .cells = { cells_v[0], cells_v[1] }, + + .polarization_density = ctx.n0, + + .mapc2p = { + .mapping = mapc2p_vel_elc, + .ctx = &ctx, + }, + + .projection = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .density = eval_density, + .ctx_density = &ctx, + .upar = eval_upar, + .ctx_upar = &ctx, + .temp = eval_temp_elc, + .ctx_temp = &ctx, + }, + + .collisionless = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = 1.0, // Will be replaced below. + }, + + .collisions = { + .collision_id = GKYL_LBO_COLLISIONS, + .den_ref = ctx.n0, + .temp_ref = ctx.Te0, + .num_cross_collisions = 1, + .collide_with = { "ion" }, + .write_diagnostics = true, + }, + + .source = { + .source_id = GKYL_PROJ_SOURCE, + .num_sources = 1, + .projection[0] = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density_source, + .ctx_upar = &ctx, + .upar = eval_upar_source, + .ctx_temp = &ctx, + .temp = eval_temp_elc_source, + }, + .diagnostics = { + .num_diag_moments = 5, + .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, + GKYL_F_MOMENT_M2PERP }, + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN }, + } + }, + + .time_rate_multiplier = { + .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated. + .cellwise_const = true, + .write_diagnostics = true, + }, + + .positivity = { + .type = GKYL_GK_POSITIVITY_SHIFT, + .write_diagnostics = true, + }, + + .bcs = { + { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + }, + + .write_omega_cfl = true, + .num_diag_moments = 8, + .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, + GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, + GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP }, + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN }, + .time_rate_diagnostics = true, + + .boundary_flux_diagnostics = { + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN }, + }, + }; + + struct gkyl_gyrokinetic_species ion = { + .name = "ion", + .charge = ctx.qi, + .mass = ctx.mi, + .vdim = ctx.vdim, + .lower = { -1.0, 0.0 }, + .upper = { 1.0, 1.0 }, + .cells = { cells_v[0], cells_v[1] }, + .polarization_density = ctx.n0, + .scale_with_polarization = true, + + .projection = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .density = eval_density, + .ctx_density = &ctx, + .upar = eval_upar, + .ctx_upar = &ctx, + .temp = eval_temp_ion, + .ctx_temp = &ctx, + }, + + .mapc2p = { + .mapping = mapc2p_vel_ion, + .ctx = &ctx, + }, + + .collisionless = { + .type = GKYL_GK_COLLISIONLESS_ES, + .scale_factor = 1.0, // Will be replaced below. + }, + + .time_rate_multiplier = { + .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE, // So solvers are allocated. + .cellwise_const = true, + .write_diagnostics = true, + }, + .collisions = { + .collision_id = GKYL_LBO_COLLISIONS, + .den_ref = ctx.n0, + .temp_ref = ctx.Ti0, + .num_cross_collisions = 1, + .collide_with = { "elc" }, + .write_diagnostics = true, + }, + .source = { + .source_id = GKYL_PROJ_SOURCE, + .num_sources = 1, + .projection[0] = { + .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, + .ctx_density = &ctx, + .density = eval_density_source, + .ctx_upar = &ctx, + .upar = eval_upar_source, + .ctx_temp = &ctx, + .temp = eval_temp_ion_source, + }, + .diagnostics = { + .num_diag_moments = 6, + .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, + GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_HAMILTONIAN }, + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP }, + }, + }, + + .positivity = { + .type = GKYL_GK_POSITIVITY_SHIFT, + .write_diagnostics = true, + }, + + .bcs = { + { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, }, + }, + .write_omega_cfl = true, + .num_diag_moments = 8, + .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, + GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, + GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP }, + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP }, + .time_rate_diagnostics = true, + + .boundary_flux_diagnostics = { + .num_integrated_diag_moments = 1, + .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP }, + }, + }; + struct gkyl_gyrokinetic_field field = { + .polarization_bmag = ctx.B_p, + .kperpSq = pow(ctx.kperp, 2.), + .time_rate_diagnostics = true, + .is_static = false, + .polarization_potential = eval_potential, + .polarization_potential_ctx = &ctx, + }; + + struct gkyl_mirror_geo_grid_inp grid_inp = { + .filename_psi = "gyrokinetic/data/unit/wham_hires.geqdsk_psi.gkyl", // psi file to use + .rclose = 0.2, // closest R to region of interest + .zmin = -2.0, // Z of lower boundary + .zmax = 2.0, // Z of upper boundary + .include_axis = false, // Include R=0 axis in grid + .fl_coord = GKYL_GEOMETRY_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid + }; + + struct gkyl_gk app_inp = { // GK app + .name = "gk_wham_kinetic_poa_1x2v_p1", + .cdim = ctx.cdim, + .lower = { ctx.z_min }, + .upper = { ctx.z_max }, + .cells = { cells_x[0] }, + .poly_order = ctx.poly_order, + .basis_type = app_args.basis_type, + .geometry = { + .geometry_id = GKYL_GEOMETRY_MIRROR, + .world = { ctx.psi_eval, 0.0 }, + .mirror_grid_info = grid_inp, + }, + .num_periodic_dir = 0, + .periodic_dirs = {}, + .num_species = 2, + .species = { elc, ion }, + .field = field, + .parallelism = { + .use_gpu = app_args.use_gpu, + .cuts = { app_args.cuts[0] }, + .comm = comm, + }, + }; + + // Set app output name from the executable name (argv[0]). + snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name); + + // Create app object. + gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp); + + // Triggers for IO. + struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag; + + struct time_frame_state tfs = { + .t_curr = 0.0, // Initial simulation time. + .frame_curr = 0, // Initial frame. + .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase. + .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase. + }; + + int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index. + if (app_args.is_restart) { + struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, + app_args.restart_frame); + + if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) { + gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", + gkyl_array_rio_status_msg(status.io_status)); + goto freeresources; + } + + tfs.frame_curr = status.frame; + tfs.t_curr = status.stime; + + // Find out what phase we are in. + double time_count = 0.0; + int frame_count = 0; + int pit_curr = 0; + for (int pit = 0; pit < ctx.num_phases; pit++) { + time_count += ctx.poa_phases[pit].duration; + frame_count += ctx.poa_phases[pit].num_frames; + if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) { + pit_curr = pit; + break; + } + } + ; + phase_idx_init = pit_curr; + + // Change the duration and number frames so this phase reaches the expected + // time and number of frames and not beyond. + struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init]; + pparams->num_frames = frame_count - tfs.frame_curr; + pparams->duration = time_count - tfs.t_curr; + + gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr); + gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr); + } + else { + gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr); + + // Write out ICs. + reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag); + + calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0); + write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true); + } + + if (app_args.num_steps != INT_MAX) + phase_idx_end = 1; + + // Loop over number of number of phases; + for (int pit = phase_idx_init; pit < phase_idx_end; pit++) { + struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit]; + run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, + &trig_calc_intdiag, &tfs, phase_params); + } + + gkyl_gyrokinetic_app_stat_write(app); + + struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics + gkyl_gyrokinetic_app_cout(app, stdout, "\n"); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail); + if (stat.nstage_2_fail > 0) { + gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[1]); + gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", + stat.stage_2_dt_diff[0]); + } + gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail); + gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io); + gkyl_gyrokinetic_app_print_timings(app, stdout); + +freeresources: + // simulation complete, free app + gkyl_gyrokinetic_app_release(app); + gkyl_gyrokinetic_comms_release(comm); + release_ctx(&ctx); + +#ifdef GKYL_HAVE_MPI + if (app_args.use_mpi) + MPI_Finalize(); +#endif + return 0; +} diff --git a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c index ae0e96a7b..07c910376 100644 --- a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c +++ b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c @@ -812,7 +812,7 @@ int main(int argc, char **argv) .map_strength = 0.5, .maximum_slope_at_min_B = 2, .gaussian_std = 0.2, - .gaussian_max_integration_width = 1.0, + .gaussian_max_integration_width = 0.5, }, }, diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c index 4d590b952..f7eaab745 100644 --- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c +++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c @@ -1,5 +1,7 @@ #include +#define _USE_MATH_DEFINES +#include #include #include #include @@ -14,8 +16,13 @@ #include #include #include +#include #include +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + struct loss_cone_mask_test_ctx { int cdim; // Configuration space dimensionality. double eV; // Elementary charge. @@ -25,30 +32,34 @@ struct loss_cone_mask_test_ctx { double mass, charge; // Species mass and charge. double n0, T0, B0; // Reference parameters. double phi_fac; // phi(z=0) = phi_fac*T0/e; + double psi_max; // For 2x: upper limit of psi (radial coordinate). double z_max, vpar_max, mu_max; // Upper grid extents. - int Nz, Nvpar, Nmu; // Number of cells in each direction. + int Npsi, Nz, Nvpar, Nmu; // Number of cells in each direction. enum gkyl_quad_type quad_type; // Type of quadrature/nodes. int num_quad; // Number of quadrature points to use in projection, 1 or p+1. bool cellwise_trap_loss; // Whether a whole cell is either trapped or lost. + bool is_tandem; // Whether this is a tandem mirror configuration. + double B_tandem; // Field at tandem mirror (for tandem case). + double z_tandem; // z-coordinate of tandem mirror (for tandem case). }; // allocate array (filled with zeros) static struct gkyl_array* mkarr(bool use_gpu, long nc, long size) { - struct gkyl_array* a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size) - : gkyl_array_new(GKYL_DOUBLE, nc, size); + struct gkyl_array *a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size) + : gkyl_array_new(GKYL_DOUBLE, nc, size); return a; } void -mapc2p_3x(double t, const double *xc, double* GKYL_RESTRICT xp, void *ctx) +mapc2p_3x(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx) { xp[0] = xc[0]; xp[1] = xc[1]; xp[2] = xc[2]; } void -bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx) +bfield_func_3x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) { double x = xc[0], y = xc[1], z = xc[2]; @@ -58,12 +69,26 @@ bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx fout[0] = 0.0; fout[1] = 0.0; - fout[2] = B_m * (1.0 - ((R_m-1.0)/R_m)*pow(cos(z), 2.0)); -// fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0)); + fout[2] = B_m * (1.0 - ((R_m - 1.0) / R_m) * pow(cos(z), 2.0)); +// fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0)); +} + +void +phi_func_1x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +{ + double z = xc[0]; + + struct loss_cone_mask_test_ctx *params = ctx; + double phi_fac = params->phi_fac; + double T0 = params->T0; + double eV = params->eV; + + fout[0] = 0.0; // 0.5 * phi_fac*T0/eV * (1.0 + cos(z)); } +// Non-zero electrostatic potential: peaked at center, zero at wall. void -phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx) +phi_func_1x_nonzero(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) { double z = xc[0]; @@ -71,12 +96,51 @@ phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx) double phi_fac = params->phi_fac; double T0 = params->T0; double eV = params->eV; + double z_max = params->z_max; + + // Parabolic potential profile: phi(z) = phi_fac*T0/eV * (1 - (z/z_max)^2) + // This gives phi=phi_fac*T0/eV at z=0, and phi=0 at z=+/-z_max. + fout[0] = phi_fac * T0 / eV * (1.0 - pow(z / z_max, 2.0)); +} + +// Reference mask for nonzero phi case. +void +mask_ref_1x2v_nonzero_phi(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) +{ + double z = xc[0], vpar = xc[1], mu = xc[2]; + struct loss_cone_mask_test_ctx *params = ctx; + + double z_m = params->z_m; + double mass = params->mass; + double charge = params->charge; + + double phi, phi_m; + phi_func_1x_nonzero(t, xc, &phi, ctx); + phi_func_1x_nonzero(t, &z_m, &phi_m, ctx); + + double bfield[3], bmag; + double zinfl[3] = { 0.0 }, z_minfl[3] = { 0.0 }; + zinfl[2] = z, z_minfl[2] = z_m; + bfield_func_3x(t, zinfl, bfield, ctx); + bmag = bfield[2]; + + double bfield_m[3], bmag_m; + bfield_func_3x(t, z_minfl, bfield_m, ctx); + bmag_m = bfield_m[2]; - fout[0] = 0.0; //0.5 * phi_fac*T0/eV * (1.0 + cos(z)); + // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1)) + double mu_bound = (0.5 * mass * pow(vpar, + 2) + charge * (phi - phi_m)) / (bmag * (bmag_m / bmag - 1)); + if (mu_bound < mu && fabs(z) < z_m) { + fout[0] = 1.0; + } + else { + fout[0] = 0.0; + } } void -mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx) +mask_ref_1x2v(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx) { double z = xc[0], vpar = xc[1], mu = xc[2]; struct loss_cone_mask_test_ctx *params = ctx; @@ -90,7 +154,7 @@ mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx) phi_func_1x(t, &z_m, &phi_m, ctx); double bfield[3], bmag; - double zinfl[3] = {0.0}, z_minfl[3] = {0.0}; + double zinfl[3] = { 0.0 }, z_minfl[3] = { 0.0 }; zinfl[2] = z, z_minfl[2] = z_m; bfield_func_3x(t, zinfl, bfield, ctx); bmag = bfield[2]; @@ -100,7 +164,8 @@ mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx) bmag_m = bfield_m[2]; // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1)) - double mu_bound = (0.5*mass*pow(vpar,2)+charge*(phi-phi_m))/(bmag*(bmag_m/bmag-1)); + double mu_bound = (0.5 * mass * pow(vpar, + 2) + charge * (phi - phi_m)) / (bmag * (bmag_m / bmag - 1)); if (mu_bound < mu && fabs(z) < z_m) fout[0] = 1.0; else @@ -120,11 +185,11 @@ test_1x2v_gk(int poly_order, bool use_gpu) .eV = eV, .R_m = 8.0, .B_m = 4.0, - .z_m = M_PI/2.0, - .mass = 2.014*mass_proton, + .z_m = M_PI / 2.0, + .mass = 2.014 * mass_proton, .charge = eV, .n0 = 1e18, - .T0 = 100*eV, + .T0 = 100 * eV, .phi_fac = 3.0, .z_max = M_PI, .Nz = 8, @@ -134,31 +199,32 @@ test_1x2v_gk(int poly_order, bool use_gpu) .num_quad = 2, .cellwise_trap_loss = true, }; - ctx.B0 = ctx.B_m/2.0; - ctx.vpar_max = 6.0*sqrt(ctx.T0/ctx.mass); - ctx.mu_max = 0.5*ctx.mass*pow(ctx.vpar_max,2)/ctx.B0; + ctx.B0 = ctx.B_m / 2.0; + ctx.vpar_max = 6.0 * sqrt(ctx.T0 / ctx.mass); + ctx.mu_max = 0.5 * ctx.mass * pow(ctx.vpar_max, 2) / ctx.B0; double mass = ctx.mass; - double lower[] = {-ctx.z_max, -ctx.vpar_max, 0.0}, upper[] = {ctx.z_max, ctx.vpar_max, ctx.mu_max}; - int cells[] = {ctx.Nz, ctx.Nvpar, ctx.Nmu}; - const int ndim = sizeof(cells)/sizeof(cells[0]); + double lower[] = { -ctx.z_max, -ctx.vpar_max, 0.0 }, + upper[] = { ctx.z_max, ctx.vpar_max, ctx.mu_max }; + int cells[] = { ctx.Nz, ctx.Nvpar, ctx.Nmu }; + const int ndim = sizeof(cells) / sizeof(cells[0]); const int cdim = ctx.cdim; - const int vdim = ndim-ctx.cdim; + const int vdim = ndim - ctx.cdim; // Grids. double lower_conf[cdim], upper_conf[cdim]; int cells_conf[cdim]; - for (int d=0; dgeo_int.bmag); + gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag); + + // Get the LOCAL_MAX peak (bmag maximum along z direction). + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder); + int bmag_max_peak_idx = num_peaks - 2; // Edge is num_peaks-1, so maximum is one less + const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, + bmag_max_peak_idx); + const struct gkyl_array *bmag_max_z_coord = + gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx); + const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, + num_peaks - 1); // First peak is wall + const struct gkyl_array *bmag_wall_z_coord = + gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks - 1); + const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder); + const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder); + const struct gkyl_range *bmag_max_range_ext = + gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder); + + // Allocate arrays for phi evaluated at all peak locations. + struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array *)); + for (int p = 0; p < num_peaks; p++) { + phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume); + } + // If we are on the gpu, copy from host if (use_gpu) { - struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu); + struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu); gkyl_gk_geometry_release(gk_geom); gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev); gkyl_gk_geometry_release(gk_geom_dev); @@ -248,82 +355,55 @@ test_1x2v_gk(int poly_order, bool use_gpu) // Project the electostatic potential. struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume); struct gkyl_array *phi_ho = use_gpu? mkarr(false, phi->ncomp, phi->size) - : gkyl_array_acquire(phi); + : gkyl_array_acquire(phi); gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func_1x, &ctx); gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho); gkyl_eval_on_nodes_release(evphi); gkyl_array_copy(phi, phi_ho); - // Location of the mirror throat. - double bmag_max_loc_ho[] = {ctx.z_m}; - double *bmag_max_loc; - if (use_gpu) { - bmag_max_loc = gkyl_cu_malloc(sizeof(double)); - gkyl_cu_memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double), GKYL_CU_MEMCPY_H2D); - } - else { - bmag_max_loc = gkyl_malloc(sizeof(double)); - memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double)); - } - - // Get the magnetic field at the mirror throat. - double bfield_max_ho[3], bmag_max_ho[1]; - double xc_infl[] = {0.0,0.0,ctx.z_m}; - bfield_func_3x(0.0, xc_infl, bfield_max_ho, &ctx); - bmag_max_ho[0] = bfield_max_ho[2]; - double *bmag_max; - if (use_gpu) { - bmag_max = gkyl_cu_malloc(sizeof(double)); - gkyl_cu_memcpy(bmag_max, bmag_max_ho, sizeof(double), GKYL_CU_MEMCPY_H2D); - } - else { - bmag_max = gkyl_malloc(sizeof(double)); - memcpy(bmag_max, bmag_max_ho, sizeof(double)); - } + // Project phi onto peak locations to get phi_m at the mirror throat. + gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks); - // Get the potential at the mirror throat (z=pi/2). - double phi_m_ho[1]; - double xc[] = {ctx.z_m}; - phi_func_1x(0.0, xc, phi_m_ho, &ctx); - double *phi_m; - if (use_gpu) { - phi_m = gkyl_cu_malloc(sizeof(double)); - gkyl_cu_memcpy(phi_m, phi_m_ho, sizeof(double), GKYL_CU_MEMCPY_H2D); - } - else { - phi_m = gkyl_malloc(sizeof(double)); - memcpy(phi_m, phi_m_ho, sizeof(double)); - } + // Get phi at the mirror throat (bmag_max peak location). + const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx]; // Basis used to project the mask. struct gkyl_basis basis_mask; - if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) + if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) { gkyl_cart_modal_serendip(&basis_mask, ndim, 0); + } else { - if (poly_order == 1) + if (poly_order == 1) { gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim); - else + } + else { gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order); + } } // Create mask array. struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume); struct gkyl_array *mask_ho = use_gpu? mkarr(false, mask->ncomp, mask->size) - : gkyl_array_acquire(mask); + : gkyl_array_acquire(mask); // Project the loss cone mask. + // Use bmag_max and bmag_max_z_coord arrays from find_peaks. struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = { .phase_grid = &grid, .conf_basis = &basis_conf, .phase_basis = &basis, - .conf_range = &local_conf, + .conf_range = &local_conf, .conf_range_ext = &local_ext_conf, - .vel_range = &local_vel, + .vel_range = &local_vel, .vel_map = gvm, .bmag = gk_geom->geo_int.bmag, + .bmag_max_z_coord = bmag_max_z_coord, .bmag_max = bmag_max, - .bmag_max_loc = bmag_max_loc, + .bmag_wall = bmag_wall, + .bmag_wall_z_coord = bmag_wall_z_coord, + .bmag_max_basis = bmag_max_basis, + .bmag_max_range = bmag_max_range, .mass = ctx.mass, .charge = ctx.charge, .qtype = ctx.quad_type, @@ -331,93 +411,463 @@ test_1x2v_gk(int poly_order, bool use_gpu) .cellwise_trap_loss = ctx.cellwise_trap_loss, .use_gpu = use_gpu, }; - struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj ); + struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = + gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj); - gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, mask); + gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask); gkyl_array_copy(mask_ho, mask); // Project expected mask. struct gkyl_array *mask_ref_ho = mkarr(false, basis_mask.num_basis, local_ext.volume); - gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask, basis_mask.poly_order+1, 1, mask_ref_1x2v, &ctx); + gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask, + basis_mask.poly_order + 1, 1, mask_ref_1x2v, &ctx); gkyl_proj_on_basis_advance(evmask_ref, 0.0, &local, mask_ref_ho); gkyl_proj_on_basis_release(evmask_ref); - if (ctx.num_quad == 1) { - // Rescale to deal with normalization. - gkyl_array_scale(mask_ref_ho, 1.0/pow(sqrt(2.0),cdim+vdim)); - } - -// // values to compare at index (1, 9, 9) [remember, lower-left index is (1,1,1)] -// double p1_vals[] = { -// 7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00, -// -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00, -// -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, }; -// double p2_vals[] = { -// 7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00, -// -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19, -// 0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20, -// 0.0000000000000000e+00, 0.0000000000000000e+00 }; + +//// values to compare at index (1, 9, 9) [remember, lower-left index is (1,1,1)] +// double p1_vals[] = { +// 7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00, +// -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00, +// -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, }; +// double p2_vals[] = { +// 7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00, +// -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19, +// 0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20, +// 0.0000000000000000e+00, 0.0000000000000000e+00 }; // -// const double *fv = gkyl_array_cfetch(distf, gkyl_range_idx(&local_ext, (int[3]) { 1, 9, 9 })); -// if (poly_order == 1) { -// for (int i=0; igeo_int.bmag); + gkyl_array_dg_find_peaks_advance(bmag_peak_finder, gk_geom->geo_int.bmag); + + int num_peaks = gkyl_array_dg_find_peaks_num_peaks(bmag_peak_finder); + int bmag_max_peak_idx = num_peaks - 2; + const struct gkyl_array *bmag_max = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, + bmag_max_peak_idx); + const struct gkyl_array *bmag_max_z_coord = + gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, bmag_max_peak_idx); + const struct gkyl_array *bmag_wall = gkyl_array_dg_find_peaks_acquire_vals(bmag_peak_finder, + num_peaks - 1); + const struct gkyl_array *bmag_wall_z_coord = + gkyl_array_dg_find_peaks_acquire_coords(bmag_peak_finder, num_peaks - 1); + const struct gkyl_basis *bmag_max_basis = gkyl_array_dg_find_peaks_get_basis(bmag_peak_finder); + const struct gkyl_range *bmag_max_range = gkyl_array_dg_find_peaks_get_range(bmag_peak_finder); + const struct gkyl_range *bmag_max_range_ext = + gkyl_array_dg_find_peaks_get_range_ext(bmag_peak_finder); + + // Allocate arrays for phi evaluated at peak locations. + struct gkyl_array **phi_at_peaks = gkyl_malloc(num_peaks * sizeof(struct gkyl_array *)); + for (int p = 0; p < num_peaks; p++) { + phi_at_peaks[p] = mkarr(use_gpu, bmag_max_basis->num_basis, bmag_max_range_ext->volume); + } + + if (use_gpu) { + struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu); + gkyl_gk_geometry_release(gk_geom); + gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev); + gkyl_gk_geometry_release(gk_geom_dev); + } + + // Velocity space mapping. + struct gkyl_mapc2p_inp c2p_in = { }; + struct gkyl_velocity_map *gvm = gkyl_velocity_map_new(c2p_in, grid, grid_vel, + local, local_ext, local_vel, local_ext_vel, use_gpu); + + // Project the electrostatic potential with NON-ZERO phi. + struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume); + struct gkyl_array *phi_ho = use_gpu ? mkarr(false, phi->ncomp, phi->size) + : gkyl_array_acquire(phi); + + gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, + phi_func_1x_nonzero, &ctx); + gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho); + gkyl_eval_on_nodes_release(evphi); + gkyl_array_copy(phi, phi_ho); + + // Project phi onto peak locations to get phi_m. + gkyl_array_dg_find_peaks_project_on_peaks(bmag_peak_finder, phi, phi_at_peaks); + const struct gkyl_array *phi_m = phi_at_peaks[bmag_max_peak_idx]; + + // Basis used to project the mask. + struct gkyl_basis basis_mask; + if (ctx.num_quad == 1 || ctx.cellwise_trap_loss) { + gkyl_cart_modal_serendip(&basis_mask, ndim, 0); + } + else { + if (poly_order == 1) { + gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim); + } + else { + gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order); + } + } + + // Create mask array. + struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume); + struct gkyl_array *mask_ho = use_gpu ? mkarr(false, mask->ncomp, mask->size) + : gkyl_array_acquire(mask); + + // Project the loss cone mask. + struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = { + .phase_grid = &grid, + .conf_basis = &basis_conf, + .phase_basis = &basis, + .conf_range = &local_conf, + .conf_range_ext = &local_ext_conf, + .vel_range = &local_vel, + .vel_map = gvm, + .bmag = gk_geom->geo_int.bmag, + .bmag_max_z_coord = bmag_max_z_coord, + .bmag_max = bmag_max, + .bmag_wall = bmag_wall, + .bmag_wall_z_coord = bmag_wall_z_coord, + .bmag_max_basis = bmag_max_basis, + .bmag_max_range = bmag_max_range, + .mass = ctx.mass, + .charge = ctx.charge, + .qtype = ctx.quad_type, + .num_quad = ctx.num_quad, + .cellwise_trap_loss = ctx.cellwise_trap_loss, + .use_gpu = use_gpu, + }; + struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = + gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj); + + gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, phi_m, mask); + + gkyl_array_copy(mask_ho, mask); + + // Verify physical properties of the mask: + // 1. At the center (z≈0), high-mu particles should be trapped (mask=1) + // 2. At the wall (|z| ≈ z_max), particles should not be in the trapped region + // 3. Low-mu particles near center should be passing (mask=0) + + // Check specific cells to verify correct behavior. + // Cell indices: [iz, ivpar, imu] where each starts at 1 in local range. + // Grid: z in [-pi, pi], vpar in [-vpar_max, vpar_max], mu in [0, mu_max] + // Central z cells are around iz=4,5 (8 cells, symmetric) + // High mu cells are imu=3,4 (4 cells) + // Low mu cells are imu=1 + + int num_trapped_high_mu_center = 0; + int num_passing_low_mu_center = 0; + int total_high_mu_center = 0; + int total_low_mu_center = 0; + + struct gkyl_range_iter iter; + gkyl_range_iter_init(&iter, &local); + while (gkyl_range_iter_next(&iter)) { + int iz = iter.idx[0]; + int imu = iter.idx[2]; + + // Determine if we're at center (iz = 4 or 5 for 8 cells in [-pi, pi]) + // and if we're at high mu (imu = 3 or 4) or low mu (imu = 1) + bool is_center = (iz == 4 || iz == 5); + bool is_high_mu = (imu == 3 || imu == 4); + bool is_low_mu = (imu == 1); + + long linidx = gkyl_range_idx(&local, iter.idx); + const double *mask_val = gkyl_array_cfetch(mask_ho, linidx); + + if (is_center && is_high_mu) { + total_high_mu_center++; + if (mask_val[0] > 0.5) { + num_trapped_high_mu_center++; + } + } + if (is_center && is_low_mu) { + total_low_mu_center++; + if (mask_val[0] < 0.5) { + num_passing_low_mu_center++; + } + } + } + + // High mu particles at center should mostly be trapped. + double trapped_frac = (double)num_trapped_high_mu_center / (double)total_high_mu_center; + // printf("Trapped fraction for high-mu center particles: %g (%d / %d)\n", + // trapped_frac, num_trapped_high_mu_center, total_high_mu_center); + TEST_CHECK(trapped_frac >= 0.5); + if (trapped_frac < 0.5) { + printf("High-mu center trapped fraction: %g (%d / %d)\n", + trapped_frac, num_trapped_high_mu_center, total_high_mu_center); + } + + // Low mu particles at center should mostly be passing. + double passing_frac = (double)num_passing_low_mu_center / (double)total_low_mu_center; + // printf("Passing fraction for low-mu center particles: %g (%d / %d)\n", + // passing_frac, num_passing_low_mu_center, total_low_mu_center); + TEST_CHECK(passing_frac >= 0.5); + if (passing_frac < 0.5) { + printf("Low-mu center passing fraction: %g (%d / %d)\n", + passing_frac, num_passing_low_mu_center, total_low_mu_center); + } + + // Write output for debugging. + char fname[1024]; if (use_gpu) { - gkyl_cu_free(bmag_max); - gkyl_cu_free(phi_m); + sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_dev.gkyl", poly_order); } else { - gkyl_free(bmag_max); - gkyl_free(phi_m); + sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_nonzero_phi_p%d_ho.gkyl", poly_order); + } + gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname); + + // Cleanup. + for (int p = 0; p < num_peaks; p++) { + gkyl_array_release(phi_at_peaks[p]); } - gkyl_array_release(phi); - gkyl_array_release(phi_ho); - gkyl_array_release(mask); + gkyl_free(phi_at_peaks); + gkyl_array_release(phi); + gkyl_array_release(phi_ho); + gkyl_array_release(mask); gkyl_array_release(mask_ho); - gkyl_array_release(mask_ref_ho); gkyl_loss_cone_mask_gyrokinetic_release(proj_mask); gkyl_velocity_map_release(gvm); - gkyl_gk_geometry_release(gk_geom); + gkyl_array_release(bmag_max); + gkyl_array_release(bmag_max_z_coord); + gkyl_array_release(bmag_wall); + gkyl_array_release(bmag_wall_z_coord); + gkyl_array_dg_find_peaks_release(bmag_peak_finder); gkyl_position_map_release(pmap); + gkyl_gk_geometry_release(gk_geom); #ifdef GKYL_HAVE_CUDA if (use_gpu) { gkyl_cu_free(basis_on_dev); gkyl_cu_free(basis_on_dev_conf); } -#endif +#endif +} + +void test_1x2v_p1_gk_ho() +{ + test_1x2v_gk(1, false); } -void test_1x2v_p1_gk_ho() { test_1x2v_gk(1, false); } +void test_1x2v_p1_nonzero_phi_gk_ho() +{ + test_1x2v_nonzero_phi_gk(1, false); +} #ifdef GKYL_HAVE_CUDA -void test_1x2v_p1_gk_dev() { test_1x2v_gk(1, true); } +void test_1x2v_p1_gk_dev() +{ + test_1x2v_gk(1, true); +} + +void test_1x2v_p1_nonzero_phi_gk_dev() +{ + test_1x2v_nonzero_phi_gk(1, true); +} + #endif TEST_LIST = { { "test_1x2v_p1_gk_ho", test_1x2v_p1_gk_ho }, + { "test_1x2v_p1_nonzero_phi_gk_ho", test_1x2v_p1_nonzero_phi_gk_ho }, #ifdef GKYL_HAVE_CUDA { "test_1x2v_p1_gk_dev", test_1x2v_p1_gk_dev }, + { "test_1x2v_p1_nonzero_phi_gk_dev", test_1x2v_p1_nonzero_phi_gk_dev }, #endif { NULL, NULL }, }; diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h index 36e12300c..7bb388349 100644 --- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h +++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h @@ -10,13 +10,10 @@ // Object type. typedef struct gkyl_loss_cone_mask_gyrokinetic gkyl_loss_cone_mask_gyrokinetic; -// Type of function expected for the ctp_pos_func input. -typedef void (*loss_cone_mask_gyrokinetic_c2p_t)(const double *xcomp, double *xphys, void *ctx); - // Available options: -// A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array. -// B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array. -// C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array. +// A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array. +// B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array. +// C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array. // Inputs packaged as a struct. struct gkyl_loss_cone_mask_gyrokinetic_inp { @@ -27,18 +24,22 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp { const struct gkyl_range *conf_range_ext; // Extended configuration-space range (for internal memory allocations). const struct gkyl_range *vel_range; // Velocity space range. const struct gkyl_velocity_map *vel_map; // Velocity space mapping object. - const struct gkyl_array *bmag; // Magnetic field magnitude. - const double *bmag_max; // Maximum bmag (on GPU if use_gpu=true). - const double *bmag_max_loc; // Location of maximum bmag (on GPU if use_gpu=true).. + const struct gkyl_array *bmag; // Magnetic field magnitude (cdim DG expansion). + const struct gkyl_array *bmag_max; // Maximum bmag per field line (1D DG expansion for 2x, scalar for 1x). + const struct gkyl_array *bmag_max_z_coord; // z-coordinate of bmag_max per field line (1D DG expansion for 2x, scalar for 1x). + const struct gkyl_array *bmag_wall; // Magnetic field magnitude at the wall (1D DG expansion for 2x, scalar for 1x). + const struct gkyl_array *bmag_wall_z_coord; // z-coordinate of bmag at the wall (1D DG expansion for 2x, scalar for 1x). + const struct gkyl_array *bmag_tandem; // Magnetic field at the tandem mirror (for 7-extrema case). + const struct gkyl_array *bmag_tandem_z_coord; // z-coordinate of bmag_tandem per field line. + const struct gkyl_basis *bmag_max_basis; // Basis for bmag_max arrays (1D for 2x, 0D for 1x). + const struct gkyl_range *bmag_max_range; // Range for bmag_max arrays. + bool is_tandem; // =True double mass; // Species mass. double charge; // Species charge. enum gkyl_quad_type qtype; // Quadrature rule/nodes. int num_quad; // Number of quad points in each direction to use (default: poly_order+1). bool cellwise_trap_loss; // =True takes a whole cell to be either trapped or passing, // so not high-order distinction within the cell is made. - loss_cone_mask_gyrokinetic_c2p_t c2p_pos_func; // Function that transforms a set of cdim - // position-space computational coordinates to physical ones. - void *c2p_pos_func_ctx; // Context for c2p_pos_func. bool use_gpu; // Whether to run on GPU. }; @@ -54,7 +55,7 @@ struct gkyl_loss_cone_mask_gyrokinetic_inp { * @param inp Input parameters defined in gkyl_loss_cone_mask_gyrokinetic_inp struct. * @return New updater pointer. */ -struct gkyl_loss_cone_mask_gyrokinetic* +struct gkyl_loss_cone_mask_gyrokinetic* gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp); /** @@ -64,16 +65,18 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti * @param phase_rng Phase-space range. * @param conf_rng Configuration-space range. * @param phi Electrostatic potential. - * @param phi_m Electrostatic potential at the mirror throat (on GPU if use_gpu=true). + * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid). + * @param phi_tandem Electrostatic potential at the tandem mirror throat (DG array on reduced grid). * @param mask_out Output masking function. */ void gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_range *phase_range, const struct gkyl_range *conf_range, - const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out); + const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem, + struct gkyl_array *mask_out); /** * Delete updater. * * @param up Updater to delete. */ -void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up); +void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up); diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h index 03e57dec9..68cc2579f 100644 --- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h +++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h @@ -8,26 +8,30 @@ #include #include #include -#include +#include #include #include GKYL_CU_DH static inline void log_to_comp(int ndim, const double *eta, - const double * GKYL_RESTRICT dx, const double * GKYL_RESTRICT xc, - double* GKYL_RESTRICT xout) + const double *GKYL_RESTRICT dx, const double *GKYL_RESTRICT xc, + double *GKYL_RESTRICT xout) { - for (int d=0; dDbmag_quad. + * Obtain bmag_peak-bmag at conf-space quadrature nodes and store it in Dbmag_quad. * - * @param up Project on basis updater to run. - * @param conf_rng Configuration-space range. + * @param up Loss cone mask updater. + * @param conf_range Configuration-space range. * @param bmag Magnetic field magnitude. - * @param bmag_max Maximum bmag. + * @param Dbmag_quad Output array (bmag_peak - bmag) at quadrature nodes. + * @param bmag_peak Peak bmag value (per-field-line array for 2x, scalar for 1x). */ -void +void gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up, - const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max); + const struct gkyl_range *conf_range, const struct gkyl_array *bmag, + struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_peak); /** * Compute projection of the loss cone masking function on the phase-space basis @@ -102,11 +128,13 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *u * @param phase_rng Phase-space range. * @param conf_rng Configuration-space range. * @param phi Electrostatic potential. - * @param phi_m Electrostatic potential at the mirror throat (on GPU). + * @param phi_m Electrostatic potential at the mirror throat (DG array on reduced grid). + * @param phi_tandem Electrostatic potential at the tandem mirror throat (DG array on reduced grid). * @param mask_out Output masking function. */ void gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_range *phase_range, const struct gkyl_range *conf_range, - const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out); + const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem, + struct gkyl_array *mask_out); #endif diff --git a/gyrokinetic/zero/gkyl_position_map_priv.h b/gyrokinetic/zero/gkyl_position_map_priv.h index ec730c26e..0822b014f 100644 --- a/gyrokinetic/zero/gkyl_position_map_priv.h +++ b/gyrokinetic/zero/gkyl_position_map_priv.h @@ -276,13 +276,13 @@ calc_bmag_global_derivative(double theta, void *ctx) double fout[3]; xh[0] = gpm->constB_ctx->psi; xh[1] = gpm->constB_ctx->alpha; - xh[2] = theta - h; + xh[2] = theta + h; gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx); double Bmag_plus = fout[0]; - xh[2] = theta - 2*h; + xh[2] = theta - h; gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx); double Bmag_minus = fout[0]; - return (Bmag_plus - Bmag_minus) / (h); + return (Bmag_plus - Bmag_minus) / (2*h); } /** @@ -314,15 +314,24 @@ find_B_field_extrema(struct gkyl_position_map *gpm) double *theta_extrema = gkyl_malloc(sizeof(double) * (npts + 1)); double *bmag_extrema = gkyl_malloc(sizeof(double) * (npts + 1)); - for (int i = 0; i <= npts; i++){ + for (int i = 1; i < npts; i++){ double theta = theta_lo + i * theta_dxi; xp[Z_IDX] = theta; gkyl_calc_bmag_global(0.0, xp, &bmag_vals[i], bmag_ctx); dbmag_vals[i] = calc_bmag_global_derivative(theta, gpm); - if (i==0) continue; - // Minima - if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0){ + // Near-zero derivative: B is locally flat here, record as a minimum. + // Use continue so this is mutually exclusive with the sign-change checks below. + if (fabs(dbmag_vals[i]) < 1e-10) { + theta_extrema[extrema] = theta; + bmag_extrema[extrema] = bmag_vals[i]; + extrema++; + continue; + } + + // Minima via sign change. Guard on |dbmag[i-1]| to avoid a double-record if the + // previous point was already captured by the near-zero branch above. + if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0 && fabs(dbmag_vals[i-1]) >= 1e-10){ if (bmag_vals[i] < bmag_vals[i-1]) { theta_extrema[extrema] = theta; @@ -337,8 +346,8 @@ find_B_field_extrema(struct gkyl_position_map *gpm) } } - // Maxima - if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0){ + // Maxima via sign change. Guard on |dbmag[i-1]| for the same reason. + if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0 && fabs(dbmag_vals[i-1]) >= 1e-10){ if (bmag_vals[i] > bmag_vals[i-1]) { theta_extrema[extrema] = theta; @@ -375,30 +384,53 @@ find_B_field_extrema(struct gkyl_position_map *gpm) // Left edge if (bmag_extrema[0] > bmag_extrema[1]) - { gpm->constB_ctx->min_or_max[0] = 1; } // Maximum + { + gpm->constB_ctx->min_or_max[0] = 1; // Maximum + } else if (bmag_extrema[0] < bmag_extrema[1]) - { gpm->constB_ctx->min_or_max[0] = 0; } // Minimum + { + gpm->constB_ctx->min_or_max[0] = 0; // Minimum + } else - { printf("Error: Extrema is not an extrema. Position_map optimization failed\n"); } + { + printf("Error: Extrema[0] is not an extrema (bmag[0]=%.6g == bmag[1]=%.6g). " + "Position_map optimization failed\n", bmag_extrema[0], bmag_extrema[1]); + } // Middle points for (int i = 1; i < extrema - 1; i++) { if (bmag_extrema[i] > bmag_extrema[i-1] && bmag_extrema[i] > bmag_extrema[i+1]) - { gpm->constB_ctx->min_or_max[i] = 1; } // Maximum + { + gpm->constB_ctx->min_or_max[i] = 1; // Maximum + } else if (bmag_extrema[i] < bmag_extrema[i-1] && bmag_extrema[i] < bmag_extrema[i+1]) - { gpm->constB_ctx->min_or_max[i] = 0; } // Minimum + { + gpm->constB_ctx->min_or_max[i] = 0; // Minimum + } else - { printf("Error: Extrema is not an extrema. Position_map optimization failed\n"); } + { + printf("Error: Extrema[%d] is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g, bmag[%d+1]=%.6g). " + "Position_map optimization failed\n", + i, i, bmag_extrema[i-1], i, bmag_extrema[i], i, bmag_extrema[i+1]); + } } // Right edge if (bmag_extrema[extrema-1] > bmag_extrema[extrema-2]) - { gpm->constB_ctx->min_or_max[extrema-1] = 1; } // Maximum + { + gpm->constB_ctx->min_or_max[extrema-1] = 1; // Maximum + } else if (bmag_extrema[extrema-1] < bmag_extrema[extrema-2]) - { gpm->constB_ctx->min_or_max[extrema-1] = 0; } // Minimum - else - { printf("Error: Extrema is not an extrema. Position_map optimization failed\n"); } + { + gpm->constB_ctx->min_or_max[extrema-1] = 0; // Minimum + } + else + { + printf("Error: Extrema[%d] (right edge) is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g). " + "Position_map optimization failed\n", + extrema-1, extrema-1, bmag_extrema[extrema-2], extrema-1, bmag_extrema[extrema-1]); + } // Free mallocs gkyl_free(bmag_vals); @@ -454,7 +486,7 @@ refine_B_field_extrema(struct gkyl_position_map *gpm) else if (bmag_cent < bmag_left && bmag_cent < bmag_right) { is_maximum = false; } // Local minima else - { printf("Error: Extrema is not an extrema. Position_map optimization failed\n"); + { // printf("Error: Extrema is not an extrema. Position_map optimization failed\n"); break; } @@ -655,7 +687,7 @@ position_map_constB_z_numeric(double t, const double *xn, double *fout, void *ct return; } else { - fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n"); + // fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n"); fout[0] = theta; return; } diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c index e7e553570..072c142aa 100644 --- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c +++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c @@ -14,17 +14,16 @@ // // mu_bound = (0.5*mass*pow(vpar,2)+charge*Delta_phi)/(bmag[0]*(Rm-1)); -// = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1)); -// = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]); +// = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1)); +// = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]); // -// Identity comp to phys coord mapping, for when user doesn't provide a map. -static inline void -c2p_pos_identity(const double *xcomp, double *xphys, void *ctx) +// allocate array (filled with zeros) +static struct gkyl_array* +mkarr(long nc, long size, bool use_gpu) { - struct gkyl_loss_cone_mask_gyrokinetic *up = ctx; - int cdim = up->cdim; - for (int d=0; dndim; int num_quad_v = num_quad; // Hybrid basis have p=2 in velocity space. - bool is_vdim_p2[2] = {false}; // 2 is the max vdim for GK. + bool is_vdim_p2[2] = { false }; // 2 is the max vdim for GK. if (num_quad > 1 && basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) { - num_quad_v = num_quad+1; + num_quad_v = num_quad + 1; is_vdim_p2[0] = true; // only vpar is quadratic in GK hybrid. } @@ -100,7 +103,7 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q if (use_gpu) { *ordinates = gkyl_array_cu_dev_new(GKYL_DOUBLE, ndim, tot_quad); *weights = gkyl_array_cu_dev_new(GKYL_DOUBLE, 1, tot_quad); - } + } else { *ordinates = gkyl_array_new(GKYL_DOUBLE, ndim, tot_quad); *weights = gkyl_array_new(GKYL_DOUBLE, 1, tot_quad); @@ -111,25 +114,29 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q while (gkyl_range_iter_next(&iter)) { int node = gkyl_range_idx(&qrange, iter.idx); - + // set ordinates double *ord = gkyl_array_fetch(ordinates_ho, node); - for (int i=0; inum_basis, tot_quad); - for (int n=0; neval(gkyl_array_fetch(ordinates_ho, n), gkyl_array_fetch(basis_at_ords_ho, n)); + } // Copy host array to device array. gkyl_array_copy(*ordinates, ordinates_ho); @@ -155,13 +163,16 @@ init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type q } static void -gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, - const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max) +gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, + const struct gkyl_range *conf_range, const struct gkyl_array *bmag, + struct gkyl_array *Dbmag_quad, const struct gkyl_array *bmag_max) { // Get bmag_max-bmag at quadrature nodes. + // bmag_max is now a per-field-line array (1D for 2x, scalar for 1x). #ifdef GKYL_HAVE_CUDA if (up->use_gpu) - return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag, bmag_max); + return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag, + Dbmag_quad, bmag_max); #endif int cdim = up->cdim, pdim = up->pdim; @@ -175,20 +186,40 @@ gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, long linidx = gkyl_range_idx(conf_range, conf_iter.idx); const double *bmag_d = gkyl_array_cfetch(bmag, linidx); - double *Dbmag_quad = gkyl_array_fetch(up->Dbmag_quad, linidx); + double *Dbmag_quad_d = gkyl_array_fetch(Dbmag_quad, linidx); + + // Get bmag_max for this field line (psi value). + // For 1x: bmag_max is a single value (index 0). + // For 2x: bmag_max varies with psi (x-direction), so use conf_iter.idx[0]. + double bmag_max_val; + if (cdim == 1) { + // 1x case: single value. + const double *bmag_max_d = gkyl_array_cfetch(bmag_max, 0); + bmag_max_val = bmag_max_d[0]; // Just the constant coefficient. + } + else { + // 2x case: evaluate bmag_max at this psi cell. + // The bmag_max array is 1D in psi, so we need the psi index. + int psi_idx[1] = { conf_iter.idx[0] }; + long psi_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx); + const double *bmag_max_d = gkyl_array_cfetch(bmag_max, psi_linidx); + // For simplicity, evaluate at cell center (logical coord 0). + double xc[1] = { 0.0 }; + bmag_max_val = up->bmag_max_basis->eval_expand(xc, bmag_max_d); + } - // Sum over basis - for (int n=0; nbasis_at_ords_conf, n); - for (int k=0; kvel_map = gkyl_velocity_map_acquire(inp->vel_map); up->mass = inp->mass; up->charge = inp->charge; + up->is_tandem = inp->is_tandem; up->cdim = inp->conf_basis->ndim; up->pdim = inp->phase_basis->ndim; up->cellwise_trap_loss = inp->cellwise_trap_loss; - int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order+1; + int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order + 1; up->norm_fac = 1; if (!up->cellwise_trap_loss) - up->norm_fac = num_quad == 1? 1.0/pow(sqrt(2.0),up->pdim) : 1.0; + up->norm_fac = num_quad == 1? 1.0 / pow(sqrt(2.0), up->pdim) : 1.0; if (num_quad == 1) { up->num_basis_conf = 1; @@ -216,15 +248,8 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti up->num_basis_phase = inp->phase_basis->num_basis; } up->use_gpu = inp->use_gpu; - - if (inp->c2p_pos_func == 0) { - up->c2p_pos = c2p_pos_identity; - up->c2p_pos_ctx = up; - } - else { - up->c2p_pos = inp->c2p_pos_func; - up->c2p_pos_ctx = inp->c2p_pos_func_ctx; - } + up->bmag_max_z_scalar_gpu = NULL; // Will be set for GPU case. + up->bmag_max_basis_on_dev = NULL; // Will be set for GPU case. // Initialize data needed for conf-space quadrature. up->tot_quad_conf = init_quad_values(up->cdim, inp->conf_basis, inp->qtype, num_quad, @@ -240,9 +265,9 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti // create a map between phase-space and conf-space ordinates. int num_quad_v = num_quad; // Hybrid basis have p=2 in velocity space. // hybrid basis have p=2 in velocity space. - bool is_vdim_p2[2] = {false}; // 2 is the max vdim for GK. + bool is_vdim_p2[2] = { false }; // 2 is the max vdim for GK. if (num_quad > 1 && inp->phase_basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) { - num_quad_v = num_quad+1; + num_quad_v = num_quad + 1; is_vdim_p2[0] = true; // only vpar is quadratic in GK hybrid. } up->conf_qrange = get_qrange(up->cdim, up->cdim, num_quad, num_quad_v, is_vdim_p2); @@ -256,30 +281,39 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti // Allocate device copies of arrays needed for quadrature. int p2c_qidx_ho[up->phase_qrange.volume]; - up->p2c_qidx = (int*) gkyl_cu_malloc(sizeof(int)*up->phase_qrange.volume); + up->p2c_qidx = (int *)gkyl_cu_malloc(sizeof(int) * up->phase_qrange.volume); // Allocate mask_quad at phase-space quadrature points. // Dbmag_quad at configuration-space quadrature points. // qDphiDbmag_quad, the term proportional to (phi-phi_m)/(bmag_max-bmag), at quadrature points. up->mask_out_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_phase, - inp->conf_range_ext->volume*inp->vel_range->volume); - up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume); + inp->conf_range_ext->volume * inp->vel_range->volume); + up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, + inp->conf_range_ext->volume); + up->qDphiDbmag_quad_wall = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, + inp->conf_range_ext->volume); + up->qDphiDbmag_quad_tandem = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, + inp->conf_range_ext->volume); // Allocate the memory for computing the specific phase nodal to modal calculation struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho; - phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, + phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, + 1.0, 0.0, GKYL_NO_TRANS, GKYL_NO_TRANS, false); // Compute the matrix A for the phase nodal to modal memory - const double *phase_w = (const double*) up->weights_phase->data; - const double *phaseb_o = (const double*) up->basis_at_ords_phase->data; - for (int n=0; ntot_quad_phase; ++n) { - for (int k=0; knum_basis_phase; ++k) - gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n, phase_w[n]*phaseb_o[k+up->num_basis_phase*n]); + const double *phase_w = (const double *)up->weights_phase->data; + const double *phaseb_o = (const double *)up->basis_at_ords_phase->data; + for (int n = 0; n < up->tot_quad_phase; ++n) { + for (int k = 0; k < up->num_basis_phase; ++k) { + gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n, + phase_w[n] * phaseb_o[k + up->num_basis_phase * n]); + } } - + // Copy to device - up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, + up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase, + up->tot_quad_phase, 1.0, 0.0, GKYL_NO_TRANS, GKYL_NO_TRANS, up->use_gpu); gkyl_mat_copy(up->phase_nodal_to_modal_mem->A, phase_nodal_to_modal_mem_ho->A); gkyl_mat_mm_array_mem_release(phase_nodal_to_modal_mem_ho); @@ -293,70 +327,119 @@ gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokineti &up->ordinates_phase, &up->weights_phase, &up->basis_at_ords_phase, up->use_gpu); int pidx[GKYL_MAX_DIM]; - for (int n=0; ntot_quad_phase; ++n) { + for (int n = 0; n < up->tot_quad_phase; ++n) { gkyl_range_inv_idx(&up->phase_qrange, n, pidx); int cqidx = gkyl_range_idx(&up->conf_qrange, pidx); p2c_qidx_ho[n] = cqidx; } - gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int)*up->phase_qrange.volume, GKYL_CU_MEMCPY_H2D); + gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int) * up->phase_qrange.volume, + GKYL_CU_MEMCPY_H2D); + + // Allocate and set scalar bmag_max_z for GPU kernels. + // TODO: For 2x GPU support, need to pass full arrays and do per-cell lookup. + // inp->bmag_max_z_coord is a GPU array, so copy to host before reading. + struct gkyl_array *bmag_max_z_coord_ho = gkyl_array_new(GKYL_DOUBLE, + inp->bmag_max_z_coord->ncomp, inp->bmag_max_z_coord->size); + gkyl_array_copy(bmag_max_z_coord_ho, inp->bmag_max_z_coord); + + double bmag_max_z_val; + if (up->cdim == 1) { + // 1x case: single value. + const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, 0); + bmag_max_z_val = bmag_max_z_d[0]; + } + else { + // 2x case: use the first field line's value (simplified approach). + int psi_idx[1] = { inp->bmag_max_range->lower[0] }; + long bmag_max_z_linidx = gkyl_range_idx(inp->bmag_max_range, psi_idx); + const double *bmag_max_z_d = gkyl_array_cfetch(bmag_max_z_coord_ho, bmag_max_z_linidx); + double xc[1] = { 0.0 }; + bmag_max_z_val = inp->bmag_max_basis->eval_expand(xc, bmag_max_z_d); + } + gkyl_array_release(bmag_max_z_coord_ho); + up->bmag_max_z_scalar_gpu = gkyl_cu_malloc(sizeof(double)); + gkyl_cu_memcpy(up->bmag_max_z_scalar_gpu, &bmag_max_z_val, sizeof(double), GKYL_CU_MEMCPY_H2D); + + // Create a device-resident basis with device-callable function pointers + // for use in GPU kernels that call eval_expand. + up->bmag_max_basis_on_dev = gkyl_cart_modal_serendip_cu_dev_new( + inp->bmag_max_basis->ndim, inp->bmag_max_basis->poly_order); } #endif - // Allocate and obtain bmag_max-bmag at quadrature points. - if (up->use_gpu) - up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume); - else - up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume); + // Store references to bmag_max arrays (no copy, just store pointers). + // Must be done before calling gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad. + up->bmag_max = gkyl_array_acquire(inp->bmag_max); + up->bmag_max_z_coord = gkyl_array_acquire(inp->bmag_max_z_coord); + up->bmag_wall = gkyl_array_acquire(inp->bmag_wall); + up->bmag_wall_z_coord = gkyl_array_acquire(inp->bmag_wall_z_coord); + up->bmag_tandem = + up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem) : gkyl_array_acquire(inp->bmag_max); + up->bmag_tandem_z_coord = + up->is_tandem ? gkyl_array_acquire(inp->bmag_tandem_z_coord) : + gkyl_array_acquire(inp->bmag_max_z_coord); + up->bmag_max_basis = inp->bmag_max_basis; + up->bmag_max_range = inp->bmag_max_range; - gkyl_array_clear(up->Dbmag_quad, 0.0); - gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, inp->bmag_max); + // Allocate and obtain bmag_max-bmag at quadrature points. + up->Dbmag_quad = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu); + up->Dbmag_quad_wall = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu); + up->Dbmag_quad_tandem = mkarr(up->tot_quad_conf, inp->conf_range_ext->volume, up->use_gpu); + + gkyl_array_clear(up->Dbmag_quad, 0.0); + gkyl_array_clear(up->Dbmag_quad_wall, 0.0); + gkyl_array_clear(up->Dbmag_quad_tandem, 0.0); + + gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad, + up->bmag_max); // bmag_max - bmag + gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_wall, + up->bmag_wall); // bmag_wall - bmag + gkyl_array_scale(up->Dbmag_quad_wall, -1.0); // bmag - bmag_wall + gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, up->Dbmag_quad_tandem, + up->bmag_tandem); // bmag_tandem - bmag - // Save the location of bmag_max in this updater. - if (up->use_gpu) { - up->bmag_max_loc = gkyl_cu_malloc(sizeof(double)*up->cdim); - gkyl_cu_memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim, GKYL_CU_MEMCPY_D2D); - } - else { - up->bmag_max_loc = gkyl_malloc(sizeof(double)*up->cdim); - memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim); - } - return up; } static void -proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f) +proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, + double *f) { int num_basis = up->num_basis_phase; int tot_quad = up->tot_quad_phase; - const double* GKYL_RESTRICT weights = up->weights_phase->data; - const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data; - const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data; + const double *GKYL_RESTRICT weights = up->weights_phase->data; + const double *GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data; + const double *GKYL_RESTRICT func_at_ords = fun_at_ords->data; - for (int k=0; knum_basis_phase; int tot_quad = up->tot_quad_phase; - const double* GKYL_RESTRICT weights = up->weights_phase->data; - const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data; - const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data; + const double *GKYL_RESTRICT weights = up->weights_phase->data; + const double *GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data; + const double *GKYL_RESTRICT func_at_ords = fun_at_ords->data; - for (int k=0; kuse_gpu) - return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range, - phi, phi_m, mask_out); + return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range, + phi, phi_m, phi_tandem, mask_out); #endif int cdim = up->cdim, pdim = up->pdim; - int vdim = pdim-cdim; + int vdim = pdim - cdim; int tot_quad_conf = up->tot_quad_conf; int num_basis_conf = up->num_basis_conf; + bool is_tandem = up->is_tandem; + struct gkyl_range vel_rng; struct gkyl_range_iter conf_iter, vel_iter; int pidx[GKYL_MAX_DIM], rem_dir[GKYL_MAX_DIM] = { 0 }; - for (int d=0; dndim; ++d) rem_dir[d] = 1; + for (int d = 0; d < conf_range->ndim; ++d) { + rem_dir[d] = 1; + } - double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0}; + double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 }; double phi_quad[tot_quad_conf]; double qDphiDbmag_quad[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag[0]). + double qDphiDbmag_quad_wall[tot_quad_conf]; // charge*(phi-phi_m)/(bmag[0]-bmag_wall). + double qDphiDbmag_quad_tandem[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag_tandem). // Outer loop over configuration space cells; for each // config-space cell inner loop walks over velocity space. @@ -400,27 +490,74 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up, const double *phi_d = gkyl_array_cfetch(phi, linidx_conf); const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf); + const double *Dbmag_quad_wall = gkyl_array_cfetch(up->Dbmag_quad_wall, linidx_conf); + const double *Dbmag_quad_tandem = is_tandem ? + gkyl_array_cfetch(up->Dbmag_quad_tandem, linidx_conf) : gkyl_array_cfetch(up->Dbmag_quad, + linidx_conf); + + // Get phi_m value for this field line. + // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion). + // For 2x: varies with psi, evaluate at this psi cell. + double phi_m_val, phi_tandem_m_val; + if (cdim == 1) { + // 1x case: single scalar value stored as p=0 DG expansion. + const double *phi_m_d = gkyl_array_cfetch(phi_m, 0); + const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, 0); + phi_m_val = phi_m_d[0]; + phi_tandem_m_val = phi_tandem_m_d[0]; + } + else { + // 2x case: evaluate phi_m at this psi cell center. + int psi_idx[1] = { conf_iter.idx[0] }; + long phi_m_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx); + const double *phi_m_d = gkyl_array_cfetch(phi_m, phi_m_linidx); + const double *phi_tandem_m_d = gkyl_array_cfetch(phi_tandem, phi_m_linidx); + // Evaluate at cell center (logical coord 0). + double xc_log[1] = { 0.0 }; + phi_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_m_d); + phi_tandem_m_val = up->bmag_max_basis->eval_expand(xc_log, phi_tandem_m_d); + } // Sum over basis for given potential phi. - for (int n=0; nbasis_at_ords_conf, n); // Compute the configuration-space quadrature phi_quad[n] = 0.0; - for (int k=0; k 0.0) - qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m[0])/Dbmag_quad[n]; - else + if (Dbmag_quad[n] > 0.0) { + qDphiDbmag_quad[n] = up->charge * (phi_quad[n] - phi_m_val) / Dbmag_quad[n]; + } + else { qDphiDbmag_quad[n] = 0.0; + } + + if (Dbmag_quad_wall[n] > 0.0) { + qDphiDbmag_quad_wall[n] = up->charge * phi_quad[n] / Dbmag_quad_wall[n]; + } + else { + qDphiDbmag_quad_wall[n] = 0.0; + } + + if (is_tandem) { + if (Dbmag_quad_tandem[n] > 0.0) { + qDphiDbmag_quad_tandem[n] = up->charge * (phi_quad[n] - phi_tandem_m_val) / + Dbmag_quad_tandem[n]; + } + else { + qDphiDbmag_quad_tandem[n] = 0.0; + } + } } // Inner loop over velocity space. gkyl_range_deflate(&vel_rng, phase_range, rem_dir, conf_iter.idx); gkyl_range_iter_no_split_init(&vel_iter, &vel_rng); while (gkyl_range_iter_next(&vel_iter)) { - + copy_idx_arrays(conf_range->ndim, phase_range->ndim, conf_iter.idx, vel_iter.idx, pidx); long linidx_phase = gkyl_range_idx(&vel_rng, vel_iter.idx); @@ -437,7 +574,6 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up, // Convert comp position coordinate to phys pos coord. gkyl_rect_grid_cell_center(up->grid_phase, pidx, xc); log_to_comp(up->cdim, xcomp_d, up->grid_phase->dx, xc, xmu); - up->c2p_pos(xmu, xmu, up->c2p_pos_ctx); // Convert comp velocity coordinate to phys velocity coord. const struct gkyl_velocity_map *gvm = up->vel_map; @@ -445,24 +581,123 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up, const double *vmap_d = gkyl_array_cfetch(gvm->vmap, linidx_vel); double xcomp[1]; for (int vd = 0; vd < vdim; vd++) { - xcomp[0] = xcomp_d[cdim+vd]; - xmu[cdim+vd] = gvm->vmap_basis->eval_expand(xcomp, vmap_d+vd*gvm->vmap_basis->num_basis); + xcomp[0] = xcomp_d[cdim + vd]; + xmu[cdim + vd] = gvm->vmap_basis->eval_expand(xcomp, + vmap_d + vd * gvm->vmap_basis->num_basis); } // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]). + // KEparDbmag_wall = 0.5*mass*pow(vpar,2)/(bmag[0]-bmag_wall). + // KEparDbmag_tandem = 0.5*mass*pow(vpar,2)/(bmag_tandem-bmag[0]). double KEparDbmag = 0.0; - if (Dbmag_quad[cqidx] > 0.0) - KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx]; - else + double KEparDbmag_wall = 0.0; + double KEparDbmag_tandem = 0.0; + + if (Dbmag_quad[cqidx] > 0.0) { + KEparDbmag = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad[cqidx]; + } + else { KEparDbmag = 0.0; + } - double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]); + if (Dbmag_quad_wall[cqidx] > 0.0) { + KEparDbmag_wall = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall[cqidx]; + } + else { + KEparDbmag_wall = 0.0; + } + + if (Dbmag_quad_tandem[cqidx] > 0.0) { + KEparDbmag_tandem = 0.5 * up->mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem[cqidx]; + } + else { + KEparDbmag_tandem = 0.0; + } + + double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad[cqidx]); + double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall[cqidx])); + double mu_bound_tandem = GKYL_MAX2(0.0, KEparDbmag_tandem + qDphiDbmag_quad_tandem[cqidx]); + + // Get the z-coordinate of bmag_max for this field line. + // For 1x: single value (index 0). + // For 2x: varies with psi, so use conf_iter.idx[0]. + double bmag_max_z_val, bmag_tandem_z_val; + if (cdim == 1) { + // 1x case: single value. + const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, 0); + bmag_max_z_val = bmag_max_z_d[0]; + if (is_tandem) { + const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, 0); + bmag_tandem_z_val = bmag_tandem_z_d[0]; + } + } + else { + // 2x case: evaluate bmag_max_z at this psi cell. + int psi_idx[1] = { conf_iter.idx[0] }; + long bmag_max_z_linidx = gkyl_range_idx(up->bmag_max_range, psi_idx); + const double *bmag_max_z_d = gkyl_array_cfetch(up->bmag_max_z_coord, bmag_max_z_linidx); + // For simplicity, evaluate at cell center (logical coord 0). + double xc[1] = { 0.0 }; + bmag_max_z_val = up->bmag_max_basis->eval_expand(xc, bmag_max_z_d); + if (is_tandem) { + const double *bmag_tandem_z_d = gkyl_array_cfetch(up->bmag_tandem_z_coord, + bmag_max_z_linidx); + bmag_tandem_z_val = up->bmag_max_basis->eval_expand(xc, bmag_tandem_z_d); + } + } double *fq = gkyl_array_fetch(up->fun_at_ords, pqidx); - if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(up->bmag_max_loc[cdim-1])) - fq[0] = 1.0 * up->norm_fac; - else - fq[0] = 0.0; + // xmu[cdim-1] is the z-coordinate (last config space coordinate). + + if (is_tandem) { + // Tandem mirror trapping condition: + // Determine which region we're in based on position. + bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) && + fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val); + bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val); + + if (in_outer_cell) { + // Between tandem and outer mirror - check outer barrier + if (mu_bound < xmu[cdim + 1]) { + fq[0] = 1.0 * up->norm_fac; + } + else { + fq[0] = 0.0; + } + } + else if (in_central_cell) { + // In central cell - must overcome the minimum of both barriers to escape. + // A particle is trapped if mu > min(mu_bound, mu_bound_tandem). + double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem); + if (mu_bound_min < xmu[cdim + 1]) { + fq[0] = 1.0 * up->norm_fac; + } + else { + fq[0] = 0.0; + } + } + else { + // In the outer wall region beyond outer mirror + if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) { + fq[0] = 1.0 * up->norm_fac; + } + else { + fq[0] = 0.0; + } + } + } + else { + // Single mirror case (original logic) + if (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) { + fq[0] = 1.0 * up->norm_fac; + } + else if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) { + fq[0] = 1.0 * up->norm_fac; + } + else { + fq[0] = 0.0; + } + } } // Compute DG expansion coefficients of the mask. if (up->cellwise_trap_loss) @@ -474,7 +709,7 @@ gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up, } void -gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up) +gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up) { gkyl_velocity_map_release(up->vel_map); @@ -488,16 +723,26 @@ gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up) gkyl_array_release(up->fun_at_ords); gkyl_array_release(up->Dbmag_quad); + gkyl_array_release(up->Dbmag_quad_wall); + gkyl_array_release(up->Dbmag_quad_tandem); + + gkyl_array_release(up->bmag_max); + gkyl_array_release(up->bmag_max_z_coord); + gkyl_array_release(up->bmag_wall); + gkyl_array_release(up->bmag_wall_z_coord); + gkyl_array_release(up->bmag_tandem); + gkyl_array_release(up->bmag_tandem_z_coord); if (up->use_gpu) { gkyl_cu_free(up->p2c_qidx); gkyl_array_release(up->mask_out_quad); gkyl_array_release(up->qDphiDbmag_quad); + gkyl_array_release(up->qDphiDbmag_quad_wall); + gkyl_array_release(up->qDphiDbmag_quad_tandem); + gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem); - gkyl_cu_free(up->bmag_max_loc); - } - else { - gkyl_free(up->bmag_max_loc); + gkyl_cu_free(up->bmag_max_z_scalar_gpu); + gkyl_cu_free(up->bmag_max_basis_on_dev); } gkyl_free(up); diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu index ee35da47e..693b5ff4f 100644 --- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu +++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu @@ -16,61 +16,97 @@ extern "C" { #include } +// Kernel to compute Dbmag_quad = bmag_peak - bmag at quadrature nodes. +// bmag_peak is a per-field-line array (1D for 2x, scalar for 1x). +// For 1x: bmag_peak has a single value at index 0. +// For 2x: bmag_peak varies with psi (x-direction). __global__ static void -gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(struct gkyl_range conf_range, - const struct gkyl_array* basis_at_ords_conf, const struct gkyl_array* bmag, const double *bmag_max, - struct gkyl_array* Dbmag_quad_d) -{ +gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(int cdim, struct gkyl_range conf_range, + struct gkyl_range bmag_peak_range, const struct gkyl_array *basis_at_ords_conf, + const struct gkyl_array *bmag, const struct gkyl_array *bmag_peak, + const struct gkyl_basis *bmag_peak_basis, struct gkyl_array *Dbmag_quad_out) +{ int num_basis_conf = basis_at_ords_conf->ncomp; int tot_quad_conf = basis_at_ords_conf->size; int cidx[GKYL_MAX_CDIM]; - for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x; - tid < conf_range.volume; tid += blockDim.x*gridDim.x) { + for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x; + tid < conf_range.volume; tid += blockDim.x * gridDim.x) { gkyl_sub_range_inv_idx(&conf_range, tid, cidx); long linidx = gkyl_range_idx(&conf_range, cidx); - const double *bmag_d = (const double*) gkyl_array_cfetch(bmag, linidx); - - double *bmag_quad = (double*) gkyl_array_fetch(Dbmag_quad_d, linidx); - - for (int n=0; neval_expand(xc, bmag_peak_d); + } - for (int k=0; knblocks, nthreads = conf_range->nthreads; - gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<>>(*conf_range, - up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_max, up->Dbmag_quad->on_dev); + gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<>>(up->cdim, *conf_range, + *up->bmag_max_range, up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_peak->on_dev, + up->bmag_max_basis_on_dev, Dbmag_quad->on_dev); } static void -gkyl_parallelize_components_kernel_launch_dims(dim3* dimGrid, dim3* dimBlock, gkyl_range range, int ncomp) +gkyl_parallelize_components_kernel_launch_dims(dim3 *dimGrid, dim3 *dimBlock, gkyl_range range, + int ncomp) { - // Create a 2D thread grid so we launch ncomp*range.volume number of threads + // Create a 2D thread grid so we launch ncomp*range.volume number of threads // so we can parallelize over components too dimBlock->y = ncomp; // ncomp *must* be less than 256 dimGrid->y = 1; - dimBlock->x = GKYL_DEFAULT_NUM_THREADS/ncomp; + dimBlock->x = GKYL_DEFAULT_NUM_THREADS / ncomp; dimGrid->x = gkyl_int_div_up(range.volume, dimBlock->x); } +// Kernel to compute qDphiDbmag_quad = charge*(phi-phi_m)/(bmag_max-bmag) at quadrature nodes. +// Supports per-field-line phi_m lookup for 2x mirrors. __global__ static void -gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range, - const struct gkyl_array* basis_at_ords_conf, double charge, const struct gkyl_array* phi, - const double *phi_m, const struct gkyl_array* Dbmag_quad, struct gkyl_array* qDphiDbmag_quad) +gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(int cdim, struct gkyl_range conf_range, + struct gkyl_range phi_m_range, const struct gkyl_array *basis_at_ords_conf, + const struct gkyl_basis *phi_m_basis, double charge, bool is_tandem, + const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem, + const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall, + const struct gkyl_array *Dbmag_quad_tandem, + struct gkyl_array *qDphiDbmag_quad, struct gkyl_array *qDphiDbmag_quad_wall, + struct gkyl_array *qDphiDbmag_quad_tandem) { int num_basis_conf = basis_at_ords_conf->ncomp; @@ -78,93 +114,214 @@ gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range // 2D thread grid // linc2 goes from 0 to tot_quad_conf= basis_at_ords_conf->size. - long linc2 = threadIdx.y + blockIdx.y*blockDim.y; - for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x; - tid < conf_range.volume; tid += blockDim.x*gridDim.x) { + long linc2 = threadIdx.y + blockIdx.y * blockDim.y; + for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x; + tid < conf_range.volume; tid += blockDim.x * gridDim.x) { gkyl_sub_range_inv_idx(&conf_range, tid, cidx); long linidx = gkyl_range_idx(&conf_range, cidx); - const double *phi_d = (const double*) gkyl_array_cfetch(phi, linidx); - const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx); + const double *phi_d = (const double *)gkyl_array_cfetch(phi, linidx); + const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx); + const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall, linidx); + const double *Dbmag_quad_tandem_d = is_tandem ? + (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx) : Dbmag_quad_d; + + // Get phi_m value for this field line. + // For 1x: single value (phi_m is a scalar stored as p=0 DG expansion). + // For 2x: varies with psi, evaluate at this psi cell. + double phi_m_val, phi_tandem_m_val; + if (cdim == 1) { + // 1x case: single scalar value stored as p=0 DG expansion. + const double *phi_m_d = (const double *)gkyl_array_cfetch(phi_m, 0); + phi_m_val = phi_m_d[0]; + if (is_tandem) { + const double *phi_tandem_m_d = (const double *)gkyl_array_cfetch(phi_tandem, 0); + phi_tandem_m_val = phi_tandem_m_d[0]; + } + } + else { + // 2x case: evaluate phi_m at this psi cell center. + int psi_idx[1] = { cidx[0] }; + long phi_m_linidx = gkyl_range_idx(&phi_m_range, psi_idx); + const double *phi_m_d = (const double *)gkyl_array_cfetch(phi_m, phi_m_linidx); + // Evaluate at cell center (logical coord 0). + double xc[1] = { 0.0 }; + phi_m_val = phi_m_basis->eval_expand(xc, phi_m_d); + if (is_tandem) { + const double *phi_tandem_m_d = (const double *)gkyl_array_cfetch(phi_tandem, phi_m_linidx); + phi_tandem_m_val = phi_m_basis->eval_expand(xc, phi_tandem_m_d); + } + } - // Sum over basis at configuration-space quadrature points. - const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, linc2); + // Sum over basis at configuration-space quadrature points. + const double *b_ord = (const double *)gkyl_array_cfetch(basis_at_ords_conf, linc2); double phi_quad = 0; - for (int k=0; k 0.0) - qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m[0])/Dbmag_quad_d[linc2]; + qDphiDbmag_quad_d[linc2] = charge * (phi_quad - phi_m_val) / Dbmag_quad_d[linc2]; else qDphiDbmag_quad_d[linc2] = 0.0; + + if (Dbmag_quad_wall_d[linc2] > 0.0) + qDphiDbmag_quad_wall_d[linc2] = charge * phi_quad / Dbmag_quad_wall_d[linc2]; + else + qDphiDbmag_quad_wall_d[linc2] = 0.0; + + if (is_tandem) { + double *qDphiDbmag_quad_tandem_d = (double *)gkyl_array_fetch(qDphiDbmag_quad_tandem, linidx); + if (Dbmag_quad_tandem_d[linc2] > 0.0) + qDphiDbmag_quad_tandem_d[linc2] = charge * (phi_quad - phi_tandem_m_val) / + Dbmag_quad_tandem_d[linc2]; + else + qDphiDbmag_quad_tandem_d[linc2] = 0.0; + } } } +// Cellwise kernel: determines if a cell is trapped or lost without quadrature. +// Supports tandem mirrors and per-field-line z-coordinate lookup. __global__ static void -gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase, +gkyl_loss_cone_mask_gyrokinetic_ker(int cdim, struct gkyl_rect_grid grid_phase, struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range, - double mass, const struct gkyl_array* phase_ordinates, - const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad, - const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out) + struct gkyl_range bmag_max_range, const struct gkyl_basis *bmag_max_basis, bool is_tandem, + double mass, const struct gkyl_array *phase_ordinates, + const struct gkyl_array *bmag_max_z_coord, const struct gkyl_array *bmag_tandem_z_coord, + const struct gkyl_array *qDphiDbmag_quad, const struct gkyl_array *qDphiDbmag_quad_wall, + const struct gkyl_array *qDphiDbmag_quad_tandem, + const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall, + const struct gkyl_array *Dbmag_quad_tandem, + const int *p2c_qidx, struct gkyl_array *vmap, struct gkyl_basis *vmap_basis, + struct gkyl_array *mask_out) { - int pdim = phase_range.ndim, cdim = conf_range.ndim; - int vdim = pdim-cdim; + int pdim = phase_range.ndim; + int vdim = pdim - cdim; - double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0}; + double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 }; int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2]; int tot_phase_quad = phase_ordinates->size; - for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x; - tid < phase_range.volume; tid += blockDim.x*gridDim.x) { + for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x; + tid < phase_range.volume; tid += blockDim.x * gridDim.x) { gkyl_sub_range_inv_idx(&phase_range, tid, pidx); // Get configuration-space linear index. - for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k]; + for (unsigned int k = 0; k < cdim; k++) { + cidx[k] = pidx[k]; + } long linidx_conf = gkyl_range_idx(&conf_range, cidx); - const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf); - const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf); + const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx_conf); + const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall, + linidx_conf); + const double *Dbmag_quad_tandem_d = is_tandem ? + (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d; + const double *qDphiDbmag_quad_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad, + linidx_conf); + const double *qDphiDbmag_quad_wall_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad_wall, + linidx_conf); + const double *qDphiDbmag_quad_tandem_d = is_tandem ? + (const double *)gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d; + + // Get z-coordinates for field-line specific values. + double bmag_max_z_val, bmag_tandem_z_val; + if (cdim == 1) { + const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, 0); + bmag_max_z_val = bmag_max_z_d[0]; + if (is_tandem) { + const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, 0); + bmag_tandem_z_val = bmag_tandem_z_d[0]; + } + } + else { + int psi_idx[1] = { cidx[0] }; + long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx); + const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, psi_linidx); + double xc_log[1] = { 0.0 }; + bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d); + if (is_tandem) { + const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, + psi_linidx); + bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d); + } + } gkyl_rect_grid_cell_center(&grid_phase, pidx, xc); long linidx_phase = gkyl_range_idx(&phase_range, pidx); - double *mask_d = (double*) gkyl_array_fetch(mask_out, linidx_phase); + double *mask_d = (double *)gkyl_array_fetch(mask_out, linidx_phase); - for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d]; + for (int d = cdim; d < pdim; d++) { + vidx[d - cdim] = pidx[d]; + } long linidx_vel = gkyl_range_idx(&vel_range, vidx); - const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel); + const double *vmap_d = (const double *)gkyl_array_cfetch(vmap, linidx_vel); mask_d[0] = 1.0; // In this case the mask has ncomp=1. - for (int n=0; nc2p_pos(xmu, xmu, up->c2p_pos_ctx); - + // Convert comp velocity coordinate to phys velocity coord. double xcomp[1]; for (int vd = 0; vd < vdim; vd++) { - xcomp[0] = xcomp_d[cdim+vd]; - xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis); + xcomp[0] = xcomp_d[cdim + vd]; + xmu[cdim + vd] = vmap_basis->eval_expand(xcomp, vmap_d + vd * vmap_basis->num_basis); } - - // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]). - double KEparDbmag = 0.0; + + // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag). + double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0; if (Dbmag_quad_d[cqidx] > 0.0) - KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx]; - else - KEparDbmag = 0.0; - - double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]); - - if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) ) { + KEparDbmag = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_d[cqidx]; + + if (Dbmag_quad_wall_d[cqidx] > 0.0) + KEparDbmag_wall = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall_d[cqidx]; + + if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0) + KEparDbmag_tandem = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem_d[cqidx]; + + double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad_d[cqidx]); + double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall_d[cqidx])); + double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0, + KEparDbmag_tandem + qDphiDbmag_quad_tandem_d[cqidx]) : 0.0; + + bool is_trapped; + if (is_tandem) { + // Tandem mirror trapping condition. + bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) && + fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val); + bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val); + + if (in_outer_cell) { + is_trapped = mu_bound < xmu[cdim + 1]; + } + else if (in_central_cell) { + double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem); + is_trapped = mu_bound_min < xmu[cdim + 1]; + } + else { + is_trapped = mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val); + } + } + else { + // Single mirror case. + is_trapped = (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) || + (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)); + } + + if (!is_trapped) { mask_d[0] = 0.0; break; } @@ -172,108 +329,199 @@ gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase, } } +// Quadrature kernel: computes mask at phase-space quadrature nodes. +// Supports tandem mirrors and per-field-line z-coordinate lookup. __global__ static void -gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase, +gkyl_loss_cone_mask_gyrokinetic_quad_ker(int cdim, struct gkyl_rect_grid grid_phase, struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range, - double mass, double norm_fac, const struct gkyl_array* phase_ordinates, - const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad, - const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad) + struct gkyl_range bmag_max_range, const struct gkyl_basis *bmag_max_basis, bool is_tandem, + double mass, double norm_fac, const struct gkyl_array *phase_ordinates, + const struct gkyl_array *bmag_max_z_coord, const struct gkyl_array *bmag_tandem_z_coord, + const struct gkyl_array *qDphiDbmag_quad, const struct gkyl_array *qDphiDbmag_quad_wall, + const struct gkyl_array *qDphiDbmag_quad_tandem, + const struct gkyl_array *Dbmag_quad, const struct gkyl_array *Dbmag_quad_wall, + const struct gkyl_array *Dbmag_quad_tandem, + const int *p2c_qidx, struct gkyl_array *vmap, struct gkyl_basis *vmap_basis, + struct gkyl_array *mask_out_quad) { - int pdim = phase_range.ndim, cdim = conf_range.ndim; - int vdim = pdim-cdim; + int pdim = phase_range.ndim; + int vdim = pdim - cdim; - double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0}; + double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = { 0.0 }; int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2]; // 2D thread grid // linc2 goes from 0 to tot_quad_phase - long linc2 = threadIdx.y + blockIdx.y*blockDim.y; - for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x; - tid < phase_range.volume; tid += blockDim.x*gridDim.x) { + long linc2 = threadIdx.y + blockIdx.y * blockDim.y; + for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x; + tid < phase_range.volume; tid += blockDim.x * gridDim.x) { gkyl_sub_range_inv_idx(&phase_range, tid, pidx); // Get configuration-space linear index. - for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k]; + for (unsigned int k = 0; k < cdim; k++) { + cidx[k] = pidx[k]; + } long linidx_conf = gkyl_range_idx(&conf_range, cidx); - const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf); - const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf); + const double *Dbmag_quad_d = (const double *)gkyl_array_cfetch(Dbmag_quad, linidx_conf); + const double *Dbmag_quad_wall_d = (const double *)gkyl_array_cfetch(Dbmag_quad_wall, + linidx_conf); + const double *Dbmag_quad_tandem_d = is_tandem ? + (const double *)gkyl_array_cfetch(Dbmag_quad_tandem, linidx_conf) : Dbmag_quad_d; + const double *qDphiDbmag_quad_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad, + linidx_conf); + const double *qDphiDbmag_quad_wall_d = (const double *)gkyl_array_cfetch(qDphiDbmag_quad_wall, + linidx_conf); + const double *qDphiDbmag_quad_tandem_d = is_tandem ? + (const double *)gkyl_array_cfetch(qDphiDbmag_quad_tandem, linidx_conf) : qDphiDbmag_quad_d; + + // Get z-coordinates for field-line specific values. + double bmag_max_z_val, bmag_tandem_z_val; + if (cdim == 1) { + const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, 0); + bmag_max_z_val = bmag_max_z_d[0]; + if (is_tandem) { + const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, 0); + bmag_tandem_z_val = bmag_tandem_z_d[0]; + } + } + else { + int psi_idx[1] = { cidx[0] }; + long psi_linidx = gkyl_range_idx(&bmag_max_range, psi_idx); + const double *bmag_max_z_d = (const double *)gkyl_array_cfetch(bmag_max_z_coord, psi_linidx); + double xc_log[1] = { 0.0 }; + bmag_max_z_val = bmag_max_basis->eval_expand(xc_log, bmag_max_z_d); + if (is_tandem) { + const double *bmag_tandem_z_d = (const double *)gkyl_array_cfetch(bmag_tandem_z_coord, + psi_linidx); + bmag_tandem_z_val = bmag_max_basis->eval_expand(xc_log, bmag_tandem_z_d); + } + } gkyl_rect_grid_cell_center(&grid_phase, pidx, xc); long linidx_phase = gkyl_range_idx(&phase_range, pidx); int cqidx = p2c_qidx[linc2]; - for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d]; + for (int d = cdim; d < pdim; d++) { + vidx[d - cdim] = pidx[d]; + } long linidx_vel = gkyl_range_idx(&vel_range, vidx); - const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel); - const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, linc2); + const double *vmap_d = (const double *)gkyl_array_cfetch(vmap, linidx_vel); + const double *xcomp_d = (const double *)gkyl_array_cfetch(phase_ordinates, linc2); // Convert comp position coordinate to phys pos coord. - gkyl_rect_grid_cell_center(&grid_phase, pidx, xc); log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu); -// up->c2p_pos(xmu, xmu, up->c2p_pos_ctx); // Convert comp velocity coordinate to phys velocity coord. double xcomp[1]; for (int vd = 0; vd < vdim; vd++) { - xcomp[0] = xcomp_d[cdim+vd]; - xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis); + xcomp[0] = xcomp_d[cdim + vd]; + xmu[cdim + vd] = vmap_basis->eval_expand(xcomp, vmap_d + vd * vmap_basis->num_basis); } - // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]). - double KEparDbmag = 0.0; + // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_peak-bmag). + double KEparDbmag = 0.0, KEparDbmag_wall = 0.0, KEparDbmag_tandem = 0.0; if (Dbmag_quad_d[cqidx] > 0.0) - KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx]; - else - KEparDbmag = 0.0; + KEparDbmag = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_d[cqidx]; - double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]); + if (Dbmag_quad_wall_d[cqidx] > 0.0) + KEparDbmag_wall = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_wall_d[cqidx]; - double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase); - if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) - fq[linc2] = norm_fac; - else - fq[linc2] = 0.0; + if (is_tandem && Dbmag_quad_tandem_d[cqidx] > 0.0) + KEparDbmag_tandem = 0.5 * mass * pow(xmu[cdim], 2.0) / Dbmag_quad_tandem_d[cqidx]; + + double mu_bound = GKYL_MAX2(0.0, KEparDbmag + qDphiDbmag_quad_d[cqidx]); + double mu_bound_wall = GKYL_MAX2(0.0, -(KEparDbmag_wall + qDphiDbmag_quad_wall_d[cqidx])); + double mu_bound_tandem = is_tandem ? GKYL_MAX2(0.0, + KEparDbmag_tandem + qDphiDbmag_quad_tandem_d[cqidx]) : 0.0; + + double *fq = (double *)gkyl_array_fetch(mask_out_quad, linidx_phase); + + if (is_tandem) { + // Tandem mirror trapping condition. + bool in_outer_cell = fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val) && + fabs(xmu[cdim - 1]) > fabs(bmag_tandem_z_val); + bool in_central_cell = fabs(xmu[cdim - 1]) <= fabs(bmag_tandem_z_val); + + if (in_outer_cell) { + fq[linc2] = (mu_bound < xmu[cdim + 1]) ? norm_fac : 0.0; + } + else if (in_central_cell) { + double mu_bound_min = GKYL_MIN2(mu_bound, mu_bound_tandem); + fq[linc2] = (mu_bound_min < xmu[cdim + 1]) ? norm_fac : 0.0; + } + else { + fq[linc2] = (mu_bound_wall > xmu[cdim + 1] && + fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) ? norm_fac : 0.0; + } + } + else { + // Single mirror case. + if (mu_bound < xmu[cdim + 1] && fabs(xmu[cdim - 1]) < fabs(bmag_max_z_val)) + fq[linc2] = norm_fac; + else if (mu_bound_wall > xmu[cdim + 1] && fabs(xmu[cdim - 1]) >= fabs(bmag_max_z_val)) + fq[linc2] = norm_fac; + else + fq[linc2] = 0.0; + } } } void gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_range *phase_range, const struct gkyl_range *conf_range, - const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out) + const struct gkyl_array *phi, const struct gkyl_array *phi_m, const struct gkyl_array *phi_tandem, + struct gkyl_array *mask_out) { dim3 dimGrid_conf, dimBlock_conf; int tot_quad_conf = up->basis_at_ords_conf->size; - gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, tot_quad_conf); - - gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<>>(*conf_range, - up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m, up->Dbmag_quad->on_dev, - up->qDphiDbmag_quad->on_dev); + gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, + tot_quad_conf); + + // Compute qDphiDbmag at quadrature points. + gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<>>( + up->cdim, *conf_range, *up->bmag_max_range, + up->basis_at_ords_conf->on_dev, up->bmag_max_basis_on_dev, up->charge, up->is_tandem, + phi->on_dev, phi_m->on_dev, phi_tandem->on_dev, + up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev, + up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, + up->qDphiDbmag_quad_tandem->on_dev); const struct gkyl_velocity_map *gvm = up->vel_map; if (up->cellwise_trap_loss) { // Don't do quadrature. int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads; - gkyl_loss_cone_mask_gyrokinetic_ker<<>>(*up->grid_phase, *phase_range, *conf_range, - gvm->local_ext_vel, up->mass, up->ordinates_phase->on_dev, - up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev, - gvm->vmap_basis, mask_out->on_dev); + gkyl_loss_cone_mask_gyrokinetic_ker<<>>(up->cdim, *up->grid_phase, *phase_range, *conf_range, + gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem, + up->mass, up->ordinates_phase->on_dev, + up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev, + up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, + up->qDphiDbmag_quad_tandem->on_dev, + up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev, + up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, mask_out->on_dev); } else { // Use quadrature. dim3 dimGrid, dimBlock; int tot_quad_phase = up->basis_at_ords_phase->size; - gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, tot_quad_phase); - - gkyl_loss_cone_mask_gyrokinetic_quad_ker<<>>(*up->grid_phase, *phase_range, *conf_range, - gvm->local_ext_vel, up->mass, up->norm_fac, up->ordinates_phase->on_dev, - up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev, - gvm->vmap_basis, up->mask_out_quad->on_dev); - - // Call cublas to do the matrix multiplication nodal to modal conversion + gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, + tot_quad_phase); + + gkyl_loss_cone_mask_gyrokinetic_quad_ker<<>>(up->cdim, *up->grid_phase, *phase_range, *conf_range, + gvm->local_ext_vel, *up->bmag_max_range, up->bmag_max_basis_on_dev, up->is_tandem, + up->mass, up->norm_fac, up->ordinates_phase->on_dev, + up->bmag_max_z_coord->on_dev, up->bmag_tandem_z_coord->on_dev, + up->qDphiDbmag_quad->on_dev, up->qDphiDbmag_quad_wall->on_dev, + up->qDphiDbmag_quad_tandem->on_dev, + up->Dbmag_quad->on_dev, up->Dbmag_quad_wall->on_dev, up->Dbmag_quad_tandem->on_dev, + up->p2c_qidx, gvm->vmap->on_dev, gvm->vmap_basis, up->mask_out_quad->on_dev); + + // Call cublas to do the matrix multiplication nodal to modal conversion. gkyl_mat_mm_array(up->phase_nodal_to_modal_mem, up->mask_out_quad, mask_out); } }