diff --git a/CMakeLists.txt b/CMakeLists.txt
index db615c334..785ebcf54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,7 @@ macro(remove_flag_from_target _target _flag)
 endmacro()
 
 
+
 if (NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
 
     set (CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING
@@ -34,7 +35,6 @@ else ()
     message (STATUS "${Green}Build type is: ${CMAKE_BUILD_TYPE}${ColourReset}")
 endif()
 
-
 if (${CMAKE_BUILD_TYPE} STREQUAL "Release")
     add_definitions (-DNOPAssert)
 endif ()
@@ -42,7 +42,23 @@ endif ()
 add_compile_options (-Wall)
 add_compile_options (-Wunused)
 add_compile_options (-Wextra)
-#¼add_compile_options (-Werror)
+
+
+# allow deprecated functions
+add_compile_options (-Wno-deprecated-declarations)
+file(
+    DOWNLOAD
+    https://github.com/nlohmann/json/releases/download/v3.11.3/json.hpp
+    "${CMAKE_BINARY_DIR}/downloaded_headers/json.hpp"
+)
+file(
+    DOWNLOAD
+    https://raw.githubusercontent.com/manuel5975p/stb/master/stb_image_write.h
+    "${CMAKE_BINARY_DIR}/downloaded_headers/stb_image_write.hpp"
+)
+include_directories("${CMAKE_BINARY_DIR}/downloaded_headers/")
+
+#add_compile_options (-Werror)
 
 # allow deprecated functions
 add_compile_options (-Wno-deprecated-declarations)
@@ -118,7 +134,7 @@ set (CMAKE_CXX_STANDARD 20)
 set (CMAKE_CUDA_EXTENSIONS OFF)
 set (CMAKE_CXX_EXTENSIONS OFF)
 set (CMAKE_CXX_STANDARD_REQUIRED ON)
-set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINGO} -O3 -g ")
+set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 -g ")
 set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
 #set (CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g")
 
@@ -226,6 +242,7 @@ if (ENABLE_ALPINE)
     endif ()
     message (STATUS "Enable Alpine")
     add_subdirectory (alpine)
+    add_subdirectory(fel)
 endif ()
 
 
diff --git a/config.json b/config.json
new file mode 100644
index 000000000..f5cd3b99f
--- /dev/null
+++ b/config.json
@@ -0,0 +1,85 @@
+{
+    "timestep-ratio" : 1.0,
+    "mesh": {
+      "length-scale" : "micrometers",
+      "time-scale"   : "picoseconds",
+      "extents": [3400.0, 3400.0, 280.0],
+      "resolution": [64, 64, 2800],
+      "mesh-center": [0.0, 0.0, 0.0],
+      "total-time": 30000.0,
+      "bunch-time-step": 1.6,
+      "mesh-truncation-order": 2,
+      "space-charge": false,
+      "solver": "NSFD"
+    },
+    "bunch": {
+      "type": "ellipsoid",
+      "distribution": "uniform",
+      "_charge": 1,
+      "_mass": 1,
+      "charge": 1.846e8,
+      "mass": 1.846e8,
+      "number-of-particles": 1000000,
+      "gamma": 100.41,
+      "_gamma": 1.0,
+      "direction (ignored)": [0.0, 0.0, 1.0],
+      "position is 0 because that's the center": "",
+      "position": [0.0, 0.0, 0.0],
+      "_sigma-position (those are commented out)": [260.0, 260.0, 50.25],
+      "_sigma-momentum (those are commented out)": [1.0e-8, 1.0e-8, 100.41e-4],
+      "sigma-position": [260.0, 260.0, 50.25],
+      "sigma-momentum": [1.0e-8, 1.0e-8, 100.41e-4],
+      "particle-gammabetas": [0, 0, 0],
+      "transverse-truncation   (Deprecated! see distribution-truncations)": 1040.0,
+      "longitudinal-truncation (Deprecated! see distribution-truncations)": 90.0,
+      "distribution-truncations" : [1040.0, 1040.0, 90.0],
+      "bunching-factor": 0.01,
+      "update-rule" : {"type" : "lorentz"}
+    },
+    "field": {
+      "update-rule" : "do",
+      "strength" : 100,
+      "field-sampling": {
+        "sample": true,
+        "type": "at-point",
+        "field": ["Ex", "Ey", "Ez"],
+        "directory": "./",
+        "base-name": "field-sampling/field",
+        "rhythm": 3.2,
+        "position": [0.0, 0.0, 110.0]
+      }
+    },
+    "undulator": {
+      "static-undulator": {
+        "undulator-parameter": 1.417,
+        "_undulator-parameter": 0,
+        "period": 30000.0,
+        "length": 5e6,
+        "polarization-angle": 0.0
+      }
+    },
+    "output":{
+      "rhythm" : 30,
+      "count" : 20,
+      "path": "../renderdata",
+      "type" : "E",
+      "track" : {
+        "radiation" : "boundary_radiation.txt",
+        "particle-position" : "p0pos.txt"
+      }
+    },
+    "fel-output": {
+      "radiation-power": {
+        "sample": true,
+        "type": "at-point",
+        "directory": "./",
+        "base-name": "power-sampling/power-NSFD",
+        "plane-position": 110.0,
+        "normalized-frequency": 1.0
+      }
+    },
+    "experimentation" : {
+      "stretch-factor" : 1.0,
+      "resort" : 1.0
+    }
+  }
diff --git a/fel/CMakeLists.txt b/fel/CMakeLists.txt
new file mode 100644
index 000000000..e986ce8b9
--- /dev/null
+++ b/fel/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories (
+    ${CMAKE_SOURCE_DIR}/src
+)
+
+link_directories (
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${Kokkos_DIR}/..
+)
+set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES})
+set (COMPILE_FLAGS ${OPAL_CXX_FLAGS})
+
+add_executable (FreeElectronLaser FreeElectronLaser.cpp)
+target_link_libraries (FreeElectronLaser ${IPPL_LIBS})
+
+add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp)
+target_link_libraries (TestFDTDSolverWithParticles ${IPPL_LIBS})
\ No newline at end of file
diff --git a/fel/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp
new file mode 100644
index 000000000..ce05c8c3e
--- /dev/null
+++ b/fel/FreeElectronLaser.cpp
@@ -0,0 +1,1676 @@
+#include "Ippl.h"
+
+#include <chrono>
+#include <cstdio>  // For popen
+
+#include "Types/Vector.h"
+
+#include "Field/Field.h"
+
+#include "NSFDSolverWithParticles.h"
+#define JSON_HAS_RANGES 0  // Merlin compilation
+#include <Kokkos_Random.hpp>
+#include <fstream>
+#include <json.hpp>
+#include <list>
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.hpp>
+
+#include "Undulator.h"
+#include "units.h"
+
+constexpr float turbo_cm[256][3] = {
+    {0.18995, 0.07176, 0.23217}, {0.19483, 0.08339, 0.26149}, {0.19956, 0.09498, 0.29024},
+    {0.20415, 0.10652, 0.31844}, {0.20860, 0.11802, 0.34607}, {0.21291, 0.12947, 0.37314},
+    {0.21708, 0.14087, 0.39964}, {0.22111, 0.15223, 0.42558}, {0.22500, 0.16354, 0.45096},
+    {0.22875, 0.17481, 0.47578}, {0.23236, 0.18603, 0.50004}, {0.23582, 0.19720, 0.52373},
+    {0.23915, 0.20833, 0.54686}, {0.24234, 0.21941, 0.56942}, {0.24539, 0.23044, 0.59142},
+    {0.24830, 0.24143, 0.61286}, {0.25107, 0.25237, 0.63374}, {0.25369, 0.26327, 0.65406},
+    {0.25618, 0.27412, 0.67381}, {0.25853, 0.28492, 0.69300}, {0.26074, 0.29568, 0.71162},
+    {0.26280, 0.30639, 0.72968}, {0.26473, 0.31706, 0.74718}, {0.26652, 0.32768, 0.76412},
+    {0.26816, 0.33825, 0.78050}, {0.26967, 0.34878, 0.79631}, {0.27103, 0.35926, 0.81156},
+    {0.27226, 0.36970, 0.82624}, {0.27334, 0.38008, 0.84037}, {0.27429, 0.39043, 0.85393},
+    {0.27509, 0.40072, 0.86692}, {0.27576, 0.41097, 0.87936}, {0.27628, 0.42118, 0.89123},
+    {0.27667, 0.43134, 0.90254}, {0.27691, 0.44145, 0.91328}, {0.27701, 0.45152, 0.92347},
+    {0.27698, 0.46153, 0.93309}, {0.27680, 0.47151, 0.94214}, {0.27648, 0.48144, 0.95064},
+    {0.27603, 0.49132, 0.95857}, {0.27543, 0.50115, 0.96594}, {0.27469, 0.51094, 0.97275},
+    {0.27381, 0.52069, 0.97899}, {0.27273, 0.53040, 0.98461}, {0.27106, 0.54015, 0.98930},
+    {0.26878, 0.54995, 0.99303}, {0.26592, 0.55979, 0.99583}, {0.26252, 0.56967, 0.99773},
+    {0.25862, 0.57958, 0.99876}, {0.25425, 0.58950, 0.99896}, {0.24946, 0.59943, 0.99835},
+    {0.24427, 0.60937, 0.99697}, {0.23874, 0.61931, 0.99485}, {0.23288, 0.62923, 0.99202},
+    {0.22676, 0.63913, 0.98851}, {0.22039, 0.64901, 0.98436}, {0.21382, 0.65886, 0.97959},
+    {0.20708, 0.66866, 0.97423}, {0.20021, 0.67842, 0.96833}, {0.19326, 0.68812, 0.96190},
+    {0.18625, 0.69775, 0.95498}, {0.17923, 0.70732, 0.94761}, {0.17223, 0.71680, 0.93981},
+    {0.16529, 0.72620, 0.93161}, {0.15844, 0.73551, 0.92305}, {0.15173, 0.74472, 0.91416},
+    {0.14519, 0.75381, 0.90496}, {0.13886, 0.76279, 0.89550}, {0.13278, 0.77165, 0.88580},
+    {0.12698, 0.78037, 0.87590}, {0.12151, 0.78896, 0.86581}, {0.11639, 0.79740, 0.85559},
+    {0.11167, 0.80569, 0.84525}, {0.10738, 0.81381, 0.83484}, {0.10357, 0.82177, 0.82437},
+    {0.10026, 0.82955, 0.81389}, {0.09750, 0.83714, 0.80342}, {0.09532, 0.84455, 0.79299},
+    {0.09377, 0.85175, 0.78264}, {0.09287, 0.85875, 0.77240}, {0.09267, 0.86554, 0.76230},
+    {0.09320, 0.87211, 0.75237}, {0.09451, 0.87844, 0.74265}, {0.09662, 0.88454, 0.73316},
+    {0.09958, 0.89040, 0.72393}, {0.10342, 0.89600, 0.71500}, {0.10815, 0.90142, 0.70599},
+    {0.11374, 0.90673, 0.69651}, {0.12014, 0.91193, 0.68660}, {0.12733, 0.91701, 0.67627},
+    {0.13526, 0.92197, 0.66556}, {0.14391, 0.92680, 0.65448}, {0.15323, 0.93151, 0.64308},
+    {0.16319, 0.93609, 0.63137}, {0.17377, 0.94053, 0.61938}, {0.18491, 0.94484, 0.60713},
+    {0.19659, 0.94901, 0.59466}, {0.20877, 0.95304, 0.58199}, {0.22142, 0.95692, 0.56914},
+    {0.23449, 0.96065, 0.55614}, {0.24797, 0.96423, 0.54303}, {0.26180, 0.96765, 0.52981},
+    {0.27597, 0.97092, 0.51653}, {0.29042, 0.97403, 0.50321}, {0.30513, 0.97697, 0.48987},
+    {0.32006, 0.97974, 0.47654}, {0.33517, 0.98234, 0.46325}, {0.35043, 0.98477, 0.45002},
+    {0.36581, 0.98702, 0.43688}, {0.38127, 0.98909, 0.42386}, {0.39678, 0.99098, 0.41098},
+    {0.41229, 0.99268, 0.39826}, {0.42778, 0.99419, 0.38575}, {0.44321, 0.99551, 0.37345},
+    {0.45854, 0.99663, 0.36140}, {0.47375, 0.99755, 0.34963}, {0.48879, 0.99828, 0.33816},
+    {0.50362, 0.99879, 0.32701}, {0.51822, 0.99910, 0.31622}, {0.53255, 0.99919, 0.30581},
+    {0.54658, 0.99907, 0.29581}, {0.56026, 0.99873, 0.28623}, {0.57357, 0.99817, 0.27712},
+    {0.58646, 0.99739, 0.26849}, {0.59891, 0.99638, 0.26038}, {0.61088, 0.99514, 0.25280},
+    {0.62233, 0.99366, 0.24579}, {0.63323, 0.99195, 0.23937}, {0.64362, 0.98999, 0.23356},
+    {0.65394, 0.98775, 0.22835}, {0.66428, 0.98524, 0.22370}, {0.67462, 0.98246, 0.21960},
+    {0.68494, 0.97941, 0.21602}, {0.69525, 0.97610, 0.21294}, {0.70553, 0.97255, 0.21032},
+    {0.71577, 0.96875, 0.20815}, {0.72596, 0.96470, 0.20640}, {0.73610, 0.96043, 0.20504},
+    {0.74617, 0.95593, 0.20406}, {0.75617, 0.95121, 0.20343}, {0.76608, 0.94627, 0.20311},
+    {0.77591, 0.94113, 0.20310}, {0.78563, 0.93579, 0.20336}, {0.79524, 0.93025, 0.20386},
+    {0.80473, 0.92452, 0.20459}, {0.81410, 0.91861, 0.20552}, {0.82333, 0.91253, 0.20663},
+    {0.83241, 0.90627, 0.20788}, {0.84133, 0.89986, 0.20926}, {0.85010, 0.89328, 0.21074},
+    {0.85868, 0.88655, 0.21230}, {0.86709, 0.87968, 0.21391}, {0.87530, 0.87267, 0.21555},
+    {0.88331, 0.86553, 0.21719}, {0.89112, 0.85826, 0.21880}, {0.89870, 0.85087, 0.22038},
+    {0.90605, 0.84337, 0.22188}, {0.91317, 0.83576, 0.22328}, {0.92004, 0.82806, 0.22456},
+    {0.92666, 0.82025, 0.22570}, {0.93301, 0.81236, 0.22667}, {0.93909, 0.80439, 0.22744},
+    {0.94489, 0.79634, 0.22800}, {0.95039, 0.78823, 0.22831}, {0.95560, 0.78005, 0.22836},
+    {0.96049, 0.77181, 0.22811}, {0.96507, 0.76352, 0.22754}, {0.96931, 0.75519, 0.22663},
+    {0.97323, 0.74682, 0.22536}, {0.97679, 0.73842, 0.22369}, {0.98000, 0.73000, 0.22161},
+    {0.98289, 0.72140, 0.21918}, {0.98549, 0.71250, 0.21650}, {0.98781, 0.70330, 0.21358},
+    {0.98986, 0.69382, 0.21043}, {0.99163, 0.68408, 0.20706}, {0.99314, 0.67408, 0.20348},
+    {0.99438, 0.66386, 0.19971}, {0.99535, 0.65341, 0.19577}, {0.99607, 0.64277, 0.19165},
+    {0.99654, 0.63193, 0.18738}, {0.99675, 0.62093, 0.18297}, {0.99672, 0.60977, 0.17842},
+    {0.99644, 0.59846, 0.17376}, {0.99593, 0.58703, 0.16899}, {0.99517, 0.57549, 0.16412},
+    {0.99419, 0.56386, 0.15918}, {0.99297, 0.55214, 0.15417}, {0.99153, 0.54036, 0.14910},
+    {0.98987, 0.52854, 0.14398}, {0.98799, 0.51667, 0.13883}, {0.98590, 0.50479, 0.13367},
+    {0.98360, 0.49291, 0.12849}, {0.98108, 0.48104, 0.12332}, {0.97837, 0.46920, 0.11817},
+    {0.97545, 0.45740, 0.11305}, {0.97234, 0.44565, 0.10797}, {0.96904, 0.43399, 0.10294},
+    {0.96555, 0.42241, 0.09798}, {0.96187, 0.41093, 0.09310}, {0.95801, 0.39958, 0.08831},
+    {0.95398, 0.38836, 0.08362}, {0.94977, 0.37729, 0.07905}, {0.94538, 0.36638, 0.07461},
+    {0.94084, 0.35566, 0.07031}, {0.93612, 0.34513, 0.06616}, {0.93125, 0.33482, 0.06218},
+    {0.92623, 0.32473, 0.05837}, {0.92105, 0.31489, 0.05475}, {0.91572, 0.30530, 0.05134},
+    {0.91024, 0.29599, 0.04814}, {0.90463, 0.28696, 0.04516}, {0.89888, 0.27824, 0.04243},
+    {0.89298, 0.26981, 0.03993}, {0.88691, 0.26152, 0.03753}, {0.88066, 0.25334, 0.03521},
+    {0.87422, 0.24526, 0.03297}, {0.86760, 0.23730, 0.03082}, {0.86079, 0.22945, 0.02875},
+    {0.85380, 0.22170, 0.02677}, {0.84662, 0.21407, 0.02487}, {0.83926, 0.20654, 0.02305},
+    {0.83172, 0.19912, 0.02131}, {0.82399, 0.19182, 0.01966}, {0.81608, 0.18462, 0.01809},
+    {0.80799, 0.17753, 0.01660}, {0.79971, 0.17055, 0.01520}, {0.79125, 0.16368, 0.01387},
+    {0.78260, 0.15693, 0.01264}, {0.77377, 0.15028, 0.01148}, {0.76476, 0.14374, 0.01041},
+    {0.75556, 0.13731, 0.00942}, {0.74617, 0.13098, 0.00851}, {0.73661, 0.12477, 0.00769},
+    {0.72686, 0.11867, 0.00695}, {0.71692, 0.11268, 0.00629}, {0.70680, 0.10680, 0.00571},
+    {0.69650, 0.10102, 0.00522}, {0.68602, 0.09536, 0.00481}, {0.67535, 0.08980, 0.00449},
+    {0.66449, 0.08436, 0.00424}, {0.65345, 0.07902, 0.00408}, {0.64223, 0.07380, 0.00401},
+    {0.63082, 0.06868, 0.00401}, {0.61923, 0.06367, 0.00410}, {0.60746, 0.05878, 0.00427},
+    {0.59550, 0.05399, 0.00453}, {0.58336, 0.04931, 0.00486}, {0.57103, 0.04474, 0.00529},
+    {0.55852, 0.04028, 0.00579}, {0.54583, 0.03593, 0.00638}, {0.53295, 0.03169, 0.00705},
+    {0.51989, 0.02756, 0.00780}, {0.50664, 0.02354, 0.00863}, {0.49321, 0.01963, 0.00955},
+    {0.47960, 0.01583, 0.01055}};
+
+uint64_t nanoTime() {
+    using namespace std;
+    using namespace chrono;
+    return duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count();
+}
+// CONFIG
+KOKKOS_INLINE_FUNCTION bool isNaN(float x) {
+#ifdef __CUDA_ARCH__
+    return isnan(x);
+#else
+    return std::isnan(x);
+#endif
+}
+KOKKOS_INLINE_FUNCTION bool isINF(float x) {
+#ifdef __CUDA_ARCH__
+    return isinf(x);
+#else
+    return std::isinf(x);
+#endif
+}
+KOKKOS_INLINE_FUNCTION bool isNaN(double x) {
+#ifdef __CUDA_ARCH__
+    return isnan(x);
+#else
+    return std::isnan(x);
+#endif
+}
+KOKKOS_INLINE_FUNCTION bool isINF(double x) {
+#ifdef __CUDA_ARCH__
+    return isinf(x);
+#else
+    return std::isinf(x);
+#endif
+}
+template <unsigned int Dim, typename callable, typename... Ts>
+KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector<uint32_t, Dim> from,
+                                       ippl::Vector<uint32_t, Dim> to, Ts... args) {
+    if constexpr (sizeof...(Ts) == Dim) {
+        c(args...);
+    } else {
+        for (uint32_t i = from[sizeof...(Ts)]; i < to[sizeof...(Ts)]; i++) {
+            serial_for(c, from, to, args..., i);
+        }
+    }
+}
+
+struct config {
+    using scalar = double;
+
+    // GRID PARAMETERS
+    ippl::Vector<uint32_t, 3> resolution;  // Grid resolution in 3D
+    ippl::Vector<scalar, 3> extents;       // Physical extents of the grid in each dimension
+    scalar total_time;                     // Total simulation time
+    scalar timestep_ratio;                 // Ratio of timestep to some reference value
+
+    scalar length_scale_in_jobfile;    // Length scale defined in the jobfile
+    scalar temporal_scale_in_jobfile;  // Temporal scale defined in the jobfile
+
+    // PARTICLE PARAMETERS
+    scalar charge;           // Particle charge in unit_charge
+    scalar mass;             // Particle mass in unit_mass
+    uint64_t num_particles;  // Number of particles in the simulation
+    bool space_charge;       // Flag for considering space charge effects
+
+    // BUNCH PARAMETERS
+    ippl::Vector<scalar, 3> mean_position;  // Mean initial position of the particle bunch
+    ippl::Vector<scalar, 3>
+        sigma_position;  // Standard deviation of the initial position distribution
+    ippl::Vector<scalar, 3> position_truncations;  // Truncations of the position distribution
+    ippl::Vector<scalar, 3>
+        sigma_momentum;  // Standard deviation of the initial momentum distribution
+    scalar bunch_gamma;  // Relativistic gamma factor of the bunch
+
+    // UNDULATOR PARAMETERS
+    scalar undulator_K;       // Undulator parameter K
+    scalar undulator_period;  // Period of the undulator
+    scalar undulator_length;  // Length of the undulator
+
+    uint32_t output_rhythm;                                      // Frequency of output in timesteps
+    std::string output_path;                                     // Path to output files
+    std::unordered_map<std::string, double> experiment_options;  // Additional experimental options
+};
+
+template <typename scalar, unsigned Dim>
+ippl::Vector<scalar, Dim> getVector(const nlohmann::json& j) {
+    if (j.is_array()) {
+        assert(j.size() == Dim);
+        ippl::Vector<scalar, Dim> ret;
+        for (unsigned i = 0; i < Dim; i++)
+            ret[i] = (scalar)j[i];
+        return ret;
+    } else {
+        std::cerr << "Warning: Obtaining Vector from scalar json\n";
+        ippl::Vector<scalar, Dim> ret;
+        ret.fill((scalar)j);
+        return ret;
+    }
+}
+template <size_t N, typename T>
+struct DefaultedStringLiteral {
+    constexpr DefaultedStringLiteral(const char (&str)[N], const T val)
+        : value(val) {
+        std::copy_n(str, N, key);
+    }
+
+    T value;
+    char key[N];
+};
+template <size_t N>
+struct StringLiteral {
+    constexpr StringLiteral(const char (&str)[N]) { std::copy_n(str, N, value); }
+
+    char value[N];
+    constexpr DefaultedStringLiteral<N, int> operator>>(int t) const noexcept {
+        return DefaultedStringLiteral<N, int>(value, t);
+    }
+    constexpr size_t size() const noexcept { return N - 1; }
+};
+template <StringLiteral lit>
+constexpr size_t chash() {
+    size_t hash = 5381;
+    int c;
+
+    for (size_t i = 0; i < lit.size(); i++) {
+        c    = lit.value[i];
+        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
+    }
+
+    return hash;
+}
+size_t chash(const char* val) {
+    size_t hash = 5381;
+    int c;
+
+    while ((c = *val++)) {
+        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
+    }
+
+    return hash;
+}
+size_t chash(const std::string& _val) {
+    size_t hash     = 5381;
+    const char* val = _val.c_str();
+    int c;
+
+    while ((c = *val++)) {
+        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
+    }
+
+    return hash;
+}
+std::string lowercase_singular(std::string str) {
+    // Convert string to lowercase
+    std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+
+    // Check if the string ends with "s" and remove it if it does
+    if (!str.empty() && str.back() == 's') {
+        str.pop_back();
+    }
+
+    return str;
+}
+double get_time_multiplier(const nlohmann::json& j) {
+    std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]);
+    double time_factor              = 1.0;
+    switch (chash(length_scale_string)) {
+        case chash<"planck-time">():
+        case chash<"plancktime">():
+        case chash<"pt">():
+        case chash<"natural">():
+            time_factor = unit_time_in_seconds;
+            break;
+        case chash<"picosecond">():
+            time_factor = 1e-12;
+            break;
+        case chash<"nanosecond">():
+            time_factor = 1e-9;
+            break;
+        case chash<"microsecond">():
+            time_factor = 1e-6;
+            break;
+        case chash<"millisecond">():
+            time_factor = 1e-3;
+            break;
+        case chash<"second">():
+            time_factor = 1.0;
+            break;
+        default:
+            std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"]
+                      << "\n";
+            break;
+    }
+    return time_factor;
+}
+double get_length_multiplier(const nlohmann::json& options) {
+    std::string length_scale_string =
+        lowercase_singular((std::string)options["mesh"]["length-scale"]);
+    double length_factor = 1.0;
+    switch (chash(length_scale_string)) {
+        case chash<"planck-length">():
+        case chash<"plancklength">():
+        case chash<"pl">():
+        case chash<"natural">():
+            length_factor = unit_length_in_meters;
+            break;
+        case chash<"picometer">():
+            length_factor = 1e-12;
+            break;
+        case chash<"nanometer">():
+            length_factor = 1e-9;
+            break;
+        case chash<"micrometer">():
+            length_factor = 1e-6;
+            break;
+        case chash<"millimeter">():
+            length_factor = 1e-3;
+            break;
+        case chash<"meter">():
+            length_factor = 1.0;
+            break;
+        default:
+            std::cerr << "Unrecognized length scale: "
+                      << (std::string)options["mesh"]["length-scale"] << "\n";
+            break;
+    }
+    return length_factor;
+}
+config read_config(const char* filepath) {
+    std::ifstream cfile(filepath);
+    nlohmann::json j;
+    cfile >> j;
+    config::scalar lmult = get_length_multiplier(j);
+    config::scalar tmult = get_time_multiplier(j);
+    config ret;
+
+    ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters;
+    ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters;
+    ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters;
+    ret.resolution = getVector<uint32_t, 3>(j["mesh"]["resolution"]);
+
+    // std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n";
+    // std::cerr << "Tmult: " << tmult << "\n";
+    if (j.contains("timestep-ratio")) {
+        ret.timestep_ratio = (config::scalar)j["timestep-ratio"];
+    }
+
+    else {
+        ret.timestep_ratio = 1;
+    }
+    ret.total_time   = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds;
+    ret.space_charge = (bool)(j["mesh"]["space-charge"]);
+    ret.bunch_gamma  = (config::scalar)(j["bunch"]["gamma"]);
+    if (ret.bunch_gamma < config::scalar(1)) {
+        std::cerr << "Gamma must be >= 1\n";
+        exit(1);
+    }
+    assert(j.contains("undulator"));
+    assert(j["undulator"].contains("static-undulator"));
+
+    ret.undulator_K      = j["undulator"]["static-undulator"]["undulator-parameter"];
+    ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult)
+                           / unit_length_in_meters;
+    ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult)
+                           / unit_length_in_meters;
+    assert(!std::isnan(ret.undulator_length));
+    assert(!std::isnan(ret.undulator_period));
+    assert(!std::isnan(ret.extents[0]));
+    assert(!std::isnan(ret.extents[1]));
+    assert(!std::isnan(ret.extents[2]));
+    assert(!std::isnan(ret.total_time));
+    ret.length_scale_in_jobfile   = get_length_multiplier(j);
+    ret.temporal_scale_in_jobfile = get_time_multiplier(j);
+    ret.charge        = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges;
+    ret.mass          = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses;
+    ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"];
+    ret.mean_position =
+        getVector<config::scalar, 3>(j["bunch"]["position"]) * lmult / unit_length_in_meters;
+    ret.sigma_position =
+        getVector<config::scalar, 3>(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters;
+    ret.position_truncations = getVector<config::scalar, 3>(j["bunch"]["distribution-truncations"])
+                               * lmult / unit_length_in_meters;
+    ret.sigma_momentum = getVector<config::scalar, 3>(j["bunch"]["sigma-momentum"]);
+    ret.output_rhythm  = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0;
+    ret.output_path    = "../data/";
+    if (j["output"].contains("path")) {
+        ret.output_path = j["output"]["path"];
+        if (!ret.output_path.ends_with('/')) {
+            ret.output_path.push_back('/');
+        }
+    }
+    if (j.contains("experimentation")) {
+        nlohmann::json je = j["experimentation"];
+        for (auto it = je.begin(); it != je.end(); it++) {
+            ret.experiment_options[it.key()] = double(it.value());
+        }
+    }
+    return ret;
+}
+template <typename scalar>
+using FieldVector = ippl::Vector<scalar, 3>;
+template <typename scalar>
+struct BunchInitialize {
+    /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If
+     * it is manual the charge at points of the position vector will be produced.
+     */
+    // std::string     			bunchType_;
+
+    /* Type of the distributions (transverse or longitudinal) in the bunch.
+     */
+    std::string distribution_;
+
+    /* Type of the generator for creating the bunch distribution.
+     */
+    std::string generator_;
+
+    /* Total number of macroparticles in the bunch. */
+    unsigned int numberOfParticles_;
+
+    /* Total charge of the bunch in pC. */
+    scalar cloudCharge_;
+
+    /* Initial energy of the bunch in MeV. */
+    scalar initialGamma_;
+
+    /* Initial normalized speed of the bunch. */
+    scalar initialBeta_;
+
+    /* Initial movement direction of the bunch, which is a unit vector. */
+    FieldVector<scalar> initialDirection_;
+
+    /* Position of the center of the bunch in the unit of length scale. */
+    // std::vector<FieldVector<scalar> >	position_;
+    FieldVector<scalar> position_;
+
+    /* Number of macroparticles in each direction for 3Dcrystal type. */
+    FieldVector<unsigned int> numbers_;
+
+    /* Lattice constant in x, y, and z directions for 3D crystal type. */
+    FieldVector<scalar> latticeConstants_;
+
+    /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal
+     * type, it will be the spread in position for each micro-bunch of the crystal.
+     */
+    FieldVector<scalar> sigmaPosition_;
+
+    /* Spread in energy in each direction. */
+    FieldVector<scalar> sigmaGammaBeta_;
+
+    /* Store the truncation transverse distance for the electron generation.
+     */
+    scalar tranTrun_;
+
+    /* Store the truncation longitudinal distance for the electron generation.
+     */
+    scalar longTrun_;
+
+    /* Name of the file for reading the electrons distribution from.
+     */
+    std::string fileName_;
+
+    /* The radiation wavelength corresponding to the bunch length outside the undulator
+     */
+    scalar lambda_;
+
+    /* Bunching factor for the initialization of the bunch.
+     */
+    scalar bF_;
+
+    /* Phase of the bunching factor for the initialization of the bunch.
+     */
+    scalar bFP_;
+
+    /* Boolean flag determining the activation of shot-noise.
+     */
+    bool shotNoise_;
+
+    /* Initial beta vector of the bunch, which is obtained as the product of beta and direction.
+     */
+    FieldVector<scalar> betaVector_;
+
+    /* Initialize the parameters for the bunch initialization to some first values. */
+    // BunchInitialize ();
+};
+
+// LORENTZ FRAME AND UNDULATOR
+
+template <typename scalar>
+BunchInitialize<scalar> generate_mithra_config(
+    const config& cfg, const ippl::UniaxialLorentzframe<scalar>& /*frame_boost unused*/) {
+    using vec3         = ippl::Vector<scalar, 3>;
+    scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K);
+    BunchInitialize<scalar> init;
+    init.generator_        = "random";
+    init.distribution_     = "uniform";
+    init.initialDirection_ = vec3{0, 0, 1};
+    init.initialGamma_     = cfg.bunch_gamma;
+    init.initialBeta_      = cfg.bunch_gamma == scalar(1)
+                                 ? 0
+                                 : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma);
+    init.sigmaGammaBeta_   = cfg.sigma_momentum.template cast<scalar>();
+    init.sigmaPosition_    = cfg.sigma_position.template cast<scalar>();
+
+    // TODO: Initial bunching factor huh
+    init.bF_                = 0.01;
+    init.bFP_               = 0;
+    init.shotNoise_         = false;
+    init.cloudCharge_       = cfg.charge;
+    init.lambda_            = cfg.undulator_period / (2 * frame_gamma * frame_gamma);
+    init.longTrun_          = cfg.position_truncations[2];
+    init.tranTrun_          = cfg.position_truncations[0];
+    init.position_          = cfg.mean_position.cast<scalar>();
+    init.betaVector_        = ippl::Vector<scalar, 3>{0, 0, init.initialBeta_};
+    init.numberOfParticles_ = cfg.num_particles;
+
+    init.numbers_          = 0;              // UNUSED
+    init.latticeConstants_ = vec3{0, 0, 0};  // UNUSED
+
+    return init;
+}
+template <typename Double>
+struct Charge {
+    Double q;                     /* Charge of the point in the unit of electron charge.	*/
+    FieldVector<Double> rnp, rnm; /* Position vector of the charge.			*/
+    FieldVector<Double> gb;       /* Normalized velocity vector of the charge.		*/
+
+    /* Double flag determining if the particle is passing the entrance point of the undulator. This
+     * flag can be used for better boosting the bunch to the moving frame. We need to consider it to
+     * be double, because this flag needs to be communicated during bunch update.
+     */
+    Double e;
+
+    // Charge();
+};
+template <typename scalar>
+using ChargeVector = std::list<Charge<scalar>>;
+template <typename Double>
+void initializeBunchEllipsoid(BunchInitialize<Double> bunchInit, ChargeVector<Double>& chargeVector,
+                              int rank, int size, int ia) {
+    /* Correct the number of particles if it is not a multiple of four.
+     */
+    if (bunchInit.numberOfParticles_ % 4 != 0) {
+        unsigned int n = bunchInit.numberOfParticles_ % 4;
+        bunchInit.numberOfParticles_ += 4 - n;
+        // printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of
+        // particles in the bunch is not a multiple of four. ") +
+        //     std::string("It is corrected to ") +  std::to_string(bunchInit.numberOfParticles_) );
+    }
+
+    /* Save the initially given number of particles. */
+    unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size();
+
+    /* Declare the required parameters for the initialization of charge vectors. */
+    Charge<Double> charge;
+    charge.q               = bunchInit.cloudCharge_ / Np;
+    FieldVector<Double> gb = bunchInit.initialGamma_ * bunchInit.betaVector_;
+    FieldVector<Double> r(0.0);
+    FieldVector<Double> t(0.0);
+    Double t0;  //, g;
+    Double zmin = 1e100;
+    Double Ne, bF, bFi;
+    unsigned int bmi;
+    std::vector<Double> randomNumbers;
+
+    /* The initialization in group of four particles should only be done if there exists an
+     * undulator in the interaction.
+     */
+    unsigned int ng = (bunchInit.lambda_ == 0.0) ? 1 : 4;
+
+    /* Check the bunching factor. */
+    if (bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0) {
+        // printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be
+        // larger than one or a negative value !!!") ); exit(1);
+    }
+
+    /* If the generator is random we should make sure that different processors do not produce the
+     * same random numbers.
+     */
+    if (bunchInit.generator_ == "random") {
+        /* Initialize the random number generator.
+         */
+        srand(time(NULL));
+        /* Np / ng * 20 is the maximum number of particles.
+         */
+        randomNumbers.resize(Np / ng * 20, 0.0);
+        for (unsigned int ri = 0; ri < Np / ng * 20; ri++)
+            randomNumbers[ri] =
+                (float)std::min(1 - 1e-7, std::max(1e-7, ((double)rand()) / RAND_MAX));
+    }
+
+    /* Declare the generator function depending on the input.
+     */
+    auto generate = [&](unsigned int n, unsigned int m) {
+        // if 	( bunchInit.generator_ == "random" )
+        return (randomNumbers.at(n * 2 * Np / ng + m));
+        // else
+        //   return  ( randomNumbers[ n * 2 * Np/ng + m ] );
+        // TODO: Return halton properly
+        // return 	( halton(n,m) );
+    };
+
+    /* Declare the function for injecting the shot noise.
+     */
+    auto insertCharge = [&](Charge<Double> q) {
+        for (unsigned int ii = 0; ii < ng; ii++) {
+            /* The random modulation is introduced depending on the shot-noise being activated.
+             */
+            if (bunchInit.shotNoise_) {
+                /* Obtain the number of beamlet.
+                 */
+                bmi = int((charge.rnp[2] - zmin) / bunchInit.lambda_);
+
+                /* Obtain the phase and amplitude of the modulation.
+                 */
+                bFi = bF * sqrt(-2.0 * log(generate(8, bmi)));
+
+                q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+                q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi
+                            * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2]
+                                  + 2.0 * M_PI * generate(9, bmi));
+            } else if (bunchInit.lambda_ != 0.0) {
+                q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii;
+
+                q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_
+                            * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2]
+                                  + bunchInit.bFP_ * M_PI / 180.0);
+            }
+
+            /* Set this charge into the charge vector. */
+            chargeVector.push_back(q);
+        }
+    };
+
+    /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to
+     * calculate the FEL bucket number. */
+    if (bunchInit.shotNoise_) {
+        for (i = 0; i < Np / ng; i++) {
+            if (bunchInit.distribution_ == "uniform")
+                zmin = std::min(
+                    Double(2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2], zmin);
+            else if (bunchInit.distribution_ == "gaussian")
+                zmin = std::min(
+                    (Double)(bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0)))
+                             * sin(2.0 * M_PI * generate(3, i + Np0))),
+                    zmin);
+            else {
+                std::cout << std::string(
+                    "The longitudinal type is not correctly given to the code !!!\n");
+                exit(1);
+            }
+        }
+
+        if (bunchInit.distribution_ == "uniform")
+            for (; i < unsigned(Np / ng
+                                * (1.0
+                                   + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI)
+                                         / (2.0 * bunchInit.sigmaPosition_[2])));
+                 i++) {
+                t0 = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0)))
+                     * sin(2.0 * M_PI * generate(3, i + Np0));
+                t0 += (t0 < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]);
+
+                zmin = std::min(t0, zmin);
+            }
+
+        zmin = zmin + bunchInit.position_[2];
+
+        /* Obtain the average number of electrons per FEL beamlet.
+         */
+        Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / (2.0 * bunchInit.sigmaPosition_[2]);
+
+        /* Set the bunching factor level for the shot noise depending on the given values.
+         */
+        bF = (bunchInit.bF_ == 0.0) ? 1.0 / sqrt(Ne) : bunchInit.bF_;
+    }
+
+    /* Determine the properties of each charge point and add them to the charge vector. */
+    for (i = rank; i < Np / ng; i += size) {
+        /* Determine the transverse coordinate. */
+        r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0)))
+               * cos(2.0 * M_PI * generate(1, i + Np0));
+        r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0)))
+               * sin(2.0 * M_PI * generate(1, i + Np0));
+
+        /* Determine the longitudinal coordinate. */
+        if (bunchInit.distribution_ == "uniform")
+            r[2] = (2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2];
+        else if (bunchInit.distribution_ == "gaussian")
+            r[2] = bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0)))
+                   * sin(2.0 * M_PI * generate(3, i + Np0));
+        else {
+            exit(1);
+        }
+
+        /* Determine the transverse momentum. */
+        t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0)))
+               * cos(2.0 * M_PI * generate(5, i + Np0));
+        t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0)))
+               * sin(2.0 * M_PI * generate(5, i + Np0));
+        t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0)))
+               * cos(2.0 * M_PI * generate(7, i + Np0));
+
+        if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_
+            && fabs(r[2]) < bunchInit.longTrun_) {
+            /* Shift the generated charge to the center position and momentum space.
+             */
+            charge.rnp = bunchInit.position_;
+            charge.rnp += r;
+
+            charge.gb = gb;
+            charge.gb += t;
+            if (std::isinf(gb[2])) {
+                std::cerr << "[Warning] Gammabeta obtained an klonked here\n";
+            }
+
+            /* Insert this charge and the mirrored ones into the charge vector.
+             */
+            insertCharge(charge);
+        }
+    }
+
+    /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove
+     * the CSE from the tail of the bunch.
+     */
+    if (bunchInit.distribution_ == "uniform") {
+        for (; i < unsigned(uint32_t(Np / ng)
+                            * (1.0
+                               + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI)
+                                     / (2.0 * bunchInit.sigmaPosition_[2])));
+             i += size) {
+            r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0)))
+                   * cos(2.0 * M_PI * generate(1, i + Np0));
+            r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0)))
+                   * sin(2.0 * M_PI * generate(1, i + Np0));
+
+            /* Determine the longitudinal coordinate. */
+            r[2] = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0)))
+                   * sin(2.0 * M_PI * generate(3, i + Np0));
+            r[2] += (r[2] < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]);
+
+            /* Determine the transverse momentum.
+             */
+            t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0)))
+                   * cos(2.0 * M_PI * generate(5, i + Np0));
+            t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0)))
+                   * sin(2.0 * M_PI * generate(5, i + Np0));
+            t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0)))
+                   * cos(2.0 * M_PI * generate(7, i + Np0));
+            if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_
+                && fabs(r[2]) < bunchInit.longTrun_) {
+                /* Shift the generated charge to the center position and momentum space.
+                 */
+                charge.rnp = bunchInit.position_[ia];
+                charge.rnp += r;
+
+                charge.gb = gb;
+
+                charge.gb += t;
+                /* Insert this charge and the mirrored ones into the charge vector.
+                 */
+                insertCharge(charge);
+            }
+        }
+    }
+
+    /* Reset the value for the number of particle variable according to the installed number of
+     * macro-particles and perform the corresponding changes. */
+    bunchInit.numberOfParticles_ = chargeVector.size();
+}
+
+template <typename Double>
+void boost_bunch(ChargeVector<Double>& chargeVectorn_, Double frame_gamma) {
+    Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma);
+    Double zmaxL      = -1.0e100, zmaxG;
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) {
+        Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        if (std::isinf(g)) {
+            std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g
+                      << "\n";
+            abort();
+        }
+        Double bz = iterQ->gb[2] / g;
+        iterQ->rnp[2] *= frame_gamma;
+
+        iterQ->gb[2] = frame_gamma * g * (bz - frame_beta);
+
+        zmaxL = std::max(zmaxL, iterQ->rnp[2]);
+    }
+    zmaxG = zmaxL;
+    struct {
+        Double zu_;
+        Double beta_;
+    } bunch_;
+    bunch_.zu_   = zmaxG;
+    bunch_.beta_ = frame_beta;
+
+    /****************************************************************************************************/
+
+    for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) {
+        Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        iterQ->rnp[0] += iterQ->gb[0] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta;
+        iterQ->rnp[1] += iterQ->gb[1] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta;
+        iterQ->rnp[2] += iterQ->gb[2] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta;
+        if (std::isnan(iterQ->rnp[2])) {
+            std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_
+                      << ", " << frame_beta << "\n";
+            std::cerr << __FILE__ << ": " << __LINE__ << " Particle has NaN velocity or position\n";
+            abort();
+        }
+    }
+}
+
+template <typename bunch_type, typename scalar>
+size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize<scalar>& bunchInit,
+                               scalar frame_gamma) {
+    ChargeVector<scalar> temporary_charge_list;
+    initializeBunchEllipsoid(bunchInit, temporary_charge_list, 0, 1, 0);
+    for (auto& c : temporary_charge_list) {
+        if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2]))
+            std::cout << "Pos before boost: " << c.rnp << "\n";
+        if (std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2]))
+            std::cout << "Pos before boost: " << c.rnp << "\n";
+    }
+    boost_bunch(temporary_charge_list, frame_gamma);
+    for (auto& c : temporary_charge_list) {
+        if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) {
+            std::cout << "Pos after boost: " << c.rnp << "\n";
+            break;
+        }
+    }
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> positions("", temporary_charge_list.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*, Kokkos::HostSpace> gammabetas("", temporary_charge_list.size());
+    auto iterQ = temporary_charge_list.begin();
+    for (size_t i = 0; i < temporary_charge_list.size(); i++) {
+        assert_isreal(iterQ->gb[0]);
+        assert_isreal(iterQ->gb[1]);
+        assert_isreal(iterQ->gb[2]);
+        assert(iterQ->gb[2] != 0.0f);
+        scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm());
+        assert_isreal(g);
+        scalar bz = iterQ->gb[2] / g;
+        assert_isreal(bz);
+        (void)bz;
+        positions(i)  = iterQ->rnp;
+        gammabetas(i) = iterQ->gb;
+        ++iterQ;
+    }
+    if (temporary_charge_list.size() > bunch.getLocalNum()) {
+        bunch.create(temporary_charge_list.size() - bunch.getLocalNum());
+    }
+    Kokkos::View<ippl::Vector<scalar, 3>*> dpositions("", temporary_charge_list.size());
+    Kokkos::View<ippl::Vector<scalar, 3>*> dgammabetas("", temporary_charge_list.size());
+
+    Kokkos::deep_copy(dpositions, positions);
+    Kokkos::deep_copy(dgammabetas, gammabetas);
+    Kokkos::deep_copy(bunch.R_nm1.getView(), positions);
+    Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas);
+    auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(),
+         gbview = bunch.gamma_beta.getView();
+    ;
+    Kokkos::parallel_for(
+        temporary_charge_list.size(), KOKKOS_LAMBDA(size_t i) {
+            rview(i)   = dpositions(i);
+            rm1view(i) = dpositions(i);
+            gbview(i)  = dgammabetas(i);
+        });
+    Kokkos::fence();
+
+    return temporary_charge_list.size();
+}
+
+// END LORENTZ FRAME AND UNDULATOR AND BUNCH
+
+// PREAMBLE
+
+template <typename scalar1, typename... scalar>
+    requires((std::is_floating_point_v<scalar1>))
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x) {
+    uint32_t dim = sizeof...(scalar);
+    ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    for (unsigned d = 0; d < dim; d++) {
+        vec[d] = vec[d] * vec[d];
+    }
+#ifndef __CUDA_ARCH__
+    using std::exp;
+#endif
+    return exp(-(vec.sum()) / (stddev * stddev));
+}
+
+template <typename T>
+KOKKOS_INLINE_FUNCTION ippl::Vector<T, 3> cross_prod(const ippl::Vector<T, 3>& a,
+                                                     const ippl::Vector<T, 3>& b) {
+    ippl::Vector<T, 3> ret{0.0, 0.0, 0.0};
+    ret[0] = a[1] * b[2] - a[2] * b[1];
+    ret[1] = a[2] * b[0] - a[0] * b[2];
+    ret[2] = a[0] * b[1] - a[1] * b[0];
+    return ret;
+}
+template <typename T>
+KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
+    const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos,
+    int nghost = 1) {
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
+    ippl::Vector<T, 3> relpos  = pos - origin;
+    ippl::Vector<T, 3> gridpos = relpos / hr;
+    ippl::Vector<int, 3> ipos;
+    ipos = gridpos.template cast<int>();
+    ippl::Vector<T, 3> fracpos;
+    for (unsigned k = 0; k < 3; k++) {
+        fracpos[k] = gridpos[k] - (int)ipos[k];
+    }
+    ipos += ippl::Vector<int, 3>(nghost);
+    ret.first  = ipos;
+    ret.second = fracpos;
+    return ret;
+}
+template <typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                   ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                   const ippl::Vector<scalar, 3>& pos, const scalar value,
+                                   int nghost = 1) {
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
+    ipos -= ldom.first();
+    if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+        || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+        || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+        return;
+    }
+    assert(fracpos[0] >= 0.0f);
+    assert(fracpos[0] <= 1.0f);
+    assert(fracpos[1] >= 0.0f);
+    assert(fracpos[1] <= 1.0f);
+    assert(fracpos[2] >= 0.0f);
+    assert(fracpos[2] <= 1.0f);
+    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+    assert(one_minus_fracpos[0] >= 0.0f);
+    assert(one_minus_fracpos[0] <= 1.0f);
+    assert(one_minus_fracpos[1] >= 0.0f);
+    assert(one_minus_fracpos[1] <= 1.0f);
+    assert(one_minus_fracpos[2] >= 0.0f);
+    assert(one_minus_fracpos[2] <= 1.0f);
+    scalar accum = 0;
+
+    for (unsigned i = 0; i < 8; i++) {
+        scalar weight               = 1;
+        ippl::Vector<int, 3> ipos_l = ipos;
+        for (unsigned d = 0; d < 3; d++) {
+            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+            ipos_l[d] += !!(i & (1 << d));
+        }
+        assert_isreal(value);
+        assert_isreal(weight);
+        accum += weight;
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
+    }
+    assert(abs(accum - 1.0f) < 1e-6f);
+}
+template <typename view_type, typename scalar>
+KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                   ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                   const ippl::Vector<scalar, 3>& pos,
+                                   const ippl::Vector<scalar, 3>& value, int nghost = 1) {
+    auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost);
+    ipos -= ldom.first();
+    if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+        || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+        || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+        
+        return; // Return in case of out of bounds
+    }
+    assert(fracpos[0] >= 0.0f);
+    assert(fracpos[0] <= 1.0f);
+    assert(fracpos[1] >= 0.0f);
+    assert(fracpos[1] <= 1.0f);
+    assert(fracpos[2] >= 0.0f);
+    assert(fracpos[2] <= 1.0f);
+    ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+    assert(one_minus_fracpos[0] >= 0.0f);
+    assert(one_minus_fracpos[0] <= 1.0f);
+    assert(one_minus_fracpos[1] >= 0.0f);
+    assert(one_minus_fracpos[1] <= 1.0f);
+    assert(one_minus_fracpos[2] >= 0.0f);
+    assert(one_minus_fracpos[2] <= 1.0f);
+    scalar accum = 0;
+
+    for (unsigned i = 0; i < 8; i++) {
+        scalar weight               = 1;
+        ippl::Vector<int, 3> ipos_l = ipos;
+        for (unsigned d = 0; d < 3; d++) {
+            weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+            ipos_l[d] += !!(i & (1 << d));
+        }
+        assert_isreal(weight);
+        accum += weight;
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
+        Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
+    }
+    assert(abs(accum - 1.0f) < 1e-6f);
+}
+
+template <typename view_type, typename scalar>
+KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview,
+                                              ippl::Vector<scalar, 3> hr,
+                                              ippl::Vector<scalar, 3> origin,
+                                              const ippl::Vector<scalar, 3>& from,
+                                              const ippl::Vector<scalar, 3>& to,
+                                              const scalar factor, int nghost = 1) {
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid =
+        gridCoordinatesOf(hr, origin, from);
+    Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
+        gridCoordinatesOf(hr, origin, to);
+
+    if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
+        && from_grid.first[2] == to_grid.first[2]) {
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((to - from) * factor));
+
+        return;
+    }
+    ippl::Vector<scalar, 3> relay;
+    const ippl::Vector<scalar, 3> orig = origin;
+    using Kokkos::max;
+    using Kokkos::min;
+    for (unsigned int i = 0; i < 3; i++) {
+        relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                           + scalar(1.0) * hr[i] + orig[i],
+                       max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                               + scalar(0.0) * hr[i] + orig[i],
+                           scalar(0.5) * (to[i] + from[i])));
+    }
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)),
+                  ippl::Vector<scalar, 3>((relay - from) * factor));
+    scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5)),
+                  ippl::Vector<scalar, 3>((to - relay) * factor));
+}
+
+namespace ippl {
+
+    template <typename scalar>
+    struct nondispersive {
+        scalar a1;
+        scalar a2;
+        scalar a4;
+        scalar a6;
+        scalar a8;
+    };
+    template <typename _scalar, class PLayout>
+    struct Bunch_eb : public ippl::ParticleBase<PLayout> {
+        using scalar = _scalar;
+
+        // Constructor for the Bunch class, taking a PLayout reference
+        Bunch_eb(PLayout& playout)
+            : ippl::ParticleBase<PLayout>(playout) {
+            // Add attributes to the particle bunch
+            this->addAttribute(Q);     // Charge attribute
+            this->addAttribute(mass);  // Mass attribute
+            this->addAttribute(
+                gamma_beta);  // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);      // Position attribute for the next time step
+            this->addAttribute(R_nm1);      // Position attribute for the next time step
+            this->addAttribute(EB_gather);  // Electric field attribute for particle gathering
+        }
+
+        // Destructor for the Bunch class
+        ~Bunch_eb() {}
+
+        // Define container types for various attributes
+        using charge_container_type   = ippl::ParticleAttrib<scalar>;
+        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector4_container_type  = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+
+        // Declare instances of the attribute containers
+        charge_container_type Q;             // Charge container
+        charge_container_type mass;          // Mass container
+        velocity_container_type gamma_beta;  // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_np1;  // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_nm1;  // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<ippl::Vector<scalar, 3>, 2>>
+            EB_gather;  // Electric field container for particle gathering
+    };
+
+    /**
+     * @brief Struct representing the state of an FEL (Free Electron Laser) simulation.
+     *
+     * @tparam scalar Type of the scalar values (e.g., float, double).
+     */
+    template <typename scalar>
+    struct FELSimulationState {
+        // clang-format off
+
+        constexpr static unsigned int dim = 3; ///< Dimensionality of the simulation (3D).
+        using Vector_t = ippl::Vector<scalar, dim>; ///< Type alias for a 3D vector.
+        using value_type = ippl::Vector<scalar, 4>; ///< Type alias for a 4D vector.
+        using EB_type = ippl::Vector<ippl::Vector<scalar, 3>, 2>; ///< Type alias for an electric and magnetic field vector pair.
+        using Mesh_t = ippl::UniformCartesian<scalar, dim>; ///< Type alias for a uniform Cartesian mesh.
+
+        bool periodic_bc; ///< Flag indicating if periodic boundary conditions are used.
+
+        /// Type alias for a field with 4D vector values.
+        using FourField = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for a field with 3D vector values.
+        using ThreeField = ippl::Field<Vector_t, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for a field with 4D vector values (repeated for consistency).
+        using VField_t = ippl::Field<value_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for a field with electric and magnetic field vector pairs.
+        using EBField_t = ippl::Field<EB_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+
+        /// Type alias for the view type of a field with 4D vector values.
+        using view_type = typename VField_t::view_type;
+
+        /// Type alias for the view type of a field with electric and magnetic field vector pairs.
+        using ev_view_type = typename EBField_t::view_type;
+
+        /// Type alias for the view type of a field with 3D vector values (electric field).
+        using e_view_type = typename ThreeField::view_type;
+
+        /// Type alias for the view type of a field with 3D vector values (magnetic field).
+        using b_view_type = typename ThreeField::view_type;
+
+        /// Solver and particle handler.
+        ippl::NSFDSolverWithParticles<scalar, absorbing> fieldsAndParticles;
+
+        Vector_t hr_m; ///< Mesh spacing vector.
+        ippl::Vector<uint32_t, dim> nr_global; ///< Global number of cells in each dimension.
+        ippl::Vector<uint32_t, dim> nr_local; ///< Local number of cells in each dimension.
+        config m_config; ///< Configuration object.
+        UniaxialLorentzframe<scalar, 2> ulb; ///< Uniaxial Lorentz frame transformation along z-axis.
+        undulator_parameters<scalar> uparams; ///< Undulator parameters.
+        Undulator<scalar> undulator; ///< Undulator object.
+
+        /**
+         * @brief Construct a new FELSimulationState object.
+         * 
+         * Initializes the simulation state with the given field layout, mesh, number of particles, and configuration.
+         * 
+         * @details 
+         * - `ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5)` is the frame's gamma factor.
+         * 
+         * @param layout Field layout.
+         * @param mesch Mesh object.
+         * @param nparticles Number of particles.
+         * @param cfg Configuration object.
+         */
+        FELSimulationState(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles, config cfg) 
+            : fieldsAndParticles(layout, mesch, nparticles), 
+              m_config(cfg), 
+              ulb(UniaxialLorentzframe<scalar, 2>::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), 
+              uparams(cfg.undulator_K, cfg.undulator_period, cfg.undulator_length), 
+              undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m) {
+
+            hr_m = mesch.getMeshSpacing();
+            nr_global = ippl::Vector<uint32_t, dim>{
+                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
+                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)
+            };
+            nr_local = ippl::Vector<uint32_t, dim>{
+                uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1),
+                uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1),
+                uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1)
+            };
+        }
+
+        /**
+         * @brief Get the time step size.
+         * 
+         * @return scalar Time step size.
+         */
+        scalar dt() const noexcept {
+            return fieldsAndParticles.field_solver.dt;
+        }
+
+        /**
+         * @brief Perform a simulation step.
+         * 
+         * Solves the fields and updates the state using the undulator and Lorentz frame transformations.
+         */
+        void step() {
+            auto und = this->undulator;
+            auto lb = this->ulb;
+            fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector<scalar, 3> pos, scalar time) {
+                lb.primedToUnprimed(pos, time);
+                auto eb = und(pos);
+                return lb.transform_EB(eb);
+            });
+        }
+    };
+    // clang-format on
+}  // namespace ippl
+bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) {
+    const int channels = 3;  // RGB
+    const int stride   = width * channels;
+    std::vector<unsigned char> flippedData(data, data + stride * height);
+
+    // Use stb_image_write to write the BMP image to the file descriptor
+    if (!stbi_write_bmp_to_func(
+            [](void* context, void* data, int size) {
+                FILE* f = reinterpret_cast<FILE*>(context);
+                fwrite(data, 1, size, f);
+            },
+            fd, width, height, channels, flippedData.data())) {
+        return false;
+    }
+
+    return true;
+}
+/**
+ * @brief Helper function to gather data from a field using interpolation.
+ *
+ * This function computes the value at a given position by interpolating the values in the field.
+ * The position is transformed from physical coordinates to grid coordinates and then used to
+ * gather data from the field. The interpolation weights are computed based on the relative
+ * positions within the grid cells.
+ *
+ * @tparam View Type of the field view.
+ * @tparam T Scalar type for coordinates and grid spacing.
+ * @tparam Dim Dimensionality of the position vector.
+ *
+ * @param v The field view from which data is to be gathered.
+ * @param pos The position vector in physical coordinates where the data is to be gathered.
+ * @param origin The origin of the grid in physical coordinates.
+ * @param hr The grid spacing vector.
+ * @param lDom The local domain indices of the grid.
+ *
+ * @return typename View::value_type The interpolated value at the specified position.
+ */
+template <typename View, typename T, unsigned Dim>
+KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v,
+                                                               const ippl::Vector<T, Dim>& pos,
+                                                               const ippl::Vector<T, 3>& origin,
+                                                               const ippl::Vector<T, 3>& hr,
+                                                               const ippl::NDIndex<3>& lDom,
+                                                               int nghost = 1) {
+    using vector_type = ippl::Vector<T, 3>;
+
+    vector_type l;
+
+    for (unsigned k = 0; k < 3; k++) {
+        l[k] = (pos[k] - origin[k]) / hr[k]
+               + 1.0;  // gather is implemented in a way such that this 1 is necessary here. 
+                       // This is NOT the nghost, this needs to be a hardcoded 1.0
+    }
+
+    ippl::Vector<int, 3> index{int(l[0]), int(l[1]), int(l[2])};
+    ippl::Vector<T, 3> whi = l - index;
+    ippl::Vector<T, 3> wlo(1.0);
+    wlo -= whi;
+
+    ippl::Vector<size_t, 3> args = index - lDom.first() + nghost;
+    for (unsigned k = 0; k < 3; k++) {
+        if (args[k] >= v.extent(k) || args[k] == 0) {
+            return typename View::value_type(0);
+        }
+    }
+    return ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo,
+                                                    whi, args);
+}
+template <typename scalar>
+scalar test_gauss_law(uint32_t n) {
+    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths, meter_in_unit_lengths,
+                                    meter_in_unit_lengths};
+    std::array<bool, 3> isParallel;
+    isParallel.fill(false);
+    isParallel[2] = true;
+
+    // all parallel layout, standard domain, normal axis order
+    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+    //[-1, 1] box
+    ippl::Vector<scalar, 3> hx     = extents / nr.cast<scalar>();
+    using vector_type              = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths),
+                                      scalar(-0.5 * meter_in_unit_lengths),
+                                      scalar(-0.5 * meter_in_unit_lengths)};
+    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+
+    uint32_t pcount = 1 << 20;
+    // config cfg{};
+    // cfg.space_charge = true;
+    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh,
+                                                                                pcount);
+    field_state.particles.Q    = scalar(coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount;  // Irrelefant
+    auto pview                 = field_state.particles.R.getView();
+    auto p1view                = field_state.particles.R_nm1.getView();
+
+    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
+
+    Kokkos::parallel_for(
+        pcount, KOKKOS_LAMBDA(size_t i) {
+            auto state  = random_pool.get_state();
+            pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            p1view(i)   = pview(i);
+            random_pool.free_state(state);
+        });
+    Kokkos::fence();
+    field_state.J = scalar(0.0);
+    field_state.scatterBunch();
+    for (size_t i = 0; i < 8 * n; i++) {
+        field_state.field_solver.solve();
+    }
+
+    Kokkos::fence();
+    auto lDom = field_state.E.getLayout().getLocalNDIndex();
+
+    std::ofstream line("gauss_line.txt");
+    typename ippl::FELSimulationState<scalar>::e_view_type::host_mirror_type view =
+        Kokkos::create_mirror_view(field_state.E.getView());
+    for (unsigned i = 1; i < nr[2]; i++) {
+        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
+        pos[2] += hx[2] * scalar(i);
+        ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
+        line << pos.norm() * unit_length_in_meters << " "
+             << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n";
+    }
+    return 0.0f;
+}
+template <typename scalar>
+scalar test_amperes_law(uint32_t n) {
+    ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n);
+    ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+    ippl::Vector<scalar, 3> extents{meter_in_unit_lengths, (scalar)(4 * meter_in_unit_lengths),
+                                    meter_in_unit_lengths};
+    std::array<bool, 3> isParallel;
+    isParallel.fill(false);
+    isParallel[2] = true;
+
+    // all parallel layout, standard domain, normal axis order
+    ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+    //[-1, 1] box
+    ippl::Vector<scalar, 3> hx;
+    for (unsigned d = 0; d < 3; d++) {
+        hx[d] = extents[d] / (scalar)nr[d];
+    }
+    using vector_type              = ippl::Vector<scalar, 3>;
+    ippl::Vector<scalar, 3> origin = {scalar(-0.5 * meter_in_unit_lengths),
+                                      scalar(-2.0 * meter_in_unit_lengths),
+                                      scalar(-0.5 * meter_in_unit_lengths)};
+    ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+
+    uint32_t pcount = 1 << 20;
+    ippl::NSFDSolverWithParticles<scalar, ippl::fdtd_bc::absorbing> field_state(layout, mesh,
+                                                                                pcount);
+    field_state.particles.Q    = scalar(4.0 * coulomb_in_unit_charges) / pcount;
+    field_state.particles.mass = scalar(1.0) / pcount;  // Irrelefant
+    auto pview                 = field_state.particles.R.getView();
+    auto p1view                = field_state.particles.R_nm1.getView();
+    constexpr scalar vy        = meter_in_unit_lengths / second_in_unit_times;
+    scalar timestep            = field_state.field_solver.dt;
+    Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345);
+
+    Kokkos::parallel_for(
+        pcount, KOKKOS_LAMBDA(size_t i) {
+            // bunch.gammaBeta[i].fill(scalar(0));
+            auto state  = random_pool.get_state();
+            pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths);
+            pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1);
+            p1view(i)   = pview(i);
+            p1view(i)[1] -= vy * timestep;
+            random_pool.free_state(state);
+        });
+    Kokkos::fence();
+    field_state.J = scalar(0.0);
+    field_state.scatterBunch();
+    for (size_t i = 0; i < 8 * n; i++) {
+        field_state.field_solver.solve();
+    }
+    field_state.field_solver.evaluate_EB();
+    Kokkos::fence();
+    auto lDom = field_state.B.getLayout().getLocalNDIndex();
+
+    std::ofstream line("ampere_line.txt");
+
+    typename ippl::FELSimulationState<scalar>::b_view_type::host_mirror_type view =
+        Kokkos::create_mirror_view(field_state.B.getView());
+    // ippl::Vector<ippl::Vector<scalar, 3>, 2> ebg = gather_helper(view, ippl::Vector<scalar,
+    // 3>{0,0,0}, origin, hx, lDom);
+    for (unsigned i = 1; i < nr[2]; i++) {
+        vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]};
+        pos[2] += hx[2] * scalar(i);
+        ippl::Vector<scalar, 3> ebg = gather_helper(view, pos, origin, hx, lDom);
+        // line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) *
+        // unit_electric_fieldstrength_in_voltpermeters << "\n";
+        line << pos.norm() * unit_length_in_meters << " "
+             << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n";
+    }
+    return 0.0f;
+}
+int main(int argc, char* argv[]) {
+    using scalar = float;
+    ippl::initialize(argc, argv);
+    {
+        // test_gauss_law<scalar>(64);
+        // test_amperes_law<scalar>(64);
+        // goto exit;
+        config cfg               = read_config("../config.json");
+        const scalar frame_gamma = std::max(
+            decltype(cfg)::scalar(1),
+            cfg.bunch_gamma
+                / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5)));
+        cfg.extents[2] *= frame_gamma;
+        cfg.total_time /= frame_gamma;
+
+        const scalar frame_beta      = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma));
+        const scalar frame_gammabeta = frame_gamma * frame_beta;
+        ippl::UniaxialLorentzframe<scalar, 2> frame_boost(frame_gammabeta);
+        ippl::undulator_parameters<scalar> uparams(cfg.undulator_K, cfg.undulator_period,
+                                                   cfg.undulator_length);
+
+        BunchInitialize<scalar> mithra_config = generate_mithra_config(cfg, frame_boost);
+        ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]);
+
+        std::array<bool, 3> isParallel;
+        isParallel.fill(false);
+        isParallel[2] = true;
+
+        // all parallel layout, standard domain, normal axis order
+        ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        //[-1, 1] box
+        ippl::Vector<scalar, 3> hx     = {scalar(cfg.extents[0] / cfg.resolution[0]),
+                                          scalar(cfg.extents[1] / cfg.resolution[1]),
+                                          scalar(cfg.extents[2] / cfg.resolution[2])};
+        ippl::Vector<scalar, 3> origin = {scalar(-cfg.extents[0] * 0.5),
+                                          scalar(-cfg.extents[1] * 0.5),
+                                          scalar(-cfg.extents[2] * 0.5)};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        std::cout << "hx: " << hx << "\n";
+        std::cout << "origin: " << origin << "\n";
+        std::cout << "extents: " << cfg.extents << std::endl;
+        if (sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1) {
+            std::cerr << "Dispersion relation not satisfiable\n";
+            abort();
+        }
+
+        ippl::FELSimulationState<scalar> fdtd_state(
+            layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg);
+
+        if (ippl::Comm->rank() == 0) {
+            std::cout << "Init particles: " << std::endl;
+            size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles,
+                                                       mithra_config, frame_gamma);
+            fdtd_state.fieldsAndParticles.particles.Q    = cfg.charge / actual_pc;
+            fdtd_state.fieldsAndParticles.particles.mass = cfg.mass / actual_pc;
+        } else {
+            fdtd_state.fieldsAndParticles.particles.create(0);
+        }
+        {
+            auto rview   = fdtd_state.fieldsAndParticles.particles.R.getView();
+            auto rm1view = fdtd_state.fieldsAndParticles.particles.R_nm1.getView();
+            ippl::Vector<scalar, 3> meanpos =
+                fdtd_state.fieldsAndParticles.particles.R.sum()
+                * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum());
+
+            Kokkos::parallel_for(
+                fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                    rview(i) -= meanpos;
+                    rm1view(i) -= meanpos;
+                });
+        }
+        fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO);
+
+        size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt());
+        uint64_t starttime        = nanoTime();
+        std::ofstream rad;
+        FILE* ffmpeg_file = nullptr;
+        if (ippl::Comm->rank() == 0) {
+            rad = std::ofstream("radiation.txt");
+            const char* ffmpegCmd =
+                "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf "
+                "scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p "
+                "-c:v libx264 -movflags +faststart ffmpeg_popen.mkv";
+            if (cfg.output_rhythm != 0) {
+                ffmpeg_file = popen(ffmpegCmd, "w");
+            }
+        }
+
+        for (size_t i = 0; i < timesteps_required; i++) {
+            if (ippl::Comm->rank() == 0) {
+                std::cout << "Doing step: " << i << std::endl;
+            }
+            fdtd_state.step();
+            auto ldom        = layout.getLocalNDIndex();
+            auto nrg         = fdtd_state.nr_global;
+            auto eview       = fdtd_state.fieldsAndParticles.E.getView();
+            auto bview       = fdtd_state.fieldsAndParticles.B.getView();
+            double radiation = 0.0;
+            Kokkos::parallel_reduce(
+                ippl::getRangePolicy(eview, 1),
+                KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref) {
+                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> buncheb{
+                        eview(i, j, k), bview(i, j, k)};
+                    ippl::Vector<scalar, 3> Elab = frame_boost.inverse_transform_EB(buncheb).first;
+                    ippl::Vector<scalar, 3> Blab = frame_boost.inverse_transform_EB(buncheb).second;
+                    uint32_t kg                  = k + ldom.first()[2];
+                    if (kg == nrg[2] - 3) {
+                        ref += Elab.cross(Blab)[2];
+                    }
+                },
+                radiation);
+            double radiation_in_watt_on_this_rank =
+                radiation
+                * double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters
+                         * unit_length_in_meters)
+                * fdtd_state.hr_m[0] * fdtd_state.hr_m[1];
+            double radiation_in_watt_global = 0.0;
+            MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE,
+                       MPI_SUM, 0, ippl::Comm->getCommunicator());
+            if (ippl::Comm->rank() == 0) {
+                ippl::Vector<scalar, 3> pos{0, 0, (float)cfg.extents[2]};
+                frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i);
+                rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n";
+            }
+
+            int rank = ippl::Comm->rank();
+            int size = ippl::Comm->size();
+            if ((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)) {
+                int img_height   = 400;
+                int img_width    = int(400.0 * cfg.extents[2] / cfg.extents[0]);
+                float* imagedata = new float[img_width * img_height * 3];
+                std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
+                float* idata_recvbuffer  = new float[img_width * img_height * 3];
+                int floatcount           = img_width * img_height * 3;
+                uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
+                std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float));
+                auto phmirror = fdtd_state.fieldsAndParticles.particles.R.getHostMirror();
+                Kokkos::deep_copy(phmirror, fdtd_state.fieldsAndParticles.particles.R.getView());
+                for (size_t hi = 0; hi < fdtd_state.fieldsAndParticles.particles.getLocalNum();
+                     hi++) {
+                    ippl::Vector<scalar, 3> ppos = phmirror(hi);
+                    ppos -= mesh.getOrigin();
+                    ppos /= cfg.extents.cast<scalar>();
+                    int x_imgcoord = ppos[2] * img_width;
+                    int y_imgcoord = ppos[0] * img_height;
+                    // printf("%d, %d\n", x_imgcoord, y_imgcoord);
+                    if (y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width
+                        && y_imgcoord < img_height) {
+                        const float intensity =
+                            std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles);
+                        // std::cout << intensity << "\n";
+                        imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] =
+                            std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1]
+                                                + intensity);
+                    }
+                };
+
+                auto eh = fdtd_state.fieldsAndParticles.E.getHostMirror();
+                auto bh = fdtd_state.fieldsAndParticles.B.getHostMirror();
+                Kokkos::deep_copy(eh, fdtd_state.fieldsAndParticles.E.getView());
+                Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView());
+
+                {
+                    for (int i = 1; i < img_width; i++) {
+                        for (int j = 1; j < img_height; j++) {
+                            int i_remap =
+                                (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2;
+                            int j_remap =
+                                (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2;
+                            if (i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]) {
+                                if (j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]) {
+                                    ippl::Vector<scalar, 3> E = eh(j_remap + 1 - ldom.first()[0],
+                                                                   fdtd_state.nr_global[1] / 2,
+                                                                   i_remap + 1 - ldom.first()[2]);
+                                    ippl::Vector<scalar, 3> B = bh(j_remap + 1 - ldom.first()[0],
+                                                                   fdtd_state.nr_global[1] / 2,
+                                                                   i_remap + 1 - ldom.first()[2]);
+
+                                    ippl::Vector<scalar, 3> poynting = E.cross(B);
+                                    Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+                                        eblab = frame_boost.inverse_transform_EB(
+                                            Kokkos::make_pair<ippl::Vector<scalar, 3>,
+                                                              ippl::Vector<scalar, 3>>(E, B));
+                                    ippl::Vector<scalar, 3> poyntinglab =
+                                        eblab.first.cross(eblab.second);
+                                    poynting = poyntinglab;
+                                    if (poynting.norm() > 0) {
+                                        // std::cout << poynting.norm() << "\n";
+                                    }
+                                    float normalized_colorscale_value =
+                                        std::sqrt(poynting.norm()) * 0.00001f;
+                                    int index = (int)std::max(
+                                        0.0f,
+                                        std::min(normalized_colorscale_value * 255.0f, 255.0f));
+                                    imagedata[(j * img_width + i) * 3 + 0] +=
+                                        turbo_cm[index][0]
+                                        * 255.0f;  // * std::min(normalized_colorscale_value *
+                                                   // 100.0f + 50.0f, 150.0f);//(unsigned
+                                                   // char)(std::min(255u, (unsigned int)(0.5f *
+                                                   // std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 1] +=
+                                        turbo_cm[index][1]
+                                        * 255.0f;  // * std::min(normalized_colorscale_value *
+                                                   // 100.0f + 50.0f, 150.0f);//(unsigned
+                                                   // char)(std::min(255u, (unsigned int)(0.5f *
+                                                   // std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                    imagedata[(j * img_width + i) * 3 + 2] +=
+                                        turbo_cm[index][2]
+                                        * 255.0f;  // * std::min(normalized_colorscale_value *
+                                                   // 100.0f + 50.0f, 150.0f);//(unsigned
+                                                   // char)(std::min(255u, (unsigned int)(0.5f *
+                                                   // std::sqrt(std::sqrt(poynting.squaredNorm())))));
+                                }
+                            }
+                        }
+                    }
+                }
+                /*
+                 * This code should be replaced by the Image Gathering anyway, but that's a
+                 * different PR
+                 */
+                int mask = 1;
+                while (mask < size) {
+                    int partner = rank ^ mask;
+                    {
+                        if ((rank & mask) == 0) {
+                            // Send data to partner
+                            MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0,
+                                     ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE);
+                            // Apply image summation
+                            for (int f = 0; f < floatcount; f++) {
+                                imagedata[f] += idata_recvbuffer[f];
+                            }
+                        } else {
+                            MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0,
+                                     ippl::Comm->getCommunicator());
+                            // Receive data from partner and apply reduction
+                        }
+                    }
+                    mask <<= 1;  // Move to next bit position for pairing
+                }
+                if (rank == 0) {
+                    char output[1024] = {0};
+
+                    snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i);
+                    std::transform(imagedata, imagedata + img_height * img_width * 3,
+                                   imagedata_final, [](float x) {
+                                       return (unsigned char)std::min(255.0f, std::max(0.0f, x));
+                                   });
+                    if (ffmpeg_file != nullptr)
+                        writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final);
+                }
+                delete[] imagedata;
+                delete[] idata_recvbuffer;
+                delete[] imagedata_final;
+            }
+        }
+        uint64_t endtime = nanoTime();
+        if (ippl::Comm->rank() == 0)
+            std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9
+                      << std::endl;
+    }
+    // exit:
+    ippl::finalize();
+}
\ No newline at end of file
diff --git a/fel/LorentzTransform.h b/fel/LorentzTransform.h
new file mode 100644
index 000000000..876bb22a6
--- /dev/null
+++ b/fel/LorentzTransform.h
@@ -0,0 +1,318 @@
+#ifndef LORENTZ_TRANSFORM_H
+#define LORENTZ_TRANSFORM_H
+#include <Kokkos_Core.hpp>
+
+#include "Types/Matrix.h"
+#include "Types/Vector.h"
+namespace ippl {
+    /**
+     * @brief A template struct representing a uniaxial Lorentz frame.
+     *
+     * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis
+     * (default is the z-axis, axis 2). It includes methods for transforming vectors and
+     * electromagnetic fields between frames, as well as for computing gamma factors and beta
+     * velocities. The struct uses Kokkos for portability and performance.
+     *
+     * @tparam T The scalar type used for computations (e.g., float, double).
+     * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the
+     * z-axis).
+     */
+    template <typename T, unsigned axis = 2>
+    struct UniaxialLorentzframe {
+        /// Speed of light, set to 1 for natural units.
+        constexpr static T c = 1.0;
+        /// Alias for the scalar type used in the struct.
+        using scalar = T;
+        /// Alias for a 3-dimensional vector of the scalar type.
+        using Vector3 = ippl::Vector<T, 3>;
+
+        /// Beta velocity component along the specified axis.
+        scalar beta_m;
+        /// Product of gamma factor and beta velocity.
+        scalar gammaBeta_m;
+        /// Gamma factor for the Lorentz transformation.
+        scalar gamma_m;
+
+        /**
+         * @brief Create a UniaxialLorentzframe from a given gamma factor.
+         *
+         * This static member function constructs a UniaxialLorentzframe object using a given
+         * gamma factor. It computes the corresponding beta velocity and gamma*beta product.
+         *
+         * @param gamma The gamma factor for the Lorentz transformation.
+         * @return A UniaxialLorentzframe object with computed beta and gamma*beta.
+         */
+        KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) {
+            UniaxialLorentzframe ret;
+            ret.gamma_m      = gamma;
+            scalar beta      = Kokkos::sqrt(1 - double(1) / (gamma * gamma));
+            scalar gammabeta = gamma * beta;
+            ret.beta_m       = beta;
+            ret.gammaBeta_m  = gammabeta;
+            return ret;
+        }
+
+        /**
+         * @brief Get a UniaxialLorentzframe with negative beta velocity.
+         *
+         * This member function returns a new UniaxialLorentzframe object with the same gamma
+         * factor but with a negative beta velocity, effectively representing the inverse
+         * Lorentz transformation.
+         *
+         * @return A UniaxialLorentzframe object with negative beta velocity.
+         */
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe<T, axis> negative() const noexcept {
+            UniaxialLorentzframe ret;
+            ret.beta_m      = -beta_m;
+            ret.gammaBeta_m = -gammaBeta_m;
+            ret.gamma_m     = gamma_m;
+            return ret;
+        }
+
+        /// Default constructor.
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default;
+
+        /**
+         * @brief Construct a UniaxialLorentzframe from a gamma*beta value.
+         *
+         * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta
+         * value. It computes the corresponding beta velocity and gamma factor.
+         *
+         * @param gammaBeta The product of the gamma factor and beta velocity.
+         */
+        KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) {
+            using Kokkos::sqrt;
+            gammaBeta_m = gammaBeta;
+            beta_m      = gammaBeta / sqrt(1 + gammaBeta * gammaBeta);
+            gamma_m     = sqrt(1 + gammaBeta * gammaBeta);
+        }
+
+        /**
+         * @brief Transform a spatial vector from the primed frame to the unprimed frame.
+         *
+         * This member function transforms a given spatial vector from the primed frame to the
+         * unprimed frame using the Lorentz transformation along the specified axis and the given
+         * time.
+         *
+         * @param arg The spatial vector to be transformed.
+         * @param time The time component for the transformation.
+         */
+        KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept {
+            arg[axis] = gamma_m * (arg[axis] + beta_m * time);
+        }
+
+        /**
+         * @brief Transform electric and magnetic fields from the unprimed to the primed frame.
+         *
+         * This member function transforms a pair of electric and magnetic field vectors from the
+         * unprimed frame to the primed frame using the Lorentz transformation along the specified
+         * axis.
+         *
+         * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the
+         * unprimed frame.
+         * @return A pair of vectors representing the electric and magnetic fields in the primed
+         * frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB) const noexcept {
+            Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+            ippl::Vector<scalar, 3> betavec{0, 0, beta_m};
+            ippl::Vector<scalar, 3> vnorm{axis == 0, axis == 1, axis == 2};
+            ret.first =
+                ippl::Vector<T, 3>(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m
+                - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+            ret.second =
+                ippl::Vector<T, 3>(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m
+                - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+            ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis];
+            ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis];
+            return ret;
+        }
+
+        /**
+         * @brief Transform electric and magnetic fields from the primed to the unprimed frame.
+         *
+         * This member function transforms a pair of electric and magnetic field vectors from the
+         * primed frame to the unprimed frame using the inverse Lorentz transformation along the
+         * specified axis.
+         *
+         * @param primedEB A pair of vectors representing the electric and magnetic fields in the
+         * primed frame.
+         * @return A pair of vectors representing the electric and magnetic fields in the unprimed
+         * frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>
+        inverse_transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB) const noexcept {
+            return negative().transform_EB(primedEB);
+        }
+    };
+    /**
+     * @brief Represents a Lorentz frame with associated transformations and operations.
+     * 
+     * This template struct models a Lorentz frame used in special relativity to handle
+     * transformations between reference frames moving at a constant velocity relative to each other.
+     * The LorentzFrame supports various operations such as converting between frames and transforming
+     * vectors and electromagnetic fields.
+     * 
+     * @tparam T The type for scalar values, typically a floating-point type.
+     */
+    template <typename T>
+    struct LorentzFrame {
+        /// Speed of light in natural units (c = 1)
+        constexpr static T c = 1.0;
+    
+        /// Alias for the scalar type.
+        using scalar = T;
+    
+        /// Alias for a 3D vector of type T.
+        using Vector3 = ippl::Vector<T, 3>;
+    
+        /// Velocity vector (beta) in the frame, normalized by the speed of light.
+        ippl::Vector<T, 3> beta_m;
+    
+        /// Product of gamma and beta vectors.
+        ippl::Vector<T, 3> gammaBeta_m;
+    
+        /// Lorentz factor gamma.
+        T gamma_m;
+    
+        /**
+         * @brief Constructs a LorentzFrame from a given gammaBeta vector.
+         * 
+         * This constructor initializes the Lorentz frame using a vector that represents
+         * the product of the Lorentz factor gamma and the velocity vector beta.
+         * The velocity vector beta is then computed by normalizing this gammaBeta vector,
+         * and the Lorentz factor gamma is computed from the magnitude of gammaBeta.
+         * 
+         * @param gammaBeta The gammaBeta vector.
+         */
+        KOKKOS_INLINE_FUNCTION LorentzFrame(const ippl::Vector<T, 3>& gammaBeta) {
+            beta_m      = gammaBeta / sqrt(1 + gammaBeta.dot(gammaBeta));
+            gamma_m     = sqrt(1 + gammaBeta.dot(gammaBeta));
+            gammaBeta_m = gammaBeta;
+        }
+        /**
+         * @brief Constructs a uniaxial Lorentz frame given a gamma value and an axis.
+         * 
+         * This static member function creates a Lorentz frame that has motion along one of the principal
+         * axes (x, y, or z) based on the provided gamma value. The beta vector is calculated accordingly
+         * and aligned with the specified axis.
+         * 
+         * @tparam axis The axis along which the frame is moving ('x', 'y', or 'z').
+         * @param gamma The Lorentz factor gamma, which must be greater than or equal to 1.
+         * @return LorentzFrame A Lorentz frame with motion along the specified axis.
+         */
+        template <char axis>
+        static LorentzFrame uniaxialGamma(T gamma) {
+            static_assert(axis == 'x' || axis == 'y' || axis == 'z', "Only xyz axis suproted");
+            assert(gamma >= 1.0 && "Gamma must be >= 1");
+            using Kokkos::sqrt;
+
+            T beta = gamma == 1 ? T(0) : sqrt(gamma * gamma - 1) / gamma;
+            Vector3 arg{0, 0, 0};
+            arg[axis - 'x'] = gamma * beta;
+            return LorentzFrame<T>(arg);
+        }
+
+        /**
+         * @brief Constructs the Lorentz transformation matrix for converting from unprimed to primed frame.
+         * 
+         * This function computes the Lorentz transformation matrix that converts vectors
+         * from the unprimed reference frame to the primed reference frame. The transformation
+         * accounts for the relative velocity between the frames using the stored beta and gamma values.
+         * 
+         * If the magnitude of the beta vector is very small (less than 1e-10), the function returns
+         * an identity matrix, indicating no significant transformation.
+         * 
+         * @return matrix<T, 4, 4> The transformation matrix.
+         */
+        KOKKOS_INLINE_FUNCTION matrix<T, 4, 4> unprimedToPrimed() const noexcept {
+            T betaMagsq = beta_m.dot(beta_m);
+            using Kokkos::abs;
+            if (abs(betaMagsq) < 1e-10) {
+                return matrix<T, 4, 4>(T(1));
+            }
+            ippl::Vector<T, 3> betaSquared = beta_m * beta_m;
+
+            matrix<T, 4, 4> ret;
+
+            ret.data[0] = ippl::Vector<T, 4>{gamma_m, -gammaBeta_m[0], -gammaBeta_m[1], -gammaBeta_m[2]};
+            ret.data[1] = ippl::Vector<T, 4>{-gammaBeta_m[0], 1 + (gamma_m - 1) * betaSquared[0] / betaMagsq,
+                                   (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq,
+                                   (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq};
+            ret.data[2] = ippl::Vector<T, 4>{-gammaBeta_m[1],
+                                             (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq,
+                                             1 + (gamma_m - 1) * betaSquared[1] / betaMagsq,
+                                             (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq};
+            ret.data[3] = ippl::Vector<T, 4>{-gammaBeta_m[2],
+                                             (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq,
+                                             (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq,
+                                             1 + (gamma_m - 1) * betaSquared[2] / betaMagsq};
+            return ret;
+        }
+        /**
+         * @brief Constructs the Lorentz transformation matrix for converting from primed to unprimed frame.
+         * 
+         * This function computes the inverse of the Lorentz transformation matrix obtained
+         * from unprimedToPrimed(), allowing conversion of vectors from the primed reference frame
+         * back to the unprimed reference frame. It utilizes the matrix inversion function to achieve this.
+         * 
+         * @return matrix<T, 4, 4> The inverse transformation matrix. This could also be done by taking the negative velocity
+         */
+        KOKKOS_INLINE_FUNCTION matrix<T, 4, 4> primedToUnprimed() const noexcept {
+            return unprimedToPrimed().inverse();
+        }
+
+        KOKKOS_INLINE_FUNCTION Vector3 transformV(const Vector3& unprimedV) const noexcept {
+            T factor    = T(1.0) / (1.0 - unprimedV.dot(beta_m));
+            Vector3 ret = unprimedV * scalar(1.0 / gamma_m);
+            ret -= beta_m;
+            ret += beta_m * (unprimedV.dot(beta_m) * (gamma_m / (gamma_m + 1)));
+            return ret * factor;
+        }
+
+        KOKKOS_INLINE_FUNCTION Vector3 transformGammabeta(const Vector3& gammabeta) const noexcept {
+            using Kokkos::sqrt;
+            T gamma      = sqrt(T(1) + gammabeta.dot(gammabeta));
+            Vector3 beta = gammabeta;
+            beta /= gamma;
+            Vector3 betatrf = transformV(beta);
+            betatrf *= sqrt(1 - betatrf.dot(betatrf));
+            return betatrf;
+        }
+        /**
+         * @brief Transforms electric and magnetic fields from the unprimed to the primed frame.
+         * 
+         * This function applies the Lorentz transformation to a pair of 3D vectors representing
+         * the electric field (E) and magnetic field (B) in the unprimed reference frame,
+         * converting them to the corresponding fields in the primed reference frame. The transformation
+         * accounts for relativistic effects on the fields due to the relative motion of the frames.
+         * 
+         * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the unprimed frame.
+         * @return Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> A pair of transformed vectors representing the electric and magnetic fields in the primed frame.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& unprimedEB) const noexcept {
+            using Kokkos::sqrt;
+            Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>> ret;
+            Vector3 vnorm = beta_m * (T(1.0) / sqrt(beta_m.dot(beta_m)));
+
+            ret.first =
+                (ippl::Vector<T, 3>(unprimedEB.first + beta_m.cross(unprimedEB.second)) * gamma_m)
+                - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm)));
+            ret.second =
+                (ippl::Vector<T, 3>(unprimedEB.second - beta_m.cross(unprimedEB.first)) * gamma_m)
+                - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm)));
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>
+        inverse_transform_EB(
+            const Kokkos::pair<ippl::Vector<T, 3>, ippl::Vector<T, 3>>& primedEB) const noexcept {
+            ippl::Vector<T, 3> mgb(gammaBeta_m * scalar(-1.0));
+            return LorentzFrame<T>(mgb).transform_EB(primedEB);
+        }
+    };
+
+}  // namespace ippl
+#endif
diff --git a/fel/NSFDSolverWithParticles.h b/fel/NSFDSolverWithParticles.h
new file mode 100644
index 000000000..28f9668bc
--- /dev/null
+++ b/fel/NSFDSolverWithParticles.h
@@ -0,0 +1,383 @@
+#ifndef NSFDSOLVERWITHPARTICLES_H
+#define NSFDSOLVERWITHPARTICLES_H
+#include "MaxwellSolvers/FDTD.h"
+namespace ippl{
+    template <typename _scalar, class PLayout>
+    struct Bunch : public ippl::ParticleBase<PLayout> {
+        using scalar = _scalar;
+
+        // Constructor for the Bunch class, taking a PLayout reference
+        Bunch(PLayout& playout)
+            : ippl::ParticleBase<PLayout>(playout) {
+            // Add attributes to the particle bunch
+            this->addAttribute(Q);     // Charge attribute
+            this->addAttribute(mass);  // Mass attribute
+            this->addAttribute(
+                gamma_beta);  // Gamma-beta attribute (product of relativistiv gamma and beta)
+            this->addAttribute(R_np1);     // Position attribute for the next time step
+            this->addAttribute(R_nm1);     // Position attribute for the next time step
+            this->addAttribute(E_gather);  // Electric field attribute for particle gathering
+            this->addAttribute(B_gather);  // Magnedit field attribute for particle gathering
+        }
+
+        // Destructor for the Bunch class
+        ~Bunch() {}
+
+        // Define container types for various attributes
+        using charge_container_type   = ippl::ParticleAttrib<scalar>;
+        using velocity_container_type = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector_container_type   = ippl::ParticleAttrib<ippl::Vector<scalar, 3>>;
+        using vector4_container_type  = ippl::ParticleAttrib<ippl::Vector<scalar, 4>>;
+
+        // Declare instances of the attribute containers
+        charge_container_type Q;             // Charge container
+        charge_container_type mass;          // Mass container
+        velocity_container_type gamma_beta;  // Gamma-beta container
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_np1;  // Position container for the next time step
+        typename ippl::ParticleBase<PLayout>::particle_position_type
+            R_nm1;  // Position container for the previous time step
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
+            E_gather;  // Electric field container for particle gathering
+        ippl::ParticleAttrib<ippl::Vector<scalar, 3>>
+            B_gather;  // Magnetic field container for particle gathering
+    };
+    template <typename T>
+    KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> gridCoordinatesOf(
+        const ippl::Vector<T, 3> hr, const ippl::Vector<T, 3> origin, ippl::Vector<T, 3> pos, int nghost = 1) {
+
+        // Declare a pair to hold the resulting grid coordinates (integer part) and fractional part
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<T, 3>> ret;
+
+        // Calculate the relative position of pos with respect to the origin
+        ippl::Vector<T, 3> relpos = pos - origin;
+
+        // Convert the relative position to grid coordinates by dividing by the grid spacing (hr)
+        ippl::Vector<T, 3> gridpos = relpos / hr;
+
+        // Declare an integer vector to hold the integer part of the grid coordinates
+        ippl::Vector<int, 3> ipos;
+
+        // Cast the grid position to an integer vector, which gives us the integer part of the coordinates
+        ipos = gridpos.template cast<int>();
+
+        // Declare a vector to hold the fractional part of the grid coordinates
+        ippl::Vector<T, 3> fracpos;
+
+        // Calculate the fractional part of the grid coordinates
+        for (unsigned k = 0; k < 3; k++) {
+            fracpos[k] = gridpos[k] - static_cast<int>(ipos[k]);
+        }
+
+        // Add the number of ghost cells to the integer part of the coordinates
+        ipos += ippl::Vector<int, 3>(nghost);
+
+        // Set the integer part of the coordinates in the return pair
+        ret.first = ipos;
+
+        // Set the fractional part of the coordinates in the return pair
+        ret.second = fracpos;
+
+        // Return the pair containing both the integer and fractional parts of the grid coordinates
+        return ret;
+    }
+
+    template <typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                       const ippl::Vector<scalar, 3>& pos, const scalar value) {
+        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+        ipos -= ldom.first();
+        // std::cout << pos << " 's scatter args (will have 1, or nghost in general added): " << ipos << "\n";
+        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+            return;
+        }
+        assert(fracpos[0] >= 0.0f);
+        assert(fracpos[0] <= 1.0f);
+        assert(fracpos[1] >= 0.0f);
+        assert(fracpos[1] <= 1.0f);
+        assert(fracpos[2] >= 0.0f);
+        assert(fracpos[2] <= 1.0f);
+        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+        assert(one_minus_fracpos[0] >= 0.0f);
+        assert(one_minus_fracpos[0] <= 1.0f);
+        assert(one_minus_fracpos[1] >= 0.0f);
+        assert(one_minus_fracpos[1] <= 1.0f);
+        assert(one_minus_fracpos[2] >= 0.0f);
+        assert(one_minus_fracpos[2] <= 1.0f);
+        scalar accum = 0;
+
+        for (unsigned i = 0; i < 8; i++) {
+            scalar weight               = 1;
+            ippl::Vector<int, 3> ipos_l = ipos;
+            for (unsigned d = 0; d < 3; d++) {
+                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+                ipos_l[d] += !!(i & (1 << d));
+            }
+            assert_isreal(value);
+            assert_isreal(weight);
+            accum += weight;
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight);
+        }
+        assert(abs(accum - 1.0f) < 1e-6f);
+    }
+    template <typename view_type, typename scalar>
+    KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view,
+                                       ippl::Vector<scalar, 3> hr, ippl::Vector<scalar, 3> orig,
+                                       const ippl::Vector<scalar, 3>& pos,
+                                       const ippl::Vector<scalar, 3>& value) {
+        auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos);
+        ipos -= ldom.first();
+        if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1
+            || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1
+            || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) {
+            // Out of bounds case (you'll do nothing)
+            return;
+        }
+        assert(fracpos[0] >= 0.0f);
+        assert(fracpos[0] <= 1.0f);
+        assert(fracpos[1] >= 0.0f);
+        assert(fracpos[1] <= 1.0f);
+        assert(fracpos[2] >= 0.0f);
+        assert(fracpos[2] <= 1.0f);
+        ippl::Vector<scalar, 3> one_minus_fracpos = ippl::Vector<scalar, 3>(1) - fracpos;
+        assert(one_minus_fracpos[0] >= 0.0f);
+        assert(one_minus_fracpos[0] <= 1.0f);
+        assert(one_minus_fracpos[1] >= 0.0f);
+        assert(one_minus_fracpos[1] <= 1.0f);
+        assert(one_minus_fracpos[2] >= 0.0f);
+        assert(one_minus_fracpos[2] <= 1.0f);
+        scalar accum = 0;
+
+        for (unsigned i = 0; i < 8; i++) {
+            scalar weight               = 1;
+            ippl::Vector<int, 3> ipos_l = ipos;
+            for (unsigned d = 0; d < 3; d++) {
+                weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]);
+                ipos_l[d] += !!(i & (1 << d));
+            }
+            assert_isreal(weight);
+            accum += weight;
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight);
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight);
+            Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight);
+        }
+        assert(abs(accum - 1.0f) < 1e-6f);
+    }
+    template <typename view_type, typename scalar>
+    KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview,
+                                                  ippl::Vector<scalar, 3> hr,
+                                                  ippl::Vector<scalar, 3> origin,
+                                                  const ippl::Vector<scalar, 3>& from,
+                                                  const ippl::Vector<scalar, 3>& to,
+                                                  const scalar factor) {
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> from_grid =
+            gridCoordinatesOf(hr, origin, from);
+        Kokkos::pair<ippl::Vector<int, 3>, ippl::Vector<scalar, 3>> to_grid =
+            gridCoordinatesOf(hr, origin, to);
+
+        if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1]
+            && from_grid.first[2] == to_grid.first[2]) {
+            scatterToGrid(ldom, Jview, hr, origin,
+                          /*Scatter point, geometric average */         ippl::Vector<scalar, 3>((from + to) * scalar(0.5)),
+                          /*Scatter value, factor=charge / timestep */ ippl::Vector<scalar, 3>((to - from) * factor));
+
+            return;
+        }
+        ippl::Vector<scalar, 3> relay;
+        const int nghost                   = 1;
+        const ippl::Vector<scalar, 3> orig = origin;
+        using Kokkos::max;
+        using Kokkos::min;
+        for (unsigned int i = 0; i < 3; i++) {
+            relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                               + scalar(1.0) * hr[i] + orig[i],
+                           max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i]
+                                   + scalar(0.0) * hr[i] + orig[i],
+                               scalar(0.5) * (to[i] + from[i])));
+        }
+        scatterToGrid(ldom, Jview, hr, origin,
+                      ippl::Vector<scalar, 3>((from + relay) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((relay - from) * factor));
+        scatterToGrid(ldom, Jview, hr, origin, ippl::Vector<scalar, 3>((relay + to) * scalar(0.5)),
+                      ippl::Vector<scalar, 3>((to - relay) * factor));
+    }
+    template <typename scalar, fdtd_bc boundary_conditions>
+    class NSFDSolverWithParticles {
+    public:
+        constexpr static unsigned dim = 3;
+        using vector_type             = ippl::Vector<scalar, 3>;
+        using vector4_type            = ippl::Vector<scalar, 4>;
+        using FourField =
+            ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>,
+                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField =
+            ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>,
+                        typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using playout_type = ParticleSpatialLayout<scalar, 3>;
+        using bunch_type   = Bunch<scalar, ParticleSpatialLayout<scalar, 3>>;
+        using Mesh_t       = ippl::UniformCartesian<scalar, dim>;
+        FieldLayout<dim>* layout_mp;
+        Mesh_t* mesh_mp;
+        playout_type playout;
+        Bunch<scalar, ParticleSpatialLayout<scalar, 3>> particles;
+        FourField J;
+        ThreeField E;
+        ThreeField B;
+        NonStandardFDTDSolver<ThreeField, FourField, absorbing> field_solver;
+
+        ippl::Vector<uint32_t, 3> nr_global;
+        ippl::Vector<scalar, 3> hr_m;
+        size_t steps_taken;
+        NSFDSolverWithParticles(FieldLayout<dim>& layout, Mesh_t& mesch, size_t nparticles)
+            : layout_mp(&layout)
+            , mesh_mp(&mesch)
+            , playout(layout, mesch)
+            , particles(playout)
+            , J(mesch, layout)
+            , E(mesch, layout)
+            , B(mesch, layout)
+            , field_solver(J, E, B) {
+            particles.create(nparticles);
+            nr_global = ippl::Vector<uint32_t, 3>{
+                uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1),
+                uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1),
+                uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)};
+            hr_m        = mesh_mp->getMeshSpacing();
+            steps_taken = 0;
+        }
+        template <bool space_charge = false>
+        void scatterBunch() {
+            auto hr_m           = mesh_mp->getMeshSpacing();
+            const scalar volume = hr_m[0] * hr_m[1] * hr_m[2];
+            assert_isreal(volume);
+            assert_isreal((scalar(1) / volume));
+            J                       = typename decltype(J)::value_type(0);
+            auto Jview              = J.getView();
+            auto qview              = particles.Q.getView();
+            auto rview              = particles.R.getView();
+            auto rm1view            = particles.R_nm1.getView();
+            auto orig               = mesh_mp->getOrigin();
+            auto hr                 = mesh_mp->getMeshSpacing();
+            auto dt                 = field_solver.dt;
+            bool sc                 = space_charge;
+            ippl::NDIndex<dim> lDom = layout_mp->getLocalNDIndex();
+            Kokkos::parallel_for(
+                particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                    vector_type pos  = rview(i);
+                    vector_type to   = rview(i);
+                    vector_type from = rm1view(i);
+                    if (sc) {
+                        scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume);
+                    }
+                    scatterLineToGrid(lDom, Jview, hr, orig, from, to,
+                                      scalar(qview(i)) / (volume * dt));
+                });
+            Kokkos::fence();
+            J.accumulateHalo();
+        }
+        template <typename callable>
+        void updateBunch(scalar time, callable external_field) {
+            Kokkos::fence();
+            auto gbview     = particles.gamma_beta.getView();
+            auto eview      = particles.E_gather.getView();
+            auto bview      = particles.B_gather.getView();
+            auto qview      = particles.Q.getView();
+            auto mview      = particles.mass.getView();
+            auto rview      = particles.R.getView();
+            auto rm1view    = particles.R_nm1.getView();
+            auto rp1view    = particles.R_np1.getView();
+            scalar bunch_dt = field_solver.dt / 3;
+            Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView());
+            E.fillHalo();
+            B.fillHalo();
+            Kokkos::fence();
+            for (int bts = 0; bts < 3; bts++) {
+                particles.E_gather.gather(E, particles.R, /*offset = */ 0.0);
+                particles.B_gather.gather(B, particles.R, /*offset = */ 0.0);
+                Kokkos::fence();
+                Kokkos::parallel_for(
+                    particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) {
+                        const ippl::Vector<scalar, 3> pgammabeta = gbview(i);
+                        ippl::Vector<scalar, 3> E_grid           = eview(i);
+                        ippl::Vector<scalar, 3> B_grid           = bview(i);
+                        ippl::Vector<scalar, 3> bunchpos = rview(i);
+                        Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>> external_eb =
+                            external_field(bunchpos, time + bunch_dt * bts);
+
+                        ippl::Vector<ippl::Vector<scalar, 3>, 2> EB{
+                            ippl::Vector<scalar, 3>(E_grid + external_eb.first),
+                            ippl::Vector<scalar, 3>(B_grid + external_eb.second)};
+
+                        const scalar charge = qview(i);
+                        const scalar mass   = mview(i);
+                        const ippl::Vector<scalar, 3> t1 =
+                            pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+                        const scalar alpha =
+                            charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1)));
+                        const ippl::Vector<scalar, 3> t2 = t1 + alpha * t1.cross(EB[1]);
+                        const ippl::Vector<scalar, 3> t3 =
+                            t1
+                            + t2.cross(scalar(2) * alpha
+                                       * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1])))));
+                        const ippl::Vector<scalar, 3> ngammabeta =
+                            t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass);
+
+                        rview(i) =
+                            rview(i)
+                            + bunch_dt * ngammabeta
+                                  / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta))));
+                        gbview(i) = ngammabeta;
+                    });
+                Kokkos::fence();
+            }
+            Kokkos::View<bool*> invalid("OOB Particcel", particles.getLocalNum());
+            size_t invalid_count = 0;
+            auto origo           = mesh_mp->getOrigin();
+            ippl::Vector<scalar, 3> extenz;  //
+            extenz[0] = nr_global[0] * hr_m[0];
+            extenz[1] = nr_global[1] * hr_m[1];
+            extenz[2] = nr_global[2] * hr_m[2];
+            Kokkos::parallel_reduce(
+                Kokkos::RangePolicy<
+                    typename playout_type::RegionLayout_t::view_type::execution_space>(
+                    0, particles.getLocalNum()),
+                KOKKOS_LAMBDA(size_t i, size_t & ref) {
+                    bool out_of_bounds             = false;
+                    ippl::Vector<scalar, dim> ppos = rview(i);
+                    for (size_t d = 0; d < dim; d++) {
+                        out_of_bounds |= (ppos[d] <= origo[d]);
+                        out_of_bounds |=
+                            (ppos[d] >= origo[d] + extenz[d]);  // Check against simulation domain
+                    }
+                    invalid(i) = out_of_bounds;
+                    ref += out_of_bounds;
+                },
+                invalid_count);
+            particles.destroy(invalid, invalid_count);
+            Kokkos::fence();
+            playout.update(particles);
+        }
+        void solve() {
+            scatterBunch();
+            field_solver.solve();
+            updateBunch(
+                field_solver.dt * steps_taken,
+                /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) {
+                    return Kokkos::pair<vector_type, vector_type>{vector_type(0), vector_type(0)};
+                });
+            ++steps_taken;
+        }
+        template <typename callable>
+        void solve(callable external_field) {
+            scatterBunch();
+            field_solver.solve();
+            // std::cout << field_solver.dt * steps_taken << "\n";
+            updateBunch(field_solver.dt * steps_taken, external_field);
+            ++steps_taken;
+        }
+    };
+}
+#endif
\ No newline at end of file
diff --git a/fel/TestFDTDSolverWithParticles.cpp b/fel/TestFDTDSolverWithParticles.cpp
new file mode 100644
index 000000000..33c516918
--- /dev/null
+++ b/fel/TestFDTDSolverWithParticles.cpp
@@ -0,0 +1,94 @@
+#include <cstddef>
+using std::size_t;
+#include <Kokkos_Core.hpp>
+#include "Ippl.h"
+#include "Types/Vector.h"
+#include "Field/Field.h"
+#include "NSFDSolverWithParticles.h"
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.h>
+#include "units.h"
+#include <Kokkos_Random.hpp>
+template<typename scalar1, typename... scalar>
+    requires((std::is_floating_point_v<scalar1>))
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
+    uint32_t dim = sizeof...(scalar);
+    ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    scalar1 vecsum(0);
+    for(unsigned d = 0;d < dim;d++){
+        vecsum += vec[d] * vec[d];
+        
+    }
+    #ifndef __CUDA_ARCH__
+    using std::exp;
+    #endif
+    return exp(-(vecsum) / (stddev * stddev)); 
+}
+
+int main(int argc, char* argv[]){
+    ippl::initialize(argc, argv);
+    {
+        using scalar = float;
+        constexpr size_t n = 100;
+        ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+        ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
+        ippl::Vector<scalar, 3> extents{meter_in_unit_lengths,
+                                        meter_in_unit_lengths,
+                                        meter_in_unit_lengths};
+        std::array<bool, 3> isParallel;
+        isParallel.fill(false);
+        isParallel[2] = true;
+
+        // all parallel layout, standard domain, normal axis order
+        ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        ippl::Vector<scalar, 3> hx;
+        for(unsigned d = 0;d < 3;d++){
+            hx[d] = extents[d] / (scalar)nr[d];
+        }
+        ippl::Vector<scalar, 3> origin = {0,0,0};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        
+        ippl::NSFDSolverWithParticles<scalar, ippl::absorbing> solver(layout, mesh, 1 << 15);
+        
+        auto pview = solver.particles.R.getView();
+        auto p1view = solver.particles.R_nm1.getView();
+        auto gbview = solver.particles.gamma_beta.getView();
+
+        Kokkos::Random_XorShift64_Pool<> random_pool(12345);
+        Kokkos::parallel_for(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){
+            auto state = random_pool.get_state();
+            pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.04 * extents[0]);
+            pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.04 * extents[1]);
+            pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.04 * extents[2]);
+            p1view(i) = pview(i);
+            gbview(i) = 0;
+            random_pool.free_state(state);
+        });
+        {
+            double var = 0;
+            Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){
+                ippl::Vector<scalar, 3> pd(pview(i) - (origin + extents * 0.5));
+                ref += pd.dot(pd);
+            }, var);
+            std::cout << ippl::Vector<double, 3>(var * (1.0 / solver.particles.getLocalNum())) << "\n";
+        }
+        solver.playout.update(solver.particles);
+        solver.particles.Q = electron_charge_in_unit_charges;
+        solver.particles.mass = electron_mass_in_unit_masses;
+        for(int i = 0;i < 200;i++){
+            solver.solve();
+        }
+        {
+            double var = 0;
+            Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){
+                ippl::Vector<scalar, 3> pd(pview(i) - (origin + extents * 0.5));
+                ref += pd.dot(pd);
+            }, var);
+            std::cout << ippl::Vector<double, 3>(var * (1.0 / solver.particles.getLocalNum())) << "\n";
+        }
+        
+    }
+    //exit:
+    ippl::finalize();
+}
\ No newline at end of file
diff --git a/fel/Undulator.h b/fel/Undulator.h
new file mode 100644
index 000000000..57cad337c
--- /dev/null
+++ b/fel/Undulator.h
@@ -0,0 +1,97 @@
+#ifndef UNDULATOR_H
+#define UNDULATOR_H
+#include <Kokkos_Core.hpp>
+
+#include <cmath>
+
+#include "Types/Vector.h"
+
+#include "units.h"
+#include "LorentzTransform.h"
+namespace ippl {
+    template <typename scalar>
+    struct undulator_parameters {
+        scalar lambda_u;  // MITHRA: lambda_u
+        scalar K;         // Undulator parameter
+        scalar length;
+        scalar B_magnitude;
+        undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length)
+            : lambda_u(lambda_u)
+            , K(K_undulator_parameter)
+            , length(_length) {
+            B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K)
+                          / (electron_charge_in_unit_charges * lambda_u);
+        }
+    };
+    /**
+     * @brief Struct representing an undulator.
+     *
+     * @tparam scalar Type of the scalar values (e.g., float, double).
+     */
+    template <typename scalar>
+    struct Undulator {
+        undulator_parameters<scalar> uparams;  ///< Parameters of the undulator.
+        scalar distance_to_entry;              ///< Distance to the entry of the undulator.
+        scalar k_u;                            ///< Wavenumber of the undulator.
+
+        /**
+         * @brief Constructor to initialize undulator parameters and calculate k_u.
+         *
+         * @param p Parameters of the undulator.
+         * @param dte Distance to the entry of the undulator.
+         */
+        KOKKOS_FUNCTION Undulator(const undulator_parameters<scalar>& p, scalar dte)
+            : uparams(p)
+            , distance_to_entry(dte)
+            , k_u(2 * M_PI / p.lambda_u) {}
+
+        /**
+         * @brief Overloaded operator() to compute magnetic field components.
+         *
+         * @param position_in_lab_frame Position vector in the lab frame.
+         * @return Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+         *         Pair containing magnetic field and its derivative.
+         */
+        KOKKOS_INLINE_FUNCTION Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+        operator()(const ippl::Vector<scalar, 3>& position_in_lab_frame) const noexcept {
+            using Kokkos::cos;
+            using Kokkos::cosh;
+            using Kokkos::exp;
+            using Kokkos::sin;
+            using Kokkos::sinh;
+
+            Kokkos::pair<ippl::Vector<scalar, 3>, ippl::Vector<scalar, 3>>
+                ret;             // Return pair containing magnetic field and its derivative.
+            ret.first.fill(0);   // Initialize magnetic field vector.
+            ret.second.fill(0);  // Initialize derivative vector.
+
+            // If the position is before the undulator entry.
+            if (position_in_lab_frame[2] < distance_to_entry) {
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator < 0);  // Ensure we are in the correct region.
+                scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator)
+                                    * 0.5));  // Gaussian decay factor.
+
+                ret.second[0] = 0;  // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
+                                * z_in_undulator * k_u * scal;  // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
+                                * scal;  // z-component.
+            }
+            // If the position is within the undulator.
+            else if (position_in_lab_frame[2] > distance_to_entry
+                     && position_in_lab_frame[2] < distance_to_entry + uparams.length) {
+                scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry;
+                assert(z_in_undulator >= 0);  // Ensure we are in the correct region.
+
+                ret.second[0] = 0;  // No x-component.
+                ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1])
+                                * sin(k_u * z_in_undulator);  // y-component.
+                ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1])
+                                * cos(k_u * z_in_undulator);  // z-component.
+            }
+            return ret;
+        }
+    };
+}  // namespace ippl
+#endif  // UNDULATOR_H
\ No newline at end of file
diff --git a/fel/units.h b/fel/units.h
new file mode 100644
index 000000000..318892db8
--- /dev/null
+++ b/fel/units.h
@@ -0,0 +1,37 @@
+#ifndef UNITS_HPP
+#define UNITS_HPP
+constexpr double sqrt_4pi = 3.54490770181103205459;
+constexpr double alpha_scaling_factor = 1e30;
+constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor;
+constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi;
+constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor;
+constexpr double electron_mass_in_kg = 9.1093837015e-31;
+constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor;
+constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
+constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg;
+constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters;
+constexpr double electron_charge_in_coulombs = 1.602176634e-19;
+constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs;
+
+constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges;
+constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds;
+constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses;
+constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds);
+
+constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges;
+constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges;
+constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges;
+constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds;
+constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes;
+constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters;
+constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters);
+constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters);
+constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters;
+constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor);
+constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts;
+constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters);
+constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes);
+constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds);
+constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G;
+constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons;
+#endif
\ No newline at end of file
diff --git a/plotscripts/ampereplot.gp b/plotscripts/ampereplot.gp
new file mode 100755
index 000000000..495a05cbd
--- /dev/null
+++ b/plotscripts/ampereplot.gp
@@ -0,0 +1,13 @@
+#! /usr/bin/gnuplot
+set terminal svg size 800,600 font "CMU Serif,22"
+set logscale x 2
+set logscale y 2
+set key box
+set output "ampere.svg"
+set ylabel "Magnetic Flux density[T]"
+set xlabel "Distance to wire [m]"
+#set title "Ampere's Law"
+set grid lw 2
+set ytics format "2^{%L}"
+set xtics format "2^{%L}"
+plot 'ampere_line.txt' u (abs($1)):(abs($2)) w points title "Measurements", 1.256e-6/(2*3.14159)*(1/x) w lines lw 2 title "Radial falloff"
diff --git a/plotscripts/gaussplot.gp b/plotscripts/gaussplot.gp
new file mode 100755
index 000000000..879584114
--- /dev/null
+++ b/plotscripts/gaussplot.gp
@@ -0,0 +1,13 @@
+#! /usr/bin/gnuplot
+set terminal svg size 900,700 font "CMU Serif, 25"
+set logscale x 2
+set logscale y 2
+set key box
+set output "gauss.svg"
+set ylabel "Electric Field [V/m]"
+set xlabel "Distance to charge [m]"
+#set title "Gauss's Law"
+set grid lw 2
+set ytics format "2^{%L}"
+set xtics format "2^{%L}"
+plot 'gauss_line.txt' u (abs($1)):(abs($2)) w points title "Measurements" pt 2, (1 / (4 * 3.1416 * 8.854e-12 * x * x)) w lines lw 3 title "Quadratic falloff"
diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h
index dad442901..3aa3177d2 100644
--- a/src/Expression/IpplOperations.h
+++ b/src/Expression/IpplOperations.h
@@ -276,13 +276,16 @@ namespace ippl {
                 }
                 return res;
             }
+            KOKKOS_INLINE_FUNCTION operator typename E1::value_type() const{
+                return apply();
+            }
 
             /*
              * This is required for BareField::dot
              */
             template <typename... Args>
             KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const {
-                return dot(u_m(args...), v_m(args...)).apply();
+                return dot(u_m(args...), v_m(args...));
             }
 
         private:
diff --git a/src/MaxwellSolvers/AbsorbingBC.h b/src/MaxwellSolvers/AbsorbingBC.h
new file mode 100644
index 000000000..4a87df71c
--- /dev/null
+++ b/src/MaxwellSolvers/AbsorbingBC.h
@@ -0,0 +1,427 @@
+#ifndef ABC_H
+#define ABC_H
+#include <Kokkos_Core.hpp>
+#include "Types/Vector.h"
+#include "Field/Field.h"
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto first(){
+    return a;
+}
+template<unsigned a, unsigned b>
+constexpr KOKKOS_INLINE_FUNCTION auto second(){
+    return b;
+}
+template<typename _scalar, unsigned _main_axis, unsigned... _side_axes>
+struct second_order_abc_face{
+    using scalar = _scalar;
+    scalar Cweights[5];
+    int sign;
+    constexpr static unsigned main_axis = _main_axis;
+    KOKKOS_FUNCTION second_order_abc_face(ippl::Vector<scalar, 3> hr, scalar dt, int _sign) : sign(_sign){
+        constexpr scalar c = 1;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        static_assert(
+            (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) ||
+            (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) ||
+            (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1)
+        );
+        assert(_main_axis != side_axes[0]);
+        assert(_main_axis != side_axes[1]);
+        assert(side_axes[0] != side_axes[1]);
+        constexpr scalar truncation_order = 2.0;
+        scalar p      = ( 1.0 + 1 * 1 ) / ( 1 + 1 );
+        scalar q      = - 1.0 / ( 1 + 1 );
+
+        scalar d  	 = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt);
+
+        Cweights[0]	= (   1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        Cweights[1]	= ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d;
+        assert(abs(Cweights[1] + 1) < 1e-6); //Like literally
+        Cweights[2]  	= (   p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d;
+        Cweights[3]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d;
+        Cweights[4]  	= -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d;
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        constexpr unsigned side_axes[2] = {_side_axes...};
+        ippl::Vector<uint32_t, 3> side_axis1_onehot = ippl::Vector<uint32_t, 3>{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2};
+        ippl::Vector<uint32_t, 3> side_axis2_onehot = ippl::Vector<uint32_t, 3>{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2};
+        ippl::Vector<uint32_t, 3> mainaxis_off = ippl::Vector<int32_t, 3>{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast<uint32_t>();
+        return advanceBoundaryS(
+		    A_nm1(i,j,k), A_n(i,j,k),
+		    apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off),
+		    apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off),
+		    apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot),                apply(A_n, c - side_axis1_onehot),
+		    apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 ,
+						 const value_type& v3 , const value_type& v4 , const value_type& v5 ,
+						 const value_type& v6 , const value_type& v7 , const value_type& v8 ,
+						 const value_type& v9 , const value_type& v10, const value_type& v11,
+						 const value_type& v12, const value_type& v13)const noexcept
+    {
+        
+      value_type v0 =
+    	 Cweights[0]  * (v1 + v5) +
+    	(Cweights[1]) * v3 +
+    	(Cweights[2]) * ( v2 + v4 ) +
+    	(Cweights[3]) * ( v6 + v7 + v10 + v11 ) +
+    	(Cweights[4]) * ( v8 + v9 + v12 + v13 );
+      return v0;
+    }
+};
+template<typename _scalar, unsigned edge_axis, unsigned normal_axis1, unsigned normal_axis2, bool na1_zero, bool na2_zero>
+struct second_order_abc_edge{
+    using scalar = _scalar;
+    //
+    scalar Eweights[5];
+    
+    KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector<scalar, 3> hr, scalar dt){
+        static_assert(normal_axis1 != normal_axis2);
+        static_assert(edge_axis != normal_axis2);
+        static_assert(edge_axis != normal_axis1);
+        static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0));
+        constexpr scalar c0_ = scalar(1);
+        scalar d    =    ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt );
+        if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x)
+            Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[1] = (   ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[2] = (   ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d;
+            Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d;
+            Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d;
+        }
+        else{
+            assert(false);
+        }
+        
+
+
+        
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        uint32_t i = c[0];
+        uint32_t j = c[1];
+        uint32_t k = c[2];
+        using ippl::apply;
+        //constexpr unsigned nax[2] = {normal_axis1, normal_axis2};
+        ippl::Vector<int32_t, 3> normal_axis1_onehot = ippl::Vector<int32_t, 3>{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1);
+        ippl::Vector<int32_t, 3> normal_axis2_onehot = ippl::Vector<int32_t, 3>{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1);
+        ippl::Vector<uint32_t, 3> acc0 = {i, j, k};
+        ippl::Vector<uint32_t, 3> acc1 = acc0 + normal_axis1_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc2 = acc0 + normal_axis2_onehot.cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> acc3 = acc0 + normal_axis1_onehot.cast<uint32_t>() + normal_axis2_onehot.cast<uint32_t>();
+        //ippl::Vector<uint32_t, 3> axism = (-ippl::Vector<int, 3>{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast<uint32_t>();
+        ippl::Vector<uint32_t, 3> axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2};
+        //return A_n(i, j, k);
+        return advanceEdgeS(
+                                        A_n(i, j, k),      A_nm1(i, j, k),
+            apply(A_np1, acc1),   apply(A_n, acc1   ), apply(A_nm1, acc1),
+            apply(A_np1, acc2),   apply(A_n, acc2   ), apply(A_nm1, acc2),
+            apply(A_np1, acc3),   apply(A_n, acc3   ), apply(A_nm1, acc3),
+            apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp),
+            apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp)
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceEdgeS 		
+            (              value_type v1 , value_type v2 ,
+                           value_type v3 , value_type v4 , value_type v5 ,
+                           value_type v6 , value_type v7 , value_type v8 ,
+                           value_type v9 , value_type v10, value_type v11,
+                           value_type v12, value_type v13, value_type v14,
+                           value_type v15, value_type v16, value_type v17,
+                           value_type v18, value_type v19)const noexcept{
+    value_type v0 =
+    Eweights[0] * (v3 + v8) +
+    Eweights[1] * (v5 + v6) +
+    Eweights[2] * (v2 + v9) +
+    Eweights[3] * (v1 + v4 + v7 + v10) +
+    Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11;
+    return v0;
+  }
+};
+template<typename _scalar, bool x0, bool y0, bool z0>
+struct second_order_abc_corner{
+    using scalar = _scalar;
+    scalar Cweights[17];
+    KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector<scalar, 3> hr, scalar dt){
+        constexpr scalar c0_ = scalar(1);
+        Cweights[0]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[1]  =   (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[2]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[3]  =   ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[4]  =   (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[5]  =   (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[6]  =   ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[7]  =   (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[8]  = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[9]  = - (   1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[12] = - (   1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[13] = - (   1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[15] = - (   1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt);
+        Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt);
+    }
+    template<typename view_type, typename Coords>
+    KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{
+        //First implementation: 0,0,0 corner
+        constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1);
+        constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1);
+        constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1);
+        using ippl::apply;
+        const ippl::Vector<uint32_t, 3> offsets[8] = {
+            ippl::Vector<uint32_t, 3>{0,0,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,0},
+            ippl::Vector<uint32_t, 3>{0,yoff,0},
+            ippl::Vector<uint32_t, 3>{0,0,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,0},
+            ippl::Vector<uint32_t, 3>{xoff,0,zoff},
+            ippl::Vector<uint32_t, 3>{0,yoff,zoff},
+            ippl::Vector<uint32_t, 3>{xoff,yoff,zoff},
+        };
+        return advanceCornerS(
+                                          apply(A_n, c), apply(A_nm1, c),
+            apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]),
+            apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]),
+            apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]),
+            apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]),
+            apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]),
+            apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]),
+            apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7])
+        );
+    }
+    template<typename value_type>
+    KOKKOS_INLINE_FUNCTION value_type advanceCornerS         
+                            (       value_type v1 , value_type v2 ,
+                             value_type v3 , value_type v4 , value_type v5 ,
+                             value_type v6 , value_type v7 , value_type v8 ,
+                             value_type v9 , value_type v10, value_type v11,
+                             value_type v12, value_type v13, value_type v14,
+                             value_type v15, value_type v16, value_type v17,
+                             value_type v18, value_type v19, value_type v20,
+                             value_type v21, value_type v22, value_type v23)const noexcept{
+    return      - ( v1  * (Cweights[16]) + v2  * (Cweights[8]) +
+    v3  * Cweights[1] + v4  * Cweights[16] + v5  * Cweights[9] +
+    v6  * Cweights[2] + v7  * Cweights[16] + v8  * Cweights[10] +
+    v9  * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] +
+    v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] +
+    v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] +
+    v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] +
+    v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0];
+  }
+};
+
+
+
+
+
+
+struct second_order_mur_boundary_conditions{
+    template<typename field_type, typename dt_type>
+    void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector<uint32_t, 3> true_nr, ippl::NDIndex<3> lDom){
+        using scalar = decltype(dt);
+        //TODO: tbh don't know
+        //const unsigned nghost = 1;
+        const ippl::Vector<scalar, 3> hr = FA_n.get_mesh().getMeshSpacing();
+        //assert_isreal((betaMur[0]));
+        //assert_isreal((betaMur[1]));
+        //assert_isreal((betaMur[2]));
+        auto A_n   = FA_n.getView();
+        auto A_np1 = FA_np1.getView();
+        auto A_nm1 = FA_nm1.getView();
+        ippl::Vector<uint32_t, 3> local_nr{
+            uint32_t(A_n.extent(0)),
+            uint32_t(A_n.extent(1)),
+            uint32_t(A_n.extent(2))
+        };
+        constexpr uint32_t min_abc_boundary = 1;
+        constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1;
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+            
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+
+            if(Kokkos::popcount(val) == 1){
+                if(ig == min_abc_boundary){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(jg == min_abc_boundary){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(kg == min_abc_boundary){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, 1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(ig == true_nr[0] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 0, 1, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 1, 0, 2> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                if(kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_face<scalar, 2, 0, 1> soa(hr, dt, -1);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+            }
+        });
+        Kokkos::fence();
+        //FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+                         + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            if(Kokkos::popcount(lval) > 2)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+            if(Kokkos::popcount(val) == 2){ //Edge
+                if(ig == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 2, 0, 1, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 0, 1, 2, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 1, 2, 0, true, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, true> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+
+                else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 1, 2, 0, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 2, 0, 1, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_edge<scalar, 0, 1, 2, false, false> soa(hr, dt);
+                    A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+        //FA_np1.fillHalo();
+        Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){
+            uint32_t ig = i + lDom.first()[0];
+            uint32_t jg = j + lDom.first()[1];
+            uint32_t kg = k + lDom.first()[2];
+
+            //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2)
+            //             + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5);
+
+            //if(Kokkos::popcount(lval) > 1)return;
+            uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2)
+                             + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5);
+            
+            if(Kokkos::popcount(val) == 3){
+                //printf("Corner: %d, %d, %d\n", i, j, k);
+                if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 1, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 0, 1, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 1, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){
+                    second_order_abc_corner<scalar, 0, 0, 1> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 1, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 0, 1, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 1, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){
+                    second_order_abc_corner<scalar, 0, 0, 0> coa(hr, dt);
+                    A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector<uint32_t, 3>{i, j, k});
+                }
+                else{
+                    assert(false);
+                }
+            }
+        });
+        Kokkos::fence();
+    }
+};
+#endif
\ No newline at end of file
diff --git a/src/MaxwellSolvers/CMakeLists.txt b/src/MaxwellSolvers/CMakeLists.txt
index 62d93dd36..d961415ea 100644
--- a/src/MaxwellSolvers/CMakeLists.txt
+++ b/src/MaxwellSolvers/CMakeLists.txt
@@ -3,6 +3,7 @@ set (_SRCS
 
 set (_HDRS
     Maxwell.h
+    FDTD.h
 )
 
 include_DIRECTORIES (
diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h
new file mode 100644
index 000000000..018758db4
--- /dev/null
+++ b/src/MaxwellSolvers/FDTD.h
@@ -0,0 +1,403 @@
+#ifndef IPPL_FDTD_H
+#define IPPL_FDTD_H
+#include <cstddef>
+using std::size_t;
+#include "Types/Vector.h"
+
+#include "FieldLayout/FieldLayout.h"
+#include "MaxwellSolvers/AbsorbingBC.h"
+#include "MaxwellSolvers/Maxwell.h"
+#include "Meshes/UniformCartesian.h"
+#include "Particle/ParticleBase.h"
+
+constexpr double sq(double x) {
+    return x * x;
+}
+constexpr float sq(float x) {
+    return x * x;
+}
+#define assert_isreal(X) assert(!Kokkos::isnan(X) && !Kokkos::isinf(X))
+namespace ippl {
+    enum fdtd_bc {
+        periodic,
+        absorbing
+    };
+
+    template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
+    class StandardFDTDSolver : Maxwell<EMField, FourField> {
+    public:
+        constexpr static unsigned Dim = EMField::dim;
+        using scalar                  = typename EMField::value_type::value_type;
+        using Vector_t                = Vector<typename EMField::value_type::value_type, Dim>;
+        using FourVector_t            = typename FourField::value_type;
+        StandardFDTDSolver(FourField& source, EMField& E, EMField& B) {
+            Maxwell<EMField, FourField>::setSource(source);
+            Maxwell<EMField, FourField>::setEMFields(E, B);
+            initialize();
+        }
+        virtual void solve() override {
+            step();
+            timeShift();
+            evaluate_EB();
+        }
+        void setPeriodicBoundaryConditions() {
+            periodic_bc = true;
+            typename FourField::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<FourField>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<Dim * 2>{});
+            A_n.setFieldBC(vector_bcs);
+            A_np1.setFieldBC(vector_bcs);
+            A_nm1.setFieldBC(vector_bcs);
+        }
+
+        FourField A_n;
+        FourField A_np1;
+        FourField A_nm1;
+        scalar dt;
+
+    private:
+        void timeShift() {
+            // Look into this, maybe cyclic swap is better
+            Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
+            Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
+        }
+        void applyBCs() {
+            if constexpr (boundary_conditions == periodic) {
+                A_n.getFieldBC().apply(A_n);
+                A_nm1.getFieldBC().apply(A_nm1);
+                A_np1.getFieldBC().apply(A_np1);
+            } else {
+                Vector<uint32_t, Dim> true_nr = nr_m;
+                true_nr += (A_n.getNghost() * 2);
+                second_order_mur_boundary_conditions bcs{};
+                bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
+            }
+        }
+
+    public:
+        void step() {
+            const auto& ldom       = layout_mp->getLocalNDIndex();
+            const int nghost       = A_n.getNghost();
+            const auto aview       = A_n.getView();
+            const auto anp1view    = A_np1.getView();
+            const auto anm1view    = A_nm1.getView();
+            const auto source_view = Maxwell<EMField, FourField>::source_mp->getView();
+
+            const scalar a1 =
+                scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2]));
+            const scalar a2               = sq(dt / hr_m[0]);
+            const scalar a4               = sq(dt / hr_m[1]);
+            const scalar a6               = sq(dt / hr_m[2]);
+            const scalar a8               = sq(dt);
+            Vector<uint32_t, Dim> true_nr = nr_m;
+            true_nr += (nghost * 2);
+            constexpr uint32_t one_if_absorbing_otherwise_0 =
+                boundary_conditions == absorbing ? 1 : 0;
+            Kokkos::parallel_for(
+                "Four potential update", ippl::getRangePolicy(aview, nghost),
+                KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
+                    // global indices
+                    const uint32_t ig = i + ldom.first()[0];
+                    const uint32_t jg = j + ldom.first()[1];
+                    const uint32_t kg = k + ldom.first()[2];
+                    uint32_t val =
+                        uint32_t(ig == one_if_absorbing_otherwise_0)
+                        + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1)
+                        + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
+                        + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3)
+                        + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4)
+                        + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
+
+                    if (val == 0) {
+                        FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k)
+                                                + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k))
+                                                + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k))
+                                                + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1))
+                                                + a8 * (-source_view(i, j, k));
+                        anp1view(i, j, k) = interior;
+                    } else {
+                        // std::cout << i << ", " << j << ", " << k << "\n";
+                    }
+                });
+            Kokkos::fence();
+            applyBCs();
+            A_np1.fillHalo();
+        }
+        void evaluate_EB() {
+            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
+            const scalar idt                          = scalar(1.0) / dt;
+            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(),
+                 A_nm1  = this->A_nm1.getView();
+            auto source = Maxwell<EMField, FourField>::source_mp->getView();
+            auto Eview  = Maxwell<EMField, FourField>::En_mp->getView();
+            auto Bview  = Maxwell<EMField, FourField>::Bn_mp->getView();
+
+            Kokkos::parallel_for(
+                this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) {
+                    ippl::Vector<scalar, 3> dAdt =
+                        (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                    ippl::Vector<scalar, 4> dAdx =
+                        (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                    ippl::Vector<scalar, 4> dAdy =
+                        (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                    ippl::Vector<scalar, 4> dAdz =
+                        (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                    ippl::Vector<scalar, 3> grad_phi{dAdx[0], dAdy[0], dAdz[0]};
+                    ippl::Vector<scalar, 3> curlA{
+                        dAdy[3] - dAdz[2],
+                        dAdz[1] - dAdx[3],
+                        dAdx[2] - dAdy[1],
+                    };
+                    Eview(i, j, k) = -dAdt - grad_phi;
+                    Bview(i, j, k) = curlA;
+                });
+            Kokkos::fence();
+        }
+
+        bool periodic_bc;
+
+        typename FourField::Mesh_t* mesh_mp;
+        FieldLayout<Dim>* layout_mp;
+        NDIndex<Dim> domain_m;
+        Vector_t hr_m;
+
+        Vector<int, Dim> nr_m;
+
+        void initialize() {
+            // get layout and mesh
+            layout_mp = &(this->source_mp->getLayout());
+            mesh_mp   = &(this->source_mp->get_mesh());
+
+            // get mesh spacing, domain, and mesh size
+            hr_m = mesh_mp->getMeshSpacing();
+            dt   = hr_m[0] / 2;
+            for (unsigned int i = 0; i < Dim; ++i) {
+                dt = std::min(dt, hr_m[i] / 2);
+            }
+            domain_m = layout_mp->getDomain();
+            for (unsigned int i = 0; i < Dim; ++i)
+                nr_m[i] = domain_m[i].length();
+
+            // initialize fields
+            A_nm1.initialize(*mesh_mp, *layout_mp);
+            A_n.initialize(*mesh_mp, *layout_mp);
+            A_np1.initialize(*mesh_mp, *layout_mp);
+
+            A_nm1 = 0.0;
+            A_n   = 0.0;
+            A_np1 = 0.0;
+        };
+    };
+    /**
+     * @brief Nonstandard Finite-Difference Time-Domain
+     *
+     * @tparam EMField
+     * @tparam FourField
+     */
+    template <typename EMField, typename FourField, fdtd_bc boundary_conditions = periodic>
+    class NonStandardFDTDSolver : Maxwell<EMField, FourField> {
+    public:
+        constexpr static unsigned Dim = EMField::dim;
+        using scalar                  = typename EMField::value_type::value_type;
+        using Vector_t                = Vector<typename EMField::value_type::value_type, Dim>;
+        using FourVector_t            = typename FourField::value_type;
+        NonStandardFDTDSolver(FourField& source, EMField& E, EMField& B) {
+            auto hx = source.get_mesh().getMeshSpacing();
+            if ((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1) {
+                std::cerr << "Dispersion-free CFL condition not satisfiable\n";
+                std::abort();
+            }
+            Maxwell<EMField, FourField>::setSource(source);
+            Maxwell<EMField, FourField>::setEMFields(E, B);
+            initialize();
+        }
+        virtual void solve() override {
+            step();
+            timeShift();
+            evaluate_EB();
+        }
+        FourField A_n;
+        FourField A_np1;
+        FourField A_nm1;
+        void setPeriodicBoundaryConditions() {
+            periodic_bc = true;
+            typename FourField::BConds_t vector_bcs;
+            auto bcsetter_single = [&vector_bcs]<size_t Idx>(const std::index_sequence<Idx>&) {
+                vector_bcs[Idx] = std::make_shared<ippl::PeriodicFace<FourField>>(Idx);
+                return 0;
+            };
+            auto bcsetter = [bcsetter_single]<size_t... Idx>(const std::index_sequence<Idx...>&) {
+                int x = (bcsetter_single(std::index_sequence<Idx>{}) ^ ...);
+                (void)x;
+            };
+            bcsetter(std::make_index_sequence<Dim * 2>{});
+            A_n.setFieldBC(vector_bcs);
+            A_np1.setFieldBC(vector_bcs);
+            A_nm1.setFieldBC(vector_bcs);
+        }
+
+    private:
+        void timeShift() {
+            // Look into this, maybe cyclic swap is better
+            Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView());
+            Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView());
+        }
+
+        void applyBCs() {
+            if constexpr (boundary_conditions == periodic) {
+                A_n.getFieldBC().apply(A_n);
+                A_nm1.getFieldBC().apply(A_nm1);
+                A_np1.getFieldBC().apply(A_np1);
+            } else {
+                Vector<uint32_t, Dim> true_nr = nr_m;
+                true_nr += (A_n.getNghost() * 2);
+                second_order_mur_boundary_conditions bcs{};
+                bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex());
+            }
+        }
+
+    public:
+        template <typename scalar>
+        struct nondispersive {
+            scalar a1;
+            scalar a2;
+            scalar a4;
+            scalar a6;
+            scalar a8;
+        };
+        void step() {
+            const auto& ldom       = layout_mp->getLocalNDIndex();
+            const int nghost       = A_n.getNghost();
+            const auto aview       = A_n.getView();
+            const auto anp1view    = A_np1.getView();
+            const auto anm1view    = A_nm1.getView();
+            const auto source_view = Maxwell<EMField, FourField>::source_mp->getView();
+
+            const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1])));
+            nondispersive<scalar> ndisp{
+                .a1 = 2
+                      * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2 * calA) * sq(dt / hr_m[1])
+                         - sq(dt / hr_m[2])),
+                .a2 = sq(dt / hr_m[0]),
+                .a4 = sq(dt / hr_m[1]),
+                .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]),
+                .a8 = sq(dt)};
+            Vector<uint32_t, Dim> true_nr = nr_m;
+            true_nr += (nghost * 2);
+            constexpr uint32_t one_if_absorbing_otherwise_0 =
+                boundary_conditions == absorbing ? 1 : 0;
+            Kokkos::parallel_for(
+                ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) {
+                    uint32_t ig = i + ldom.first()[0];
+                    uint32_t jg = j + ldom.first()[1];
+                    uint32_t kg = k + ldom.first()[2];
+                    uint32_t val =
+                        uint32_t(ig == one_if_absorbing_otherwise_0)
+                        + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1)
+                        + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2)
+                        + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3)
+                        + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4)
+                        + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5);
+
+                    if (!val) {
+                        anp1view(i, j, k) = -anm1view(i, j, k) + ndisp.a1 * aview(i, j, k)
+                                            + ndisp.a2
+                                                  * (calA * aview(i + 1, j, k - 1)
+                                                     + (1 - 2 * calA) * aview(i + 1, j, k)
+                                                     + calA * aview(i + 1, j, k + 1))
+                                            + ndisp.a2
+                                                  * (calA * aview(i - 1, j, k - 1)
+                                                     + (1 - 2 * calA) * aview(i - 1, j, k)
+                                                     + calA * aview(i - 1, j, k + 1))
+                                            + ndisp.a4
+                                                  * (calA * aview(i, j + 1, k - 1)
+                                                     + (1 - 2 * calA) * aview(i, j + 1, k)
+                                                     + calA * aview(i, j + 1, k + 1))
+                                            + ndisp.a4
+                                                  * (calA * aview(i, j - 1, k - 1)
+                                                     + (1 - 2 * calA) * aview(i, j - 1, k)
+                                                     + calA * aview(i, j - 1, k + 1))
+                                            + ndisp.a6 * aview(i, j, k + 1)
+                                            + ndisp.a6 * aview(i, j, k - 1)
+                                            + ndisp.a8 * source_view(i, j, k);
+                    }
+                });
+            Kokkos::fence();
+            A_np1.fillHalo();
+            applyBCs();
+        }
+        void evaluate_EB() {
+            *(Maxwell<EMField, FourField>::En_mp)     = typename EMField::value_type(0);
+            *(Maxwell<EMField, FourField>::Bn_mp)     = typename EMField::value_type(0);
+            ippl::Vector<scalar, 3> inverse_2_spacing = ippl::Vector<scalar, 3>(0.5) / hr_m;
+            const scalar idt                          = scalar(1.0) / dt;
+            auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(),
+                 A_nm1  = this->A_nm1.getView();
+            auto source = Maxwell<EMField, FourField>::source_mp->getView();
+            auto Eview  = Maxwell<EMField, FourField>::En_mp->getView();
+            auto Bview  = Maxwell<EMField, FourField>::Bn_mp->getView();
+
+            Kokkos::parallel_for(
+                this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) {
+                    ippl::Vector<scalar, 3> dAdt =
+                        (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt;
+                    ippl::Vector<scalar, 4> dAdx =
+                        (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0];
+                    ippl::Vector<scalar, 4> dAdy =
+                        (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1];
+                    ippl::Vector<scalar, 4> dAdz =
+                        (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2];
+
+                    ippl::Vector<scalar, 3> grad_phi{dAdx[0], dAdy[0], dAdz[0]};
+                    ippl::Vector<scalar, 3> curlA{
+                        dAdy[3] - dAdz[2],
+                        dAdz[1] - dAdx[3],
+                        dAdx[2] - dAdy[1],
+                    };
+                    Eview(i, j, k) = -dAdt - grad_phi;
+                    Bview(i, j, k) = curlA;
+                });
+            Kokkos::fence();
+        }
+
+        typename FourField::Mesh_t* mesh_mp;
+        FieldLayout<Dim>* layout_mp;
+        NDIndex<Dim> domain_m;
+        Vector_t hr_m;
+        scalar dt;
+        Vector<int, Dim> nr_m;
+        bool periodic_bc;
+
+        void initialize() {
+            // get layout and mesh
+            layout_mp = &(this->source_mp->getLayout());
+            mesh_mp   = &(this->source_mp->get_mesh());
+
+            // get mesh spacing, domain, and mesh size
+            hr_m     = mesh_mp->getMeshSpacing();
+            dt       = hr_m[2];
+            domain_m = layout_mp->getDomain();
+            for (unsigned int i = 0; i < Dim; ++i)
+                nr_m[i] = domain_m[i].length();
+
+            // initialize fields
+            A_nm1.initialize(*mesh_mp, *layout_mp);
+            A_n.initialize(*mesh_mp, *layout_mp);
+            A_np1.initialize(*mesh_mp, *layout_mp);
+
+            A_nm1 = 0.0;
+            A_n   = 0.0;
+            A_np1 = 0.0;
+        };
+    };
+}  // namespace ippl
+
+#endif
\ No newline at end of file
diff --git a/src/MaxwellSolvers/Maxwell.h b/src/MaxwellSolvers/Maxwell.h
index 301bd4914..1fb952791 100644
--- a/src/MaxwellSolvers/Maxwell.h
+++ b/src/MaxwellSolvers/Maxwell.h
@@ -15,7 +15,7 @@
 
 namespace ippl {
 
-    template <typename EMField, typename SourceField>
+    template <typename EMField, typename FourField>
     class Maxwell {
     public:
         constexpr static unsigned Dim = EMField::dim;
diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h
index 70168a79b..0702489f3 100644
--- a/src/Particle/ParticleAttrib.h
+++ b/src/Particle/ParticleAttrib.h
@@ -124,12 +124,23 @@ namespace ippl {
                      const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp) const;
 
         template <typename Field, typename P2>
-        void gather(Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp);
+        void gather(Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp, const typename Field::Mesh_t::vector_type::value_type offset = 0.5);
 
         T sum();
         T max();
         T min();
         T prod();
+        template<typename returnType, typename reductionLambda>
+            requires (std::is_invocable_v<reductionLambda, T, returnType&>)
+        returnType customReduction(reductionLambda lambda, returnType identity){
+            auto dview = this->dview_m;
+            Kokkos::parallel_reduce(*(this->localNum_mp), KOKKOS_LAMBDA(size_t i, returnType& ref){
+                lambda(dview(i), ref);
+            }, identity);
+            returnType globaltemp;
+            Comm->allreduce(identity, globaltemp, 1, std::plus<returnType>());
+            return globaltemp;
+        }
 
     private:
         view_type dview_m;
diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp
index 3b37f920a..7e0a58fbf 100644
--- a/src/Particle/ParticleAttrib.hpp
+++ b/src/Particle/ParticleAttrib.hpp
@@ -155,7 +155,8 @@ namespace ippl {
     template <typename T, class... Properties>
     template <typename Field, typename P2>
     void ParticleAttrib<T, Properties...>::gather(
-        Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp) {
+        Field& f, const ParticleAttrib<Vector<P2, Field::dim>, Properties...>& pp,
+        const typename Field::Mesh_t::vector_type::value_type offset) {
         constexpr unsigned Dim = Field::dim;
         using PositionType     = typename Field::Mesh_t::value_type;
 
@@ -186,7 +187,7 @@ namespace ippl {
             "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)),
             KOKKOS_CLASS_LAMBDA(const size_t idx) {
                 // find nearest grid point
-                vector_type l                        = (pp(idx) - origin) * invdx + 0.5;
+                vector_type l                        = (pp(idx) - origin) * invdx + (1.0 - offset);
                 Vector<int, Field::dim> index        = l;
                 Vector<PositionType, Field::dim> whi = l - index;
                 Vector<PositionType, Field::dim> wlo = 1.0 - whi;
@@ -194,6 +195,7 @@ namespace ippl {
                 Vector<size_t, Field::dim> args = index - lDom.first() + nghost;
 
                 // gather
+                // std::cout << index << std::endl;
                 dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{},
                                                        view, wlo, whi, args);
             });
diff --git a/src/Types/Matrix.h b/src/Types/Matrix.h
new file mode 100644
index 000000000..147286f99
--- /dev/null
+++ b/src/Types/Matrix.h
@@ -0,0 +1,186 @@
+#ifndef MATRIX_H
+#define MATRIX_H
+#include <Kokkos_Core.hpp>
+
+#include "Vector.h"
+namespace ippl {
+    /**
+     * @brief Column major
+     * 
+     * @tparam T Scalar type
+     * @tparam m Number of rows
+     * @tparam n Number of columns
+     */
+    template <typename T, int m, int n>
+    struct matrix {
+        
+        ippl::Vector<ippl::Vector<T, m>, n> data;
+        constexpr static bool squareMatrix = (m == n && m > 0);
+
+        constexpr KOKKOS_INLINE_FUNCTION matrix(T diag)
+            requires(m == n)
+        {
+            for (unsigned i = 0; i < n; i++) {
+                for (unsigned j = 0; j < n; j++) {
+                    data[i][j] = diag * T(i == j);
+                }
+            }
+        }
+        constexpr matrix() = default;
+        KOKKOS_INLINE_FUNCTION constexpr static matrix zero() {
+            matrix<T, m, n> ret;
+            for (unsigned i = 0; i < n; i++) {
+                for (unsigned j = 0; j < n; j++) {
+                    ret.data[i][j] = 0;
+                }
+            }
+            return ret;
+        };
+        KOKKOS_INLINE_FUNCTION void setZero() {
+            for (unsigned i = 0; i < n; i++) {
+                for (unsigned j = 0; j < n; j++) {
+                    data[i][j] = 0;
+                }
+            }
+        }
+
+        KOKKOS_INLINE_FUNCTION T operator()(int i, int j) const noexcept { return data[j][i]; }
+        KOKKOS_INLINE_FUNCTION T& operator()(int i, int j) noexcept { return data[j][i]; }
+        template <typename O>
+        KOKKOS_INLINE_FUNCTION matrix<O, m, n> cast() const noexcept {
+            matrix<O, m, n> ret;
+            for (unsigned i = 0; i < n; i++) {
+                ret.data[i] = data[i].template cast<O>();
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator+(const matrix<T, m, n>& other) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] + other.data[i];
+            }
+            return result;
+        }
+
+        // Implement matrix subtraction
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator-(const matrix<T, m, n>& other) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] - other.data[i];
+            }
+            return result;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator*(const T& factor) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] * factor;
+            }
+            return result;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> operator/(const T& divisor) const {
+            matrix<T, m, n> result;
+            for (int i = 0; i < n; ++i) {
+                result.data[i] = data[i] / divisor;
+            }
+            return result;
+        }
+
+        // Implement matrix-vector multiplication
+        template <unsigned int other_m>
+        KOKKOS_INLINE_FUNCTION ippl::Vector<T, m> operator*(
+            const ippl::Vector<T, other_m>& vec) const {
+            static_assert((int)other_m == n);
+            ippl::Vector<T, m> result(0);
+            for (int i = 0; i < n; ++i) {
+                for(int j = 0;j < m;i++){
+                    //This could be a vector operation, but you never know with ippl::Vector
+                    result[j] += vec[i] * data[i][j];
+                }
+            }
+            return result;
+        }
+        template <int otherm, int othern>
+            requires(n == otherm)
+        KOKKOS_INLINE_FUNCTION matrix<T, m, othern> operator*(
+            const matrix<T, otherm, othern>& otherMat) const noexcept {
+            matrix<T, m, othern> ret(0);
+            for (int i = 0; i < m; i++) {
+                for (int j = 0; j < othern; j++) {
+                    for (int k = 0; k < n; k++) {
+                        ret(i, j) += (*this)(i, k) * otherMat(k, j);
+                    }
+                }
+            }
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION void addCol(int i, int j, T alpha = 1.0) {
+            data[j] += data[i] * alpha;
+        }
+        KOKKOS_INLINE_FUNCTION matrix<T, m, n> inverse() const noexcept
+            requires(squareMatrix)
+        {
+            constexpr int N = m;
+
+            matrix<T, m, n> ret(1.0);
+            matrix<T, m, n> dis(*this);
+
+            for (int i = 0; i < N; i++) {
+                for (int j = i + 1; j < N; j++) {
+                    T alpha = -dis(i, j) / dis(i, i);
+                    dis.addCol(i, j, alpha);
+                    dis(i, j) = 0;
+                    ret.addCol(i, j, alpha);
+                }
+            }
+            for (int i = N - 1; i >= 0; i--) {
+                for (int j = i - 1; j >= 0; j--) {
+                    T alpha = -dis(i, j) / dis(i, i);
+                    dis.addCol(i, j, alpha);
+                    dis(i, j) = 0;
+                    ret.addCol(i, j, alpha);
+                }
+            }
+            for (int i = 0; i < N; i++) {
+                T d     = dis(i, i);
+                T oneod = T(1) / d;
+                dis.data[i] *= oneod;
+                ret.data[i] *= oneod;
+            }
+
+            return ret;
+        }
+
+        template <typename stream_t>
+        friend stream_t& operator<<(stream_t& str, const matrix<T, m, n>& mat) {
+            for (int i = 0; i < m; i++) {
+                for (int j = 0; j < n; j++) {
+                    str << mat.data[j][i] << " ";
+                }
+                str << "\n";
+            }
+            return str;
+        }
+    };
+    /**
+     * @brief Computes the outer product l r^T of two vectors
+     * 
+     * @tparam T Scalar type
+     * @tparam N Output rows
+     * @tparam M Output cols
+     * @param l left operand
+     * @param r right operand 
+     * @return KOKKOS_INLINE_FUNCTION the matrix l r^T 
+     */
+    template <typename T, unsigned N, unsigned M>
+    KOKKOS_INLINE_FUNCTION matrix<T, N, N> outer_product(const ippl::Vector<T, N>& l,
+                                                         const ippl::Vector<T, M>& r) {
+        matrix<T, N, M> ret;
+        for (unsigned i = 0; i < N; i++) {
+            for (unsigned j = 0; j < M; j++) {
+                ret.data[j][i] = l[i] * r[j];
+            }
+        }
+        return ret;
+    }
+}  // namespace ippl
+#endif
\ No newline at end of file
diff --git a/src/Types/Vector.h b/src/Types/Vector.h
index 4380fc8cf..216ddf5b7 100644
--- a/src/Types/Vector.h
+++ b/src/Types/Vector.h
@@ -26,7 +26,7 @@ namespace ippl {
         static constexpr unsigned dim = Dim;
 
         KOKKOS_FUNCTION
-        Vector()
+        constexpr Vector()
             : Vector(value_type(0)) {}
 
         template <typename... Args,
@@ -97,6 +97,42 @@ namespace ippl {
         KOKKOS_INLINE_FUNCTION constexpr const_iterator end() const;
 
         KOKKOS_INLINE_FUNCTION T dot(const Vector<T, Dim>& rhs) const;
+        KOKKOS_INLINE_FUNCTION Vector<T, 3> cross(const Vector<T, 3>& rhs) const;
+        KOKKOS_INLINE_FUNCTION void fill(const T& v){
+            for(unsigned k = 0;k < Dim;k++)(*this)[k] = v;
+        }
+        template<unsigned N>
+        KOKKOS_INLINE_FUNCTION Vector<T, N> tail()const noexcept{
+            Vector<T, N> ret;
+            static_assert(N <= Dim, "N must be smaller than Dim");
+            constexpr unsigned diff = Dim - N;
+            for(unsigned i = 0;i < N;i++){
+                ret[i] = (*this)[i + diff];
+            }
+            return ret;
+        }
+        template<unsigned N>
+        KOKKOS_INLINE_FUNCTION Vector<T, N> head()const noexcept{
+            Vector<T, N> ret;
+            static_assert(N <= Dim, "N must be smaller than Dim");
+            for(unsigned i = 0;i < N;i++){
+                ret[i] = (*this)[i];
+            }
+            return ret;
+        }
+        template<typename OtherType>
+        KOKKOS_INLINE_FUNCTION Vector<OtherType, Dim> cast()const{
+            Vector<OtherType, Dim> ret;
+            for(unsigned k = 0;k < Dim;k++)ret[k] = OtherType((*this)[k]);
+            return ret;
+        }
+        KOKKOS_INLINE_FUNCTION T squaredNorm()const{
+            return this->dot(*this);
+        }
+        KOKKOS_INLINE_FUNCTION T norm()const{
+            using Kokkos::sqrt;
+            return sqrt(squaredNorm());
+        }
 
     //Needs to be public to be a standard-layout type
     //private:
diff --git a/src/Types/Vector.hpp b/src/Types/Vector.hpp
index aa9a6168b..8723ed431 100644
--- a/src/Types/Vector.hpp
+++ b/src/Types/Vector.hpp
@@ -187,6 +187,15 @@ namespace ippl {
         }
         return res;
     }
+    template <typename T, unsigned Dim>
+    KOKKOS_INLINE_FUNCTION Vector<T, 3> Vector<T, Dim>::cross(const Vector<T, 3>& rhs) const {
+        
+        Vector<T, 3> ret;
+        ret[0] = data_m[1] * rhs[2] - data_m[2] * rhs[1];
+        ret[1] = data_m[2] * rhs[0] - data_m[0] * rhs[2];
+        ret[2] = data_m[0] * rhs[1] - data_m[1] * rhs[0];
+        return ret;
+    }
 
     template <typename T, unsigned Dim>
     inline std::ostream& operator<<(std::ostream& out, const Vector<T, Dim>& v) {
diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt
index 358c475cf..dda292fff 100644
--- a/test/solver/CMakeLists.txt
+++ b/test/solver/CMakeLists.txt
@@ -25,6 +25,12 @@ target_link_libraries (
     ${IPPL_LIBS}
     ${MPI_CXX_LIBRARIES}
 )
+add_executable (TestFDTDSolver TestFDTDSolver.cpp)
+target_link_libraries (
+    TestFDTDSolver
+    ${IPPL_LIBS}
+    ${MPI_CXX_LIBRARIES}
+)
 
 if (ENABLE_FFT)
     add_executable (TestGaussian_convergence TestGaussian_convergence.cpp)
diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp
new file mode 100644
index 000000000..62088acc9
--- /dev/null
+++ b/test/solver/TestFDTDSolver.cpp
@@ -0,0 +1,139 @@
+#include <cstddef>
+using std::size_t;
+#include <Kokkos_Core.hpp>
+#include "Ippl.h"
+#include "Types/Vector.h"
+#include "Field/Field.h"
+#include "MaxwellSolvers/FDTD.h"
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include <stb_image_write.h>
+template<typename scalar1, typename... scalar>
+    requires((std::is_floating_point_v<scalar1>))
+KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){
+    uint32_t dim = sizeof...(scalar);
+    ippl::Vector<scalar1, sizeof...(scalar)> vec{scalar1(x - mean)...};
+    scalar1 vecsum(0);
+    for(unsigned d = 0;d < dim;d++){
+        vecsum += vec[d] * vec[d];
+        
+    }
+    #ifndef __CUDA_ARCH__
+    using std::exp;
+    #endif
+    return exp(-(vecsum) / (stddev * stddev)); 
+}
+
+int main(int argc, char* argv[]){
+    ippl::initialize(argc, argv);
+    {
+        using scalar = float;
+        const unsigned dim = 3;
+        using vector_type = ippl::Vector<scalar, 3>;
+        using vector4_type = ippl::Vector<scalar, 4>;
+        using FourField = ippl::Field<vector4_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        using ThreeField = ippl::Field<vector_type, dim, ippl::UniformCartesian<scalar, dim>, typename ippl::UniformCartesian<scalar, dim>::DefaultCentering>;
+        constexpr size_t n = 100;
+        ippl::Vector<uint32_t, 3> nr{n / 2, n / 2, 2 * n};
+        ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]);
+        ippl::Vector<scalar, 3> extents{scalar(1), scalar(1), scalar(1)};
+        std::array<bool, 3> isParallel;
+        isParallel.fill(false);
+        isParallel[2] = true;
+
+        // all parallel layout, standard domain, normal axis order
+        ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel);
+
+        ippl::Vector<scalar, 3> hx;
+        for(unsigned d = 0;d < 3;d++){
+            hx[d] = extents[d] / (scalar)nr[d];
+        }
+        ippl::Vector<scalar, 3> origin = {0,0,0};
+        ippl::UniformCartesian<scalar, 3> mesh(owned, hx, origin);
+        FourField source(mesh, layout);
+        ThreeField E(mesh, layout);
+        ThreeField B(mesh, layout);
+        source = vector4_type(0);
+        ippl::StandardFDTDSolver<ThreeField, FourField, ippl::absorbing> sfdsolver(source, E, B);
+        sfdsolver.setPeriodicBoundaryConditions();
+        auto aview = sfdsolver.A_n.getView();
+        auto am1view = sfdsolver.A_nm1.getView();
+        auto lDom = layout.getLocalNDIndex();
+        size_t nghost = sfdsolver.A_n.getNghost();
+        Kokkos::parallel_for(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){
+            const size_t ig = i + lDom.first()[0] - nghost;
+            const size_t jg = j + lDom.first()[1] - nghost;
+            const size_t kg = k + lDom.first()[2] - nghost;
+            scalar x = scalar(ig) / nr[0];
+            scalar y = scalar(jg) / nr[1];
+            scalar z = scalar(kg) / nr[2];
+            //std::cout << x << ", " << y << ", " << z << "\n";
+            (void)x;
+            (void)y;
+            (void)z;
+            const scalar magnitude = gauss(scalar(0.5), scalar(0.05), z);
+            aview  (i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)};
+            am1view(i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)};
+        });
+        Kokkos::fence();
+        sfdsolver.A_n.getFieldBC().apply(sfdsolver.A_n);
+        //Kokkos::fence();
+        //std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n";
+        //std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n";
+        //goto exit;
+        sfdsolver.A_np1.getFieldBC().apply(sfdsolver.A_np1);
+        sfdsolver.A_nm1.getFieldBC().apply(sfdsolver.A_nm1);
+        sfdsolver.A_n.fillHalo();
+        sfdsolver.A_nm1.fillHalo();
+        int img_width  = 500;
+        int img_height = 500;
+        float* imagedata = new float[img_width * img_height * 3];
+        std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f);
+        uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3];
+        for(size_t s = 0;s < 4 * n;s++){
+            auto ebh = sfdsolver.A_n.getHostMirror();
+            Kokkos::deep_copy(ebh, sfdsolver.A_n.getView());
+            for(int i = 1;i < img_width;i++){
+                for(int j = 1;j < img_height;j++){
+                    int i_remap = (double(i) / (img_width  - 1)) * (nr[2] - 4) + 2;
+                    int j_remap = (double(j) / (img_height - 1)) * (nr[0] - 4) + 2;
+                    if(i_remap >= lDom.first()[2] && i_remap <= lDom.last()[2]){
+                        if(j_remap >= lDom.first()[0] && j_remap <= lDom.last()[0]){
+                            ippl::Vector<scalar, 4> acc = ebh(j_remap + 1 - lDom.first()[0], nr[1] / 2, i_remap + 1 - lDom.first()[2]);
+                            
+                            
+                            float normalized_colorscale_value = acc.norm();
+                            //int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f));
+                            imagedata[(j * img_width + i) * 3 + 0] = normalized_colorscale_value * 255.0f;
+                            imagedata[(j * img_width + i) * 3 + 1] = normalized_colorscale_value * 255.0f;
+                            imagedata[(j * img_width + i) * 3 + 2] = normalized_colorscale_value * 255.0f;
+                        }
+                    }
+                }
+            }
+            std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));});
+            char output[1024] = {0};        
+            snprintf(output, 1023, "%soutimage%.05lu.bmp", "renderdata/", s);
+            if(s % 4 == 0)
+                stbi_write_bmp(output, img_width, img_height, 3, imagedata_final);
+            sfdsolver.solve();
+        }
+        double sum_error = 0;
+        Kokkos::parallel_reduce(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k, double& ref){
+            const size_t ig = i + lDom.first()[0] - nghost;
+            const size_t jg = j + lDom.first()[1] - nghost;
+            const size_t kg = k + lDom.first()[2] - nghost;
+            scalar x = scalar(ig) / nr[0];
+            scalar y = scalar(jg) / nr[1];
+            scalar z = scalar(kg) / nr[2];
+            (void)x;
+            (void)y;
+            (void)z;
+            ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.05), z) - aview(i,j,k)[2]);
+        }, sum_error);
+        std::cout << "Sum: " << sum_error / double(n * n * n) << "\n";
+
+        
+    }
+    //exit:
+    ippl::finalize();
+}
\ No newline at end of file