diff --git a/CMakeLists.txt b/CMakeLists.txt index db615c334..785ebcf54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ macro(remove_flag_from_target _target _flag) endmacro() + if (NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING @@ -34,7 +35,6 @@ else () message (STATUS "${Green}Build type is: ${CMAKE_BUILD_TYPE}${ColourReset}") endif() - if (${CMAKE_BUILD_TYPE} STREQUAL "Release") add_definitions (-DNOPAssert) endif () @@ -42,7 +42,23 @@ endif () add_compile_options (-Wall) add_compile_options (-Wunused) add_compile_options (-Wextra) -#¼add_compile_options (-Werror) + + +# allow deprecated functions +add_compile_options (-Wno-deprecated-declarations) +file( + DOWNLOAD + https://github.com/nlohmann/json/releases/download/v3.11.3/json.hpp + "${CMAKE_BINARY_DIR}/downloaded_headers/json.hpp" +) +file( + DOWNLOAD + https://raw.githubusercontent.com/manuel5975p/stb/master/stb_image_write.h + "${CMAKE_BINARY_DIR}/downloaded_headers/stb_image_write.hpp" +) +include_directories("${CMAKE_BINARY_DIR}/downloaded_headers/") + +#add_compile_options (-Werror) # allow deprecated functions add_compile_options (-Wno-deprecated-declarations) @@ -118,7 +134,7 @@ set (CMAKE_CXX_STANDARD 20) set (CMAKE_CUDA_EXTENSIONS OFF) set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_CXX_STANDARD_REQUIRED ON) -set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINGO} -O3 -g ") +set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 -g ") set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") #set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") @@ -226,6 +242,7 @@ if (ENABLE_ALPINE) endif () message (STATUS "Enable Alpine") add_subdirectory (alpine) + add_subdirectory(fel) endif () diff --git a/config.json b/config.json new file mode 100644 index 000000000..f5cd3b99f --- /dev/null +++ b/config.json @@ -0,0 +1,85 @@ +{ + "timestep-ratio" : 1.0, + "mesh": { + "length-scale" : "micrometers", + "time-scale" : "picoseconds", + "extents": [3400.0, 3400.0, 280.0], + "resolution": [64, 64, 2800], + "mesh-center": [0.0, 0.0, 0.0], + "total-time": 30000.0, + "bunch-time-step": 1.6, + "mesh-truncation-order": 2, + "space-charge": false, + "solver": "NSFD" + }, + "bunch": { + "type": "ellipsoid", + "distribution": "uniform", + "_charge": 1, + "_mass": 1, + "charge": 1.846e8, + "mass": 1.846e8, + "number-of-particles": 1000000, + "gamma": 100.41, + "_gamma": 1.0, + "direction (ignored)": [0.0, 0.0, 1.0], + "position is 0 because that's the center": "", + "position": [0.0, 0.0, 0.0], + "_sigma-position (those are commented out)": [260.0, 260.0, 50.25], + "_sigma-momentum (those are commented out)": [1.0e-8, 1.0e-8, 100.41e-4], + "sigma-position": [260.0, 260.0, 50.25], + "sigma-momentum": [1.0e-8, 1.0e-8, 100.41e-4], + "particle-gammabetas": [0, 0, 0], + "transverse-truncation (Deprecated! see distribution-truncations)": 1040.0, + "longitudinal-truncation (Deprecated! see distribution-truncations)": 90.0, + "distribution-truncations" : [1040.0, 1040.0, 90.0], + "bunching-factor": 0.01, + "update-rule" : {"type" : "lorentz"} + }, + "field": { + "update-rule" : "do", + "strength" : 100, + "field-sampling": { + "sample": true, + "type": "at-point", + "field": ["Ex", "Ey", "Ez"], + "directory": "./", + "base-name": "field-sampling/field", + "rhythm": 3.2, + "position": [0.0, 0.0, 110.0] + } + }, + "undulator": { + "static-undulator": { + "undulator-parameter": 1.417, + "_undulator-parameter": 0, + "period": 30000.0, + "length": 5e6, + "polarization-angle": 0.0 + } + }, + "output":{ + "rhythm" : 30, + "count" : 20, + "path": "../renderdata", + "type" : "E", + "track" : { + "radiation" : "boundary_radiation.txt", + "particle-position" : "p0pos.txt" + } + }, + "fel-output": { + "radiation-power": { + "sample": true, + "type": "at-point", + "directory": "./", + "base-name": "power-sampling/power-NSFD", + "plane-position": 110.0, + "normalized-frequency": 1.0 + } + }, + "experimentation" : { + "stretch-factor" : 1.0, + "resort" : 1.0 + } + } diff --git a/fel/CMakeLists.txt b/fel/CMakeLists.txt new file mode 100644 index 000000000..e986ce8b9 --- /dev/null +++ b/fel/CMakeLists.txt @@ -0,0 +1,16 @@ +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (FreeElectronLaser FreeElectronLaser.cpp) +target_link_libraries (FreeElectronLaser ${IPPL_LIBS}) + +add_executable (TestFDTDSolverWithParticles TestFDTDSolverWithParticles.cpp) +target_link_libraries (TestFDTDSolverWithParticles ${IPPL_LIBS}) \ No newline at end of file diff --git a/fel/FreeElectronLaser.cpp b/fel/FreeElectronLaser.cpp new file mode 100644 index 000000000..ce05c8c3e --- /dev/null +++ b/fel/FreeElectronLaser.cpp @@ -0,0 +1,1676 @@ +#include "Ippl.h" + +#include +#include // For popen + +#include "Types/Vector.h" + +#include "Field/Field.h" + +#include "NSFDSolverWithParticles.h" +#define JSON_HAS_RANGES 0 // Merlin compilation +#include +#include +#include +#include +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include + +#include "Undulator.h" +#include "units.h" + +constexpr float turbo_cm[256][3] = { + {0.18995, 0.07176, 0.23217}, {0.19483, 0.08339, 0.26149}, {0.19956, 0.09498, 0.29024}, + {0.20415, 0.10652, 0.31844}, {0.20860, 0.11802, 0.34607}, {0.21291, 0.12947, 0.37314}, + {0.21708, 0.14087, 0.39964}, {0.22111, 0.15223, 0.42558}, {0.22500, 0.16354, 0.45096}, + {0.22875, 0.17481, 0.47578}, {0.23236, 0.18603, 0.50004}, {0.23582, 0.19720, 0.52373}, + {0.23915, 0.20833, 0.54686}, {0.24234, 0.21941, 0.56942}, {0.24539, 0.23044, 0.59142}, + {0.24830, 0.24143, 0.61286}, {0.25107, 0.25237, 0.63374}, {0.25369, 0.26327, 0.65406}, + {0.25618, 0.27412, 0.67381}, {0.25853, 0.28492, 0.69300}, {0.26074, 0.29568, 0.71162}, + {0.26280, 0.30639, 0.72968}, {0.26473, 0.31706, 0.74718}, {0.26652, 0.32768, 0.76412}, + {0.26816, 0.33825, 0.78050}, {0.26967, 0.34878, 0.79631}, {0.27103, 0.35926, 0.81156}, + {0.27226, 0.36970, 0.82624}, {0.27334, 0.38008, 0.84037}, {0.27429, 0.39043, 0.85393}, + {0.27509, 0.40072, 0.86692}, {0.27576, 0.41097, 0.87936}, {0.27628, 0.42118, 0.89123}, + {0.27667, 0.43134, 0.90254}, {0.27691, 0.44145, 0.91328}, {0.27701, 0.45152, 0.92347}, + {0.27698, 0.46153, 0.93309}, {0.27680, 0.47151, 0.94214}, {0.27648, 0.48144, 0.95064}, + {0.27603, 0.49132, 0.95857}, {0.27543, 0.50115, 0.96594}, {0.27469, 0.51094, 0.97275}, + {0.27381, 0.52069, 0.97899}, {0.27273, 0.53040, 0.98461}, {0.27106, 0.54015, 0.98930}, + {0.26878, 0.54995, 0.99303}, {0.26592, 0.55979, 0.99583}, {0.26252, 0.56967, 0.99773}, + {0.25862, 0.57958, 0.99876}, {0.25425, 0.58950, 0.99896}, {0.24946, 0.59943, 0.99835}, + {0.24427, 0.60937, 0.99697}, {0.23874, 0.61931, 0.99485}, {0.23288, 0.62923, 0.99202}, + {0.22676, 0.63913, 0.98851}, {0.22039, 0.64901, 0.98436}, {0.21382, 0.65886, 0.97959}, + {0.20708, 0.66866, 0.97423}, {0.20021, 0.67842, 0.96833}, {0.19326, 0.68812, 0.96190}, + {0.18625, 0.69775, 0.95498}, {0.17923, 0.70732, 0.94761}, {0.17223, 0.71680, 0.93981}, + {0.16529, 0.72620, 0.93161}, {0.15844, 0.73551, 0.92305}, {0.15173, 0.74472, 0.91416}, + {0.14519, 0.75381, 0.90496}, {0.13886, 0.76279, 0.89550}, {0.13278, 0.77165, 0.88580}, + {0.12698, 0.78037, 0.87590}, {0.12151, 0.78896, 0.86581}, {0.11639, 0.79740, 0.85559}, + {0.11167, 0.80569, 0.84525}, {0.10738, 0.81381, 0.83484}, {0.10357, 0.82177, 0.82437}, + {0.10026, 0.82955, 0.81389}, {0.09750, 0.83714, 0.80342}, {0.09532, 0.84455, 0.79299}, + {0.09377, 0.85175, 0.78264}, {0.09287, 0.85875, 0.77240}, {0.09267, 0.86554, 0.76230}, + {0.09320, 0.87211, 0.75237}, {0.09451, 0.87844, 0.74265}, {0.09662, 0.88454, 0.73316}, + {0.09958, 0.89040, 0.72393}, {0.10342, 0.89600, 0.71500}, {0.10815, 0.90142, 0.70599}, + {0.11374, 0.90673, 0.69651}, {0.12014, 0.91193, 0.68660}, {0.12733, 0.91701, 0.67627}, + {0.13526, 0.92197, 0.66556}, {0.14391, 0.92680, 0.65448}, {0.15323, 0.93151, 0.64308}, + {0.16319, 0.93609, 0.63137}, {0.17377, 0.94053, 0.61938}, {0.18491, 0.94484, 0.60713}, + {0.19659, 0.94901, 0.59466}, {0.20877, 0.95304, 0.58199}, {0.22142, 0.95692, 0.56914}, + {0.23449, 0.96065, 0.55614}, {0.24797, 0.96423, 0.54303}, {0.26180, 0.96765, 0.52981}, + {0.27597, 0.97092, 0.51653}, {0.29042, 0.97403, 0.50321}, {0.30513, 0.97697, 0.48987}, + {0.32006, 0.97974, 0.47654}, {0.33517, 0.98234, 0.46325}, {0.35043, 0.98477, 0.45002}, + {0.36581, 0.98702, 0.43688}, {0.38127, 0.98909, 0.42386}, {0.39678, 0.99098, 0.41098}, + {0.41229, 0.99268, 0.39826}, {0.42778, 0.99419, 0.38575}, {0.44321, 0.99551, 0.37345}, + {0.45854, 0.99663, 0.36140}, {0.47375, 0.99755, 0.34963}, {0.48879, 0.99828, 0.33816}, + {0.50362, 0.99879, 0.32701}, {0.51822, 0.99910, 0.31622}, {0.53255, 0.99919, 0.30581}, + {0.54658, 0.99907, 0.29581}, {0.56026, 0.99873, 0.28623}, {0.57357, 0.99817, 0.27712}, + {0.58646, 0.99739, 0.26849}, {0.59891, 0.99638, 0.26038}, {0.61088, 0.99514, 0.25280}, + {0.62233, 0.99366, 0.24579}, {0.63323, 0.99195, 0.23937}, {0.64362, 0.98999, 0.23356}, + {0.65394, 0.98775, 0.22835}, {0.66428, 0.98524, 0.22370}, {0.67462, 0.98246, 0.21960}, + {0.68494, 0.97941, 0.21602}, {0.69525, 0.97610, 0.21294}, {0.70553, 0.97255, 0.21032}, + {0.71577, 0.96875, 0.20815}, {0.72596, 0.96470, 0.20640}, {0.73610, 0.96043, 0.20504}, + {0.74617, 0.95593, 0.20406}, {0.75617, 0.95121, 0.20343}, {0.76608, 0.94627, 0.20311}, + {0.77591, 0.94113, 0.20310}, {0.78563, 0.93579, 0.20336}, {0.79524, 0.93025, 0.20386}, + {0.80473, 0.92452, 0.20459}, {0.81410, 0.91861, 0.20552}, {0.82333, 0.91253, 0.20663}, + {0.83241, 0.90627, 0.20788}, {0.84133, 0.89986, 0.20926}, {0.85010, 0.89328, 0.21074}, + {0.85868, 0.88655, 0.21230}, {0.86709, 0.87968, 0.21391}, {0.87530, 0.87267, 0.21555}, + {0.88331, 0.86553, 0.21719}, {0.89112, 0.85826, 0.21880}, {0.89870, 0.85087, 0.22038}, + {0.90605, 0.84337, 0.22188}, {0.91317, 0.83576, 0.22328}, {0.92004, 0.82806, 0.22456}, + {0.92666, 0.82025, 0.22570}, {0.93301, 0.81236, 0.22667}, {0.93909, 0.80439, 0.22744}, + {0.94489, 0.79634, 0.22800}, {0.95039, 0.78823, 0.22831}, {0.95560, 0.78005, 0.22836}, + {0.96049, 0.77181, 0.22811}, {0.96507, 0.76352, 0.22754}, {0.96931, 0.75519, 0.22663}, + {0.97323, 0.74682, 0.22536}, {0.97679, 0.73842, 0.22369}, {0.98000, 0.73000, 0.22161}, + {0.98289, 0.72140, 0.21918}, {0.98549, 0.71250, 0.21650}, {0.98781, 0.70330, 0.21358}, + {0.98986, 0.69382, 0.21043}, {0.99163, 0.68408, 0.20706}, {0.99314, 0.67408, 0.20348}, + {0.99438, 0.66386, 0.19971}, {0.99535, 0.65341, 0.19577}, {0.99607, 0.64277, 0.19165}, + {0.99654, 0.63193, 0.18738}, {0.99675, 0.62093, 0.18297}, {0.99672, 0.60977, 0.17842}, + {0.99644, 0.59846, 0.17376}, {0.99593, 0.58703, 0.16899}, {0.99517, 0.57549, 0.16412}, + {0.99419, 0.56386, 0.15918}, {0.99297, 0.55214, 0.15417}, {0.99153, 0.54036, 0.14910}, + {0.98987, 0.52854, 0.14398}, {0.98799, 0.51667, 0.13883}, {0.98590, 0.50479, 0.13367}, + {0.98360, 0.49291, 0.12849}, {0.98108, 0.48104, 0.12332}, {0.97837, 0.46920, 0.11817}, + {0.97545, 0.45740, 0.11305}, {0.97234, 0.44565, 0.10797}, {0.96904, 0.43399, 0.10294}, + {0.96555, 0.42241, 0.09798}, {0.96187, 0.41093, 0.09310}, {0.95801, 0.39958, 0.08831}, + {0.95398, 0.38836, 0.08362}, {0.94977, 0.37729, 0.07905}, {0.94538, 0.36638, 0.07461}, + {0.94084, 0.35566, 0.07031}, {0.93612, 0.34513, 0.06616}, {0.93125, 0.33482, 0.06218}, + {0.92623, 0.32473, 0.05837}, {0.92105, 0.31489, 0.05475}, {0.91572, 0.30530, 0.05134}, + {0.91024, 0.29599, 0.04814}, {0.90463, 0.28696, 0.04516}, {0.89888, 0.27824, 0.04243}, + {0.89298, 0.26981, 0.03993}, {0.88691, 0.26152, 0.03753}, {0.88066, 0.25334, 0.03521}, + {0.87422, 0.24526, 0.03297}, {0.86760, 0.23730, 0.03082}, {0.86079, 0.22945, 0.02875}, + {0.85380, 0.22170, 0.02677}, {0.84662, 0.21407, 0.02487}, {0.83926, 0.20654, 0.02305}, + {0.83172, 0.19912, 0.02131}, {0.82399, 0.19182, 0.01966}, {0.81608, 0.18462, 0.01809}, + {0.80799, 0.17753, 0.01660}, {0.79971, 0.17055, 0.01520}, {0.79125, 0.16368, 0.01387}, + {0.78260, 0.15693, 0.01264}, {0.77377, 0.15028, 0.01148}, {0.76476, 0.14374, 0.01041}, + {0.75556, 0.13731, 0.00942}, {0.74617, 0.13098, 0.00851}, {0.73661, 0.12477, 0.00769}, + {0.72686, 0.11867, 0.00695}, {0.71692, 0.11268, 0.00629}, {0.70680, 0.10680, 0.00571}, + {0.69650, 0.10102, 0.00522}, {0.68602, 0.09536, 0.00481}, {0.67535, 0.08980, 0.00449}, + {0.66449, 0.08436, 0.00424}, {0.65345, 0.07902, 0.00408}, {0.64223, 0.07380, 0.00401}, + {0.63082, 0.06868, 0.00401}, {0.61923, 0.06367, 0.00410}, {0.60746, 0.05878, 0.00427}, + {0.59550, 0.05399, 0.00453}, {0.58336, 0.04931, 0.00486}, {0.57103, 0.04474, 0.00529}, + {0.55852, 0.04028, 0.00579}, {0.54583, 0.03593, 0.00638}, {0.53295, 0.03169, 0.00705}, + {0.51989, 0.02756, 0.00780}, {0.50664, 0.02354, 0.00863}, {0.49321, 0.01963, 0.00955}, + {0.47960, 0.01583, 0.01055}}; + +uint64_t nanoTime() { + using namespace std; + using namespace chrono; + return duration_cast(high_resolution_clock::now().time_since_epoch()).count(); +} +// CONFIG +KOKKOS_INLINE_FUNCTION bool isNaN(float x) { +#ifdef __CUDA_ARCH__ + return isnan(x); +#else + return std::isnan(x); +#endif +} +KOKKOS_INLINE_FUNCTION bool isINF(float x) { +#ifdef __CUDA_ARCH__ + return isinf(x); +#else + return std::isinf(x); +#endif +} +KOKKOS_INLINE_FUNCTION bool isNaN(double x) { +#ifdef __CUDA_ARCH__ + return isnan(x); +#else + return std::isnan(x); +#endif +} +KOKKOS_INLINE_FUNCTION bool isINF(double x) { +#ifdef __CUDA_ARCH__ + return isinf(x); +#else + return std::isinf(x); +#endif +} +template +KOKKOS_INLINE_FUNCTION void serial_for(callable c, ippl::Vector from, + ippl::Vector to, Ts... args) { + if constexpr (sizeof...(Ts) == Dim) { + c(args...); + } else { + for (uint32_t i = from[sizeof...(Ts)]; i < to[sizeof...(Ts)]; i++) { + serial_for(c, from, to, args..., i); + } + } +} + +struct config { + using scalar = double; + + // GRID PARAMETERS + ippl::Vector resolution; // Grid resolution in 3D + ippl::Vector extents; // Physical extents of the grid in each dimension + scalar total_time; // Total simulation time + scalar timestep_ratio; // Ratio of timestep to some reference value + + scalar length_scale_in_jobfile; // Length scale defined in the jobfile + scalar temporal_scale_in_jobfile; // Temporal scale defined in the jobfile + + // PARTICLE PARAMETERS + scalar charge; // Particle charge in unit_charge + scalar mass; // Particle mass in unit_mass + uint64_t num_particles; // Number of particles in the simulation + bool space_charge; // Flag for considering space charge effects + + // BUNCH PARAMETERS + ippl::Vector mean_position; // Mean initial position of the particle bunch + ippl::Vector + sigma_position; // Standard deviation of the initial position distribution + ippl::Vector position_truncations; // Truncations of the position distribution + ippl::Vector + sigma_momentum; // Standard deviation of the initial momentum distribution + scalar bunch_gamma; // Relativistic gamma factor of the bunch + + // UNDULATOR PARAMETERS + scalar undulator_K; // Undulator parameter K + scalar undulator_period; // Period of the undulator + scalar undulator_length; // Length of the undulator + + uint32_t output_rhythm; // Frequency of output in timesteps + std::string output_path; // Path to output files + std::unordered_map experiment_options; // Additional experimental options +}; + +template +ippl::Vector getVector(const nlohmann::json& j) { + if (j.is_array()) { + assert(j.size() == Dim); + ippl::Vector ret; + for (unsigned i = 0; i < Dim; i++) + ret[i] = (scalar)j[i]; + return ret; + } else { + std::cerr << "Warning: Obtaining Vector from scalar json\n"; + ippl::Vector ret; + ret.fill((scalar)j); + return ret; + } +} +template +struct DefaultedStringLiteral { + constexpr DefaultedStringLiteral(const char (&str)[N], const T val) + : value(val) { + std::copy_n(str, N, key); + } + + T value; + char key[N]; +}; +template +struct StringLiteral { + constexpr StringLiteral(const char (&str)[N]) { std::copy_n(str, N, value); } + + char value[N]; + constexpr DefaultedStringLiteral operator>>(int t) const noexcept { + return DefaultedStringLiteral(value, t); + } + constexpr size_t size() const noexcept { return N - 1; } +}; +template +constexpr size_t chash() { + size_t hash = 5381; + int c; + + for (size_t i = 0; i < lit.size(); i++) { + c = lit.value[i]; + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +size_t chash(const char* val) { + size_t hash = 5381; + int c; + + while ((c = *val++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +size_t chash(const std::string& _val) { + size_t hash = 5381; + const char* val = _val.c_str(); + int c; + + while ((c = *val++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} +std::string lowercase_singular(std::string str) { + // Convert string to lowercase + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + + // Check if the string ends with "s" and remove it if it does + if (!str.empty() && str.back() == 's') { + str.pop_back(); + } + + return str; +} +double get_time_multiplier(const nlohmann::json& j) { + std::string length_scale_string = lowercase_singular((std::string)j["mesh"]["time-scale"]); + double time_factor = 1.0; + switch (chash(length_scale_string)) { + case chash<"planck-time">(): + case chash<"plancktime">(): + case chash<"pt">(): + case chash<"natural">(): + time_factor = unit_time_in_seconds; + break; + case chash<"picosecond">(): + time_factor = 1e-12; + break; + case chash<"nanosecond">(): + time_factor = 1e-9; + break; + case chash<"microsecond">(): + time_factor = 1e-6; + break; + case chash<"millisecond">(): + time_factor = 1e-3; + break; + case chash<"second">(): + time_factor = 1.0; + break; + default: + std::cerr << "Unrecognized time scale: " << (std::string)j["mesh"]["time-scale"] + << "\n"; + break; + } + return time_factor; +} +double get_length_multiplier(const nlohmann::json& options) { + std::string length_scale_string = + lowercase_singular((std::string)options["mesh"]["length-scale"]); + double length_factor = 1.0; + switch (chash(length_scale_string)) { + case chash<"planck-length">(): + case chash<"plancklength">(): + case chash<"pl">(): + case chash<"natural">(): + length_factor = unit_length_in_meters; + break; + case chash<"picometer">(): + length_factor = 1e-12; + break; + case chash<"nanometer">(): + length_factor = 1e-9; + break; + case chash<"micrometer">(): + length_factor = 1e-6; + break; + case chash<"millimeter">(): + length_factor = 1e-3; + break; + case chash<"meter">(): + length_factor = 1.0; + break; + default: + std::cerr << "Unrecognized length scale: " + << (std::string)options["mesh"]["length-scale"] << "\n"; + break; + } + return length_factor; +} +config read_config(const char* filepath) { + std::ifstream cfile(filepath); + nlohmann::json j; + cfile >> j; + config::scalar lmult = get_length_multiplier(j); + config::scalar tmult = get_time_multiplier(j); + config ret; + + ret.extents[0] = ((config::scalar)j["mesh"]["extents"][0] * lmult) / unit_length_in_meters; + ret.extents[1] = ((config::scalar)j["mesh"]["extents"][1] * lmult) / unit_length_in_meters; + ret.extents[2] = ((config::scalar)j["mesh"]["extents"][2] * lmult) / unit_length_in_meters; + ret.resolution = getVector(j["mesh"]["resolution"]); + + // std::cerr << (std::string)j["mesh"]["time-scale"] << " " << tmult << " Tumult\n"; + // std::cerr << "Tmult: " << tmult << "\n"; + if (j.contains("timestep-ratio")) { + ret.timestep_ratio = (config::scalar)j["timestep-ratio"]; + } + + else { + ret.timestep_ratio = 1; + } + ret.total_time = ((config::scalar)j["mesh"]["total-time"] * tmult) / unit_time_in_seconds; + ret.space_charge = (bool)(j["mesh"]["space-charge"]); + ret.bunch_gamma = (config::scalar)(j["bunch"]["gamma"]); + if (ret.bunch_gamma < config::scalar(1)) { + std::cerr << "Gamma must be >= 1\n"; + exit(1); + } + assert(j.contains("undulator")); + assert(j["undulator"].contains("static-undulator")); + + ret.undulator_K = j["undulator"]["static-undulator"]["undulator-parameter"]; + ret.undulator_period = ((config::scalar)j["undulator"]["static-undulator"]["period"] * lmult) + / unit_length_in_meters; + ret.undulator_length = ((config::scalar)j["undulator"]["static-undulator"]["length"] * lmult) + / unit_length_in_meters; + assert(!std::isnan(ret.undulator_length)); + assert(!std::isnan(ret.undulator_period)); + assert(!std::isnan(ret.extents[0])); + assert(!std::isnan(ret.extents[1])); + assert(!std::isnan(ret.extents[2])); + assert(!std::isnan(ret.total_time)); + ret.length_scale_in_jobfile = get_length_multiplier(j); + ret.temporal_scale_in_jobfile = get_time_multiplier(j); + ret.charge = (config::scalar)j["bunch"]["charge"] * electron_charge_in_unit_charges; + ret.mass = (config::scalar)j["bunch"]["mass"] * electron_mass_in_unit_masses; + ret.num_particles = (uint64_t)j["bunch"]["number-of-particles"]; + ret.mean_position = + getVector(j["bunch"]["position"]) * lmult / unit_length_in_meters; + ret.sigma_position = + getVector(j["bunch"]["sigma-position"]) * lmult / unit_length_in_meters; + ret.position_truncations = getVector(j["bunch"]["distribution-truncations"]) + * lmult / unit_length_in_meters; + ret.sigma_momentum = getVector(j["bunch"]["sigma-momentum"]); + ret.output_rhythm = j["output"].contains("rhythm") ? uint32_t(j["output"]["rhythm"]) : 0; + ret.output_path = "../data/"; + if (j["output"].contains("path")) { + ret.output_path = j["output"]["path"]; + if (!ret.output_path.ends_with('/')) { + ret.output_path.push_back('/'); + } + } + if (j.contains("experimentation")) { + nlohmann::json je = j["experimentation"]; + for (auto it = je.begin(); it != je.end(); it++) { + ret.experiment_options[it.key()] = double(it.value()); + } + } + return ret; +} +template +using FieldVector = ippl::Vector; +template +struct BunchInitialize { + /* Type of the bunch which is one of the manual, ellipsoid, cylinder, cube, and 3D-crystal. If + * it is manual the charge at points of the position vector will be produced. + */ + // std::string bunchType_; + + /* Type of the distributions (transverse or longitudinal) in the bunch. + */ + std::string distribution_; + + /* Type of the generator for creating the bunch distribution. + */ + std::string generator_; + + /* Total number of macroparticles in the bunch. */ + unsigned int numberOfParticles_; + + /* Total charge of the bunch in pC. */ + scalar cloudCharge_; + + /* Initial energy of the bunch in MeV. */ + scalar initialGamma_; + + /* Initial normalized speed of the bunch. */ + scalar initialBeta_; + + /* Initial movement direction of the bunch, which is a unit vector. */ + FieldVector initialDirection_; + + /* Position of the center of the bunch in the unit of length scale. */ + // std::vector > position_; + FieldVector position_; + + /* Number of macroparticles in each direction for 3Dcrystal type. */ + FieldVector numbers_; + + /* Lattice constant in x, y, and z directions for 3D crystal type. */ + FieldVector latticeConstants_; + + /* Spread in position for each of the directions in the unit of length scale. For the 3D crystal + * type, it will be the spread in position for each micro-bunch of the crystal. + */ + FieldVector sigmaPosition_; + + /* Spread in energy in each direction. */ + FieldVector sigmaGammaBeta_; + + /* Store the truncation transverse distance for the electron generation. + */ + scalar tranTrun_; + + /* Store the truncation longitudinal distance for the electron generation. + */ + scalar longTrun_; + + /* Name of the file for reading the electrons distribution from. + */ + std::string fileName_; + + /* The radiation wavelength corresponding to the bunch length outside the undulator + */ + scalar lambda_; + + /* Bunching factor for the initialization of the bunch. + */ + scalar bF_; + + /* Phase of the bunching factor for the initialization of the bunch. + */ + scalar bFP_; + + /* Boolean flag determining the activation of shot-noise. + */ + bool shotNoise_; + + /* Initial beta vector of the bunch, which is obtained as the product of beta and direction. + */ + FieldVector betaVector_; + + /* Initialize the parameters for the bunch initialization to some first values. */ + // BunchInitialize (); +}; + +// LORENTZ FRAME AND UNDULATOR + +template +BunchInitialize generate_mithra_config( + const config& cfg, const ippl::UniaxialLorentzframe& /*frame_boost unused*/) { + using vec3 = ippl::Vector; + scalar frame_gamma = cfg.bunch_gamma / std::sqrt(1 + 0.5 * cfg.undulator_K * cfg.undulator_K); + BunchInitialize init; + init.generator_ = "random"; + init.distribution_ = "uniform"; + init.initialDirection_ = vec3{0, 0, 1}; + init.initialGamma_ = cfg.bunch_gamma; + init.initialBeta_ = cfg.bunch_gamma == scalar(1) + ? 0 + : (sqrt(cfg.bunch_gamma * cfg.bunch_gamma - 1) / cfg.bunch_gamma); + init.sigmaGammaBeta_ = cfg.sigma_momentum.template cast(); + init.sigmaPosition_ = cfg.sigma_position.template cast(); + + // TODO: Initial bunching factor huh + init.bF_ = 0.01; + init.bFP_ = 0; + init.shotNoise_ = false; + init.cloudCharge_ = cfg.charge; + init.lambda_ = cfg.undulator_period / (2 * frame_gamma * frame_gamma); + init.longTrun_ = cfg.position_truncations[2]; + init.tranTrun_ = cfg.position_truncations[0]; + init.position_ = cfg.mean_position.cast(); + init.betaVector_ = ippl::Vector{0, 0, init.initialBeta_}; + init.numberOfParticles_ = cfg.num_particles; + + init.numbers_ = 0; // UNUSED + init.latticeConstants_ = vec3{0, 0, 0}; // UNUSED + + return init; +} +template +struct Charge { + Double q; /* Charge of the point in the unit of electron charge. */ + FieldVector rnp, rnm; /* Position vector of the charge. */ + FieldVector gb; /* Normalized velocity vector of the charge. */ + + /* Double flag determining if the particle is passing the entrance point of the undulator. This + * flag can be used for better boosting the bunch to the moving frame. We need to consider it to + * be double, because this flag needs to be communicated during bunch update. + */ + Double e; + + // Charge(); +}; +template +using ChargeVector = std::list>; +template +void initializeBunchEllipsoid(BunchInitialize bunchInit, ChargeVector& chargeVector, + int rank, int size, int ia) { + /* Correct the number of particles if it is not a multiple of four. + */ + if (bunchInit.numberOfParticles_ % 4 != 0) { + unsigned int n = bunchInit.numberOfParticles_ % 4; + bunchInit.numberOfParticles_ += 4 - n; + // printmessage(std::string(__FILE__), __LINE__, std::string("Warning: The number of + // particles in the bunch is not a multiple of four. ") + + // std::string("It is corrected to ") + std::to_string(bunchInit.numberOfParticles_) ); + } + + /* Save the initially given number of particles. */ + unsigned int Np = bunchInit.numberOfParticles_, i, Np0 = chargeVector.size(); + + /* Declare the required parameters for the initialization of charge vectors. */ + Charge charge; + charge.q = bunchInit.cloudCharge_ / Np; + FieldVector gb = bunchInit.initialGamma_ * bunchInit.betaVector_; + FieldVector r(0.0); + FieldVector t(0.0); + Double t0; //, g; + Double zmin = 1e100; + Double Ne, bF, bFi; + unsigned int bmi; + std::vector randomNumbers; + + /* The initialization in group of four particles should only be done if there exists an + * undulator in the interaction. + */ + unsigned int ng = (bunchInit.lambda_ == 0.0) ? 1 : 4; + + /* Check the bunching factor. */ + if (bunchInit.bF_ > 2.0 || bunchInit.bF_ < 0.0) { + // printmessage(std::string(__FILE__), __LINE__, std::string("The bunching factor can not be + // larger than one or a negative value !!!") ); exit(1); + } + + /* If the generator is random we should make sure that different processors do not produce the + * same random numbers. + */ + if (bunchInit.generator_ == "random") { + /* Initialize the random number generator. + */ + srand(time(NULL)); + /* Np / ng * 20 is the maximum number of particles. + */ + randomNumbers.resize(Np / ng * 20, 0.0); + for (unsigned int ri = 0; ri < Np / ng * 20; ri++) + randomNumbers[ri] = + (float)std::min(1 - 1e-7, std::max(1e-7, ((double)rand()) / RAND_MAX)); + } + + /* Declare the generator function depending on the input. + */ + auto generate = [&](unsigned int n, unsigned int m) { + // if ( bunchInit.generator_ == "random" ) + return (randomNumbers.at(n * 2 * Np / ng + m)); + // else + // return ( randomNumbers[ n * 2 * Np/ng + m ] ); + // TODO: Return halton properly + // return ( halton(n,m) ); + }; + + /* Declare the function for injecting the shot noise. + */ + auto insertCharge = [&](Charge q) { + for (unsigned int ii = 0; ii < ng; ii++) { + /* The random modulation is introduced depending on the shot-noise being activated. + */ + if (bunchInit.shotNoise_) { + /* Obtain the number of beamlet. + */ + bmi = int((charge.rnp[2] - zmin) / bunchInit.lambda_); + + /* Obtain the phase and amplitude of the modulation. + */ + bFi = bF * sqrt(-2.0 * log(generate(8, bmi))); + + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bFi + * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + + 2.0 * M_PI * generate(9, bmi)); + } else if (bunchInit.lambda_ != 0.0) { + q.rnp[2] = charge.rnp[2] - bunchInit.lambda_ / 4 * ii; + + q.rnp[2] -= bunchInit.lambda_ / M_PI * bunchInit.bF_ + * sin(2.0 * M_PI / bunchInit.lambda_ * q.rnp[2] + + bunchInit.bFP_ * M_PI / 180.0); + } + + /* Set this charge into the charge vector. */ + chargeVector.push_back(q); + } + }; + + /* If the shot noise is on, we need the minimum value of the bunch z coordinate to be able to + * calculate the FEL bucket number. */ + if (bunchInit.shotNoise_) { + for (i = 0; i < Np / ng; i++) { + if (bunchInit.distribution_ == "uniform") + zmin = std::min( + Double(2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2], zmin); + else if (bunchInit.distribution_ == "gaussian") + zmin = std::min( + (Double)(bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0))), + zmin); + else { + std::cout << std::string( + "The longitudinal type is not correctly given to the code !!!\n"); + exit(1); + } + } + + if (bunchInit.distribution_ == "uniform") + for (; i < unsigned(Np / ng + * (1.0 + + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI) + / (2.0 * bunchInit.sigmaPosition_[2]))); + i++) { + t0 = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0)); + t0 += (t0 < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]); + + zmin = std::min(t0, zmin); + } + + zmin = zmin + bunchInit.position_[2]; + + /* Obtain the average number of electrons per FEL beamlet. + */ + Ne = bunchInit.cloudCharge_ * bunchInit.lambda_ / (2.0 * bunchInit.sigmaPosition_[2]); + + /* Set the bunching factor level for the shot noise depending on the given values. + */ + bF = (bunchInit.bF_ == 0.0) ? 1.0 / sqrt(Ne) : bunchInit.bF_; + } + + /* Determine the properties of each charge point and add them to the charge vector. */ + for (i = rank; i < Np / ng; i += size) { + /* Determine the transverse coordinate. */ + r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0))) + * cos(2.0 * M_PI * generate(1, i + Np0)); + r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0))) + * sin(2.0 * M_PI * generate(1, i + Np0)); + + /* Determine the longitudinal coordinate. */ + if (bunchInit.distribution_ == "uniform") + r[2] = (2.0 * generate(2, i + Np0) - 1.0) * bunchInit.sigmaPosition_[2]; + else if (bunchInit.distribution_ == "gaussian") + r[2] = bunchInit.sigmaPosition_[2] * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0)); + else { + exit(1); + } + + /* Determine the transverse momentum. */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0))) + * cos(2.0 * M_PI * generate(5, i + Np0)); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0))) + * sin(2.0 * M_PI * generate(5, i + Np0)); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0))) + * cos(2.0 * M_PI * generate(7, i + Np0)); + + if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ + && fabs(r[2]) < bunchInit.longTrun_) { + /* Shift the generated charge to the center position and momentum space. + */ + charge.rnp = bunchInit.position_; + charge.rnp += r; + + charge.gb = gb; + charge.gb += t; + if (std::isinf(gb[2])) { + std::cerr << "[Warning] Gammabeta obtained an klonked here\n"; + } + + /* Insert this charge and the mirrored ones into the charge vector. + */ + insertCharge(charge); + } + } + + /* If the longitudinal type of the bunch is uniform a tapered part needs to be added to remove + * the CSE from the tail of the bunch. + */ + if (bunchInit.distribution_ == "uniform") { + for (; i < unsigned(uint32_t(Np / ng) + * (1.0 + + 2.0 * bunchInit.lambda_ * sqrt(2.0 * M_PI) + / (2.0 * bunchInit.sigmaPosition_[2]))); + i += size) { + r[0] = bunchInit.sigmaPosition_[0] * sqrt(-2.0 * log(generate(0, i + Np0))) + * cos(2.0 * M_PI * generate(1, i + Np0)); + r[1] = bunchInit.sigmaPosition_[1] * sqrt(-2.0 * log(generate(0, i + Np0))) + * sin(2.0 * M_PI * generate(1, i + Np0)); + + /* Determine the longitudinal coordinate. */ + r[2] = 2.0 * bunchInit.lambda_ * sqrt(-2.0 * log(generate(2, i + Np0))) + * sin(2.0 * M_PI * generate(3, i + Np0)); + r[2] += (r[2] < 0.0) ? (-bunchInit.sigmaPosition_[2]) : (bunchInit.sigmaPosition_[2]); + + /* Determine the transverse momentum. + */ + t[0] = bunchInit.sigmaGammaBeta_[0] * sqrt(-2.0 * log(generate(4, i + Np0))) + * cos(2.0 * M_PI * generate(5, i + Np0)); + t[1] = bunchInit.sigmaGammaBeta_[1] * sqrt(-2.0 * log(generate(4, i + Np0))) + * sin(2.0 * M_PI * generate(5, i + Np0)); + t[2] = bunchInit.sigmaGammaBeta_[2] * sqrt(-2.0 * log(generate(6, i + Np0))) + * cos(2.0 * M_PI * generate(7, i + Np0)); + if (fabs(r[0]) < bunchInit.tranTrun_ && fabs(r[1]) < bunchInit.tranTrun_ + && fabs(r[2]) < bunchInit.longTrun_) { + /* Shift the generated charge to the center position and momentum space. + */ + charge.rnp = bunchInit.position_[ia]; + charge.rnp += r; + + charge.gb = gb; + + charge.gb += t; + /* Insert this charge and the mirrored ones into the charge vector. + */ + insertCharge(charge); + } + } + } + + /* Reset the value for the number of particle variable according to the installed number of + * macro-particles and perform the corresponding changes. */ + bunchInit.numberOfParticles_ = chargeVector.size(); +} + +template +void boost_bunch(ChargeVector& chargeVectorn_, Double frame_gamma) { + Double frame_beta = std::sqrt((double)frame_gamma * frame_gamma - 1.0) / double(frame_gamma); + Double zmaxL = -1.0e100, zmaxG; + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + if (std::isinf(g)) { + std::cerr << __FILE__ << ": " << __LINE__ << " inf gb: " << iterQ->gb << ", g = " << g + << "\n"; + abort(); + } + Double bz = iterQ->gb[2] / g; + iterQ->rnp[2] *= frame_gamma; + + iterQ->gb[2] = frame_gamma * g * (bz - frame_beta); + + zmaxL = std::max(zmaxL, iterQ->rnp[2]); + } + zmaxG = zmaxL; + struct { + Double zu_; + Double beta_; + } bunch_; + bunch_.zu_ = zmaxG; + bunch_.beta_ = frame_beta; + + /****************************************************************************************************/ + + for (auto iterQ = chargeVectorn_.begin(); iterQ != chargeVectorn_.end(); iterQ++) { + Double g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + iterQ->rnp[0] += iterQ->gb[0] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta; + iterQ->rnp[1] += iterQ->gb[1] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta; + iterQ->rnp[2] += iterQ->gb[2] / g * (iterQ->rnp[2] - bunch_.zu_) * frame_beta; + if (std::isnan(iterQ->rnp[2])) { + std::cerr << iterQ->gb[2] << ", " << g << ", " << iterQ->rnp[2] << ", " << bunch_.zu_ + << ", " << frame_beta << "\n"; + std::cerr << __FILE__ << ": " << __LINE__ << " Particle has NaN velocity or position\n"; + abort(); + } + } +} + +template +size_t initialize_bunch_mithra(bunch_type& bunch, const BunchInitialize& bunchInit, + scalar frame_gamma) { + ChargeVector temporary_charge_list; + initializeBunchEllipsoid(bunchInit, temporary_charge_list, 0, 1, 0); + for (auto& c : temporary_charge_list) { + if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) + std::cout << "Pos before boost: " << c.rnp << "\n"; + if (std::isinf(c.rnp[0]) || std::isinf(c.rnp[1]) || std::isinf(c.rnp[2])) + std::cout << "Pos before boost: " << c.rnp << "\n"; + } + boost_bunch(temporary_charge_list, frame_gamma); + for (auto& c : temporary_charge_list) { + if (std::isnan(c.rnp[0]) || std::isnan(c.rnp[1]) || std::isnan(c.rnp[2])) { + std::cout << "Pos after boost: " << c.rnp << "\n"; + break; + } + } + Kokkos::View*, Kokkos::HostSpace> positions("", temporary_charge_list.size()); + Kokkos::View*, Kokkos::HostSpace> gammabetas("", temporary_charge_list.size()); + auto iterQ = temporary_charge_list.begin(); + for (size_t i = 0; i < temporary_charge_list.size(); i++) { + assert_isreal(iterQ->gb[0]); + assert_isreal(iterQ->gb[1]); + assert_isreal(iterQ->gb[2]); + assert(iterQ->gb[2] != 0.0f); + scalar g = std::sqrt(1.0 + iterQ->gb.squaredNorm()); + assert_isreal(g); + scalar bz = iterQ->gb[2] / g; + assert_isreal(bz); + (void)bz; + positions(i) = iterQ->rnp; + gammabetas(i) = iterQ->gb; + ++iterQ; + } + if (temporary_charge_list.size() > bunch.getLocalNum()) { + bunch.create(temporary_charge_list.size() - bunch.getLocalNum()); + } + Kokkos::View*> dpositions("", temporary_charge_list.size()); + Kokkos::View*> dgammabetas("", temporary_charge_list.size()); + + Kokkos::deep_copy(dpositions, positions); + Kokkos::deep_copy(dgammabetas, gammabetas); + Kokkos::deep_copy(bunch.R_nm1.getView(), positions); + Kokkos::deep_copy(bunch.gamma_beta.getView(), gammabetas); + auto rview = bunch.R.getView(), rm1view = bunch.R_nm1.getView(), + gbview = bunch.gamma_beta.getView(); + ; + Kokkos::parallel_for( + temporary_charge_list.size(), KOKKOS_LAMBDA(size_t i) { + rview(i) = dpositions(i); + rm1view(i) = dpositions(i); + gbview(i) = dgammabetas(i); + }); + Kokkos::fence(); + + return temporary_charge_list.size(); +} + +// END LORENTZ FRAME AND UNDULATOR AND BUNCH + +// PREAMBLE + +template + requires((std::is_floating_point_v)) +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x) { + uint32_t dim = sizeof...(scalar); + ippl::Vector vec{scalar1(x - mean)...}; + for (unsigned d = 0; d < dim; d++) { + vec[d] = vec[d] * vec[d]; + } +#ifndef __CUDA_ARCH__ + using std::exp; +#endif + return exp(-(vec.sum()) / (stddev * stddev)); +} + +template +KOKKOS_INLINE_FUNCTION ippl::Vector cross_prod(const ippl::Vector& a, + const ippl::Vector& b) { + ippl::Vector ret{0.0, 0.0, 0.0}; + ret[0] = a[1] * b[2] - a[2] * b[1]; + ret[1] = a[2] * b[0] - a[0] * b[2]; + ret[2] = a[0] * b[1] - a[1] * b[0]; + return ret; +} +template +KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( + const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos, + int nghost = 1) { + Kokkos::pair, ippl::Vector> ret; + ippl::Vector relpos = pos - origin; + ippl::Vector gridpos = relpos / hr; + ippl::Vector ipos; + ipos = gridpos.template cast(); + ippl::Vector fracpos; + for (unsigned k = 0; k < 3; k++) { + fracpos[k] = gridpos[k] - (int)ipos[k]; + } + ipos += ippl::Vector(nghost); + ret.first = ipos; + ret.second = fracpos; + return ret; +} +template +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, const scalar value, + int nghost = 1) { + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); + ipos -= ldom.first(); + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for (unsigned d = 0; d < 3; d++) { + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(value); + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); +} +template +KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, + const ippl::Vector& value, int nghost = 1) { + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos, nghost); + ipos -= ldom.first(); + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + + return; // Return in case of out of bounds + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for (unsigned d = 0; d < 3; d++) { + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); +} + +template +KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, + ippl::Vector hr, + ippl::Vector origin, + const ippl::Vector& from, + const ippl::Vector& to, + const scalar factor, int nghost = 1) { + Kokkos::pair, ippl::Vector> from_grid = + gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = + gridCoordinatesOf(hr, origin, to); + + if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] + && from_grid.first[2] == to_grid.first[2]) { + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + to) * scalar(0.5)), + ippl::Vector((to - from) * factor)); + + return; + } + ippl::Vector relay; + const ippl::Vector orig = origin; + using Kokkos::max; + using Kokkos::min; + for (unsigned int i = 0; i < 3; i++) { + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(0.0) * hr[i] + orig[i], + scalar(0.5) * (to[i] + from[i]))); + } + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((from + relay) * scalar(0.5)), + ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)), + ippl::Vector((to - relay) * factor)); +} + +namespace ippl { + + template + struct nondispersive { + scalar a1; + scalar a2; + scalar a4; + scalar a6; + scalar a8; + }; + template + struct Bunch_eb : public ippl::ParticleBase { + using scalar = _scalar; + + // Constructor for the Bunch class, taking a PLayout reference + Bunch_eb(PLayout& playout) + : ippl::ParticleBase(playout) { + // Add attributes to the particle bunch + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute( + gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(EB_gather); // Electric field attribute for particle gathering + } + + // Destructor for the Bunch class + ~Bunch_eb() {} + + // Define container types for various attributes + using charge_container_type = ippl::ParticleAttrib; + using velocity_container_type = ippl::ParticleAttrib>; + using vector_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; + + // Declare instances of the attribute containers + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type + R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type + R_nm1; // Position container for the previous time step + ippl::ParticleAttrib, 2>> + EB_gather; // Electric field container for particle gathering + }; + + /** + * @brief Struct representing the state of an FEL (Free Electron Laser) simulation. + * + * @tparam scalar Type of the scalar values (e.g., float, double). + */ + template + struct FELSimulationState { + // clang-format off + + constexpr static unsigned int dim = 3; ///< Dimensionality of the simulation (3D). + using Vector_t = ippl::Vector; ///< Type alias for a 3D vector. + using value_type = ippl::Vector; ///< Type alias for a 4D vector. + using EB_type = ippl::Vector, 2>; ///< Type alias for an electric and magnetic field vector pair. + using Mesh_t = ippl::UniformCartesian; ///< Type alias for a uniform Cartesian mesh. + + bool periodic_bc; ///< Flag indicating if periodic boundary conditions are used. + + /// Type alias for a field with 4D vector values. + using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for a field with 3D vector values. + using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for a field with 4D vector values (repeated for consistency). + using VField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for a field with electric and magnetic field vector pairs. + using EBField_t = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + + /// Type alias for the view type of a field with 4D vector values. + using view_type = typename VField_t::view_type; + + /// Type alias for the view type of a field with electric and magnetic field vector pairs. + using ev_view_type = typename EBField_t::view_type; + + /// Type alias for the view type of a field with 3D vector values (electric field). + using e_view_type = typename ThreeField::view_type; + + /// Type alias for the view type of a field with 3D vector values (magnetic field). + using b_view_type = typename ThreeField::view_type; + + /// Solver and particle handler. + ippl::NSFDSolverWithParticles fieldsAndParticles; + + Vector_t hr_m; ///< Mesh spacing vector. + ippl::Vector nr_global; ///< Global number of cells in each dimension. + ippl::Vector nr_local; ///< Local number of cells in each dimension. + config m_config; ///< Configuration object. + UniaxialLorentzframe ulb; ///< Uniaxial Lorentz frame transformation along z-axis. + undulator_parameters uparams; ///< Undulator parameters. + Undulator undulator; ///< Undulator object. + + /** + * @brief Construct a new FELSimulationState object. + * + * Initializes the simulation state with the given field layout, mesh, number of particles, and configuration. + * + * @details + * - `ulb.gamma_m = cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5)` is the frame's gamma factor. + * + * @param layout Field layout. + * @param mesch Mesh object. + * @param nparticles Number of particles. + * @param cfg Configuration object. + */ + FELSimulationState(FieldLayout& layout, Mesh_t& mesch, size_t nparticles, config cfg) + : fieldsAndParticles(layout, mesch, nparticles), + m_config(cfg), + ulb(UniaxialLorentzframe::from_gamma(cfg.bunch_gamma / std::sqrt(1 + cfg.undulator_K * cfg.undulator_K * 0.5))), + uparams(cfg.undulator_K, cfg.undulator_period, cfg.undulator_length), + undulator(uparams, 2.0 * cfg.sigma_position[2] * ulb.gamma_m * ulb.gamma_m) { + + hr_m = mesch.getMeshSpacing(); + nr_global = ippl::Vector{ + uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), + uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1) + }; + nr_local = ippl::Vector{ + uint32_t(layout.getLocalNDIndex()[0].last() - layout.getLocalNDIndex()[0].first() + 1), + uint32_t(layout.getLocalNDIndex()[1].last() - layout.getLocalNDIndex()[1].first() + 1), + uint32_t(layout.getLocalNDIndex()[2].last() - layout.getLocalNDIndex()[2].first() + 1) + }; + } + + /** + * @brief Get the time step size. + * + * @return scalar Time step size. + */ + scalar dt() const noexcept { + return fieldsAndParticles.field_solver.dt; + } + + /** + * @brief Perform a simulation step. + * + * Solves the fields and updates the state using the undulator and Lorentz frame transformations. + */ + void step() { + auto und = this->undulator; + auto lb = this->ulb; + fieldsAndParticles.solve(KOKKOS_LAMBDA(ippl::Vector pos, scalar time) { + lb.primedToUnprimed(pos, time); + auto eb = und(pos); + return lb.transform_EB(eb); + }); + } + }; + // clang-format on +} // namespace ippl +bool writeBMPToFD(FILE* fd, int width, int height, const unsigned char* data) { + const int channels = 3; // RGB + const int stride = width * channels; + std::vector flippedData(data, data + stride * height); + + // Use stb_image_write to write the BMP image to the file descriptor + if (!stbi_write_bmp_to_func( + [](void* context, void* data, int size) { + FILE* f = reinterpret_cast(context); + fwrite(data, 1, size, f); + }, + fd, width, height, channels, flippedData.data())) { + return false; + } + + return true; +} +/** + * @brief Helper function to gather data from a field using interpolation. + * + * This function computes the value at a given position by interpolating the values in the field. + * The position is transformed from physical coordinates to grid coordinates and then used to + * gather data from the field. The interpolation weights are computed based on the relative + * positions within the grid cells. + * + * @tparam View Type of the field view. + * @tparam T Scalar type for coordinates and grid spacing. + * @tparam Dim Dimensionality of the position vector. + * + * @param v The field view from which data is to be gathered. + * @param pos The position vector in physical coordinates where the data is to be gathered. + * @param origin The origin of the grid in physical coordinates. + * @param hr The grid spacing vector. + * @param lDom The local domain indices of the grid. + * + * @return typename View::value_type The interpolated value at the specified position. + */ +template +KOKKOS_INLINE_FUNCTION typename View::value_type gather_helper(const View& v, + const ippl::Vector& pos, + const ippl::Vector& origin, + const ippl::Vector& hr, + const ippl::NDIndex<3>& lDom, + int nghost = 1) { + using vector_type = ippl::Vector; + + vector_type l; + + for (unsigned k = 0; k < 3; k++) { + l[k] = (pos[k] - origin[k]) / hr[k] + + 1.0; // gather is implemented in a way such that this 1 is necessary here. + // This is NOT the nghost, this needs to be a hardcoded 1.0 + } + + ippl::Vector index{int(l[0]), int(l[1]), int(l[2])}; + ippl::Vector whi = l - index; + ippl::Vector wlo(1.0); + wlo -= whi; + + ippl::Vector args = index - lDom.first() + nghost; + for (unsigned k = 0; k < 3; k++) { + if (args[k] >= v.extent(k) || args[k] == 0) { + return typename View::value_type(0); + } + } + return ippl::detail::gatherFromField(std::make_index_sequence<(1u << Dim)>{}, v, wlo, + whi, args); +} +template +scalar test_gauss_law(uint32_t n) { + ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::Vector extents{meter_in_unit_lengths, meter_in_unit_lengths, + meter_in_unit_lengths}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx = extents / nr.cast(); + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), + scalar(-0.5 * meter_in_unit_lengths), + scalar(-0.5 * meter_in_unit_lengths)}; + ippl::UniformCartesian mesh(owned, hx, origin); + + uint32_t pcount = 1 << 20; + // config cfg{}; + // cfg.space_charge = true; + ippl::NSFDSolverWithParticles field_state(layout, mesh, + pcount); + field_state.particles.Q = scalar(coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; // Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + + Kokkos::parallel_for( + pcount, KOKKOS_LAMBDA(size_t i) { + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + p1view(i) = pview(i); + random_pool.free_state(state); + }); + Kokkos::fence(); + field_state.J = scalar(0.0); + field_state.scatterBunch(); + for (size_t i = 0; i < 8 * n; i++) { + field_state.field_solver.solve(); + } + + Kokkos::fence(); + auto lDom = field_state.E.getLayout().getLocalNDIndex(); + + std::ofstream line("gauss_line.txt"); + typename ippl::FELSimulationState::e_view_type::host_mirror_type view = + Kokkos::create_mirror_view(field_state.E.getView()); + for (unsigned i = 1; i < nr[2]; i++) { + vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; + pos[2] += hx[2] * scalar(i); + ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); + line << pos.norm() * unit_length_in_meters << " " + << ebg.norm() * unit_electric_fieldstrength_in_voltpermeters << "\n"; + } + return 0.0f; +} +template +scalar test_amperes_law(uint32_t n) { + ippl::NDIndex<3> owned(n / 2, n / 2, 2 * n); + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::Vector extents{meter_in_unit_lengths, (scalar)(4 * meter_in_unit_lengths), + meter_in_unit_lengths}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx; + for (unsigned d = 0; d < 3; d++) { + hx[d] = extents[d] / (scalar)nr[d]; + } + using vector_type = ippl::Vector; + ippl::Vector origin = {scalar(-0.5 * meter_in_unit_lengths), + scalar(-2.0 * meter_in_unit_lengths), + scalar(-0.5 * meter_in_unit_lengths)}; + ippl::UniformCartesian mesh(owned, hx, origin); + + uint32_t pcount = 1 << 20; + ippl::NSFDSolverWithParticles field_state(layout, mesh, + pcount); + field_state.particles.Q = scalar(4.0 * coulomb_in_unit_charges) / pcount; + field_state.particles.mass = scalar(1.0) / pcount; // Irrelefant + auto pview = field_state.particles.R.getView(); + auto p1view = field_state.particles.R_nm1.getView(); + constexpr scalar vy = meter_in_unit_lengths / second_in_unit_times; + scalar timestep = field_state.field_solver.dt; + Kokkos::Random_XorShift64_Pool<> random_pool(/*seed=*/12345); + + Kokkos::parallel_for( + pcount, KOKKOS_LAMBDA(size_t i) { + // bunch.gammaBeta[i].fill(scalar(0)); + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[2] = state.normal(0.0, 0.01 * meter_in_unit_lengths); + pview(i)[1] = origin[1] + 4.0 * meter_in_unit_lengths * scalar(i) / (pcount - 1); + p1view(i) = pview(i); + p1view(i)[1] -= vy * timestep; + random_pool.free_state(state); + }); + Kokkos::fence(); + field_state.J = scalar(0.0); + field_state.scatterBunch(); + for (size_t i = 0; i < 8 * n; i++) { + field_state.field_solver.solve(); + } + field_state.field_solver.evaluate_EB(); + Kokkos::fence(); + auto lDom = field_state.B.getLayout().getLocalNDIndex(); + + std::ofstream line("ampere_line.txt"); + + typename ippl::FELSimulationState::b_view_type::host_mirror_type view = + Kokkos::create_mirror_view(field_state.B.getView()); + // ippl::Vector, 2> ebg = gather_helper(view, ippl::Vector{0,0,0}, origin, hx, lDom); + for (unsigned i = 1; i < nr[2]; i++) { + vector_type pos = {scalar(0), scalar(0), (scalar)origin[2]}; + pos[2] += hx[2] * scalar(i); + ippl::Vector ebg = gather_helper(view, pos, origin, hx, lDom); + // line << pos.norm() * unit_length_in_meters << " " << (view(n / 4, n / 4, i)[0].norm()) * + // unit_electric_fieldstrength_in_voltpermeters << "\n"; + line << pos.norm() * unit_length_in_meters << " " + << ebg[0] * unit_magnetic_fluxdensity_in_tesla << "\n"; + } + return 0.0f; +} +int main(int argc, char* argv[]) { + using scalar = float; + ippl::initialize(argc, argv); + { + // test_gauss_law(64); + // test_amperes_law(64); + // goto exit; + config cfg = read_config("../config.json"); + const scalar frame_gamma = std::max( + decltype(cfg)::scalar(1), + cfg.bunch_gamma + / std::sqrt(1.0 + cfg.undulator_K * cfg.undulator_K * config::scalar(0.5))); + cfg.extents[2] *= frame_gamma; + cfg.total_time /= frame_gamma; + + const scalar frame_beta = std::sqrt(1.0 - 1.0 / double(frame_gamma * frame_gamma)); + const scalar frame_gammabeta = frame_gamma * frame_beta; + ippl::UniaxialLorentzframe frame_boost(frame_gammabeta); + ippl::undulator_parameters uparams(cfg.undulator_K, cfg.undulator_period, + cfg.undulator_length); + + BunchInitialize mithra_config = generate_mithra_config(cfg, frame_boost); + ippl::NDIndex<3> owned(cfg.resolution[0], cfg.resolution[1], cfg.resolution[2]); + + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + //[-1, 1] box + ippl::Vector hx = {scalar(cfg.extents[0] / cfg.resolution[0]), + scalar(cfg.extents[1] / cfg.resolution[1]), + scalar(cfg.extents[2] / cfg.resolution[2])}; + ippl::Vector origin = {scalar(-cfg.extents[0] * 0.5), + scalar(-cfg.extents[1] * 0.5), + scalar(-cfg.extents[2] * 0.5)}; + ippl::UniformCartesian mesh(owned, hx, origin); + std::cout << "hx: " << hx << "\n"; + std::cout << "origin: " << origin << "\n"; + std::cout << "extents: " << cfg.extents << std::endl; + if (sq(hx[2] / hx[0]) + sq(hx[2] / hx[1]) >= 1) { + std::cerr << "Dispersion relation not satisfiable\n"; + abort(); + } + + ippl::FELSimulationState fdtd_state( + layout, mesh, 0 /*no resize function exists wtf cfg.num_particles*/, cfg); + + if (ippl::Comm->rank() == 0) { + std::cout << "Init particles: " << std::endl; + size_t actual_pc = initialize_bunch_mithra(fdtd_state.fieldsAndParticles.particles, + mithra_config, frame_gamma); + fdtd_state.fieldsAndParticles.particles.Q = cfg.charge / actual_pc; + fdtd_state.fieldsAndParticles.particles.mass = cfg.mass / actual_pc; + } else { + fdtd_state.fieldsAndParticles.particles.create(0); + } + { + auto rview = fdtd_state.fieldsAndParticles.particles.R.getView(); + auto rm1view = fdtd_state.fieldsAndParticles.particles.R_nm1.getView(); + ippl::Vector meanpos = + fdtd_state.fieldsAndParticles.particles.R.sum() + * (1.0 / fdtd_state.fieldsAndParticles.particles.getTotalNum()); + + Kokkos::parallel_for( + fdtd_state.fieldsAndParticles.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + rview(i) -= meanpos; + rm1view(i) -= meanpos; + }); + } + fdtd_state.fieldsAndParticles.particles.setParticleBC(ippl::NO); + + size_t timesteps_required = std::ceil(cfg.total_time / fdtd_state.dt()); + uint64_t starttime = nanoTime(); + std::ofstream rad; + FILE* ffmpeg_file = nullptr; + if (ippl::Comm->rank() == 0) { + rad = std::ofstream("radiation.txt"); + const char* ffmpegCmd = + "ffmpeg -y -f image2pipe -vcodec bmp -r 30 -i - -vf " + "scale=force_original_aspect_ratio=decrease:force_divisible_by=2,format=yuv420p " + "-c:v libx264 -movflags +faststart ffmpeg_popen.mkv"; + if (cfg.output_rhythm != 0) { + ffmpeg_file = popen(ffmpegCmd, "w"); + } + } + + for (size_t i = 0; i < timesteps_required; i++) { + if (ippl::Comm->rank() == 0) { + std::cout << "Doing step: " << i << std::endl; + } + fdtd_state.step(); + auto ldom = layout.getLocalNDIndex(); + auto nrg = fdtd_state.nr_global; + auto eview = fdtd_state.fieldsAndParticles.E.getView(); + auto bview = fdtd_state.fieldsAndParticles.B.getView(); + double radiation = 0.0; + Kokkos::parallel_reduce( + ippl::getRangePolicy(eview, 1), + KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k, double& ref) { + Kokkos::pair, ippl::Vector> buncheb{ + eview(i, j, k), bview(i, j, k)}; + ippl::Vector Elab = frame_boost.inverse_transform_EB(buncheb).first; + ippl::Vector Blab = frame_boost.inverse_transform_EB(buncheb).second; + uint32_t kg = k + ldom.first()[2]; + if (kg == nrg[2] - 3) { + ref += Elab.cross(Blab)[2]; + } + }, + radiation); + double radiation_in_watt_on_this_rank = + radiation + * double(unit_powerdensity_in_watt_per_square_meter * unit_length_in_meters + * unit_length_in_meters) + * fdtd_state.hr_m[0] * fdtd_state.hr_m[1]; + double radiation_in_watt_global = 0.0; + MPI_Reduce(&radiation_in_watt_on_this_rank, &radiation_in_watt_global, 1, MPI_DOUBLE, + MPI_SUM, 0, ippl::Comm->getCommunicator()); + if (ippl::Comm->rank() == 0) { + ippl::Vector pos{0, 0, (float)cfg.extents[2]}; + frame_boost.primedToUnprimed(pos, fdtd_state.dt() * i); + rad << pos[2] * unit_length_in_meters << " " << radiation_in_watt_global << "\n"; + } + + int rank = ippl::Comm->rank(); + int size = ippl::Comm->size(); + if ((cfg.output_rhythm != 0) && (i % cfg.output_rhythm == 0)) { + int img_height = 400; + int img_width = int(400.0 * cfg.extents[2] / cfg.extents[0]); + float* imagedata = new float[img_width * img_height * 3]; + std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); + float* idata_recvbuffer = new float[img_width * img_height * 3]; + int floatcount = img_width * img_height * 3; + uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; + std::memset(imagedata, 0, img_width * img_height * 3 * sizeof(float)); + auto phmirror = fdtd_state.fieldsAndParticles.particles.R.getHostMirror(); + Kokkos::deep_copy(phmirror, fdtd_state.fieldsAndParticles.particles.R.getView()); + for (size_t hi = 0; hi < fdtd_state.fieldsAndParticles.particles.getLocalNum(); + hi++) { + ippl::Vector ppos = phmirror(hi); + ppos -= mesh.getOrigin(); + ppos /= cfg.extents.cast(); + int x_imgcoord = ppos[2] * img_width; + int y_imgcoord = ppos[0] * img_height; + // printf("%d, %d\n", x_imgcoord, y_imgcoord); + if (y_imgcoord >= 0 && x_imgcoord >= 0 && x_imgcoord < img_width + && y_imgcoord < img_height) { + const float intensity = + std::min(255.f, (img_width * img_height * 15.f) / cfg.num_particles); + // std::cout << intensity << "\n"; + imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] = + std::min(255.f, imagedata[(y_imgcoord * img_width + x_imgcoord) * 3 + 1] + + intensity); + } + }; + + auto eh = fdtd_state.fieldsAndParticles.E.getHostMirror(); + auto bh = fdtd_state.fieldsAndParticles.B.getHostMirror(); + Kokkos::deep_copy(eh, fdtd_state.fieldsAndParticles.E.getView()); + Kokkos::deep_copy(bh, fdtd_state.fieldsAndParticles.B.getView()); + + { + for (int i = 1; i < img_width; i++) { + for (int j = 1; j < img_height; j++) { + int i_remap = + (double(i) / (img_width - 1)) * (fdtd_state.nr_global[2] - 4) + 2; + int j_remap = + (double(j) / (img_height - 1)) * (fdtd_state.nr_global[0] - 4) + 2; + if (i_remap >= ldom.first()[2] && i_remap <= ldom.last()[2]) { + if (j_remap >= ldom.first()[0] && j_remap <= ldom.last()[0]) { + ippl::Vector E = eh(j_remap + 1 - ldom.first()[0], + fdtd_state.nr_global[1] / 2, + i_remap + 1 - ldom.first()[2]); + ippl::Vector B = bh(j_remap + 1 - ldom.first()[0], + fdtd_state.nr_global[1] / 2, + i_remap + 1 - ldom.first()[2]); + + ippl::Vector poynting = E.cross(B); + Kokkos::pair, ippl::Vector> + eblab = frame_boost.inverse_transform_EB( + Kokkos::make_pair, + ippl::Vector>(E, B)); + ippl::Vector poyntinglab = + eblab.first.cross(eblab.second); + poynting = poyntinglab; + if (poynting.norm() > 0) { + // std::cout << poynting.norm() << "\n"; + } + float normalized_colorscale_value = + std::sqrt(poynting.norm()) * 0.00001f; + int index = (int)std::max( + 0.0f, + std::min(normalized_colorscale_value * 255.0f, 255.0f)); + imagedata[(j * img_width + i) * 3 + 0] += + turbo_cm[index][0] + * 255.0f; // * std::min(normalized_colorscale_value * + // 100.0f + 50.0f, 150.0f);//(unsigned + // char)(std::min(255u, (unsigned int)(0.5f * + // std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 1] += + turbo_cm[index][1] + * 255.0f; // * std::min(normalized_colorscale_value * + // 100.0f + 50.0f, 150.0f);//(unsigned + // char)(std::min(255u, (unsigned int)(0.5f * + // std::sqrt(std::sqrt(poynting.squaredNorm()))))); + imagedata[(j * img_width + i) * 3 + 2] += + turbo_cm[index][2] + * 255.0f; // * std::min(normalized_colorscale_value * + // 100.0f + 50.0f, 150.0f);//(unsigned + // char)(std::min(255u, (unsigned int)(0.5f * + // std::sqrt(std::sqrt(poynting.squaredNorm()))))); + } + } + } + } + } + /* + * This code should be replaced by the Image Gathering anyway, but that's a + * different PR + */ + int mask = 1; + while (mask < size) { + int partner = rank ^ mask; + { + if ((rank & mask) == 0) { + // Send data to partner + MPI_Recv(idata_recvbuffer, floatcount, MPI_FLOAT, partner, 0, + ippl::Comm->getCommunicator(), MPI_STATUS_IGNORE); + // Apply image summation + for (int f = 0; f < floatcount; f++) { + imagedata[f] += idata_recvbuffer[f]; + } + } else { + MPI_Send(imagedata, floatcount, MPI_FLOAT, partner, 0, + ippl::Comm->getCommunicator()); + // Receive data from partner and apply reduction + } + } + mask <<= 1; // Move to next bit position for pairing + } + if (rank == 0) { + char output[1024] = {0}; + + snprintf(output, 1023, "%soutimage%.05lu.bmp", cfg.output_path.c_str(), i); + std::transform(imagedata, imagedata + img_height * img_width * 3, + imagedata_final, [](float x) { + return (unsigned char)std::min(255.0f, std::max(0.0f, x)); + }); + if (ffmpeg_file != nullptr) + writeBMPToFD(ffmpeg_file, img_width, img_height, imagedata_final); + } + delete[] imagedata; + delete[] idata_recvbuffer; + delete[] imagedata_final; + } + } + uint64_t endtime = nanoTime(); + if (ippl::Comm->rank() == 0) + std::cout << ippl::Comm->size() << " " << double(endtime - starttime) / 1e9 + << std::endl; + } + // exit: + ippl::finalize(); +} \ No newline at end of file diff --git a/fel/LorentzTransform.h b/fel/LorentzTransform.h new file mode 100644 index 000000000..876bb22a6 --- /dev/null +++ b/fel/LorentzTransform.h @@ -0,0 +1,318 @@ +#ifndef LORENTZ_TRANSFORM_H +#define LORENTZ_TRANSFORM_H +#include + +#include "Types/Matrix.h" +#include "Types/Vector.h" +namespace ippl { + /** + * @brief A template struct representing a uniaxial Lorentz frame. + * + * The UniaxialLorentzframe struct represents a Lorentz transformation along a specific axis + * (default is the z-axis, axis 2). It includes methods for transforming vectors and + * electromagnetic fields between frames, as well as for computing gamma factors and beta + * velocities. The struct uses Kokkos for portability and performance. + * + * @tparam T The scalar type used for computations (e.g., float, double). + * @tparam axis The axis along which the Lorentz transformation is applied (default is 2, the + * z-axis). + */ + template + struct UniaxialLorentzframe { + /// Speed of light, set to 1 for natural units. + constexpr static T c = 1.0; + /// Alias for the scalar type used in the struct. + using scalar = T; + /// Alias for a 3-dimensional vector of the scalar type. + using Vector3 = ippl::Vector; + + /// Beta velocity component along the specified axis. + scalar beta_m; + /// Product of gamma factor and beta velocity. + scalar gammaBeta_m; + /// Gamma factor for the Lorentz transformation. + scalar gamma_m; + + /** + * @brief Create a UniaxialLorentzframe from a given gamma factor. + * + * This static member function constructs a UniaxialLorentzframe object using a given + * gamma factor. It computes the corresponding beta velocity and gamma*beta product. + * + * @param gamma The gamma factor for the Lorentz transformation. + * @return A UniaxialLorentzframe object with computed beta and gamma*beta. + */ + KOKKOS_INLINE_FUNCTION static UniaxialLorentzframe from_gamma(const scalar gamma) { + UniaxialLorentzframe ret; + ret.gamma_m = gamma; + scalar beta = Kokkos::sqrt(1 - double(1) / (gamma * gamma)); + scalar gammabeta = gamma * beta; + ret.beta_m = beta; + ret.gammaBeta_m = gammabeta; + return ret; + } + + /** + * @brief Get a UniaxialLorentzframe with negative beta velocity. + * + * This member function returns a new UniaxialLorentzframe object with the same gamma + * factor but with a negative beta velocity, effectively representing the inverse + * Lorentz transformation. + * + * @return A UniaxialLorentzframe object with negative beta velocity. + */ + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe negative() const noexcept { + UniaxialLorentzframe ret; + ret.beta_m = -beta_m; + ret.gammaBeta_m = -gammaBeta_m; + ret.gamma_m = gamma_m; + return ret; + } + + /// Default constructor. + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe() = default; + + /** + * @brief Construct a UniaxialLorentzframe from a gamma*beta value. + * + * This constructor initializes a UniaxialLorentzframe object using a given gamma*beta + * value. It computes the corresponding beta velocity and gamma factor. + * + * @param gammaBeta The product of the gamma factor and beta velocity. + */ + KOKKOS_INLINE_FUNCTION UniaxialLorentzframe(const scalar gammaBeta) { + using Kokkos::sqrt; + gammaBeta_m = gammaBeta; + beta_m = gammaBeta / sqrt(1 + gammaBeta * gammaBeta); + gamma_m = sqrt(1 + gammaBeta * gammaBeta); + } + + /** + * @brief Transform a spatial vector from the primed frame to the unprimed frame. + * + * This member function transforms a given spatial vector from the primed frame to the + * unprimed frame using the Lorentz transformation along the specified axis and the given + * time. + * + * @param arg The spatial vector to be transformed. + * @param time The time component for the transformation. + */ + KOKKOS_INLINE_FUNCTION void primedToUnprimed(Vector3& arg, scalar time) const noexcept { + arg[axis] = gamma_m * (arg[axis] + beta_m * time); + } + + /** + * @brief Transform electric and magnetic fields from the unprimed to the primed frame. + * + * This member function transforms a pair of electric and magnetic field vectors from the + * unprimed frame to the primed frame using the Lorentz transformation along the specified + * axis. + * + * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the + * unprimed frame. + * @return A pair of vectors representing the electric and magnetic fields in the primed + * frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB( + const Kokkos::pair, ippl::Vector>& unprimedEB) const noexcept { + Kokkos::pair, ippl::Vector> ret; + ippl::Vector betavec{0, 0, beta_m}; + ippl::Vector vnorm{axis == 0, axis == 1, axis == 2}; + ret.first = + ippl::Vector(unprimedEB.first + betavec.cross(unprimedEB.second)) * gamma_m + - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = + ippl::Vector(unprimedEB.second - betavec.cross(unprimedEB.first)) * gamma_m + - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + ret.first[axis] -= (gamma_m - 1) * unprimedEB.first[axis]; + ret.second[axis] -= (gamma_m - 1) * unprimedEB.second[axis]; + return ret; + } + + /** + * @brief Transform electric and magnetic fields from the primed to the unprimed frame. + * + * This member function transforms a pair of electric and magnetic field vectors from the + * primed frame to the unprimed frame using the inverse Lorentz transformation along the + * specified axis. + * + * @param primedEB A pair of vectors representing the electric and magnetic fields in the + * primed frame. + * @return A pair of vectors representing the electric and magnetic fields in the unprimed + * frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + inverse_transform_EB( + const Kokkos::pair, ippl::Vector>& primedEB) const noexcept { + return negative().transform_EB(primedEB); + } + }; + /** + * @brief Represents a Lorentz frame with associated transformations and operations. + * + * This template struct models a Lorentz frame used in special relativity to handle + * transformations between reference frames moving at a constant velocity relative to each other. + * The LorentzFrame supports various operations such as converting between frames and transforming + * vectors and electromagnetic fields. + * + * @tparam T The type for scalar values, typically a floating-point type. + */ + template + struct LorentzFrame { + /// Speed of light in natural units (c = 1) + constexpr static T c = 1.0; + + /// Alias for the scalar type. + using scalar = T; + + /// Alias for a 3D vector of type T. + using Vector3 = ippl::Vector; + + /// Velocity vector (beta) in the frame, normalized by the speed of light. + ippl::Vector beta_m; + + /// Product of gamma and beta vectors. + ippl::Vector gammaBeta_m; + + /// Lorentz factor gamma. + T gamma_m; + + /** + * @brief Constructs a LorentzFrame from a given gammaBeta vector. + * + * This constructor initializes the Lorentz frame using a vector that represents + * the product of the Lorentz factor gamma and the velocity vector beta. + * The velocity vector beta is then computed by normalizing this gammaBeta vector, + * and the Lorentz factor gamma is computed from the magnitude of gammaBeta. + * + * @param gammaBeta The gammaBeta vector. + */ + KOKKOS_INLINE_FUNCTION LorentzFrame(const ippl::Vector& gammaBeta) { + beta_m = gammaBeta / sqrt(1 + gammaBeta.dot(gammaBeta)); + gamma_m = sqrt(1 + gammaBeta.dot(gammaBeta)); + gammaBeta_m = gammaBeta; + } + /** + * @brief Constructs a uniaxial Lorentz frame given a gamma value and an axis. + * + * This static member function creates a Lorentz frame that has motion along one of the principal + * axes (x, y, or z) based on the provided gamma value. The beta vector is calculated accordingly + * and aligned with the specified axis. + * + * @tparam axis The axis along which the frame is moving ('x', 'y', or 'z'). + * @param gamma The Lorentz factor gamma, which must be greater than or equal to 1. + * @return LorentzFrame A Lorentz frame with motion along the specified axis. + */ + template + static LorentzFrame uniaxialGamma(T gamma) { + static_assert(axis == 'x' || axis == 'y' || axis == 'z', "Only xyz axis suproted"); + assert(gamma >= 1.0 && "Gamma must be >= 1"); + using Kokkos::sqrt; + + T beta = gamma == 1 ? T(0) : sqrt(gamma * gamma - 1) / gamma; + Vector3 arg{0, 0, 0}; + arg[axis - 'x'] = gamma * beta; + return LorentzFrame(arg); + } + + /** + * @brief Constructs the Lorentz transformation matrix for converting from unprimed to primed frame. + * + * This function computes the Lorentz transformation matrix that converts vectors + * from the unprimed reference frame to the primed reference frame. The transformation + * accounts for the relative velocity between the frames using the stored beta and gamma values. + * + * If the magnitude of the beta vector is very small (less than 1e-10), the function returns + * an identity matrix, indicating no significant transformation. + * + * @return matrix The transformation matrix. + */ + KOKKOS_INLINE_FUNCTION matrix unprimedToPrimed() const noexcept { + T betaMagsq = beta_m.dot(beta_m); + using Kokkos::abs; + if (abs(betaMagsq) < 1e-10) { + return matrix(T(1)); + } + ippl::Vector betaSquared = beta_m * beta_m; + + matrix ret; + + ret.data[0] = ippl::Vector{gamma_m, -gammaBeta_m[0], -gammaBeta_m[1], -gammaBeta_m[2]}; + ret.data[1] = ippl::Vector{-gammaBeta_m[0], 1 + (gamma_m - 1) * betaSquared[0] / betaMagsq, + (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq, + (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq}; + ret.data[2] = ippl::Vector{-gammaBeta_m[1], + (gamma_m - 1) * beta_m[0] * beta_m[1] / betaMagsq, + 1 + (gamma_m - 1) * betaSquared[1] / betaMagsq, + (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq}; + ret.data[3] = ippl::Vector{-gammaBeta_m[2], + (gamma_m - 1) * beta_m[0] * beta_m[2] / betaMagsq, + (gamma_m - 1) * beta_m[1] * beta_m[2] / betaMagsq, + 1 + (gamma_m - 1) * betaSquared[2] / betaMagsq}; + return ret; + } + /** + * @brief Constructs the Lorentz transformation matrix for converting from primed to unprimed frame. + * + * This function computes the inverse of the Lorentz transformation matrix obtained + * from unprimedToPrimed(), allowing conversion of vectors from the primed reference frame + * back to the unprimed reference frame. It utilizes the matrix inversion function to achieve this. + * + * @return matrix The inverse transformation matrix. This could also be done by taking the negative velocity + */ + KOKKOS_INLINE_FUNCTION matrix primedToUnprimed() const noexcept { + return unprimedToPrimed().inverse(); + } + + KOKKOS_INLINE_FUNCTION Vector3 transformV(const Vector3& unprimedV) const noexcept { + T factor = T(1.0) / (1.0 - unprimedV.dot(beta_m)); + Vector3 ret = unprimedV * scalar(1.0 / gamma_m); + ret -= beta_m; + ret += beta_m * (unprimedV.dot(beta_m) * (gamma_m / (gamma_m + 1))); + return ret * factor; + } + + KOKKOS_INLINE_FUNCTION Vector3 transformGammabeta(const Vector3& gammabeta) const noexcept { + using Kokkos::sqrt; + T gamma = sqrt(T(1) + gammabeta.dot(gammabeta)); + Vector3 beta = gammabeta; + beta /= gamma; + Vector3 betatrf = transformV(beta); + betatrf *= sqrt(1 - betatrf.dot(betatrf)); + return betatrf; + } + /** + * @brief Transforms electric and magnetic fields from the unprimed to the primed frame. + * + * This function applies the Lorentz transformation to a pair of 3D vectors representing + * the electric field (E) and magnetic field (B) in the unprimed reference frame, + * converting them to the corresponding fields in the primed reference frame. The transformation + * accounts for relativistic effects on the fields due to the relative motion of the frames. + * + * @param unprimedEB A pair of vectors representing the electric and magnetic fields in the unprimed frame. + * @return Kokkos::pair, ippl::Vector> A pair of transformed vectors representing the electric and magnetic fields in the primed frame. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> transform_EB( + const Kokkos::pair, ippl::Vector>& unprimedEB) const noexcept { + using Kokkos::sqrt; + Kokkos::pair, ippl::Vector> ret; + Vector3 vnorm = beta_m * (T(1.0) / sqrt(beta_m.dot(beta_m))); + + ret.first = + (ippl::Vector(unprimedEB.first + beta_m.cross(unprimedEB.second)) * gamma_m) + - (vnorm * (gamma_m - 1) * (unprimedEB.first.dot(vnorm))); + ret.second = + (ippl::Vector(unprimedEB.second - beta_m.cross(unprimedEB.first)) * gamma_m) + - (vnorm * (gamma_m - 1) * (unprimedEB.second.dot(vnorm))); + return ret; + } + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + inverse_transform_EB( + const Kokkos::pair, ippl::Vector>& primedEB) const noexcept { + ippl::Vector mgb(gammaBeta_m * scalar(-1.0)); + return LorentzFrame(mgb).transform_EB(primedEB); + } + }; + +} // namespace ippl +#endif diff --git a/fel/NSFDSolverWithParticles.h b/fel/NSFDSolverWithParticles.h new file mode 100644 index 000000000..28f9668bc --- /dev/null +++ b/fel/NSFDSolverWithParticles.h @@ -0,0 +1,383 @@ +#ifndef NSFDSOLVERWITHPARTICLES_H +#define NSFDSOLVERWITHPARTICLES_H +#include "MaxwellSolvers/FDTD.h" +namespace ippl{ + template + struct Bunch : public ippl::ParticleBase { + using scalar = _scalar; + + // Constructor for the Bunch class, taking a PLayout reference + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) { + // Add attributes to the particle bunch + this->addAttribute(Q); // Charge attribute + this->addAttribute(mass); // Mass attribute + this->addAttribute( + gamma_beta); // Gamma-beta attribute (product of relativistiv gamma and beta) + this->addAttribute(R_np1); // Position attribute for the next time step + this->addAttribute(R_nm1); // Position attribute for the next time step + this->addAttribute(E_gather); // Electric field attribute for particle gathering + this->addAttribute(B_gather); // Magnedit field attribute for particle gathering + } + + // Destructor for the Bunch class + ~Bunch() {} + + // Define container types for various attributes + using charge_container_type = ippl::ParticleAttrib; + using velocity_container_type = ippl::ParticleAttrib>; + using vector_container_type = ippl::ParticleAttrib>; + using vector4_container_type = ippl::ParticleAttrib>; + + // Declare instances of the attribute containers + charge_container_type Q; // Charge container + charge_container_type mass; // Mass container + velocity_container_type gamma_beta; // Gamma-beta container + typename ippl::ParticleBase::particle_position_type + R_np1; // Position container for the next time step + typename ippl::ParticleBase::particle_position_type + R_nm1; // Position container for the previous time step + ippl::ParticleAttrib> + E_gather; // Electric field container for particle gathering + ippl::ParticleAttrib> + B_gather; // Magnetic field container for particle gathering + }; + template + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> gridCoordinatesOf( + const ippl::Vector hr, const ippl::Vector origin, ippl::Vector pos, int nghost = 1) { + + // Declare a pair to hold the resulting grid coordinates (integer part) and fractional part + Kokkos::pair, ippl::Vector> ret; + + // Calculate the relative position of pos with respect to the origin + ippl::Vector relpos = pos - origin; + + // Convert the relative position to grid coordinates by dividing by the grid spacing (hr) + ippl::Vector gridpos = relpos / hr; + + // Declare an integer vector to hold the integer part of the grid coordinates + ippl::Vector ipos; + + // Cast the grid position to an integer vector, which gives us the integer part of the coordinates + ipos = gridpos.template cast(); + + // Declare a vector to hold the fractional part of the grid coordinates + ippl::Vector fracpos; + + // Calculate the fractional part of the grid coordinates + for (unsigned k = 0; k < 3; k++) { + fracpos[k] = gridpos[k] - static_cast(ipos[k]); + } + + // Add the number of ghost cells to the integer part of the coordinates + ipos += ippl::Vector(nghost); + + // Set the integer part of the coordinates in the return pair + ret.first = ipos; + + // Set the fractional part of the coordinates in the return pair + ret.second = fracpos; + + // Return the pair containing both the integer and fractional parts of the grid coordinates + return ret; + } + + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, const scalar value) { + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + // std::cout << pos << " 's scatter args (will have 1, or nghost in general added): " << ipos << "\n"; + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for (unsigned d = 0; d < 3; d++) { + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(value); + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[0]), value * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); + } + template + KOKKOS_FUNCTION void scatterToGrid(const ippl::NDIndex<3>& ldom, view_type& view, + ippl::Vector hr, ippl::Vector orig, + const ippl::Vector& pos, + const ippl::Vector& value) { + auto [ipos, fracpos] = gridCoordinatesOf(hr, orig, pos); + ipos -= ldom.first(); + if (ipos[0] < 0 || ipos[1] < 0 || ipos[2] < 0 || size_t(ipos[0]) >= view.extent(0) - 1 + || size_t(ipos[1]) >= view.extent(1) - 1 || size_t(ipos[2]) >= view.extent(2) - 1 + || fracpos[0] < 0 || fracpos[1] < 0 || fracpos[2] < 0) { + // Out of bounds case (you'll do nothing) + return; + } + assert(fracpos[0] >= 0.0f); + assert(fracpos[0] <= 1.0f); + assert(fracpos[1] >= 0.0f); + assert(fracpos[1] <= 1.0f); + assert(fracpos[2] >= 0.0f); + assert(fracpos[2] <= 1.0f); + ippl::Vector one_minus_fracpos = ippl::Vector(1) - fracpos; + assert(one_minus_fracpos[0] >= 0.0f); + assert(one_minus_fracpos[0] <= 1.0f); + assert(one_minus_fracpos[1] >= 0.0f); + assert(one_minus_fracpos[1] <= 1.0f); + assert(one_minus_fracpos[2] >= 0.0f); + assert(one_minus_fracpos[2] <= 1.0f); + scalar accum = 0; + + for (unsigned i = 0; i < 8; i++) { + scalar weight = 1; + ippl::Vector ipos_l = ipos; + for (unsigned d = 0; d < 3; d++) { + weight *= ((i & (1 << d)) ? fracpos[d] : one_minus_fracpos[d]); + ipos_l[d] += !!(i & (1 << d)); + } + assert_isreal(weight); + accum += weight; + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[1]), value[0] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[2]), value[1] * weight); + Kokkos::atomic_add(&(view(ipos_l[0], ipos_l[1], ipos_l[2])[3]), value[2] * weight); + } + assert(abs(accum - 1.0f) < 1e-6f); + } + template + KOKKOS_INLINE_FUNCTION void scatterLineToGrid(const ippl::NDIndex<3>& ldom, view_type& Jview, + ippl::Vector hr, + ippl::Vector origin, + const ippl::Vector& from, + const ippl::Vector& to, + const scalar factor) { + Kokkos::pair, ippl::Vector> from_grid = + gridCoordinatesOf(hr, origin, from); + Kokkos::pair, ippl::Vector> to_grid = + gridCoordinatesOf(hr, origin, to); + + if (from_grid.first[0] == to_grid.first[0] && from_grid.first[1] == to_grid.first[1] + && from_grid.first[2] == to_grid.first[2]) { + scatterToGrid(ldom, Jview, hr, origin, + /*Scatter point, geometric average */ ippl::Vector((from + to) * scalar(0.5)), + /*Scatter value, factor=charge / timestep */ ippl::Vector((to - from) * factor)); + + return; + } + ippl::Vector relay; + const int nghost = 1; + const ippl::Vector orig = origin; + using Kokkos::max; + using Kokkos::min; + for (unsigned int i = 0; i < 3; i++) { + relay[i] = min(min(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(1.0) * hr[i] + orig[i], + max(max(from_grid.first[i] - nghost, to_grid.first[i] - nghost) * hr[i] + + scalar(0.0) * hr[i] + orig[i], + scalar(0.5) * (to[i] + from[i]))); + } + scatterToGrid(ldom, Jview, hr, origin, + ippl::Vector((from + relay) * scalar(0.5)), + ippl::Vector((relay - from) * factor)); + scatterToGrid(ldom, Jview, hr, origin, ippl::Vector((relay + to) * scalar(0.5)), + ippl::Vector((to - relay) * factor)); + } + template + class NSFDSolverWithParticles { + public: + constexpr static unsigned dim = 3; + using vector_type = ippl::Vector; + using vector4_type = ippl::Vector; + using FourField = + ippl::Field, + typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = + ippl::Field, + typename ippl::UniformCartesian::DefaultCentering>; + using playout_type = ParticleSpatialLayout; + using bunch_type = Bunch>; + using Mesh_t = ippl::UniformCartesian; + FieldLayout* layout_mp; + Mesh_t* mesh_mp; + playout_type playout; + Bunch> particles; + FourField J; + ThreeField E; + ThreeField B; + NonStandardFDTDSolver field_solver; + + ippl::Vector nr_global; + ippl::Vector hr_m; + size_t steps_taken; + NSFDSolverWithParticles(FieldLayout& layout, Mesh_t& mesch, size_t nparticles) + : layout_mp(&layout) + , mesh_mp(&mesch) + , playout(layout, mesch) + , particles(playout) + , J(mesch, layout) + , E(mesch, layout) + , B(mesch, layout) + , field_solver(J, E, B) { + particles.create(nparticles); + nr_global = ippl::Vector{ + uint32_t(layout.getDomain()[0].last() - layout.getDomain()[0].first() + 1), + uint32_t(layout.getDomain()[1].last() - layout.getDomain()[1].first() + 1), + uint32_t(layout.getDomain()[2].last() - layout.getDomain()[2].first() + 1)}; + hr_m = mesh_mp->getMeshSpacing(); + steps_taken = 0; + } + template + void scatterBunch() { + auto hr_m = mesh_mp->getMeshSpacing(); + const scalar volume = hr_m[0] * hr_m[1] * hr_m[2]; + assert_isreal(volume); + assert_isreal((scalar(1) / volume)); + J = typename decltype(J)::value_type(0); + auto Jview = J.getView(); + auto qview = particles.Q.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto orig = mesh_mp->getOrigin(); + auto hr = mesh_mp->getMeshSpacing(); + auto dt = field_solver.dt; + bool sc = space_charge; + ippl::NDIndex lDom = layout_mp->getLocalNDIndex(); + Kokkos::parallel_for( + particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + vector_type pos = rview(i); + vector_type to = rview(i); + vector_type from = rm1view(i); + if (sc) { + scatterToGrid(lDom, Jview, hr, orig, pos, qview(i) / volume); + } + scatterLineToGrid(lDom, Jview, hr, orig, from, to, + scalar(qview(i)) / (volume * dt)); + }); + Kokkos::fence(); + J.accumulateHalo(); + } + template + void updateBunch(scalar time, callable external_field) { + Kokkos::fence(); + auto gbview = particles.gamma_beta.getView(); + auto eview = particles.E_gather.getView(); + auto bview = particles.B_gather.getView(); + auto qview = particles.Q.getView(); + auto mview = particles.mass.getView(); + auto rview = particles.R.getView(); + auto rm1view = particles.R_nm1.getView(); + auto rp1view = particles.R_np1.getView(); + scalar bunch_dt = field_solver.dt / 3; + Kokkos::deep_copy(particles.R_nm1.getView(), particles.R.getView()); + E.fillHalo(); + B.fillHalo(); + Kokkos::fence(); + for (int bts = 0; bts < 3; bts++) { + particles.E_gather.gather(E, particles.R, /*offset = */ 0.0); + particles.B_gather.gather(B, particles.R, /*offset = */ 0.0); + Kokkos::fence(); + Kokkos::parallel_for( + particles.getLocalNum(), KOKKOS_LAMBDA(size_t i) { + const ippl::Vector pgammabeta = gbview(i); + ippl::Vector E_grid = eview(i); + ippl::Vector B_grid = bview(i); + ippl::Vector bunchpos = rview(i); + Kokkos::pair, ippl::Vector> external_eb = + external_field(bunchpos, time + bunch_dt * bts); + + ippl::Vector, 2> EB{ + ippl::Vector(E_grid + external_eb.first), + ippl::Vector(B_grid + external_eb.second)}; + + const scalar charge = qview(i); + const scalar mass = mview(i); + const ippl::Vector t1 = + pgammabeta + charge * bunch_dt * EB[0] / (scalar(2) * mass); + const scalar alpha = + charge * bunch_dt / (scalar(2) * mass * Kokkos::sqrt(1 + t1.dot(t1))); + const ippl::Vector t2 = t1 + alpha * t1.cross(EB[1]); + const ippl::Vector t3 = + t1 + + t2.cross(scalar(2) * alpha + * (EB[1] / (1.0 + alpha * alpha * (EB[1].dot(EB[1]))))); + const ippl::Vector ngammabeta = + t3 + charge * bunch_dt * EB[0] / (scalar(2) * mass); + + rview(i) = + rview(i) + + bunch_dt * ngammabeta + / (Kokkos::sqrt(scalar(1.0) + (ngammabeta.dot(ngammabeta)))); + gbview(i) = ngammabeta; + }); + Kokkos::fence(); + } + Kokkos::View invalid("OOB Particcel", particles.getLocalNum()); + size_t invalid_count = 0; + auto origo = mesh_mp->getOrigin(); + ippl::Vector extenz; // + extenz[0] = nr_global[0] * hr_m[0]; + extenz[1] = nr_global[1] * hr_m[1]; + extenz[2] = nr_global[2] * hr_m[2]; + Kokkos::parallel_reduce( + Kokkos::RangePolicy< + typename playout_type::RegionLayout_t::view_type::execution_space>( + 0, particles.getLocalNum()), + KOKKOS_LAMBDA(size_t i, size_t & ref) { + bool out_of_bounds = false; + ippl::Vector ppos = rview(i); + for (size_t d = 0; d < dim; d++) { + out_of_bounds |= (ppos[d] <= origo[d]); + out_of_bounds |= + (ppos[d] >= origo[d] + extenz[d]); // Check against simulation domain + } + invalid(i) = out_of_bounds; + ref += out_of_bounds; + }, + invalid_count); + particles.destroy(invalid, invalid_count); + Kokkos::fence(); + playout.update(particles); + } + void solve() { + scatterBunch(); + field_solver.solve(); + updateBunch( + field_solver.dt * steps_taken, + /*no external field*/ [] KOKKOS_FUNCTION(vector_type /*pos*/, scalar /*time*/) { + return Kokkos::pair{vector_type(0), vector_type(0)}; + }); + ++steps_taken; + } + template + void solve(callable external_field) { + scatterBunch(); + field_solver.solve(); + // std::cout << field_solver.dt * steps_taken << "\n"; + updateBunch(field_solver.dt * steps_taken, external_field); + ++steps_taken; + } + }; +} +#endif \ No newline at end of file diff --git a/fel/TestFDTDSolverWithParticles.cpp b/fel/TestFDTDSolverWithParticles.cpp new file mode 100644 index 000000000..33c516918 --- /dev/null +++ b/fel/TestFDTDSolverWithParticles.cpp @@ -0,0 +1,94 @@ +#include +using std::size_t; +#include +#include "Ippl.h" +#include "Types/Vector.h" +#include "Field/Field.h" +#include "NSFDSolverWithParticles.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include +#include "units.h" +#include +template + requires((std::is_floating_point_v)) +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ + uint32_t dim = sizeof...(scalar); + ippl::Vector vec{scalar1(x - mean)...}; + scalar1 vecsum(0); + for(unsigned d = 0;d < dim;d++){ + vecsum += vec[d] * vec[d]; + + } + #ifndef __CUDA_ARCH__ + using std::exp; + #endif + return exp(-(vecsum) / (stddev * stddev)); +} + +int main(int argc, char* argv[]){ + ippl::initialize(argc, argv); + { + using scalar = float; + constexpr size_t n = 100; + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); + ippl::Vector extents{meter_in_unit_lengths, + meter_in_unit_lengths, + meter_in_unit_lengths}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + ippl::Vector hx; + for(unsigned d = 0;d < 3;d++){ + hx[d] = extents[d] / (scalar)nr[d]; + } + ippl::Vector origin = {0,0,0}; + ippl::UniformCartesian mesh(owned, hx, origin); + + ippl::NSFDSolverWithParticles solver(layout, mesh, 1 << 15); + + auto pview = solver.particles.R.getView(); + auto p1view = solver.particles.R_nm1.getView(); + auto gbview = solver.particles.gamma_beta.getView(); + + Kokkos::Random_XorShift64_Pool<> random_pool(12345); + Kokkos::parallel_for(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i){ + auto state = random_pool.get_state(); + pview(i)[0] = state.normal(origin[0] + extents[0] * 0.5, 0.04 * extents[0]); + pview(i)[1] = state.normal(origin[1] + extents[1] * 0.5, 0.04 * extents[1]); + pview(i)[2] = state.normal(origin[2] + extents[2] * 0.5, 0.04 * extents[2]); + p1view(i) = pview(i); + gbview(i) = 0; + random_pool.free_state(state); + }); + { + double var = 0; + Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){ + ippl::Vector pd(pview(i) - (origin + extents * 0.5)); + ref += pd.dot(pd); + }, var); + std::cout << ippl::Vector(var * (1.0 / solver.particles.getLocalNum())) << "\n"; + } + solver.playout.update(solver.particles); + solver.particles.Q = electron_charge_in_unit_charges; + solver.particles.mass = electron_mass_in_unit_masses; + for(int i = 0;i < 200;i++){ + solver.solve(); + } + { + double var = 0; + Kokkos::parallel_reduce(solver.particles.getLocalNum(), KOKKOS_LAMBDA(size_t i, double& ref){ + ippl::Vector pd(pview(i) - (origin + extents * 0.5)); + ref += pd.dot(pd); + }, var); + std::cout << ippl::Vector(var * (1.0 / solver.particles.getLocalNum())) << "\n"; + } + + } + //exit: + ippl::finalize(); +} \ No newline at end of file diff --git a/fel/Undulator.h b/fel/Undulator.h new file mode 100644 index 000000000..57cad337c --- /dev/null +++ b/fel/Undulator.h @@ -0,0 +1,97 @@ +#ifndef UNDULATOR_H +#define UNDULATOR_H +#include + +#include + +#include "Types/Vector.h" + +#include "units.h" +#include "LorentzTransform.h" +namespace ippl { + template + struct undulator_parameters { + scalar lambda_u; // MITHRA: lambda_u + scalar K; // Undulator parameter + scalar length; + scalar B_magnitude; + undulator_parameters(scalar K_undulator_parameter, scalar lambda_u, scalar _length) + : lambda_u(lambda_u) + , K(K_undulator_parameter) + , length(_length) { + B_magnitude = (2 * M_PI * electron_mass_in_unit_masses * K) + / (electron_charge_in_unit_charges * lambda_u); + } + }; + /** + * @brief Struct representing an undulator. + * + * @tparam scalar Type of the scalar values (e.g., float, double). + */ + template + struct Undulator { + undulator_parameters uparams; ///< Parameters of the undulator. + scalar distance_to_entry; ///< Distance to the entry of the undulator. + scalar k_u; ///< Wavenumber of the undulator. + + /** + * @brief Constructor to initialize undulator parameters and calculate k_u. + * + * @param p Parameters of the undulator. + * @param dte Distance to the entry of the undulator. + */ + KOKKOS_FUNCTION Undulator(const undulator_parameters& p, scalar dte) + : uparams(p) + , distance_to_entry(dte) + , k_u(2 * M_PI / p.lambda_u) {} + + /** + * @brief Overloaded operator() to compute magnetic field components. + * + * @param position_in_lab_frame Position vector in the lab frame. + * @return Kokkos::pair, ippl::Vector> + * Pair containing magnetic field and its derivative. + */ + KOKKOS_INLINE_FUNCTION Kokkos::pair, ippl::Vector> + operator()(const ippl::Vector& position_in_lab_frame) const noexcept { + using Kokkos::cos; + using Kokkos::cosh; + using Kokkos::exp; + using Kokkos::sin; + using Kokkos::sinh; + + Kokkos::pair, ippl::Vector> + ret; // Return pair containing magnetic field and its derivative. + ret.first.fill(0); // Initialize magnetic field vector. + ret.second.fill(0); // Initialize derivative vector. + + // If the position is before the undulator entry. + if (position_in_lab_frame[2] < distance_to_entry) { + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator < 0); // Ensure we are in the correct region. + scalar scal = exp(-((k_u * z_in_undulator) * (k_u * z_in_undulator) + * 0.5)); // Gaussian decay factor. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) + * z_in_undulator * k_u * scal; // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) + * scal; // z-component. + } + // If the position is within the undulator. + else if (position_in_lab_frame[2] > distance_to_entry + && position_in_lab_frame[2] < distance_to_entry + uparams.length) { + scalar z_in_undulator = position_in_lab_frame[2] - distance_to_entry; + assert(z_in_undulator >= 0); // Ensure we are in the correct region. + + ret.second[0] = 0; // No x-component. + ret.second[1] = uparams.B_magnitude * cosh(k_u * position_in_lab_frame[1]) + * sin(k_u * z_in_undulator); // y-component. + ret.second[2] = uparams.B_magnitude * sinh(k_u * position_in_lab_frame[1]) + * cos(k_u * z_in_undulator); // z-component. + } + return ret; + } + }; +} // namespace ippl +#endif // UNDULATOR_H \ No newline at end of file diff --git a/fel/units.h b/fel/units.h new file mode 100644 index 000000000..318892db8 --- /dev/null +++ b/fel/units.h @@ -0,0 +1,37 @@ +#ifndef UNITS_HPP +#define UNITS_HPP +constexpr double sqrt_4pi = 3.54490770181103205459; +constexpr double alpha_scaling_factor = 1e30; +constexpr double unit_length_in_meters = 1.616255e-35 * alpha_scaling_factor; +constexpr double unit_charge_in_electron_charges = 11.70623710394218618969 / sqrt_4pi; +constexpr double unit_time_in_seconds = 5.391247e-44 * alpha_scaling_factor; +constexpr double electron_mass_in_kg = 9.1093837015e-31; +constexpr double unit_mass_in_kg = 2.176434e-8 / alpha_scaling_factor; +constexpr double unit_energy_in_joule = unit_mass_in_kg * unit_length_in_meters * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); +constexpr double kg_in_unit_masses = 1.0 / unit_mass_in_kg; +constexpr double meter_in_unit_lengths = 1.0 / unit_length_in_meters; +constexpr double electron_charge_in_coulombs = 1.602176634e-19; +constexpr double coulomb_in_electron_charges = 1.0 / electron_charge_in_coulombs; + +constexpr double electron_charge_in_unit_charges = 1.0 / unit_charge_in_electron_charges; +constexpr double second_in_unit_times = 1.0 / unit_time_in_seconds; +constexpr double electron_mass_in_unit_masses = electron_mass_in_kg * kg_in_unit_masses; +constexpr double unit_force_in_newtons = unit_mass_in_kg * unit_length_in_meters / (unit_time_in_seconds * unit_time_in_seconds); + +constexpr double coulomb_in_unit_charges = coulomb_in_electron_charges * electron_charge_in_unit_charges; +constexpr double unit_voltage_in_volts = unit_energy_in_joule * coulomb_in_unit_charges; +constexpr double unit_charges_in_coulomb = 1.0 / coulomb_in_unit_charges; +constexpr double unit_current_in_amperes = unit_charges_in_coulomb / unit_time_in_seconds; +constexpr double ampere_in_unit_currents = 1.0 / unit_current_in_amperes; +constexpr double unit_current_length_in_ampere_meters = unit_current_in_amperes * unit_length_in_meters; +constexpr double unit_magnetic_fluxdensity_in_tesla = unit_voltage_in_volts * unit_time_in_seconds / (unit_length_in_meters * unit_length_in_meters); +constexpr double unit_electric_fieldstrength_in_voltpermeters = (unit_voltage_in_volts / unit_length_in_meters); +constexpr double voltpermeter_in_unit_fieldstrengths = 1.0 / unit_electric_fieldstrength_in_voltpermeters; +constexpr double unit_powerdensity_in_watt_per_square_meter = 1.389e122 / (alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor * alpha_scaling_factor); +constexpr double volts_in_unit_voltages = 1.0 / unit_voltage_in_volts; +constexpr double epsilon0_in_si = unit_current_in_amperes * unit_time_in_seconds / (unit_voltage_in_volts * unit_length_in_meters); +constexpr double mu0_in_si = unit_force_in_newtons / (unit_current_in_amperes * unit_current_in_amperes); +constexpr double G = unit_length_in_meters * unit_length_in_meters * unit_length_in_meters / (unit_mass_in_kg * unit_time_in_seconds * unit_time_in_seconds); +constexpr double verification_gravity = unit_mass_in_kg * unit_mass_in_kg / (unit_length_in_meters * unit_length_in_meters) * G; +constexpr double verification_coulomb = (unit_charges_in_coulomb * unit_charges_in_coulomb / (unit_length_in_meters * unit_length_in_meters) * (1.0 / (epsilon0_in_si))) / unit_force_in_newtons; +#endif \ No newline at end of file diff --git a/plotscripts/ampereplot.gp b/plotscripts/ampereplot.gp new file mode 100755 index 000000000..495a05cbd --- /dev/null +++ b/plotscripts/ampereplot.gp @@ -0,0 +1,13 @@ +#! /usr/bin/gnuplot +set terminal svg size 800,600 font "CMU Serif,22" +set logscale x 2 +set logscale y 2 +set key box +set output "ampere.svg" +set ylabel "Magnetic Flux density[T]" +set xlabel "Distance to wire [m]" +#set title "Ampere's Law" +set grid lw 2 +set ytics format "2^{%L}" +set xtics format "2^{%L}" +plot 'ampere_line.txt' u (abs($1)):(abs($2)) w points title "Measurements", 1.256e-6/(2*3.14159)*(1/x) w lines lw 2 title "Radial falloff" diff --git a/plotscripts/gaussplot.gp b/plotscripts/gaussplot.gp new file mode 100755 index 000000000..879584114 --- /dev/null +++ b/plotscripts/gaussplot.gp @@ -0,0 +1,13 @@ +#! /usr/bin/gnuplot +set terminal svg size 900,700 font "CMU Serif, 25" +set logscale x 2 +set logscale y 2 +set key box +set output "gauss.svg" +set ylabel "Electric Field [V/m]" +set xlabel "Distance to charge [m]" +#set title "Gauss's Law" +set grid lw 2 +set ytics format "2^{%L}" +set xtics format "2^{%L}" +plot 'gauss_line.txt' u (abs($1)):(abs($2)) w points title "Measurements" pt 2, (1 / (4 * 3.1416 * 8.854e-12 * x * x)) w lines lw 3 title "Quadratic falloff" diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h index dad442901..3aa3177d2 100644 --- a/src/Expression/IpplOperations.h +++ b/src/Expression/IpplOperations.h @@ -276,13 +276,16 @@ namespace ippl { } return res; } + KOKKOS_INLINE_FUNCTION operator typename E1::value_type() const{ + return apply(); + } /* * This is required for BareField::dot */ template KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { - return dot(u_m(args...), v_m(args...)).apply(); + return dot(u_m(args...), v_m(args...)); } private: diff --git a/src/MaxwellSolvers/AbsorbingBC.h b/src/MaxwellSolvers/AbsorbingBC.h new file mode 100644 index 000000000..4a87df71c --- /dev/null +++ b/src/MaxwellSolvers/AbsorbingBC.h @@ -0,0 +1,427 @@ +#ifndef ABC_H +#define ABC_H +#include +#include "Types/Vector.h" +#include "Field/Field.h" +template +constexpr KOKKOS_INLINE_FUNCTION auto first(){ + return a; +} +template +constexpr KOKKOS_INLINE_FUNCTION auto second(){ + return b; +} +template +struct second_order_abc_face{ + using scalar = _scalar; + scalar Cweights[5]; + int sign; + constexpr static unsigned main_axis = _main_axis; + KOKKOS_FUNCTION second_order_abc_face(ippl::Vector hr, scalar dt, int _sign) : sign(_sign){ + constexpr scalar c = 1; + constexpr unsigned side_axes[2] = {_side_axes...}; + static_assert( + (main_axis == 0 && first<_side_axes...>() == 1 && second<_side_axes...>() == 2) || + (main_axis == 1 && first<_side_axes...>() == 0 && second<_side_axes...>() == 2) || + (main_axis == 2 && first<_side_axes...>() == 0 && second<_side_axes...>() == 1) + ); + assert(_main_axis != side_axes[0]); + assert(_main_axis != side_axes[1]); + assert(side_axes[0] != side_axes[1]); + constexpr scalar truncation_order = 2.0; + scalar p = ( 1.0 + 1 * 1 ) / ( 1 + 1 ); + scalar q = - 1.0 / ( 1 + 1 ); + + scalar d = 1.0 / ( 2.0 * dt * hr[main_axis]) + p / ( 2.0 * c * dt * dt); + + Cweights[0] = ( 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + Cweights[1] = ( - 1.0 / ( 2.0 * dt * hr[main_axis] ) - p / (2.0 * c * dt * dt)) / d; + assert(abs(Cweights[1] + 1) < 1e-6); //Like literally + Cweights[2] = ( p / ( c * dt * dt ) + q * (truncation_order - 1.0) * (c / (hr[side_axes[0]] * hr[side_axes[0]]) + c / (hr[side_axes[1]] * hr[side_axes[1]]))) / d; + Cweights[3] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[0]] * hr[side_axes[0]] ) ) / d; + Cweights[4] = -q * (truncation_order - 1.0) * ( c / ( 2.0 * hr[side_axes[1]] * hr[side_axes[1]] ) ) / d; + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + constexpr unsigned side_axes[2] = {_side_axes...}; + ippl::Vector side_axis1_onehot = ippl::Vector{side_axes[0] == 0, side_axes[0] == 1, side_axes[0] == 2}; + ippl::Vector side_axis2_onehot = ippl::Vector{side_axes[1] == 0, side_axes[1] == 1, side_axes[1] == 2}; + ippl::Vector mainaxis_off = ippl::Vector{(main_axis == 0) * sign, (main_axis == 1) * sign, (main_axis == 2) * sign}.cast(); + return advanceBoundaryS( + A_nm1(i,j,k), A_n(i,j,k), + apply(A_nm1, c + mainaxis_off), apply(A_n, c + mainaxis_off), apply(A_np1, c + mainaxis_off), + apply(A_n, c + side_axis1_onehot + mainaxis_off), apply(A_n, c - side_axis1_onehot + mainaxis_off), apply(A_n, c + side_axis2_onehot + mainaxis_off), + apply(A_n, c - side_axis2_onehot + mainaxis_off), apply(A_n, c + side_axis1_onehot), apply(A_n, c - side_axis1_onehot), + apply(A_n, c + side_axis2_onehot), apply(A_n, c - side_axis2_onehot) + ); + } + template + KOKKOS_FUNCTION value_type advanceBoundaryS (const value_type& v1 , const value_type& v2 , + const value_type& v3 , const value_type& v4 , const value_type& v5 , + const value_type& v6 , const value_type& v7 , const value_type& v8 , + const value_type& v9 , const value_type& v10, const value_type& v11, + const value_type& v12, const value_type& v13)const noexcept + { + + value_type v0 = + Cweights[0] * (v1 + v5) + + (Cweights[1]) * v3 + + (Cweights[2]) * ( v2 + v4 ) + + (Cweights[3]) * ( v6 + v7 + v10 + v11 ) + + (Cweights[4]) * ( v8 + v9 + v12 + v13 ); + return v0; + } +}; +template +struct second_order_abc_edge{ + using scalar = _scalar; + // + scalar Eweights[5]; + + KOKKOS_FUNCTION second_order_abc_edge(ippl::Vector hr, scalar dt){ + static_assert(normal_axis1 != normal_axis2); + static_assert(edge_axis != normal_axis2); + static_assert(edge_axis != normal_axis1); + static_assert((edge_axis == 2 && normal_axis1 == 0 && normal_axis2 == 1) || (edge_axis == 0 && normal_axis1 == 1 && normal_axis2 == 2) || (edge_axis == 1 && normal_axis1 == 2 && normal_axis2 == 0)); + constexpr scalar c0_ = scalar(1); + scalar d = ( 1.0 / hr[normal_axis1] + 1.0 / hr[normal_axis2] ) / ( 4.0 * dt ) + 3.0 / ( 8.0 * c0_ * dt * dt ); + if constexpr(normal_axis1 == 0 && normal_axis2 == 1){ // xy edge (along z) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 2 && normal_axis2 == 0){ // zx edge (along y) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else if constexpr(normal_axis1 == 1 && normal_axis2 == 2){ // yz edge (along x) + Eweights[0] = ( - ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[1] = ( ( 1.0 / hr[normal_axis2] - 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[2] = ( ( 1.0 / hr[normal_axis2] + 1.0 / hr[normal_axis1] ) / ( 4.0 * dt ) - 3.0 / ( 8.0 * c0_ * dt * dt )) / d; + Eweights[3] = ( 3.0 / ( 4.0 * c0_ * dt * dt ) - c0_ / (4.0 * hr[edge_axis] * hr[edge_axis])) / d; + Eweights[4] = c0_ / ( 8.0 * hr[edge_axis] * hr[edge_axis] ) / d; + } + else{ + assert(false); + } + + + + + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + uint32_t i = c[0]; + uint32_t j = c[1]; + uint32_t k = c[2]; + using ippl::apply; + //constexpr unsigned nax[2] = {normal_axis1, normal_axis2}; + ippl::Vector normal_axis1_onehot = ippl::Vector{normal_axis1 == 0, normal_axis1 == 1, normal_axis1 == 2} * int32_t(na1_zero ? 1 : -1); + ippl::Vector normal_axis2_onehot = ippl::Vector{normal_axis2 == 0, normal_axis2 == 1, normal_axis2 == 2} * int32_t(na2_zero ? 1 : -1); + ippl::Vector acc0 = {i, j, k}; + ippl::Vector acc1 = acc0 + normal_axis1_onehot.cast(); + ippl::Vector acc2 = acc0 + normal_axis2_onehot.cast(); + ippl::Vector acc3 = acc0 + normal_axis1_onehot.cast() + normal_axis2_onehot.cast(); + //ippl::Vector axism = (-ippl::Vector{edge_axis == 0, edge_axis == 1, edge_axis == 2}).cast(); + ippl::Vector axisp{edge_axis == 0, edge_axis == 1, edge_axis == 2}; + //return A_n(i, j, k); + return advanceEdgeS( + A_n(i, j, k), A_nm1(i, j, k), + apply(A_np1, acc1), apply(A_n, acc1 ), apply(A_nm1, acc1), + apply(A_np1, acc2), apply(A_n, acc2 ), apply(A_nm1, acc2), + apply(A_np1, acc3), apply(A_n, acc3 ), apply(A_nm1, acc3), + apply(A_n, acc0 - axisp), apply(A_n, acc1 - axisp), apply(A_n, acc2 - axisp), apply(A_n, acc3 - axisp), + apply(A_n, acc0 + axisp), apply(A_n, acc1 + axisp), apply(A_n, acc2 + axisp), apply(A_n, acc3 + axisp) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceEdgeS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19)const noexcept{ + value_type v0 = + Eweights[0] * (v3 + v8) + + Eweights[1] * (v5 + v6) + + Eweights[2] * (v2 + v9) + + Eweights[3] * (v1 + v4 + v7 + v10) + + Eweights[4] * (v12 + v13 + v14 + v15 + v16 + v17 + v18 + v19) - v11; + return v0; + } +}; +template +struct second_order_abc_corner{ + using scalar = _scalar; + scalar Cweights[17]; + KOKKOS_FUNCTION second_order_abc_corner(ippl::Vector hr, scalar dt){ + constexpr scalar c0_ = scalar(1); + Cweights[0] = ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[1] = ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[2] = ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[3] = ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[4] = ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[5] = ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[6] = ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[7] = ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[8] = - ( - 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[9] = - ( 1.0 / hr[0] - 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[10] = - ( - 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[11] = - ( - 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[12] = - ( 1.0 / hr[0] + 1.0 / hr[1] - 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[13] = - ( 1.0 / hr[0] - 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[14] = - ( - 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[15] = - ( 1.0 / hr[0] + 1.0 / hr[1] + 1.0 / hr[2] ) / (8.0 * dt) - 1.0 / ( 4.0 * c0_ * dt * dt); + Cweights[16] = 1.0 / (2.0 * c0_ * dt * dt); + } + template + KOKKOS_INLINE_FUNCTION auto operator()(const view_type& A_n, const view_type& A_nm1,const view_type& A_np1, const Coords& c)const -> typename view_type::value_type{ + //First implementation: 0,0,0 corner + constexpr uint32_t xoff = (x0) ? 1 : uint32_t(-1); + constexpr uint32_t yoff = (y0) ? 1 : uint32_t(-1); + constexpr uint32_t zoff = (z0) ? 1 : uint32_t(-1); + using ippl::apply; + const ippl::Vector offsets[8] = { + ippl::Vector{0,0,0}, + ippl::Vector{xoff,0,0}, + ippl::Vector{0,yoff,0}, + ippl::Vector{0,0,zoff}, + ippl::Vector{xoff,yoff,0}, + ippl::Vector{xoff,0,zoff}, + ippl::Vector{0,yoff,zoff}, + ippl::Vector{xoff,yoff,zoff}, + }; + return advanceCornerS( + apply(A_n, c), apply(A_nm1, c), + apply(A_np1, c + offsets[1]), apply(A_n, c + offsets[1]), apply(A_nm1, c + offsets[1]), + apply(A_np1, c + offsets[2]), apply(A_n, c + offsets[2]), apply(A_nm1, c + offsets[2]), + apply(A_np1, c + offsets[3]), apply(A_n, c + offsets[3]), apply(A_nm1, c + offsets[3]), + apply(A_np1, c + offsets[4]), apply(A_n, c + offsets[4]), apply(A_nm1, c + offsets[4]), + apply(A_np1, c + offsets[5]), apply(A_n, c + offsets[5]), apply(A_nm1, c + offsets[5]), + apply(A_np1, c + offsets[6]), apply(A_n, c + offsets[6]), apply(A_nm1, c + offsets[6]), + apply(A_np1, c + offsets[7]), apply(A_n, c + offsets[7]), apply(A_nm1, c + offsets[7]) + ); + } + template + KOKKOS_INLINE_FUNCTION value_type advanceCornerS + ( value_type v1 , value_type v2 , + value_type v3 , value_type v4 , value_type v5 , + value_type v6 , value_type v7 , value_type v8 , + value_type v9 , value_type v10, value_type v11, + value_type v12, value_type v13, value_type v14, + value_type v15, value_type v16, value_type v17, + value_type v18, value_type v19, value_type v20, + value_type v21, value_type v22, value_type v23)const noexcept{ + return - ( v1 * (Cweights[16]) + v2 * (Cweights[8]) + + v3 * Cweights[1] + v4 * Cweights[16] + v5 * Cweights[9] + + v6 * Cweights[2] + v7 * Cweights[16] + v8 * Cweights[10] + + v9 * Cweights[3] + v10 * Cweights[16] + v11 * Cweights[11] + + v12 * Cweights[4] + v13 * Cweights[16] + v14 * Cweights[12] + + v15 * Cweights[5] + v16 * Cweights[16] + v17 * Cweights[13] + + v18 * Cweights[6] + v19 * Cweights[16] + v20 * Cweights[14] + + v21 * Cweights[7] + v22 * Cweights[16] + v23 * Cweights[15]) / Cweights[0]; + } +}; + + + + + + +struct second_order_mur_boundary_conditions{ + template + void apply(field_type& FA_n, field_type& FA_nm1, field_type& FA_np1, dt_type dt, ippl::Vector true_nr, ippl::NDIndex<3> lDom){ + using scalar = decltype(dt); + //TODO: tbh don't know + //const unsigned nghost = 1; + const ippl::Vector hr = FA_n.get_mesh().getMeshSpacing(); + //assert_isreal((betaMur[0])); + //assert_isreal((betaMur[1])); + //assert_isreal((betaMur[2])); + auto A_n = FA_n.getView(); + auto A_np1 = FA_np1.getView(); + auto A_nm1 = FA_nm1.getView(); + ippl::Vector local_nr{ + uint32_t(A_n.extent(0)), + uint32_t(A_n.extent(1)), + uint32_t(A_n.extent(2)) + }; + constexpr uint32_t min_abc_boundary = 1; + constexpr uint32_t max_abc_boundary_sub = min_abc_boundary + 1; + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + + if(Kokkos::popcount(val) == 1){ + if(ig == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(jg == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(kg == min_abc_boundary){ + second_order_abc_face soa(hr, dt, 1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(ig == true_nr[0] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + if(kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_face soa(hr, dt, -1); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + } + }); + Kokkos::fence(); + //FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + if(Kokkos::popcount(lval) > 2)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + if(Kokkos::popcount(val) == 2){ //Edge + if(ig == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + + else if(ig == true_nr[0] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_edge soa(hr, dt); + A_np1(i, j, k) = soa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + //FA_np1.fillHalo(); + Kokkos::parallel_for(ippl::getRangePolicy(A_n, 1), KOKKOS_LAMBDA(uint32_t i, uint32_t j, uint32_t k){ + uint32_t ig = i + lDom.first()[0]; + uint32_t jg = j + lDom.first()[1]; + uint32_t kg = k + lDom.first()[2]; + + //uint32_t lval = uint32_t(i == 0) + (uint32_t(j == 0) << 1) + (uint32_t(k == 0) << 2) + // + (uint32_t(i == local_nr[0] - 1) << 3) + (uint32_t(j == local_nr[1] - 1) << 4) + (uint32_t(k == local_nr[2] - 1) << 5); + + //if(Kokkos::popcount(lval) > 1)return; + uint32_t val = uint32_t(ig == min_abc_boundary) + (uint32_t(jg == min_abc_boundary) << 1) + (uint32_t(kg == min_abc_boundary) << 2) + + (uint32_t(ig == true_nr[0] - max_abc_boundary_sub) << 3) + (uint32_t(jg == true_nr[1] - max_abc_boundary_sub) << 4) + (uint32_t(kg == true_nr[2] - max_abc_boundary_sub) << 5); + + if(Kokkos::popcount(val) == 3){ + //printf("Corner: %d, %d, %d\n", i, j, k); + if(ig == min_abc_boundary && jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == min_abc_boundary){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == min_abc_boundary && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == min_abc_boundary && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else if(ig == true_nr[0] - max_abc_boundary_sub && jg == true_nr[1] - max_abc_boundary_sub && kg == true_nr[2] - max_abc_boundary_sub){ + second_order_abc_corner coa(hr, dt); + A_np1(i, j, k) = coa(A_n, A_nm1, A_np1, ippl::Vector{i, j, k}); + } + else{ + assert(false); + } + } + }); + Kokkos::fence(); + } +}; +#endif \ No newline at end of file diff --git a/src/MaxwellSolvers/CMakeLists.txt b/src/MaxwellSolvers/CMakeLists.txt index 62d93dd36..d961415ea 100644 --- a/src/MaxwellSolvers/CMakeLists.txt +++ b/src/MaxwellSolvers/CMakeLists.txt @@ -3,6 +3,7 @@ set (_SRCS set (_HDRS Maxwell.h + FDTD.h ) include_DIRECTORIES ( diff --git a/src/MaxwellSolvers/FDTD.h b/src/MaxwellSolvers/FDTD.h new file mode 100644 index 000000000..018758db4 --- /dev/null +++ b/src/MaxwellSolvers/FDTD.h @@ -0,0 +1,403 @@ +#ifndef IPPL_FDTD_H +#define IPPL_FDTD_H +#include +using std::size_t; +#include "Types/Vector.h" + +#include "FieldLayout/FieldLayout.h" +#include "MaxwellSolvers/AbsorbingBC.h" +#include "MaxwellSolvers/Maxwell.h" +#include "Meshes/UniformCartesian.h" +#include "Particle/ParticleBase.h" + +constexpr double sq(double x) { + return x * x; +} +constexpr float sq(float x) { + return x * x; +} +#define assert_isreal(X) assert(!Kokkos::isnan(X) && !Kokkos::isinf(X)) +namespace ippl { + enum fdtd_bc { + periodic, + absorbing + }; + + template + class StandardFDTDSolver : Maxwell { + public: + constexpr static unsigned Dim = EMField::dim; + using scalar = typename EMField::value_type::value_type; + using Vector_t = Vector; + using FourVector_t = typename FourField::value_type; + StandardFDTDSolver(FourField& source, EMField& E, EMField& B) { + Maxwell::setSource(source); + Maxwell::setEMFields(E, B); + initialize(); + } + virtual void solve() override { + step(); + timeShift(); + evaluate_EB(); + } + void setPeriodicBoundaryConditions() { + periodic_bc = true; + typename FourField::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + A_n.setFieldBC(vector_bcs); + A_np1.setFieldBC(vector_bcs); + A_nm1.setFieldBC(vector_bcs); + } + + FourField A_n; + FourField A_np1; + FourField A_nm1; + scalar dt; + + private: + void timeShift() { + // Look into this, maybe cyclic swap is better + Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); + Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); + } + void applyBCs() { + if constexpr (boundary_conditions == periodic) { + A_n.getFieldBC().apply(A_n); + A_nm1.getFieldBC().apply(A_nm1); + A_np1.getFieldBC().apply(A_np1); + } else { + Vector true_nr = nr_m; + true_nr += (A_n.getNghost() * 2); + second_order_mur_boundary_conditions bcs{}; + bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); + } + } + + public: + void step() { + const auto& ldom = layout_mp->getLocalNDIndex(); + const int nghost = A_n.getNghost(); + const auto aview = A_n.getView(); + const auto anp1view = A_np1.getView(); + const auto anm1view = A_nm1.getView(); + const auto source_view = Maxwell::source_mp->getView(); + + const scalar a1 = + scalar(2) * (scalar(1) - sq(dt / hr_m[0]) - sq(dt / hr_m[1]) - sq(dt / hr_m[2])); + const scalar a2 = sq(dt / hr_m[0]); + const scalar a4 = sq(dt / hr_m[1]); + const scalar a6 = sq(dt / hr_m[2]); + const scalar a8 = sq(dt); + Vector true_nr = nr_m; + true_nr += (nghost * 2); + constexpr uint32_t one_if_absorbing_otherwise_0 = + boundary_conditions == absorbing ? 1 : 0; + Kokkos::parallel_for( + "Four potential update", ippl::getRangePolicy(aview, nghost), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + // global indices + const uint32_t ig = i + ldom.first()[0]; + const uint32_t jg = j + ldom.first()[1]; + const uint32_t kg = k + ldom.first()[2]; + uint32_t val = + uint32_t(ig == one_if_absorbing_otherwise_0) + + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) + + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); + + if (val == 0) { + FourVector_t interior = -anm1view(i, j, k) + a1 * aview(i, j, k) + + a2 * (aview(i + 1, j, k) + aview(i - 1, j, k)) + + a4 * (aview(i, j + 1, k) + aview(i, j - 1, k)) + + a6 * (aview(i, j, k + 1) + aview(i, j, k - 1)) + + a8 * (-source_view(i, j, k)); + anp1view(i, j, k) = interior; + } else { + // std::cout << i << ", " << j << ", " << k << "\n"; + } + }); + Kokkos::fence(); + applyBCs(); + A_np1.fillHalo(); + } + void evaluate_EB() { + ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), + A_nm1 = this->A_nm1.getView(); + auto source = Maxwell::source_mp->getView(); + auto Eview = Maxwell::En_mp->getView(); + auto Bview = Maxwell::Bn_mp->getView(); + + Kokkos::parallel_for( + this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) { + ippl::Vector dAdt = + (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = + (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = + (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = + (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{dAdx[0], dAdy[0], dAdz[0]}; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + Eview(i, j, k) = -dAdt - grad_phi; + Bview(i, j, k) = curlA; + }); + Kokkos::fence(); + } + + bool periodic_bc; + + typename FourField::Mesh_t* mesh_mp; + FieldLayout* layout_mp; + NDIndex domain_m; + Vector_t hr_m; + + Vector nr_m; + + void initialize() { + // get layout and mesh + layout_mp = &(this->source_mp->getLayout()); + mesh_mp = &(this->source_mp->get_mesh()); + + // get mesh spacing, domain, and mesh size + hr_m = mesh_mp->getMeshSpacing(); + dt = hr_m[0] / 2; + for (unsigned int i = 0; i < Dim; ++i) { + dt = std::min(dt, hr_m[i] / 2); + } + domain_m = layout_mp->getDomain(); + for (unsigned int i = 0; i < Dim; ++i) + nr_m[i] = domain_m[i].length(); + + // initialize fields + A_nm1.initialize(*mesh_mp, *layout_mp); + A_n.initialize(*mesh_mp, *layout_mp); + A_np1.initialize(*mesh_mp, *layout_mp); + + A_nm1 = 0.0; + A_n = 0.0; + A_np1 = 0.0; + }; + }; + /** + * @brief Nonstandard Finite-Difference Time-Domain + * + * @tparam EMField + * @tparam FourField + */ + template + class NonStandardFDTDSolver : Maxwell { + public: + constexpr static unsigned Dim = EMField::dim; + using scalar = typename EMField::value_type::value_type; + using Vector_t = Vector; + using FourVector_t = typename FourField::value_type; + NonStandardFDTDSolver(FourField& source, EMField& E, EMField& B) { + auto hx = source.get_mesh().getMeshSpacing(); + if ((hx[2] / hx[0]) * (hx[2] / hx[0]) + (hx[2] / hx[1]) * (hx[2] / hx[1]) >= 1) { + std::cerr << "Dispersion-free CFL condition not satisfiable\n"; + std::abort(); + } + Maxwell::setSource(source); + Maxwell::setEMFields(E, B); + initialize(); + } + virtual void solve() override { + step(); + timeShift(); + evaluate_EB(); + } + FourField A_n; + FourField A_np1; + FourField A_nm1; + void setPeriodicBoundaryConditions() { + periodic_bc = true; + typename FourField::BConds_t vector_bcs; + auto bcsetter_single = [&vector_bcs](const std::index_sequence&) { + vector_bcs[Idx] = std::make_shared>(Idx); + return 0; + }; + auto bcsetter = [bcsetter_single](const std::index_sequence&) { + int x = (bcsetter_single(std::index_sequence{}) ^ ...); + (void)x; + }; + bcsetter(std::make_index_sequence{}); + A_n.setFieldBC(vector_bcs); + A_np1.setFieldBC(vector_bcs); + A_nm1.setFieldBC(vector_bcs); + } + + private: + void timeShift() { + // Look into this, maybe cyclic swap is better + Kokkos::deep_copy(this->A_nm1.getView(), this->A_n.getView()); + Kokkos::deep_copy(this->A_n.getView(), this->A_np1.getView()); + } + + void applyBCs() { + if constexpr (boundary_conditions == periodic) { + A_n.getFieldBC().apply(A_n); + A_nm1.getFieldBC().apply(A_nm1); + A_np1.getFieldBC().apply(A_np1); + } else { + Vector true_nr = nr_m; + true_nr += (A_n.getNghost() * 2); + second_order_mur_boundary_conditions bcs{}; + bcs.apply(A_n, A_nm1, A_np1, this->dt, true_nr, layout_mp->getLocalNDIndex()); + } + } + + public: + template + struct nondispersive { + scalar a1; + scalar a2; + scalar a4; + scalar a6; + scalar a8; + }; + void step() { + const auto& ldom = layout_mp->getLocalNDIndex(); + const int nghost = A_n.getNghost(); + const auto aview = A_n.getView(); + const auto anp1view = A_np1.getView(); + const auto anm1view = A_nm1.getView(); + const auto source_view = Maxwell::source_mp->getView(); + + const scalar calA = 0.25 * (1 + 0.02 / (sq(hr_m[2] / hr_m[0]) + sq(hr_m[2] / hr_m[1]))); + nondispersive ndisp{ + .a1 = 2 + * (1 - (1 - 2 * calA) * sq(dt / hr_m[0]) - (1 - 2 * calA) * sq(dt / hr_m[1]) + - sq(dt / hr_m[2])), + .a2 = sq(dt / hr_m[0]), + .a4 = sq(dt / hr_m[1]), + .a6 = sq(dt / hr_m[2]) - 2 * calA * sq(dt / hr_m[0]) - 2 * calA * sq(dt / hr_m[1]), + .a8 = sq(dt)}; + Vector true_nr = nr_m; + true_nr += (nghost * 2); + constexpr uint32_t one_if_absorbing_otherwise_0 = + boundary_conditions == absorbing ? 1 : 0; + Kokkos::parallel_for( + ippl::getRangePolicy(aview, nghost), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) { + uint32_t ig = i + ldom.first()[0]; + uint32_t jg = j + ldom.first()[1]; + uint32_t kg = k + ldom.first()[2]; + uint32_t val = + uint32_t(ig == one_if_absorbing_otherwise_0) + + (uint32_t(jg == one_if_absorbing_otherwise_0) << 1) + + (uint32_t(kg == one_if_absorbing_otherwise_0) << 2) + + (uint32_t(ig == true_nr[0] - one_if_absorbing_otherwise_0 - 1) << 3) + + (uint32_t(jg == true_nr[1] - one_if_absorbing_otherwise_0 - 1) << 4) + + (uint32_t(kg == true_nr[2] - one_if_absorbing_otherwise_0 - 1) << 5); + + if (!val) { + anp1view(i, j, k) = -anm1view(i, j, k) + ndisp.a1 * aview(i, j, k) + + ndisp.a2 + * (calA * aview(i + 1, j, k - 1) + + (1 - 2 * calA) * aview(i + 1, j, k) + + calA * aview(i + 1, j, k + 1)) + + ndisp.a2 + * (calA * aview(i - 1, j, k - 1) + + (1 - 2 * calA) * aview(i - 1, j, k) + + calA * aview(i - 1, j, k + 1)) + + ndisp.a4 + * (calA * aview(i, j + 1, k - 1) + + (1 - 2 * calA) * aview(i, j + 1, k) + + calA * aview(i, j + 1, k + 1)) + + ndisp.a4 + * (calA * aview(i, j - 1, k - 1) + + (1 - 2 * calA) * aview(i, j - 1, k) + + calA * aview(i, j - 1, k + 1)) + + ndisp.a6 * aview(i, j, k + 1) + + ndisp.a6 * aview(i, j, k - 1) + + ndisp.a8 * source_view(i, j, k); + } + }); + Kokkos::fence(); + A_np1.fillHalo(); + applyBCs(); + } + void evaluate_EB() { + *(Maxwell::En_mp) = typename EMField::value_type(0); + *(Maxwell::Bn_mp) = typename EMField::value_type(0); + ippl::Vector inverse_2_spacing = ippl::Vector(0.5) / hr_m; + const scalar idt = scalar(1.0) / dt; + auto A_np1 = this->A_np1.getView(), A_n = this->A_n.getView(), + A_nm1 = this->A_nm1.getView(); + auto source = Maxwell::source_mp->getView(); + auto Eview = Maxwell::En_mp->getView(); + auto Bview = Maxwell::Bn_mp->getView(); + + Kokkos::parallel_for( + this->A_n.getFieldRangePolicy(), KOKKOS_LAMBDA(size_t i, size_t j, size_t k) { + ippl::Vector dAdt = + (A_n(i, j, k).template tail<3>() - A_nm1(i, j, k).template tail<3>()) * idt; + ippl::Vector dAdx = + (A_n(i + 1, j, k) - A_n(i - 1, j, k)) * inverse_2_spacing[0]; + ippl::Vector dAdy = + (A_n(i, j + 1, k) - A_n(i, j - 1, k)) * inverse_2_spacing[1]; + ippl::Vector dAdz = + (A_n(i, j, k + 1) - A_n(i, j, k - 1)) * inverse_2_spacing[2]; + + ippl::Vector grad_phi{dAdx[0], dAdy[0], dAdz[0]}; + ippl::Vector curlA{ + dAdy[3] - dAdz[2], + dAdz[1] - dAdx[3], + dAdx[2] - dAdy[1], + }; + Eview(i, j, k) = -dAdt - grad_phi; + Bview(i, j, k) = curlA; + }); + Kokkos::fence(); + } + + typename FourField::Mesh_t* mesh_mp; + FieldLayout* layout_mp; + NDIndex domain_m; + Vector_t hr_m; + scalar dt; + Vector nr_m; + bool periodic_bc; + + void initialize() { + // get layout and mesh + layout_mp = &(this->source_mp->getLayout()); + mesh_mp = &(this->source_mp->get_mesh()); + + // get mesh spacing, domain, and mesh size + hr_m = mesh_mp->getMeshSpacing(); + dt = hr_m[2]; + domain_m = layout_mp->getDomain(); + for (unsigned int i = 0; i < Dim; ++i) + nr_m[i] = domain_m[i].length(); + + // initialize fields + A_nm1.initialize(*mesh_mp, *layout_mp); + A_n.initialize(*mesh_mp, *layout_mp); + A_np1.initialize(*mesh_mp, *layout_mp); + + A_nm1 = 0.0; + A_n = 0.0; + A_np1 = 0.0; + }; + }; +} // namespace ippl + +#endif \ No newline at end of file diff --git a/src/MaxwellSolvers/Maxwell.h b/src/MaxwellSolvers/Maxwell.h index 301bd4914..1fb952791 100644 --- a/src/MaxwellSolvers/Maxwell.h +++ b/src/MaxwellSolvers/Maxwell.h @@ -15,7 +15,7 @@ namespace ippl { - template + template class Maxwell { public: constexpr static unsigned Dim = EMField::dim; diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 70168a79b..0702489f3 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -124,12 +124,23 @@ namespace ippl { const ParticleAttrib, Properties...>& pp) const; template - void gather(Field& f, const ParticleAttrib, Properties...>& pp); + void gather(Field& f, const ParticleAttrib, Properties...>& pp, const typename Field::Mesh_t::vector_type::value_type offset = 0.5); T sum(); T max(); T min(); T prod(); + template + requires (std::is_invocable_v) + returnType customReduction(reductionLambda lambda, returnType identity){ + auto dview = this->dview_m; + Kokkos::parallel_reduce(*(this->localNum_mp), KOKKOS_LAMBDA(size_t i, returnType& ref){ + lambda(dview(i), ref); + }, identity); + returnType globaltemp; + Comm->allreduce(identity, globaltemp, 1, std::plus()); + return globaltemp; + } private: view_type dview_m; diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 3b37f920a..7e0a58fbf 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -155,7 +155,8 @@ namespace ippl { template template void ParticleAttrib::gather( - Field& f, const ParticleAttrib, Properties...>& pp) { + Field& f, const ParticleAttrib, Properties...>& pp, + const typename Field::Mesh_t::vector_type::value_type offset) { constexpr unsigned Dim = Field::dim; using PositionType = typename Field::Mesh_t::value_type; @@ -186,7 +187,7 @@ namespace ippl { "ParticleAttrib::gather", policy_type(0, *(this->localNum_mp)), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 0.5; + vector_type l = (pp(idx) - origin) * invdx + (1.0 - offset); Vector index = l; Vector whi = l - index; Vector wlo = 1.0 - whi; @@ -194,6 +195,7 @@ namespace ippl { Vector args = index - lDom.first() + nghost; // gather + // std::cout << index << std::endl; dview_m(idx) = detail::gatherFromField(std::make_index_sequence<1 << Field::dim>{}, view, wlo, whi, args); }); diff --git a/src/Types/Matrix.h b/src/Types/Matrix.h new file mode 100644 index 000000000..147286f99 --- /dev/null +++ b/src/Types/Matrix.h @@ -0,0 +1,186 @@ +#ifndef MATRIX_H +#define MATRIX_H +#include + +#include "Vector.h" +namespace ippl { + /** + * @brief Column major + * + * @tparam T Scalar type + * @tparam m Number of rows + * @tparam n Number of columns + */ + template + struct matrix { + + ippl::Vector, n> data; + constexpr static bool squareMatrix = (m == n && m > 0); + + constexpr KOKKOS_INLINE_FUNCTION matrix(T diag) + requires(m == n) + { + for (unsigned i = 0; i < n; i++) { + for (unsigned j = 0; j < n; j++) { + data[i][j] = diag * T(i == j); + } + } + } + constexpr matrix() = default; + KOKKOS_INLINE_FUNCTION constexpr static matrix zero() { + matrix ret; + for (unsigned i = 0; i < n; i++) { + for (unsigned j = 0; j < n; j++) { + ret.data[i][j] = 0; + } + } + return ret; + }; + KOKKOS_INLINE_FUNCTION void setZero() { + for (unsigned i = 0; i < n; i++) { + for (unsigned j = 0; j < n; j++) { + data[i][j] = 0; + } + } + } + + KOKKOS_INLINE_FUNCTION T operator()(int i, int j) const noexcept { return data[j][i]; } + KOKKOS_INLINE_FUNCTION T& operator()(int i, int j) noexcept { return data[j][i]; } + template + KOKKOS_INLINE_FUNCTION matrix cast() const noexcept { + matrix ret; + for (unsigned i = 0; i < n; i++) { + ret.data[i] = data[i].template cast(); + } + return ret; + } + KOKKOS_INLINE_FUNCTION matrix operator+(const matrix& other) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] + other.data[i]; + } + return result; + } + + // Implement matrix subtraction + KOKKOS_INLINE_FUNCTION matrix operator-(const matrix& other) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] - other.data[i]; + } + return result; + } + KOKKOS_INLINE_FUNCTION matrix operator*(const T& factor) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] * factor; + } + return result; + } + KOKKOS_INLINE_FUNCTION matrix operator/(const T& divisor) const { + matrix result; + for (int i = 0; i < n; ++i) { + result.data[i] = data[i] / divisor; + } + return result; + } + + // Implement matrix-vector multiplication + template + KOKKOS_INLINE_FUNCTION ippl::Vector operator*( + const ippl::Vector& vec) const { + static_assert((int)other_m == n); + ippl::Vector result(0); + for (int i = 0; i < n; ++i) { + for(int j = 0;j < m;i++){ + //This could be a vector operation, but you never know with ippl::Vector + result[j] += vec[i] * data[i][j]; + } + } + return result; + } + template + requires(n == otherm) + KOKKOS_INLINE_FUNCTION matrix operator*( + const matrix& otherMat) const noexcept { + matrix ret(0); + for (int i = 0; i < m; i++) { + for (int j = 0; j < othern; j++) { + for (int k = 0; k < n; k++) { + ret(i, j) += (*this)(i, k) * otherMat(k, j); + } + } + } + return ret; + } + KOKKOS_INLINE_FUNCTION void addCol(int i, int j, T alpha = 1.0) { + data[j] += data[i] * alpha; + } + KOKKOS_INLINE_FUNCTION matrix inverse() const noexcept + requires(squareMatrix) + { + constexpr int N = m; + + matrix ret(1.0); + matrix dis(*this); + + for (int i = 0; i < N; i++) { + for (int j = i + 1; j < N; j++) { + T alpha = -dis(i, j) / dis(i, i); + dis.addCol(i, j, alpha); + dis(i, j) = 0; + ret.addCol(i, j, alpha); + } + } + for (int i = N - 1; i >= 0; i--) { + for (int j = i - 1; j >= 0; j--) { + T alpha = -dis(i, j) / dis(i, i); + dis.addCol(i, j, alpha); + dis(i, j) = 0; + ret.addCol(i, j, alpha); + } + } + for (int i = 0; i < N; i++) { + T d = dis(i, i); + T oneod = T(1) / d; + dis.data[i] *= oneod; + ret.data[i] *= oneod; + } + + return ret; + } + + template + friend stream_t& operator<<(stream_t& str, const matrix& mat) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + str << mat.data[j][i] << " "; + } + str << "\n"; + } + return str; + } + }; + /** + * @brief Computes the outer product l r^T of two vectors + * + * @tparam T Scalar type + * @tparam N Output rows + * @tparam M Output cols + * @param l left operand + * @param r right operand + * @return KOKKOS_INLINE_FUNCTION the matrix l r^T + */ + template + KOKKOS_INLINE_FUNCTION matrix outer_product(const ippl::Vector& l, + const ippl::Vector& r) { + matrix ret; + for (unsigned i = 0; i < N; i++) { + for (unsigned j = 0; j < M; j++) { + ret.data[j][i] = l[i] * r[j]; + } + } + return ret; + } +} // namespace ippl +#endif \ No newline at end of file diff --git a/src/Types/Vector.h b/src/Types/Vector.h index 4380fc8cf..216ddf5b7 100644 --- a/src/Types/Vector.h +++ b/src/Types/Vector.h @@ -26,7 +26,7 @@ namespace ippl { static constexpr unsigned dim = Dim; KOKKOS_FUNCTION - Vector() + constexpr Vector() : Vector(value_type(0)) {} template & rhs) const; + KOKKOS_INLINE_FUNCTION Vector cross(const Vector& rhs) const; + KOKKOS_INLINE_FUNCTION void fill(const T& v){ + for(unsigned k = 0;k < Dim;k++)(*this)[k] = v; + } + template + KOKKOS_INLINE_FUNCTION Vector tail()const noexcept{ + Vector ret; + static_assert(N <= Dim, "N must be smaller than Dim"); + constexpr unsigned diff = Dim - N; + for(unsigned i = 0;i < N;i++){ + ret[i] = (*this)[i + diff]; + } + return ret; + } + template + KOKKOS_INLINE_FUNCTION Vector head()const noexcept{ + Vector ret; + static_assert(N <= Dim, "N must be smaller than Dim"); + for(unsigned i = 0;i < N;i++){ + ret[i] = (*this)[i]; + } + return ret; + } + template + KOKKOS_INLINE_FUNCTION Vector cast()const{ + Vector ret; + for(unsigned k = 0;k < Dim;k++)ret[k] = OtherType((*this)[k]); + return ret; + } + KOKKOS_INLINE_FUNCTION T squaredNorm()const{ + return this->dot(*this); + } + KOKKOS_INLINE_FUNCTION T norm()const{ + using Kokkos::sqrt; + return sqrt(squaredNorm()); + } //Needs to be public to be a standard-layout type //private: diff --git a/src/Types/Vector.hpp b/src/Types/Vector.hpp index aa9a6168b..8723ed431 100644 --- a/src/Types/Vector.hpp +++ b/src/Types/Vector.hpp @@ -187,6 +187,15 @@ namespace ippl { } return res; } + template + KOKKOS_INLINE_FUNCTION Vector Vector::cross(const Vector& rhs) const { + + Vector ret; + ret[0] = data_m[1] * rhs[2] - data_m[2] * rhs[1]; + ret[1] = data_m[2] * rhs[0] - data_m[0] * rhs[2]; + ret[2] = data_m[0] * rhs[1] - data_m[1] * rhs[0]; + return ret; + } template inline std::ostream& operator<<(std::ostream& out, const Vector& v) { diff --git a/test/solver/CMakeLists.txt b/test/solver/CMakeLists.txt index 358c475cf..dda292fff 100644 --- a/test/solver/CMakeLists.txt +++ b/test/solver/CMakeLists.txt @@ -25,6 +25,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestFDTDSolver TestFDTDSolver.cpp) +target_link_libraries ( + TestFDTDSolver + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) if (ENABLE_FFT) add_executable (TestGaussian_convergence TestGaussian_convergence.cpp) diff --git a/test/solver/TestFDTDSolver.cpp b/test/solver/TestFDTDSolver.cpp new file mode 100644 index 000000000..62088acc9 --- /dev/null +++ b/test/solver/TestFDTDSolver.cpp @@ -0,0 +1,139 @@ +#include +using std::size_t; +#include +#include "Ippl.h" +#include "Types/Vector.h" +#include "Field/Field.h" +#include "MaxwellSolvers/FDTD.h" +#define STB_IMAGE_WRITE_IMPLEMENTATION +#include +template + requires((std::is_floating_point_v)) +KOKKOS_INLINE_FUNCTION float gauss(scalar1 mean, scalar1 stddev, scalar... x){ + uint32_t dim = sizeof...(scalar); + ippl::Vector vec{scalar1(x - mean)...}; + scalar1 vecsum(0); + for(unsigned d = 0;d < dim;d++){ + vecsum += vec[d] * vec[d]; + + } + #ifndef __CUDA_ARCH__ + using std::exp; + #endif + return exp(-(vecsum) / (stddev * stddev)); +} + +int main(int argc, char* argv[]){ + ippl::initialize(argc, argv); + { + using scalar = float; + const unsigned dim = 3; + using vector_type = ippl::Vector; + using vector4_type = ippl::Vector; + using FourField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + using ThreeField = ippl::Field, typename ippl::UniformCartesian::DefaultCentering>; + constexpr size_t n = 100; + ippl::Vector nr{n / 2, n / 2, 2 * n}; + ippl::NDIndex<3> owned(nr[0], nr[1], nr[2]); + ippl::Vector extents{scalar(1), scalar(1), scalar(1)}; + std::array isParallel; + isParallel.fill(false); + isParallel[2] = true; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(MPI_COMM_WORLD, owned, isParallel); + + ippl::Vector hx; + for(unsigned d = 0;d < 3;d++){ + hx[d] = extents[d] / (scalar)nr[d]; + } + ippl::Vector origin = {0,0,0}; + ippl::UniformCartesian mesh(owned, hx, origin); + FourField source(mesh, layout); + ThreeField E(mesh, layout); + ThreeField B(mesh, layout); + source = vector4_type(0); + ippl::StandardFDTDSolver sfdsolver(source, E, B); + sfdsolver.setPeriodicBoundaryConditions(); + auto aview = sfdsolver.A_n.getView(); + auto am1view = sfdsolver.A_nm1.getView(); + auto lDom = layout.getLocalNDIndex(); + size_t nghost = sfdsolver.A_n.getNghost(); + Kokkos::parallel_for(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k){ + const size_t ig = i + lDom.first()[0] - nghost; + const size_t jg = j + lDom.first()[1] - nghost; + const size_t kg = k + lDom.first()[2] - nghost; + scalar x = scalar(ig) / nr[0]; + scalar y = scalar(jg) / nr[1]; + scalar z = scalar(kg) / nr[2]; + //std::cout << x << ", " << y << ", " << z << "\n"; + (void)x; + (void)y; + (void)z; + const scalar magnitude = gauss(scalar(0.5), scalar(0.05), z); + aview (i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)}; + am1view(i,j,k) = vector4_type{scalar(0), scalar(0), magnitude, scalar(0)}; + }); + Kokkos::fence(); + sfdsolver.A_n.getFieldBC().apply(sfdsolver.A_n); + //Kokkos::fence(); + //std::cout << sfdsolver.A_n.getView()(0,0,0) << "\n"; + //std::cout << sfdsolver.A_n.getView()(0,5,5) << "\n"; + //goto exit; + sfdsolver.A_np1.getFieldBC().apply(sfdsolver.A_np1); + sfdsolver.A_nm1.getFieldBC().apply(sfdsolver.A_nm1); + sfdsolver.A_n.fillHalo(); + sfdsolver.A_nm1.fillHalo(); + int img_width = 500; + int img_height = 500; + float* imagedata = new float[img_width * img_height * 3]; + std::fill(imagedata, imagedata + img_width * img_height * 3, 0.0f); + uint8_t* imagedata_final = new uint8_t[img_width * img_height * 3]; + for(size_t s = 0;s < 4 * n;s++){ + auto ebh = sfdsolver.A_n.getHostMirror(); + Kokkos::deep_copy(ebh, sfdsolver.A_n.getView()); + for(int i = 1;i < img_width;i++){ + for(int j = 1;j < img_height;j++){ + int i_remap = (double(i) / (img_width - 1)) * (nr[2] - 4) + 2; + int j_remap = (double(j) / (img_height - 1)) * (nr[0] - 4) + 2; + if(i_remap >= lDom.first()[2] && i_remap <= lDom.last()[2]){ + if(j_remap >= lDom.first()[0] && j_remap <= lDom.last()[0]){ + ippl::Vector acc = ebh(j_remap + 1 - lDom.first()[0], nr[1] / 2, i_remap + 1 - lDom.first()[2]); + + + float normalized_colorscale_value = acc.norm(); + //int index = (int)std::max(0.0f, std::min(normalized_colorscale_value * 255.0f, 255.0f)); + imagedata[(j * img_width + i) * 3 + 0] = normalized_colorscale_value * 255.0f; + imagedata[(j * img_width + i) * 3 + 1] = normalized_colorscale_value * 255.0f; + imagedata[(j * img_width + i) * 3 + 2] = normalized_colorscale_value * 255.0f; + } + } + } + } + std::transform(imagedata, imagedata + img_height * img_width * 3, imagedata_final, [](float x){return (unsigned char)std::min(255.0f, std::max(0.0f,x));}); + char output[1024] = {0}; + snprintf(output, 1023, "%soutimage%.05lu.bmp", "renderdata/", s); + if(s % 4 == 0) + stbi_write_bmp(output, img_width, img_height, 3, imagedata_final); + sfdsolver.solve(); + } + double sum_error = 0; + Kokkos::parallel_reduce(ippl::getRangePolicy(aview, 1), KOKKOS_LAMBDA(size_t i, size_t j, size_t k, double& ref){ + const size_t ig = i + lDom.first()[0] - nghost; + const size_t jg = j + lDom.first()[1] - nghost; + const size_t kg = k + lDom.first()[2] - nghost; + scalar x = scalar(ig) / nr[0]; + scalar y = scalar(jg) / nr[1]; + scalar z = scalar(kg) / nr[2]; + (void)x; + (void)y; + (void)z; + ref += Kokkos::abs(gauss(scalar(0.5), scalar(0.05), z) - aview(i,j,k)[2]); + }, sum_error); + std::cout << "Sum: " << sum_error / double(n * n * n) << "\n"; + + + } + //exit: + ippl::finalize(); +} \ No newline at end of file