diff --git a/Src/Particle/AMReX_NeighborParticles.H b/Src/Particle/AMReX_NeighborParticles.H index 47dc17413f..10f2fb26e5 100644 --- a/Src/Particle/AMReX_NeighborParticles.H +++ b/Src/Particle/AMReX_NeighborParticles.H @@ -9,6 +9,7 @@ #include #include #include +#include namespace amrex { diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index 459ff31757..d37bef0db5 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -18,6 +18,9 @@ struct Particle; template struct SoAParticle; +template +struct RTSoAParticle; + struct DefaultAssignor; // for backwards compatibility @@ -87,11 +90,11 @@ public: [[nodiscard]] SoARef GetStructOfArrays () const { return GetParticleTile().GetStructOfArrays(); } - [[nodiscard]] int numParticles () const { return GetParticleTile().numParticles(); } + [[nodiscard]] auto numParticles () const { return GetParticleTile().numParticles(); } - [[nodiscard]] int numRealParticles () const { return GetParticleTile().numRealParticles(); } + [[nodiscard]] auto numRealParticles () const { return GetParticleTile().numRealParticles(); } - [[nodiscard]] int numNeighborParticles () const { return GetParticleTile().numNeighborParticles(); } + [[nodiscard]] auto numNeighborParticles () const { return GetParticleTile().numNeighborParticles(); } [[nodiscard]] Long capacity () const { return GetParticleTile().capacity(); } @@ -226,6 +229,9 @@ using ParConstIter = ParConstIter_impl, T_ template class Allocator=DefaultAllocator, class CellAssignor=DefaultAssignor> using ParConstIterSoA = ParConstIter_impl, T_NArrayReal, T_NArrayInt, Allocator, CellAssignor>; +template +using ParConstIterRTSoA = ParConstIter_impl, 0, 0, PolymorphicArenaAllocator, CellAssignor>; + template class Allocator=DefaultAllocator, class CellAssignor=DefaultAssignor> using ParIter = ParIter_impl, T_NArrayReal, T_NArrayInt, Allocator, CellAssignor>; @@ -233,6 +239,9 @@ using ParIter = ParIter_impl, T_NArrayReal template class Allocator=DefaultAllocator, class CellAssignor=DefaultAssignor> using ParIterSoA = ParIter_impl, T_NArrayReal, T_NArrayInt, Allocator, CellAssignor>; +template +using ParIterRTSoA = ParIter_impl, 0, 0, PolymorphicArenaAllocator, CellAssignor>; + } #endif diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 215e037122..4a1b84cdb0 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -389,6 +389,7 @@ struct SoAParticleBase static constexpr int NReal=0; static constexpr int NInt=0; static constexpr bool is_soa_particle = true; + static constexpr bool is_rtsoa_particle = false; }; /** \brief The struct used to store particles. @@ -401,6 +402,7 @@ struct alignas(sizeof(double)) Particle : ParticleBase { static constexpr bool is_soa_particle = false; + static constexpr bool is_rtsoa_particle = false; using StorageParticleType = Particle; using ConstType = Particle const; diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 2397a7d482..67130a54d1 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -179,8 +180,14 @@ public: RealDescriptor ParticleRealDescriptor = FPC::Native64RealDescriptor(); #endif + static constexpr bool is_rtsoa_pc = ParticleType::is_rtsoa_particle; + using ParticleContainerType = ParticleContainer_impl; - using ParticleTileType = ParticleTile; + using ParticleTileType = std::conditional_t< + is_rtsoa_pc, + ParticleTileRT, + ParticleTile + >; using ParticleInitData = ParticleInitType; //! A single level worth of particles is indexed (grid id, tile id) @@ -1461,11 +1468,13 @@ protected: int lev_min = 0, int lev_max = -1, int local_grid=-1) const; public: + using FlagsVector = PODVector>; + void WriteParticles (int level, std::ofstream& ofs, int fnum, Vector& which, Vector& count, Vector& where, const Vector& write_real_comp, const Vector& write_int_comp, - const Vector,IntVector>>& particle_io_flags, bool is_checkpoint) const; + const Vector,FlagsVector>>& particle_io_flags, bool is_checkpoint) const; #ifdef AMREX_USE_HDF5 #include "AMReX_ParticlesHDF5.H" #endif @@ -1555,6 +1564,9 @@ using ParticleContainer = ParticleContainer_impl class Allocator=DefaultAllocator, class CellAssignor=DefaultAssignor> using ParticleContainerPureSoA = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator, CellAssignor>; +template +using ParticleContainerRTSoA = ParticleContainer_impl, 0, 0, PolymorphicArenaAllocator, CellAssignor>; + } #include "AMReX_ParticleInit.H" diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index aac96dad87..3e6975bb50 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1145,7 +1145,7 @@ void ParticleContainer_impl:: copyParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = typename ParticleTileType::ConstParticleTileDataType; copyParticles(other, [] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } @@ -1156,7 +1156,7 @@ void ParticleContainer_impl:: addParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = typename ParticleTileType::ConstParticleTileDataType; addParticles(other, [] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } @@ -1219,7 +1219,8 @@ addParticles (const PCType& other, F const& f, bool local) auto dst_index = ptile.numParticles(); ptile.resize(dst_index + np); - auto count = amrex::filterParticles(ptile, ptile_other, f, 0, dst_index, np); + auto count = amrex::filterParticles(ptile, ptile_other, f, + static_cast(0), dst_index, np); ptile.resize(dst_index + count); } @@ -1267,85 +1268,7 @@ ParticleContainer_impl; - constexpr std::size_t nchunks = sizeof(ParticleType) / sizeof(tmp_t); - Gpu::DeviceVector tmp(np); - auto* ptmp = tmp.data(); - auto* paos = (tmp_t*)(ptile.getParticleTileData().m_aos); - for (std::size_t ichunk = 0; ichunk < nchunks; ++ichunk) { - // Do not need to reorder neighbor particles - AMREX_HOST_DEVICE_FOR_1D(np, i, - { - ptmp[i] = paos[permutations[i]*nchunks+ichunk]; - }); - AMREX_HOST_DEVICE_FOR_1D(np, i, - { - paos[i*nchunks+ichunk] = ptmp[i]; - }); - } - Gpu::streamSynchronize(); - } else { - typename SoA::IdCPU tmp_idcpu; - if constexpr (has_polymorphic_allocator) { - tmp_idcpu.setArena(arena()); - } - tmp_idcpu.resize(np_total); - auto src = ptile.GetStructOfArrays().GetIdCPUData().data(); - uint64_t* dst = tmp_idcpu.data(); - AMREX_HOST_DEVICE_FOR_1D( np_total, i, - { - dst[i] = i < np ? src[permutations[i]] : src[i]; - }); - - Gpu::streamSynchronize(); - - ptile.GetStructOfArrays().GetIdCPUData().swap(tmp_idcpu); - } - - { // Create a scope for the temporary vector below - RealVector tmp_real; - if constexpr (has_polymorphic_allocator) { - tmp_real.setArena(arena()); - } - tmp_real.resize(np_total); - for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) { - auto src = ptile.GetStructOfArrays().GetRealData(comp).data(); - ParticleReal* dst = tmp_real.data(); - AMREX_HOST_DEVICE_FOR_1D( np_total, i, - { - dst[i] = i < np ? src[permutations[i]] : src[i]; - }); - - Gpu::streamSynchronize(); - - ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); - } - } - - IntVector tmp_int; - if constexpr (has_polymorphic_allocator) { - tmp_int.setArena(arena()); - } - tmp_int.resize(np_total); - for (int comp = 0; comp < NArrayInt + m_num_runtime_int; ++comp) { - auto src = ptile.GetStructOfArrays().GetIntData(comp).data(); - int* dst = tmp_int.data(); - AMREX_HOST_DEVICE_FOR_1D( np_total , i, - { - dst[i] = i < np ? src[permutations[i]] : src[i]; - }); - - Gpu::streamSynchronize(); - - ptile.GetStructOfArrays().GetIntData(comp).swap(tmp_int); - } + amrex::ReorderParticles(ptile, permutations); } else { ParticleTileType ptile_tmp; ptile_tmp.define(m_num_runtime_real, m_num_runtime_int, @@ -1679,34 +1602,18 @@ ParticleContainer_impl > > tmp_remote; - Vector, Vector > > tmp_local; - Vector, Vector > > > soa_local; - tmp_local.resize(theEffectiveFinestLevel+1); - soa_local.resize(theEffectiveFinestLevel+1); + Vector, Vector > > ptile_local; + ptile_local.resize(theEffectiveFinestLevel+1); // we resize these buffers outside the parallel region for (int lev = lev_min; lev <= lev_max; lev++) { for (MFIter mfi(*m_dummy_mf[lev], this->do_tiling ? this->tile_size : IntVect::TheZeroVector()); mfi.isValid(); ++mfi) { auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex()); - tmp_local[lev][index].resize(num_threads); - soa_local[lev][index].resize(num_threads); + ptile_local[lev][index].resize(num_threads); for (int t = 0; t < num_threads; ++t) { - soa_local[lev][index][t].define(m_num_runtime_real, m_num_runtime_int, - &m_soa_rdata_names, &m_soa_idata_names); - if constexpr (has_polymorphic_allocator) { - if constexpr (ParticleType::is_soa_particle) { - soa_local[lev][index][t].GetIdCPUData().setArena(arena()); - } else { - tmp_local[lev][index][t].setArena(arena()); - } - for (int j = 0; j < soa_local[lev][index][t].NumRealComps(); ++j) { - soa_local[lev][index][t].GetRealData(j).setArena(arena()); - } - for (int j = 0; j < soa_local[lev][index][t].NumIntComps(); ++j) { - soa_local[lev][index][t].GetIntData(j).setArena(arena()); - } - } + ptile_local[lev][index][t].define(m_num_runtime_real, m_num_runtime_int, + &m_soa_rdata_names, &m_soa_idata_names, arena()); } } } @@ -1741,262 +1648,92 @@ ParticleContainer_implGetStructOfArrays(); - auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs(); - // AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0) - // || aos.size() == soa.size(), - // "The AoS and SoA data on this tile are different sizes - " - // "perhaps particles have not been initialized correctly?"); unsigned npart = ptile_ptrs[pmap_it]->numParticles(); ParticleLocData pld; - if constexpr (!ParticleType::is_soa_particle){ - - if (npart != 0) { - Long last = npart - 1; - Long pindex = 0; - while (pindex <= last) { - ParticleType& p = aos[pindex]; - - if ((remove_negative == false) && (!p.id().is_valid())) { - ++pindex; - continue; - } - - if (!p.id().is_valid()) - { - aos[pindex] = aos[last]; - for (int comp = 0; comp < NumRealComps(); comp++) { - soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; - } - for (int comp = 0; comp < NumIntComps(); comp++) { - soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; - } - correctCellVectors(last, pindex, grid, aos[pindex]); - --last; - continue; - } - - locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); - - particlePostLocate(p, pld, lev); - - if (!p.id().is_valid()) - { - aos[pindex] = aos[last]; - for (int comp = 0; comp < NumRealComps(); comp++) { - soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; - } - for (int comp = 0; comp < NumIntComps(); comp++) { - soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; - } - correctCellVectors(last, pindex, grid, aos[pindex]); - --last; - continue; - } - - const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); - if (who == MyProc) { - if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) { - // We own it but must shift it to another place. - auto index = std::make_pair(pld.m_grid, pld.m_tile); - AMREX_ASSERT(tmp_local[pld.m_lev][index].size() == num_threads); - tmp_local[pld.m_lev][index][thread_num].push_back(p); - for (int comp = 0; comp < NumRealComps(); ++comp) { - RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp); - arr.push_back(soa.GetRealData(comp)[pindex]); - } - for (int comp = 0; comp < NumIntComps(); ++comp) { - IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp); - arr.push_back(soa.GetIntData(comp)[pindex]); - } - - p.id().make_invalid(); // Invalidate the particle - } - } - else { - auto& particles_to_send = tmp_remote[who][thread_num]; - auto old_size = particles_to_send.size(); - auto new_size = old_size + superparticle_size; - particles_to_send.resize(new_size); - std::memcpy(&particles_to_send[old_size], &p, particle_size); - char* dst = &particles_to_send[old_size] + particle_size; - int array_comp_start = 0; - if constexpr (!ParticleType::is_soa_particle) { - array_comp_start = AMREX_SPACEDIM + NStructReal; - } - for (int comp = 0; comp < NumRealComps(); comp++) { - if (h_redistribute_real_comp[array_comp_start + comp]) { - std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal)); - dst += sizeof(ParticleReal); - } - } - array_comp_start = 2 + NStructInt; - for (int comp = 0; comp < NumIntComps(); comp++) { - if (h_redistribute_int_comp[array_comp_start + comp]) { - std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int)); - dst += sizeof(int); - } - } - - p.id().make_invalid(); // Invalidate the particle - } + auto particle_tile = ptile_ptrs[pmap_it]; + auto ptd = particle_tile->getParticleTileData(); - if (!p.id().is_valid()) - { - aos[pindex] = aos[last]; - for (int comp = 0; comp < NumRealComps(); comp++) { - soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; - } - for (int comp = 0; comp < NumIntComps(); comp++) { - soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; - } - correctCellVectors(last, pindex, grid, aos[pindex]); - --last; - continue; - } + if (npart != 0) { + Long last = npart - 1; + Long pindex = 0; + while (pindex <= last) { + decltype(auto) p = ptd[pindex]; + if ((remove_negative == false) && (!ptd.id(pindex).is_valid())) { ++pindex; + continue; } - aos().erase(aos().begin() + last + 1, aos().begin() + npart); - for (int comp = 0; comp < NumRealComps(); comp++) { - RealVector& rdata = soa.GetRealData(comp); - rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart); - } - for (int comp = 0; comp < NumIntComps(); comp++) { - IntVector& idata = soa.GetIntData(comp); - idata.erase(idata.begin() + last + 1, idata.begin() + npart); + if (!ptd.id(pindex).is_valid()) { + copyParticle(ptd, ptd, last, pindex); + correctCellVectors(last, pindex, grid, p); + --last; + continue; } - } - } else { // soa particle + locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); - auto particle_tile = ptile_ptrs[pmap_it]; - if (npart != 0) { - Long last = npart - 1; - Long pindex = 0; - auto ptd = particle_tile->getParticleTileData(); - while (pindex <= last) { - ParticleType p = ptd[pindex]; + particlePostLocate(p, pld, lev); - if ((remove_negative == false) && (!ptd.id(pindex).is_valid())) { - ++pindex; - continue; - } + if (!ptd.id(pindex).is_valid()) { + copyParticle(ptd, ptd, last, pindex); + correctCellVectors(last, pindex, grid, p); + --last; + continue; + } - if (!ptd.id(pindex).is_valid()){ - soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last]; - for (int comp = 0; comp < NumRealComps(); comp++) { - soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; - } - for (int comp = 0; comp < NumIntComps(); comp++) { - soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; - } - correctCellVectors(last, pindex, grid, ptd[pindex]); - --last; - continue; - } + const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); + if (who == MyProc) { + if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) { + // We own it but must shift it to another place. + auto index = std::make_pair(pld.m_grid, pld.m_tile); + AMREX_ASSERT(ptile_local[pld.m_lev][index].size() == num_threads); - locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); - - particlePostLocate(p, pld, lev); - - if (!ptd.id(pindex).is_valid()) { - soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last]; - for (int comp = 0; comp < NumRealComps(); comp++) { - soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; - } - for (int comp = 0; comp < NumIntComps(); comp++) { - soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; - } - correctCellVectors(last, pindex, grid, ptd[pindex]); - --last; - continue; - } + auto& dst = ptile_local[pld.m_lev][index][thread_num]; + auto old_size = dst.size(); + auto new_size = old_size + 1; + dst.resize(new_size); + auto dst_ptd = dst.getParticleTileData(); + + copyParticle(dst_ptd, ptd, pindex, old_size); - const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); - if (who == MyProc) { - if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) { - // We own it but must shift it to another place. - auto index = std::make_pair(pld.m_grid, pld.m_tile); - AMREX_ASSERT(soa_local[pld.m_lev][index].size() == num_threads); - { - auto& arr = soa_local[pld.m_lev][index][thread_num].GetIdCPUData(); - arr.push_back(soa.GetIdCPUData()[pindex]); - } - for (int comp = 0; comp < NumRealComps(); ++comp) { - RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp); - arr.push_back(soa.GetRealData(comp)[pindex]); - } - for (int comp = 0; comp < NumIntComps(); ++comp) { - IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp); - arr.push_back(soa.GetIntData(comp)[pindex]); - } - - ptd.id(pindex).make_invalid(); // Invalidate the particle - } - } - else { - auto& particles_to_send = tmp_remote[who][thread_num]; - auto old_size = particles_to_send.size(); - auto new_size = old_size + superparticle_size; - particles_to_send.resize(new_size); - - char* dst = &particles_to_send[old_size]; - { - std::memcpy(dst, &soa.GetIdCPUData()[pindex], sizeof(uint64_t)); - dst += sizeof(uint64_t); - } - int array_comp_start = 0; - if constexpr (!ParticleType::is_soa_particle) { - array_comp_start = AMREX_SPACEDIM + NStructReal; - } - for (int comp = 0; comp < NumRealComps(); comp++) { - if (h_redistribute_real_comp[array_comp_start + comp]) { - std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal)); - dst += sizeof(ParticleReal); - } - } - array_comp_start = 2 + NStructInt; - for (int comp = 0; comp < NumIntComps(); comp++) { - if (h_redistribute_int_comp[array_comp_start + comp]) { - std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int)); - dst += sizeof(int); - } - } ptd.id(pindex).make_invalid(); // Invalidate the particle } + } else { + auto& particles_to_send = tmp_remote[who][thread_num]; + auto old_size = particles_to_send.size(); + auto new_size = old_size + superparticle_size; + particles_to_send.resize(new_size); - if (!ptd.id(pindex).is_valid()){ - soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last]; - for (int comp = 0; comp < NumRealComps(); comp++) { - soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; - } - for (int comp = 0; comp < NumIntComps(); comp++) { - soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; - } - correctCellVectors(last, pindex, grid, ptd[pindex]); - --last; - continue; - } + ptd.packParticleData(particles_to_send.data(), pindex, old_size, + h_redistribute_real_comp.data(), h_redistribute_int_comp.data()); - ++pindex; + ptd.id(pindex).make_invalid(); // Invalidate the particle } - { - auto& iddata = soa.GetIdCPUData(); - iddata.erase(iddata.begin() + last + 1, iddata.begin() + npart); - } - for (int comp = 0; comp < NumRealComps(); comp++) { - RealVector& rdata = soa.GetRealData(comp); - rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart); + if (!ptd.id(pindex).is_valid()) { + copyParticle(ptd, ptd, last, pindex); + correctCellVectors(last, pindex, grid, p); + --last; + continue; } - for (int comp = 0; comp < NumIntComps(); comp++) { - IntVector& idata = soa.GetIntData(comp); - idata.erase(idata.begin() + last + 1, idata.begin() + npart); + + ++pindex; + } + + auto tot_npart = ptile_ptrs[pmap_it]->numTotalParticles(); + + if (last != npart - 1) { + pindex = last + 1; + last = npart; + while (last < tot_npart) { + copyParticle(ptd, ptd, last, pindex); + ++pindex; + ++last; } + particle_tile->resize(pindex); } } } @@ -2008,87 +1745,40 @@ ParticleContainer_impl, Vector >::iterator pmap_it; - if constexpr(!ParticleType::is_soa_particle) { - Vector > grid_tile_ids; - Vector* > pvec_ptrs; + Vector > grid_tile_ids; - // we need to create any missing map entries in serial here - for (pmap_it=tmp_local[lev].begin(); pmap_it != tmp_local[lev].end(); pmap_it++) - { - DefineAndReturnParticleTile(lev, pmap_it->first.first, pmap_it->first.second); - grid_tile_ids.push_back(pmap_it->first); - pvec_ptrs.push_back(&(pmap_it->second)); - } + // we need to create any missing map entries in serial here + for (auto pmap_it = ptile_local[lev].begin(); pmap_it != ptile_local[lev].end(); pmap_it++) + { + DefineAndReturnParticleTile(lev, pmap_it->first.first, pmap_it->first.second); + grid_tile_ids.push_back(pmap_it->first); + } #ifdef AMREX_USE_OMP #pragma omp parallel for #endif - for (int pit = 0; pit < static_cast(pvec_ptrs.size()); ++pit) - { - auto index = grid_tile_ids[pit]; - auto& ptile = ParticlesAt(lev, index.first, index.second); - auto& aos = ptile.GetArrayOfStructs(); - auto& soa = ptile.GetStructOfArrays(); - auto& aos_tmp = *(pvec_ptrs[pit]); - auto& soa_tmp = soa_local[lev][index]; - for (int i = 0; i < num_threads; ++i) { - aos.insert(aos.end(), aos_tmp[i].begin(), aos_tmp[i].end()); - aos_tmp[i].erase(aos_tmp[i].begin(), aos_tmp[i].end()); - for (int comp = 0; comp < NumRealComps(); ++comp) { - RealVector& arr = soa.GetRealData(comp); - RealVector& tmp = soa_tmp[i].GetRealData(comp); - arr.insert(arr.end(), tmp.begin(), tmp.end()); - tmp.erase(tmp.begin(), tmp.end()); - } - for (int comp = 0; comp < NumIntComps(); ++comp) { - IntVector& arr = soa.GetIntData(comp); - IntVector& tmp = soa_tmp[i].GetIntData(comp); - arr.insert(arr.end(), tmp.begin(), tmp.end()); - tmp.erase(tmp.begin(), tmp.end()); - } - } - } - } else { // soa particle - Vector > grid_tile_ids; + for (int pit = 0; pit < static_cast(grid_tile_ids.size()); ++pit) // NOLINT(modernize-loop-convert) + { + auto index = grid_tile_ids[pit]; + auto& dst_ptile = ParticlesAt(lev, index.first, index.second); + auto& src_ptile = ptile_local[lev][index]; - // we need to create any missing map entries in serial here - for (auto soa_map_it=soa_local[lev].begin(); soa_map_it != soa_local[lev].end(); soa_map_it++) - { - DefineAndReturnParticleTile(lev, soa_map_it->first.first, soa_map_it->first.second); - grid_tile_ids.push_back(soa_map_it->first); - } + for (int i = 0; i < num_threads; ++i) { -#ifdef AMREX_USE_OMP -#pragma omp parallel for -#endif - for (int pit = 0; pit < static_cast(grid_tile_ids.size()); ++pit) // NOLINT(modernize-loop-convert) - { - auto index = grid_tile_ids[pit]; - auto& ptile = ParticlesAt(lev, index.first, index.second); - auto& soa = ptile.GetStructOfArrays(); - auto& soa_tmp = soa_local[lev][index]; - for (int i = 0; i < num_threads; ++i) { - { - auto& arr = soa.GetIdCPUData(); - auto& tmp = soa_tmp[i].GetIdCPUData(); - arr.insert(arr.end(), tmp.begin(), tmp.end()); - tmp.erase(tmp.begin(), tmp.end()); - } - for (int comp = 0; comp < NumRealComps(); ++comp) { - RealVector& arr = soa.GetRealData(comp); - RealVector& tmp = soa_tmp[i].GetRealData(comp); - arr.insert(arr.end(), tmp.begin(), tmp.end()); - tmp.erase(tmp.begin(), tmp.end()); - } - for (int comp = 0; comp < NumIntComps(); ++comp) { - IntVector& arr = soa.GetIntData(comp); - IntVector& tmp = soa_tmp[i].GetIntData(comp); - arr.insert(arr.end(), tmp.begin(), tmp.end()); - tmp.erase(tmp.begin(), tmp.end()); - } + auto to_copy = src_ptile[i].numParticles(); + auto old_size = dst_ptile.numTotalParticles(); + auto new_size = old_size + to_copy; + dst_ptile.resize(new_size); + + auto dst_ptd = dst_ptile.getParticleTileData(); + auto src_ptd = src_ptile[i].getParticleTileData(); + + for (Long j = 0; j < to_copy; ++j) { + copyParticle(dst_ptd, src_ptd, j, j + old_size); } + + src_ptile[i].resize(0); } } } @@ -2339,18 +2029,22 @@ RedistributeMPI (std::map >& not_ours, { auto& ptile = m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart], rcv_tile[ipart])]; + auto old_size = ptile.numTotalParticles(); + auto new_size = old_size + 1; + ptile.resize(new_size); + char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; if constexpr (ParticleType::is_soa_particle) { uint64_t idcpudata; std::memcpy(&idcpudata, pbuf, sizeof(uint64_t)); pbuf += sizeof(uint64_t); - ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata); + ptile.GetStructOfArrays().GetIdCPUData()[old_size] = idcpudata; } else { ParticleType p; std::memcpy(&p, pbuf, sizeof(ParticleType)); pbuf += sizeof(ParticleType); - ptile.push_back(p); + ptile.GetArrayOfStructs()[old_size] = p; } int array_comp_start = 0; @@ -2362,9 +2056,9 @@ RedistributeMPI (std::map >& not_ours, ParticleReal rdata; std::memcpy(&rdata, pbuf, sizeof(ParticleReal)); pbuf += sizeof(ParticleReal); - ptile.push_back_real(comp, rdata); + ptile.GetStructOfArrays().GetRealData(comp)[old_size] = rdata; } else { - ptile.push_back_real(comp, 0.0); + ptile.GetStructOfArrays().GetRealData(comp)[old_size] = 0.0; } } @@ -2374,9 +2068,9 @@ RedistributeMPI (std::map >& not_ours, int idata; std::memcpy(&idata, pbuf, sizeof(int)); pbuf += sizeof(int); - ptile.push_back_int(comp, idata); + ptile.GetStructOfArrays().GetIntData(comp)[old_size] = idata; } else { - ptile.push_back_int(comp, 0); + ptile.GetStructOfArrays().GetIntData(comp)[old_size] = 0; } } ++ipart; diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index 105fa06b46..0744b683ba 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -575,7 +575,7 @@ ParticleContainer_impl& which, Vector& count, Vector& where, const Vector& write_real_comp, const Vector& write_int_comp, - const Vector, IntVector>>& particle_io_flags, + const Vector, FlagsVector>>& particle_io_flags, bool is_checkpoint) const { BL_PROFILE("ParticleContainer::WriteParticles()"); diff --git a/Src/Particle/AMReX_ParticleMesh.H b/Src/Particle/AMReX_ParticleMesh.H index 8c0d56da48..c10eead740 100644 --- a/Src/Particle/AMReX_ParticleMesh.H +++ b/Src/Particle/AMReX_ParticleMesh.H @@ -36,6 +36,21 @@ auto call_f (F const& f, return f(p, i, fabarr); } } + +template class PTDType, class RType, class IType> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +auto call_f (F const& f, + const PTDType& p, + const int i, Array4 const& fabarr, + GpuArray const& plo, + GpuArray const& dxi) noexcept +{ + if constexpr (IsCallable::value) { + return f(p, i, fabarr, plo, dxi); + } else { + return f(p, i, fabarr); + } +} } template ::value, int> foo = 0> diff --git a/Src/Particle/AMReX_ParticleReduce.H b/Src/Particle/AMReX_ParticleReduce.H index 6df792be33..b4e7478fd3 100644 --- a/Src/Particle/AMReX_ParticleReduce.H +++ b/Src/Particle/AMReX_ParticleReduce.H @@ -33,6 +33,15 @@ auto call_f (F const& f, return f(p, i); } } + +template class PTDType, class RType, class IType> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +auto call_f (F const& f, + const PTDType& p, + const int i) noexcept +{ + return f(p, i); +} } /** diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 4b9c7135dd..44dcb570a2 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -328,6 +328,7 @@ struct alignas(sizeof(double)) ConstSoAParticle : SoAParticleBase using StorageParticleType = SoAParticleBase; using ConstPTD = ConstParticleTileData; static constexpr bool is_soa_particle = true; + static constexpr bool is_rtsoa_particle = false; static constexpr bool is_constsoa_particle = true; using RealType = ParticleReal; @@ -391,6 +392,7 @@ struct alignas(sizeof(double)) SoAParticle : SoAParticleBase using StorageParticleType = SoAParticleBase; using PTD = ParticleTileData; static constexpr bool is_soa_particle = true; + static constexpr bool is_rtsoa_particle = false; static constexpr bool is_constsoa_particle = false; using ConstType = ConstSoAParticle; diff --git a/Src/Particle/AMReX_ParticleTileRT.H b/Src/Particle/AMReX_ParticleTileRT.H new file mode 100644 index 0000000000..0be3665f38 --- /dev/null +++ b/Src/Particle/AMReX_ParticleTileRT.H @@ -0,0 +1,789 @@ +#ifndef AMREX_PARTICLETILERT_H_ +#define AMREX_PARTICLETILERT_H_ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace amrex { + +// Forward Declaration +template +struct RTSoAParticle; + +template +struct ArrayView { + using size_type = std::size_t; + + T* m_data = nullptr; + size_type m_capacity = 0; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T& operator[] (const size_type i) const { + AMREX_ASSERT(i < m_capacity); + return m_data[i]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* data () const { + return m_data; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* dataPtr () const { + return m_data; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* begin () const { + return m_data; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* end () const { + return m_data + m_capacity; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + explicit operator T* () const { + return m_data; + } +}; + +template +ArrayView(T* data, std::size_t capacity) -> ArrayView; + +template +struct ParticleTileDataRT +{ + using Self = ParticleTileDataRT; + using size_type = std::size_t; + + using ParticleType = RTSoAParticle; + using RealType = RType; + using IntType = IType; + + static constexpr bool is_particle_tile_data = true; + static constexpr bool is_const = std::is_const_v; + + size_type m_capacity = 0; + std::conditional_t m_idcpu = nullptr; + RType* AMREX_RESTRICT m_rdata = nullptr; + IType* AMREX_RESTRICT m_idata = nullptr; + int m_n_real = 0; + int m_n_int = 0; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + constexpr ParticleTileDataRT () = default; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + constexpr ParticleTileDataRT (size_type a_capacity, + decltype(m_idcpu) a_idcpu, decltype(m_rdata) a_rdata, + decltype(m_idata) a_idata, int a_n_real, int a_n_int) noexcept : + m_capacity{a_capacity}, + m_idcpu{a_idcpu}, + m_rdata{a_rdata}, + m_idata{a_idata}, + m_n_real{a_n_real}, + m_n_int{a_n_int} {} + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + constexpr ParticleTileDataRT (ParticleTileDataRT const& rhs) noexcept : + m_capacity{rhs.m_capacity}, + m_idcpu{rhs.m_idcpu}, + m_rdata{rhs.m_rdata}, + m_idata{rhs.m_idata}, + m_n_real{rhs.m_n_real}, + m_n_int{rhs.m_n_int} {} + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RType& pos (const int dir, const size_type index) const & + { + AMREX_ASSERT(index < m_capacity && 0 <= dir && dir < m_n_real); + return this->m_rdata[dir * m_capacity + index]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) id (const size_type index) const & + { + AMREX_ASSERT(index < m_capacity); + if constexpr (is_const) { + return ConstParticleIDWrapper(this->m_idcpu[index]); + } else { + return ParticleIDWrapper(this->m_idcpu[index]); + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) cpu (const size_type index) const & + { + AMREX_ASSERT(index < m_capacity); + if constexpr (is_const) { + return ConstParticleCPUWrapper(this->m_idcpu[index]); + } else { + return ParticleCPUWrapper(this->m_idcpu[index]); + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) idcpu (const size_type index) const & + { + AMREX_ASSERT(index < m_capacity); + return this->m_idcpu[index]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RType * rdata (const int comp_index) const & + { + AMREX_ASSERT(0 <= comp_index && comp_index < m_n_real); + return this->m_rdata + comp_index * m_capacity; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + IType * idata (const int comp_index) const & + { + AMREX_ASSERT(0 <= comp_index && comp_index < m_n_int); + return this->m_idata + comp_index * m_capacity; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RType& rdata (const int comp_index, const size_type partice_index) const & + { + AMREX_ASSERT(partice_index < m_capacity && 0 <= comp_index && comp_index < m_n_real); + return this->m_rdata[comp_index * m_capacity + partice_index]; + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + IType& idata (const int comp_index, const size_type partice_index) const & + { + AMREX_ASSERT(partice_index < m_capacity && 0 <= comp_index && comp_index < m_n_int); + return this->m_idata[comp_index * m_capacity + partice_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) operator[] (const size_type index) const + { + AMREX_ASSERT(index < m_capacity); + return RTSoAParticle(*this, index); + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void packParticleData (char* buffer, size_type src_index, std::size_t dst_offset, + const int* comm_real, const int * comm_int) const noexcept + { + AMREX_ASSERT(src_index < m_capacity); + auto* dst = buffer + dst_offset; + + memcpy(dst, m_idcpu + src_index, sizeof(uint64_t)); + dst += sizeof(uint64_t); + + for (int i = 0; i < m_n_real; ++i) { + if (comm_real[i]) { + memcpy(dst, m_rdata + i * m_capacity + src_index, sizeof(RType)); + dst += sizeof(RType); + } + } + + for (int i = 0; i < m_n_int; ++i) { + if (comm_int[i]) { + memcpy(dst, m_idata + i * m_capacity + src_index, sizeof(IType)); + dst += sizeof(IType); + } + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void unpackParticleData (const char* buffer, Long src_offset, size_type dst_index, + const int* comm_real, const int* comm_int) const noexcept + { + AMREX_ASSERT(dst_index < m_capacity); + const auto* src = buffer + src_offset; + + memcpy(m_idcpu + dst_index, src, sizeof(uint64_t)); + src += sizeof(uint64_t); + + for (int i = 0; i < m_n_real; ++i) { + if (comm_real[i]) { + memcpy(m_rdata + i * m_capacity + dst_index, src, sizeof(RType)); + src += sizeof(RType); + } + } + + for (int i = 0; i < m_n_int; ++i) { + if (comm_int[i]) { + memcpy(m_idata + i * m_capacity + dst_index, src, sizeof(IType)); + src += sizeof(IType); + } + } + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Particle<0, 0> getSuperParticle (size_type index) const noexcept + { + return {{ + { AMREX_D_DECL( pos(0, index), pos(1, index), pos(2, index) ) }, + idcpu(index) + }}; + } +}; + +template +struct RTSoAParticle +{ + using PTD = ParticleTileDataRT; + using ConstType = RTSoAParticle, std::add_const_t>; + static constexpr bool is_constsoa_particle = PTD::is_const; + static constexpr int NReal = 0; + static constexpr int NInt = 0; + static constexpr bool is_soa_particle = true; + static constexpr bool is_rtsoa_particle = true; + using size_type = typename PTD::size_type; + using RealType = RType; + using IntType = IType; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RTSoAParticle (PTD const& ptd, size_type i) noexcept : + m_particle_tile_data(ptd), + m_index(i) {} + + template + friend struct RTSoAParticle; + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RTSoAParticle (RTSoAParticle const& rhs) noexcept : + m_particle_tile_data(rhs.m_particle_tile_data), + m_index(rhs.m_index) {} + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) cpu () const & { return this->m_particle_tile_data.cpu(m_index); } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) id () const & { return this->m_particle_tile_data.id(m_index); } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + decltype(auto) idcpu () const & { return this->m_particle_tile_data.idcpu(m_index); } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RType& rdata (const int comp_index) const & { + return this->m_particle_tile_data.rdata(comp_index, m_index); + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + IType& idata (const int comp_index) const & { + return this->m_particle_tile_data.idata(comp_index, m_index); + } + + //functions to get positions of the particle in the SOA data + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealVect pos () const & { + return RealVect(AMREX_D_DECL( + this->m_particle_tile_data.pos(0, m_index), + this->m_particle_tile_data.pos(1, m_index), + this->m_particle_tile_data.pos(2, m_index) + )); + } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RType& pos (int position_index) const & { + return this->m_particle_tile_data.pos(position_index, m_index); + } + + static Long NextID (); + + /** + * \brief This version can only be used inside omp critical. + */ + static Long UnprotectedNextID (); + + /** + * \brief Reset on restart. + * + * \param nextid + */ + static void NextID (Long nextid); + +private: + + static_assert(std::is_trivially_copyable(), "ParticleTileData is not trivially copyable"); + + PTD m_particle_tile_data; + size_type m_index; +}; + +struct NextIDRTSoA { + inline static Long the_next_id = 1; +}; + +template +Long +RTSoAParticle::NextID () +{ + Long next; +// we should be able to test on _OPENMP < 201107 for capture (version 3.1) +// but we must work around a bug in gcc < 4.9 +#if defined(AMREX_USE_OMP) && defined(_OPENMP) && _OPENMP < 201307 +#pragma omp critical (amrex_particle_nextid) +#elif defined(AMREX_USE_OMP) +#pragma omp atomic capture +#endif + next = NextIDRTSoA::the_next_id++; + + if (next > LongParticleIds::LastParticleID) { + amrex::Abort("RTSoAParticle::NextID() -- too many particles"); + } + + return next; +} + +template +Long +RTSoAParticle::UnprotectedNextID () +{ + Long next = NextIDRTSoA::the_next_id++; + if (next > LongParticleIds::LastParticleID) { + amrex::Abort("RTSoAParticle::NextID() -- too many particles"); + } + return next; +} + +template +void +RTSoAParticle::NextID (Long nextid) +{ + NextIDRTSoA::the_next_id = nextid; +} + +template +using ConstParticleTileDataRT = ParticleTileDataRT; + +template +struct ParticleTileRT +{ + using ParticleType = RTSoAParticle; + using RealType = RType; + using IntType = IType; + + using ParticleTileDataType = ParticleTileDataRT; + using ConstParticleTileDataType = ParticleTileDataRT; + + using size_type = typename ParticleTileDataType::size_type; + + using AoS = ThisParticleTileHasNoAoS; + using SoA = ParticleTileRT; + + using RealVector = ArrayView; + using IntVector = ArrayView; + + ParticleTileRT () = default; + +#ifndef _WIN32 // workaround windows compiler bug + ~ParticleTileRT () = default; + + ParticleTileRT (ParticleTileRT const&) = delete; + ParticleTileRT (ParticleTileRT &&) noexcept = default; + + ParticleTileRT& operator= (ParticleTileRT const&) = delete; + ParticleTileRT& operator= (ParticleTileRT &&) noexcept = default; +#endif + + void define ( + int a_num_real_comps, + int a_num_int_comps, + std::vector* a_rdata_names=nullptr, + std::vector* a_idata_names=nullptr, + Arena* a_arena=nullptr + ) + { + if (m_defined) { + AMREX_ALWAYS_ASSERT(a_arena == nullptr || a_arena == m_arena); + if (a_num_real_comps != m_n_real || a_num_int_comps != m_n_int) { + realloc_and_move(m_size, m_capacity, a_num_real_comps, a_num_int_comps); + } + } else { + AMREX_ALWAYS_ASSERT(a_arena != nullptr); + m_defined = true; + m_arena = a_arena; + m_n_real = a_num_real_comps; + m_n_int = a_num_int_comps; + } + + if (a_rdata_names != nullptr) { + m_real_names = a_rdata_names; + } + if (a_idata_names != nullptr) { + m_int_names = a_idata_names; + } + } + + [[nodiscard]] bool empty () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return size() == 0; + } + + /** + * \brief Returns the total number of particles (real and neighbor) + * + */ + size_type size () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_size; + } + + /** + * \brief Returns the number of real particles (excluding neighbors) + * + */ + size_type numParticles () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return numRealParticles(); + } + + /** + * \brief Returns the number of real particles (excluding neighbors) + * + */ + size_type numRealParticles () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_size; + } + + /** + * \brief Returns the number of neighbor particles (excluding reals) + * + */ + size_type numNeighborParticles () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return 0; + } + + /** + * \brief Returns the total number of particles, real and neighbor + * + */ + size_type numTotalParticles () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_size; + } + + size_type getNumNeighbors () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return 0; + } + + [[nodiscard]] int NumRealComps () const noexcept + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_n_real; + } + + [[nodiscard]] int NumIntComps () const noexcept + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_n_int; + } + + [[nodiscard]] int NumRuntimeRealComps () const noexcept + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_n_real; + } + + [[nodiscard]] int NumRuntimeIntComps () const noexcept + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_n_int; + } + + /** Get the names for the real SoA components **/ + [[nodiscard]] std::vector GetRealNames () const + { + AMREX_ALWAYS_ASSERT(m_defined); + if (m_real_names != nullptr) { + return *m_real_names; + } else { + return std::vector(); + } + } + + /** Get the names for the int SoA components **/ + [[nodiscard]] std::vector GetIntNames () const + { + AMREX_ALWAYS_ASSERT(m_defined); + if (m_int_names != nullptr) { + return *m_int_names; + } else { + return std::vector(); + } + } + + [[nodiscard]] ArrayView GetIdCPUData () { + AMREX_ALWAYS_ASSERT(m_defined); + return {m_idcpu_data.data(), m_capacity}; + } + + [[nodiscard]] ArrayView GetIdCPUData () const { + AMREX_ALWAYS_ASSERT(m_defined); + return {m_idcpu_data.data(), m_capacity}; + } + + [[nodiscard]] ArrayView GetRealData (int comp_index) { + AMREX_ALWAYS_ASSERT(m_defined); + AMREX_ALWAYS_ASSERT(0 <= comp_index && comp_index < m_n_real); + return {m_real_data.data() + comp_index * m_capacity, m_capacity}; + } + + [[nodiscard]] ArrayView GetRealData (int comp_index) const { + AMREX_ALWAYS_ASSERT(m_defined); + AMREX_ALWAYS_ASSERT(0 <= comp_index && comp_index < m_n_real); + return {m_real_data.data() + comp_index * m_capacity, m_capacity}; + } + + [[nodiscard]] ArrayView GetIntData (int comp_index) { + AMREX_ALWAYS_ASSERT(m_defined); + AMREX_ALWAYS_ASSERT(0 <= comp_index && comp_index < m_n_int); + return {m_int_data.data() + comp_index * m_capacity, m_capacity}; + } + + [[nodiscard]] ArrayView GetIntData (int comp_index) const { + AMREX_ALWAYS_ASSERT(m_defined); + AMREX_ALWAYS_ASSERT(0 <= comp_index && comp_index < m_n_int); + return {m_int_data.data() + comp_index * m_capacity, m_capacity}; + } + + [[nodiscard]] ArrayView GetRealData (std::string const & name) { + return GetRealData(get_idx_from_str(name, m_real_names)); + } + + [[nodiscard]] ArrayView GetRealData (std::string const & name) const { + return GetRealData(get_idx_from_str(name, m_real_names)); + } + + [[nodiscard]] ArrayView GetIntData (std::string const & name) { + return GetIntData(get_idx_from_str(name, m_int_names)); + } + + [[nodiscard]] ArrayView GetIntData (std::string const & name) const { + return GetIntData(get_idx_from_str(name, m_int_names)); + } + + void resize (size_type new_size) + { + AMREX_ALWAYS_ASSERT(m_defined); + + if (new_size <= m_capacity && new_size * 10 > m_capacity) { + m_size = new_size; + } else { + realloc_and_move(new_size, new_size, m_n_real, m_n_int); + } + } + + void resize_geometric_grow (size_type new_size) + { + AMREX_ALWAYS_ASSERT(m_defined); + + if (new_size <= m_capacity && new_size * 10 > m_capacity) { + m_size = new_size; + } else { + realloc_and_move(new_size, std::max(new_size, m_capacity + m_capacity / 4), + m_n_real, m_n_int); + } + } + + void reserve (size_type new_capacity) + { + AMREX_ALWAYS_ASSERT(m_defined); + + if (new_capacity > m_capacity) { + realloc_and_move(m_size, new_capacity, m_n_real, m_n_int); + } + } + + void realloc_and_move (size_type new_size, size_type new_capacity, + int new_n_real, int new_n_int) + { + AMREX_ALWAYS_ASSERT(m_defined); + + if (new_capacity <= new_size) { + new_capacity = new_size; + } + + decltype(m_idcpu_data) new_idcpu_data; + decltype(m_real_data) new_real_data; + decltype(m_int_data) new_int_data; + + new_idcpu_data.setArena(m_arena); + new_real_data.setArena(m_arena); + new_int_data.setArena(m_arena); + + new_idcpu_data.resize(new_capacity); + new_real_data.resize(new_capacity * new_n_real); + new_int_data.resize(new_capacity * new_n_int); + + auto p_idcpu = new_idcpu_data.data(); + auto p_real = new_real_data.data(); + auto p_int = new_int_data.data(); + + auto old_ptd = getParticleTileData(); + auto copy_size = std::min(m_size, new_size); + auto copy_n_real = std::min(m_n_real, new_n_real); + auto copy_n_int = std::min(m_n_int, new_n_int); + + m_size = new_size; + m_capacity = new_capacity; + m_n_real = new_n_real; + m_n_int = new_n_int; + m_idcpu_data.swap(new_idcpu_data); + m_real_data.swap(new_real_data); + m_int_data.swap(new_int_data); + + auto new_ptd = getParticleTileData(); + + if (new_size == 0) { + return; + } + + if (m_arena->isDeviceAccessible()) { + ParallelFor(copy_size, + [=] AMREX_GPU_DEVICE (size_type i) noexcept { + new_ptd.idcpu(i) = old_ptd.idcpu(i); + + for (int n = 0; n < copy_n_real; ++n) { + new_ptd.rdata(n, i) = old_ptd.rdata(n, i); + } + + for (int n = 0; n < copy_n_int; ++n) { + new_ptd.idata(n, i) = old_ptd.idata(n, i); + } + }); + Gpu::streamSynchronize(); + } else { + for (size_type i = 0; i < copy_size; ++i) { + new_ptd.idcpu(i) = old_ptd.idcpu(i); + + for (int n = 0; n < copy_n_real; ++n) { + new_ptd.rdata(n, i) = old_ptd.rdata(n, i); + } + + for (int n = 0; n < copy_n_int; ++n) { + new_ptd.idata(n, i) = old_ptd.idata(n, i); + } + } + } + } + + void shrink_to_fit () + { + AMREX_ALWAYS_ASSERT(m_defined); + + if (m_size != m_capacity) { + realloc_and_move(m_size, m_size, m_n_real, m_n_int); + } + } + + size_type capacity () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return m_capacity * (sizeof(uint64_t) + m_n_real * sizeof(RType) + m_n_int * sizeof(IType)); + } + + void swap (ParticleTileRT& other) noexcept + { + std::swap(m_defined, other.m_defined); + std::swap(m_arena, other.m_arena); + std::swap(m_capacity, other.m_capacity); + std::swap(m_size, other.m_size); + std::swap(m_n_real, other.m_n_real); + std::swap(m_n_int, other.m_n_int); + m_idcpu_data.swap(other.m_idcpu_data); + m_real_data.swap(other.m_real_data); + m_int_data.swap(other.m_int_data); + std::swap(m_real_names, other.m_real_names); + std::swap(m_int_names, other.m_int_names); + } + + ParticleTileDataType getParticleTileData () + { + AMREX_ALWAYS_ASSERT(m_defined); + return ParticleTileDataType{ + m_capacity, + m_idcpu_data.data(), + m_real_data.data(), + m_int_data.data(), + m_n_real, + m_n_int + }; + } + + ConstParticleTileDataType getConstParticleTileData () const + { + AMREX_ALWAYS_ASSERT(m_defined); + return ConstParticleTileDataType{ + m_capacity, + m_idcpu_data.data(), + m_real_data.data(), + m_int_data.data(), + m_n_real, + m_n_int + }; + } + + auto& GetStructOfArrays () { return *this; } + const auto& GetStructOfArrays () const { return *this; } + + [[nodiscard]] Arena* arena() const { + return m_arena; + } + +private: + + static int get_idx_from_str (std::string const & name, std::vector* name_list) { + AMREX_ALWAYS_ASSERT(name_list != nullptr); + auto const pos = std::find(name_list->begin(), name_list->end(), name); + AMREX_ALWAYS_ASSERT(pos != name_list->end()); + return static_cast(std::distance(name_list->begin(), pos)); + } + + bool m_defined = false; + + Arena* m_arena = nullptr; + + size_type m_capacity = 0; + size_type m_size = 0; + + int m_n_real = 0; + int m_n_int = 0; + + amrex::PODVector> m_idcpu_data; + amrex::PODVector> m_real_data; + amrex::PODVector> m_int_data; + + std::vector* m_real_names = nullptr; + std::vector* m_int_names = nullptr; +}; + +} // namespace amrex + +#endif // AMREX_PARTICLETILERT_H_ diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index 1b0b29b662..bcd9e992f4 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace amrex @@ -146,6 +147,60 @@ void swapParticle (const ParticleTileData& dst, } } +/** + * \brief A general single particle copying routine that can run on the GPU. + * + * \param dst the destination tile + * \param src the source tile + * \param src_i the index in the source to read from + * \param dst_i the index in the destination to write to + * + */ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void copyParticle (const ParticleTileDataRT& dst, + const ParticleTileDataRT& src, + typename ParticleTileDataRT::size_type src_i, + typename ParticleTileDataRT::size_type dst_i) noexcept +{ + dst.idcpu(dst_i) = src.idcpu(src_i); + + for (int j = 0; j < dst.m_n_real && j < src.m_n_real; ++j) { + dst.rdata(j)[dst_i] = src.rdata(j)[src_i]; + } + + for (int j = 0; j < dst.m_n_int && j < src.m_n_int; ++j) { + dst.idata(j)[dst_i] = src.idata(j)[src_i]; + } +} + +/** + * \brief A general single particle swapping routine that can run on the GPU. + * + * \param dst the destination tile + * \param src the source tile + * \param src_i the index in the source to read from + * \param dst_i the index in the destination to write to + * + */ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void swapParticle (const ParticleTileDataRT& dst, + const ParticleTileDataRT& src, + typename ParticleTileDataRT::size_type src_i, + typename ParticleTileDataRT::size_type dst_i) noexcept +{ + amrex::Swap(dst.idcpu(dst_i), src.idcpu(src_i)); + + for (int j = 0; j < dst.m_n_real && j < src.m_n_real; ++j) { + amrex::Swap(dst.rdata(j)[dst_i], src.rdata(j)[src_i]); + } + + for (int j = 0; j < dst.m_n_int && j < src.m_n_int; ++j) { + amrex::Swap(dst.idata(j)[dst_i], src.idata(j)[src_i]); + } +} + /** * \brief Copy particles from src to dst. This version copies all the * particles, writing them to the beginning of dst. @@ -213,7 +268,8 @@ template void transformParticles (DstTile& dst, const SrcTile& src, F&& f) noexcept { auto np = src.numParticles(); - transformParticles(dst, src, 0, 0, np, std::forward(f)); + using Index = decltype(np); + transformParticles(dst, src, Index{0}, Index{0}, np, std::forward(f)); } /** @@ -270,7 +326,8 @@ template void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src, F&& f) noexcept { auto np = src.numParticles(); - transformParticles(dst1, dst2, src, 0, 0, 0, np, std::forward(f)); + using Index = decltype(np); + transformParticles(dst1, dst2, src, Index{0}, Index{0}, Index{0}, np, std::forward(f)); } /** @@ -329,7 +386,7 @@ template , int> foo = 0> Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask) noexcept { - return filterParticles(dst, src, mask, 0, 0, src.numParticles()); + return filterParticles(dst, src, mask, Index{0}, Index{0}, src.numParticles()); } /** @@ -500,7 +557,7 @@ template , int> foo = 0> Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F&& f) noexcept { - return filterAndTransformParticles(dst, src, mask, std::forward(f), 0, 0); + return filterAndTransformParticles(dst, src, mask, std::forward(f), Index{0}, Index{0}); } /** @@ -522,7 +579,9 @@ template >,int> foo = 0> int filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f) noexcept { - return filterAndTransformParticles(dst, src, std::forward(p), std::forward(f), 0, 0); + using Index = decltype(src.numParticles()); + return filterAndTransformParticles(dst, src, std::forward(p), std::forward(f), + Index{0}, Index{0}); } /** diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index ac91573f85..918fc67252 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -377,17 +378,12 @@ IntVect getParticleCell (PTD const& ptd, int i, amrex::GpuArray const& dxi, const Box& domain) noexcept { - if constexpr (PTD::ParticleType::is_soa_particle) - { - IntVect iv( - AMREX_D_DECL(int(amrex::Math::floor((ptd.m_rdata[0][i]-plo[0])*dxi[0])), - int(amrex::Math::floor((ptd.m_rdata[1][i]-plo[1])*dxi[1])), - int(amrex::Math::floor((ptd.m_rdata[2][i]-plo[2])*dxi[2])))); - iv += domain.smallEnd(); - return iv; - } else { - return getParticleCell(ptd.m_aos[i], plo, dxi, domain);; - } + IntVect iv( + AMREX_D_DECL(int(amrex::Math::floor((ptd.pos(0, i)-plo[0])*dxi[0])), + int(amrex::Math::floor((ptd.pos(1, i)-plo[1])*dxi[1])), + int(amrex::Math::floor((ptd.pos(2, i)-plo[2])*dxi[2])))); + iv += domain.smallEnd(); + return iv; } struct DefaultAssignor @@ -865,6 +861,157 @@ std::string getDefaultCompNameInt (const int i) { return name; } +template foo = 0> +void +ReorderParticles (PTile& ptile, const index_type* permutations) +{ + const size_t np = ptile.numParticles(); + const size_t np_total = np + ptile.numNeighborParticles(); + +#if defined(AMREX_USE_CUDA) && defined(_WIN32) + if (!PTile::ParticleType::is_soa_particle) +#else + if constexpr (!PTile::ParticleType::is_soa_particle) +#endif + { + static_assert(sizeof(typename PTile::ParticleType)%4 == 0 && sizeof(uint32_t) == 4); + using tmp_t = std::conditional_t; + constexpr std::size_t nchunks = sizeof(typename PTile::ParticleType) / sizeof(tmp_t); + Gpu::DeviceVector tmp(np); + auto* ptmp = tmp.data(); + auto* paos = (tmp_t*)(ptile.getParticleTileData().m_aos); + for (std::size_t ichunk = 0; ichunk < nchunks; ++ichunk) { + // Do not need to reorder neighbor particles + AMREX_HOST_DEVICE_FOR_1D(np, i, + { + ptmp[i] = paos[permutations[i]*nchunks+ichunk]; + }); + AMREX_HOST_DEVICE_FOR_1D(np, i, + { + paos[i*nchunks+ichunk] = ptmp[i]; + }); + } + Gpu::streamSynchronize(); + } else { + typename PTile::SoA::IdCPU tmp_idcpu; + if constexpr (PTile::has_polymorphic_allocator) { + tmp_idcpu.setArena(ptile.GetStructOfArrays().GetIdCPUData().arena()); + } + tmp_idcpu.resize(np_total); + auto src = ptile.GetStructOfArrays().GetIdCPUData().data(); + uint64_t* dst = tmp_idcpu.data(); + AMREX_HOST_DEVICE_FOR_1D( np_total, i, + { + dst[i] = i < np ? src[permutations[i]] : src[i]; + }); + + Gpu::streamSynchronize(); + + ptile.GetStructOfArrays().GetIdCPUData().swap(tmp_idcpu); + } + + { // Create a scope for the temporary vector below + typename PTile::RealVector tmp_real; + if (ptile.NumRealComps() > 0) { + if constexpr (PTile::has_polymorphic_allocator) { + tmp_real.setArena(ptile.GetStructOfArrays().GetRealData(0).arena()); + } + tmp_real.resize(np_total); + } + for (int comp = 0; comp < ptile.NumRealComps(); ++comp) { + auto src = ptile.GetStructOfArrays().GetRealData(comp).data(); + ParticleReal* dst = tmp_real.data(); + AMREX_HOST_DEVICE_FOR_1D( np_total, i, + { + dst[i] = i < np ? src[permutations[i]] : src[i]; + }); + + Gpu::streamSynchronize(); + + ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); + } + } + + typename PTile::IntVector tmp_int; + if (ptile.NumIntComps() > 0) { + if constexpr (PTile::has_polymorphic_allocator) { + tmp_int.setArena(ptile.GetStructOfArrays().GetIntData(0).arena()); + } + tmp_int.resize(np_total); + } + + for (int comp = 0; comp < ptile.NumIntComps(); ++comp) { + auto src = ptile.GetStructOfArrays().GetIntData(comp).data(); + int* dst = tmp_int.data(); + AMREX_HOST_DEVICE_FOR_1D( np_total , i, + { + dst[i] = i < np ? src[permutations[i]] : src[i]; + }); + + Gpu::streamSynchronize(); + + ptile.GetStructOfArrays().GetIntData(comp).swap(tmp_int); + } +} + +template foo = 0> +void +ReorderParticles (PTile& ptile, const index_type* permutations) +{ + const size_t np = ptile.numParticles(); + { + amrex::Gpu::AsyncVector tmp_idcpu(np); + + auto src = ptile.GetIdCPUData().data(); + uint64_t* dst = tmp_idcpu.data(); + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (size_t i) { + dst[i] = src[permutations[i]]; + }); + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (size_t i) { + src[i] = dst[i]; + }); + } + { + amrex::Gpu::AsyncVector tmp_real(np); + + for (int comp = 0; comp < ptile.NumRealComps(); ++comp) { + auto src = ptile.GetRealData(comp).data(); + auto dst = tmp_real.data(); + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (size_t i) { + dst[i] = src[permutations[i]]; + }); + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (size_t i) { + src[i] = dst[i]; + }); + } + } + { + amrex::Gpu::AsyncVector tmp_int(np); + + for (int comp = 0; comp < ptile.NumIntComps(); ++comp) { + auto src = ptile.GetIntData(comp).data(); + auto dst = tmp_int.data(); + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (size_t i) { + dst[i] = src[permutations[i]]; + }); + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE (size_t i) { + src[i] = dst[i]; + }); + } + } + // stream sync for deallocation of permutations variable in user code + Gpu::streamSynchronize(); +} + #ifdef AMREX_USE_HDF5_ASYNC void async_vol_es_wait_particle(); void async_vol_es_wait_close_particle(); diff --git a/Src/Particle/AMReX_Particles.H b/Src/Particle/AMReX_Particles.H index 281f350b32..d36e02273f 100644 --- a/Src/Particle/AMReX_Particles.H +++ b/Src/Particle/AMReX_Particles.H @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 51a05a6a33..255d692ffa 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -237,47 +237,56 @@ void packParticleIDs (I* idata, const std::uint64_t idcpu, bool is_checkpoint) n template AMREX_GPU_HOST_DEVICE void -rPackParticleData (const PTD& ptd, int idx, typename PTD::RealType * rdata_ptr, +rPackParticleData (const PTD& ptd, int idx, std::remove_const_t * rdata_ptr, const int * write_real_comp) { std::size_t rout_index = 0; - for (int j = 0; j < AMREX_SPACEDIM; ++j) { - rdata_ptr[rout_index] = ptd.pos(j, idx); - rout_index++; - } + if constexpr (!PTD::ParticleType::is_rtsoa_particle) { + for (int j = 0; j < AMREX_SPACEDIM; ++j) { + rdata_ptr[rout_index] = ptd.pos(j, idx); + rout_index++; + } - if constexpr (!PTD::ParticleType::is_soa_particle) { - const auto& p = ptd[idx]; + if constexpr (!PTD::ParticleType::is_soa_particle) { + const auto& p = ptd[idx]; - for (int j = 0; j < PTD::ParticleType::NReal; ++j) { - if (write_real_comp[j]) { - rdata_ptr[rout_index] = p.rdata(j); - rout_index++; + for (int j = 0; j < PTD::ParticleType::NReal; ++j) { + if (write_real_comp[j]) { + rdata_ptr[rout_index] = p.rdata(j); + rout_index++; + } } } - } - constexpr int real_start_offset = PTD::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; + constexpr int real_start_offset = PTD::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; - for (int j = real_start_offset; j < PTD::NAR; ++j) { - if (write_real_comp[PTD::ParticleType::NReal + j - real_start_offset]) { - rdata_ptr[rout_index] = ptd.rdata(j)[idx]; - rout_index++; + for (int j = real_start_offset; j < PTD::NAR; ++j) { + if (write_real_comp[PTD::ParticleType::NReal + j - real_start_offset]) { + rdata_ptr[rout_index] = ptd.rdata(j)[idx]; + rout_index++; + } } - } - for (int j = 0; j < ptd.m_num_runtime_real; ++j) { - if (write_real_comp[PTD::ParticleType::NReal + PTD::NAR + j - real_start_offset]) { - rdata_ptr[rout_index] = ptd.m_runtime_rdata[j][idx]; - rout_index++; + for (int j = 0; j < ptd.m_num_runtime_real; ++j) { + if (write_real_comp[PTD::ParticleType::NReal + PTD::NAR + j - real_start_offset]) { + rdata_ptr[rout_index] = ptd.m_runtime_rdata[j][idx]; + rout_index++; + } + } + } else { + for (int j = 0; j < ptd.m_n_real; ++j) { + if ((j < AMREX_SPACEDIM) || write_real_comp[j - AMREX_SPACEDIM]) { + rdata_ptr[rout_index] = ptd.rdata(j)[idx]; + rout_index++; + } } } } template AMREX_GPU_HOST_DEVICE void -iPackParticleData (const PTD& ptd, int idx, typename PTD::IntType * idata_ptr, +iPackParticleData (const PTD& ptd, int idx, std::remove_const_t * idata_ptr, const int * write_int_comp, bool is_checkpoint) { std::size_t iout_index = 0; @@ -285,28 +294,37 @@ iPackParticleData (const PTD& ptd, int idx, typename PTD::IntType * idata_ptr, packParticleIDs(&idata_ptr[iout_index], ptd.idcpu(idx), is_checkpoint); iout_index += 2; - if constexpr (!PTD::ParticleType::is_soa_particle) { - const auto& p = ptd[idx]; + if constexpr (!PTD::ParticleType::is_rtsoa_particle) { + if constexpr (!PTD::ParticleType::is_soa_particle) { + const auto& p = ptd[idx]; - for (int j = 0; j < PTD::ParticleType::NInt; ++j) { - if (write_int_comp[j]) { - idata_ptr[iout_index] = p.idata(j); - iout_index++; + for (int j = 0; j < PTD::ParticleType::NInt; ++j) { + if (write_int_comp[j]) { + idata_ptr[iout_index] = p.idata(j); + iout_index++; + } } } - } - for (int j = 0; j < PTD::NAI; ++j) { - if (write_int_comp[PTD::ParticleType::NInt + j]) { - idata_ptr[iout_index] = ptd.idata(j)[idx]; - iout_index++; + for (int j = 0; j < PTD::NAI; ++j) { + if (write_int_comp[PTD::ParticleType::NInt + j]) { + idata_ptr[iout_index] = ptd.idata(j)[idx]; + iout_index++; + } } - } - for (int j = 0; j < ptd.m_num_runtime_int; ++j) { - if (write_int_comp[PTD::ParticleType::NInt + PTD::NAI + j]) { - idata_ptr[iout_index] = ptd.m_runtime_idata[j][idx]; - iout_index++; + for (int j = 0; j < ptd.m_num_runtime_int; ++j) { + if (write_int_comp[PTD::ParticleType::NInt + PTD::NAI + j]) { + idata_ptr[iout_index] = ptd.m_runtime_idata[j][idx]; + iout_index++; + } + } + } else { + for (int j = 0; j < ptd.m_n_int; ++j) { + if (write_int_comp[j]) { + idata_ptr[iout_index] = ptd.rdata(j)[idx]; + iout_index++; + } } } } @@ -315,7 +333,7 @@ template void packIODataGpu (Vector& idata, Vector& rdata, const PC& pc, int lev, int grid, const Vector& write_real_comp, const Vector& write_int_comp, - const Vector, typename PC::IntVector>>& particle_io_flags, + const Vector, typename PC::FlagsVector>>& particle_io_flags, const Vector& tiles, int np, bool is_checkpoint) { int num_output_int = 0; @@ -389,7 +407,7 @@ template void packIODataCpu (Vector& idata, Vector& rdata, const PC& pc, int lev, int grid, const Vector& write_real_comp, const Vector& write_int_comp, - const Vector, typename PC::IntVector>>& particle_io_flags, + const Vector, typename PC::FlagsVector>>& particle_io_flags, const Vector& tiles, int np, bool is_checkpoint) { int num_output_int = 0; @@ -433,10 +451,10 @@ template void packIOData (Vector& idata, Vector& rdata, const PC& pc, int lev, int grid, const Vector& write_real_comp, const Vector& write_int_comp, - const Vector, typename PC::IntVector>>& particle_io_flags, + const Vector, typename PC::FlagsVector>>& particle_io_flags, const Vector& tiles, int np, bool is_checkpoint) { - if constexpr (IsPolymorphicArenaAllocator::value) { + if constexpr (IsPolymorphicArenaAllocator::value) { if (pc.arena()->isManaged() || pc.arena()->isDevice()) { packIODataGpu(idata, rdata, pc, lev, grid, write_real_comp, write_int_comp, particle_io_flags, tiles, np, is_checkpoint); @@ -445,7 +463,7 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l particle_io_flags, tiles, np, is_checkpoint); } } else { - if constexpr (RunOnGpu::value) { + if constexpr (RunOnGpu::value) { packIODataGpu(idata, rdata, pc, lev, grid, write_real_comp, write_int_comp, particle_io_flags, tiles, np, is_checkpoint); } else { @@ -506,8 +524,9 @@ void WriteBinaryParticleDataSync (PC const& pc, Long maxnextid; // evaluate f for every particle to determine which ones to output - Vector, typename PC::IntVector > > + Vector, typename PC::FlagsVector > > particle_io_flags(pc.GetParticles().size()); + for (int lev = 0; lev < pc.GetParticles().size(); lev++) { const auto& pmap = pc.GetParticles(lev); @@ -898,8 +917,11 @@ void WriteBinaryParticleDataAsync (PC const& pc, } // make tmp particle tiles in pinned memory to write - using PinnedPTile = ParticleTile; + using PinnedPTile = std::conditional_t< + PC::is_rtsoa_pc, + typename PC::ParticleTileType, + ParticleTile + >; auto myptiles = std::make_shared,PinnedPTile> > >(); myptiles->resize(pc.finestLevel()+1); for (int lev = 0; lev <= pc.finestLevel(); lev++) diff --git a/Src/Particle/CMakeLists.txt b/Src/Particle/CMakeLists.txt index 7f475b0dbc..1ffa29775c 100644 --- a/Src/Particle/CMakeLists.txt +++ b/Src/Particle/CMakeLists.txt @@ -30,6 +30,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_StructOfArrays.H AMReX_ArrayOfStructs.H AMReX_ParticleTile.H + AMReX_ParticleTileRT.H AMReX_MakeParticle.H AMReX_NeighborParticlesCPUImpl.H AMReX_NeighborParticlesGPUImpl.H diff --git a/Src/Particle/Make.package b/Src/Particle/Make.package index 7959368ef8..95ad4670d7 100644 --- a/Src/Particle/Make.package +++ b/Src/Particle/Make.package @@ -13,6 +13,7 @@ CEXE_headers += AMReX_ParIter.H CEXE_headers += AMReX_StructOfArrays.H CEXE_headers += AMReX_ArrayOfStructs.H CEXE_headers += AMReX_ParticleTile.H +CEXE_headers += AMReX_ParticleTileRT.H CEXE_headers += AMReX_MakeParticle.H CEXE_headers += AMReX_NeighborParticles.H