diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 9d0e3bc62f..4f3efbf6d7 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -20,6 +20,7 @@ jobs: - uses: actions/checkout@v5 - name: Dependencies run: | + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_nvcc.sh 12.6 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache @@ -82,6 +83,7 @@ jobs: - uses: actions/checkout@v5 - name: Dependencies run: | + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_llvm_cuda11_clang15.sh .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache @@ -193,6 +195,7 @@ jobs: - uses: actions/checkout@v5 - name: Dependencies run: | + .github/workflows/dependencies/ubuntu_free_disk_space.sh .github/workflows/dependencies/dependencies_nvcc_2404.sh 13.0 .github/workflows/dependencies/dependencies_ccache.sh - name: Set Up Cache diff --git a/Src/Particle/AMReX_ArrayOfStructs.H b/Src/Particle/AMReX_ArrayOfStructs.H index 0d18aec5d3..49c36091d5 100644 --- a/Src/Particle/AMReX_ArrayOfStructs.H +++ b/Src/Particle/AMReX_ArrayOfStructs.H @@ -112,6 +112,11 @@ public: [[nodiscard]] typename ParticleVector::const_iterator end () const { return m_data.end(); } [[nodiscard]] typename ParticleVector::const_iterator cend () const { return m_data.cend(); } + void collectVectors (Vector& pvs) + { + pvs.push_back(&m_data); + } + int m_num_neighbor_particles{0}; private: diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 2397a7d482..9611760d08 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -1275,6 +1275,8 @@ public: void RedistributeGPU (int lev_min = 0, int lev_max = -1, int nGrow = 0, int local=0, bool remove_negative=true); + void ReserveForRedistribute (ParticleCopyPlan const& plan); + Long superParticleSize() const { return superparticle_size; } void AddRealComp (std::string const & name, int communicate=1) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index aac96dad87..1bcef69f29 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1587,6 +1587,7 @@ ParticleContainer_implReserveForRedistribute(plan); unpackBuffer(*this, plan, snd_buffer, RedistributeUnpackPolicy()); communicateParticlesFinish(plan); unpackRemotes(*this, plan, rcv_buffer, RedistributeUnpackPolicy()); @@ -1609,6 +1610,8 @@ ParticleContainer_implReserveForRedistribute(plan); + rcv_buffer.resize(pinned_rcv_buffer.size()); unpackBuffer(*this, plan, snd_buffer, RedistributeUnpackPolicy()); communicateParticlesFinish(plan); @@ -1623,6 +1626,43 @@ ParticleContainer_impl class Allocator, class CellAssignor> +void +ParticleContainer_impl +::ReserveForRedistribute (ParticleCopyPlan const& plan) +{ + BL_PROFILE("ParticleContainer::ReserveForRedistribute()"); + + std::map addsizes; + + for (int lev = 0; lev < this->BufferMap().numLevels(); ++lev) { + for (MFIter mfi = this->MakeMFIter(lev); mfi.isValid(); ++mfi) { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + auto& tile = this->DefineAndReturnParticleTile(lev, gid, tid); + int num_copies = plan.m_box_counts_h[this->BufferMap().gridAndLevToBucket(gid, lev)]; + if (num_copies > 0) { + addsizes[&tile] += num_copies; + } + } + } + + if (plan.m_nrcvs > 0) { + AMREX_ALWAYS_ASSERT(do_tiling == false); + for (int i = 0, N = int(plan.m_rcv_box_counts.size()); i < N; ++i) { + int copy_size = plan.m_rcv_box_counts[i]; + int lev = plan.m_rcv_box_levs[i]; + int gid = plan.m_rcv_box_ids[i]; + int tid = 0; // It's always 0 because this function is for RedistributeGPU only and the tiling is off. + auto& tile = this->DefineAndReturnParticleTile(lev, gid, tid); + addsizes[&tile] += copy_size; + } + } + + ParticleTileType::reserve(addsizes); +} + // // The CPU implementation of Redistribute // diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 4b9c7135dd..a9f4abf05b 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -735,7 +735,7 @@ struct ParticleTile ParticleType::is_soa_particle, ThisParticleTileHasNoAoS, ArrayOfStructs>; - //using ParticleVector = typename AoS::ParticleVector; + using ParticleVector = typename AoS::ParticleVector; using SoA = std::conditional_t< ParticleType::is_soa_particle, @@ -1351,6 +1351,133 @@ struct ParticleTile return ptd; } + void collectVectors (Vector& pv, + Vector& idcpuv, + Vector& rv, Vector& iv) + { + if constexpr (!ParticleType::is_soa_particle) { + m_aos_tile.collectVectors(pv); + } else { + amrex::ignore_unused(pv); + } + m_soa_tile.collectVectors(idcpuv, rv, iv); + } + + static void reserve (std::map*, int> const& addsizes) + { + if constexpr (!IsArenaAllocator>::value) { + for (auto [p,s] : addsizes) { + p->reserve(p->size()+s); + } + } else { + using PV = std::conditional_t; + Vector> pvs; + Vector> ids; + Vector> rvs; + Vector> ivs; + for (auto [p,s] : addsizes) { + if (s > 0) { + Vector pv; + Vector idcpuv; + Vector rv; + Vector iv; + p->collectVectors(pv, idcpuv, rv, iv); + if constexpr (!ParticleType::is_soa_particle) { + for (auto* v : pv) { + pvs.emplace_back(v, s); + } + } + for (auto* v : idcpuv) { + ids.emplace_back(v, s); + } + for (auto* v : rv) { + rvs.emplace_back(v, s); + } + for (auto* v : iv) { + ivs.emplace_back(v, s); + } + } + } + + std::sort(pvs.begin(), pvs.end(), [] (auto const& a, auto const& b) { + return (a.first->size() + a.second) > + (b.first->size() + b.second); + }); + std::sort(ids.begin(), ids.end(), [] (auto const& a, auto const& b) { + return (a.first->size() + a.second) > + (b.first->size() + b.second); + }); + std::sort(rvs.begin(), rvs.end(), [] (auto const& a, auto const& b) { + return (a.first->size() + a.second) > + (b.first->size() + b.second); + }); + std::sort(ivs.begin(), ivs.end(), [] (auto const& a, auto const& b) { + return (a.first->size() + a.second) > + (b.first->size() + b.second); + }); + + // Handle big vectcors first + { + int i_pvs = 0, i_ids = 0, i_rvs = 0, i_ivs = 0; + auto n_pvs = int(pvs.size()); + auto n_ids = int(ids.size()); + auto n_rvs = int(rvs.size()); + auto n_ivs = int(ivs.size()); + while ((i_pvs < n_pvs) || (i_ids < n_ids) || (i_rvs < n_rvs) || + (i_ivs < n_ivs)) { + std::size_t nbytes = 0; + int ii = -1; + if (i_pvs < n_pvs) { + std::size_t my_bytes = (pvs[i_pvs].first->size() + + pvs[i_pvs].second) * sizeof(typename PV::value_type); + if (my_bytes > nbytes) { + nbytes = my_bytes; + ii = 0; + } + } + if (i_ids < n_ids) { + std::size_t my_bytes = (ids[i_ids].first->size() + + ids[i_ids].second) * sizeof(typename SoA::IdCPU::value_type); + if (my_bytes > nbytes) { + nbytes = my_bytes; + ii = 1; + } + } + if (i_rvs < n_rvs) { + std::size_t my_bytes = (rvs[i_rvs].first->size() + + rvs[i_rvs].second) * sizeof(typename RealVector::value_type); + if (my_bytes > nbytes) { + nbytes = my_bytes; + ii = 2; + } + } + if (i_ivs < n_ivs) { + std::size_t my_bytes = (ivs[i_ivs].first->size() + + ivs[i_ivs].second) * sizeof(typename IntVector::value_type); + if (my_bytes > nbytes) { + nbytes = my_bytes; + ii = 3; + } + } + if (ii == 0) { + auto [p,s] = pvs[i_pvs++]; + p->reserve(p->size() + s); + } else if (ii == 1) { + auto [p,s] = ids[i_ids++]; + p->reserve(p->size() + s); + } else if (ii == 2) { + auto [p,s] = rvs[i_rvs++]; + p->reserve(p->size() + s); + } else { + auto [p,s] = ivs[i_ivs++]; + p->reserve(p->size() + s); + } + } + } + } + } + private: AoS m_aos_tile; diff --git a/Src/Particle/AMReX_StructOfArrays.H b/Src/Particle/AMReX_StructOfArrays.H index fc7e1eb28c..67a9faeea8 100644 --- a/Src/Particle/AMReX_StructOfArrays.H +++ b/Src/Particle/AMReX_StructOfArrays.H @@ -333,6 +333,29 @@ struct StructOfArrays { return arr; } + void collectVectors (Vector& idcpuv, Vector& rv, + Vector& iv) + + { + if constexpr (use64BitIdCpu == true) { + idcpuv.push_back(&m_idcpu); + } else { + amrex::ignore_unused(idcpuv); + } + if constexpr (NReal > 0) { + for (int i = 0; i < NReal; ++i) { rv.push_back(&(m_rdata[i])); } + } + if constexpr (NInt > 0) { + for (int i = 0; i < NInt; ++i) { iv.push_back(&(m_idata[i])); } + } + for (int i = 0; i < int(m_runtime_rdata.size()); ++i) { + rv.push_back(&(m_runtime_rdata[i])); + } + for (int i = 0; i < int(m_runtime_idata.size()); ++i) { + iv.push_back(&(m_runtime_idata[i])); + } + } + int m_num_neighbor_particles{0}; private: