AMReX-Codes
diff --git a/‎Src/Particle/AMReX_ParticleContainerI.H‎
Lines changed: 58 additions & 19 deletions b/‎Src/Particle/AMReX_ParticleContainerI.H‎
Lines changed: 58 additions & 19 deletions
diff --git a/‎Src/Particle/AMReX_ParticleIO.H‎
Lines changed: 11 additions & 3 deletions b/‎Src/Particle/AMReX_ParticleIO.H‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎Src/Particle/AMReX_ParticleInit.H‎
Lines changed: 24 additions & 4 deletions b/‎Src/Particle/AMReX_ParticleInit.H‎
Lines changed: 24 additions & 4 deletions
@@ -18,10 +18,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
         if (h_redistribute_int_comp[i]) {++num_int_comm_comps;}
     }
 
-    if constexpr(!ParticleType::is_soa_particle) {
-        particle_size = sizeof(ParticleType);
+    if constexpr (ParticleType::is_soa_particle) {
+        particle_size = sizeof(uint64_t);  // idcpu
     } else {
-        particle_size = 0;
+        particle_size = sizeof(ParticleType);
     }
     superparticle_size = particle_size +
         num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int);
@@ -1095,7 +1095,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
     const size_t np_total = np + ptile.numNeighborParticles();
 
     if (memEfficientSort) {
-        if constexpr(!ParticleType::is_soa_particle) {
+        if constexpr (!ParticleType::is_soa_particle) {
             static_assert(sizeof(ParticleType)%4 == 0 && sizeof(uint32_t) == 4);
             using tmp_t = std::conditional_t<sizeof(ParticleType)%8 == 0,
                                              uint64_t, uint32_t>;
@@ -1530,7 +1530,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
             unsigned npart = ptile_ptrs[pmap_it]->numParticles();
             ParticleLocData pld;
 
-            if constexpr(!ParticleType::is_soa_particle){
+            if constexpr (!ParticleType::is_soa_particle){
 
                 if (npart != 0) {
                     Long last = npart - 1;
@@ -1647,7 +1647,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                     }
                 }
 
-            } else{ // soa particle
+            } else { // soa particle
 
                 auto particle_tile = ptile_ptrs[pmap_it];
                 if (npart != 0) {
@@ -1663,6 +1663,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                         }
 
                         if (p.id() < 0){
+                            soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
                             for (int comp = 0; comp < NumRealComps(); comp++) {
                                 soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                             }
@@ -1679,6 +1680,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                         particlePostLocate(p, pld, lev);
 
                         if (p.id() < 0) {
+                            soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
                             for (int comp = 0; comp < NumRealComps(); comp++) {
                                 soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                             }
@@ -1696,6 +1698,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                                 // We own it but must shift it to another place.
                                 auto index = std::make_pair(pld.m_grid, pld.m_tile);
                                 AMREX_ASSERT(soa_local[pld.m_lev][index].size() == num_threads);
+                                {
+                                    auto& arr = soa_local[pld.m_lev][index][thread_num].GetIdCPUData();
+                                    arr.push_back(soa.GetIdCPUData()[pindex]);
+                                }
                                 for (int comp = 0; comp < NumRealComps(); ++comp) {
                                     RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp);
                                     arr.push_back(soa.GetRealData(comp)[pindex]);
@@ -1715,6 +1721,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                             particles_to_send.resize(new_size);
 
                             char* dst = &particles_to_send[old_size];
+                            {
+                                std::memcpy(dst, &soa.GetIdCPUData()[pindex], sizeof(uint64_t));
+                                dst += sizeof(uint64_t);
+                            }
                             int array_comp_start = AMREX_SPACEDIM + NStructReal;
                             for (int comp = 0; comp < NumRealComps(); comp++) {
                                 if (h_redistribute_real_comp[array_comp_start + comp]) {
@@ -1733,6 +1743,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                         }
 
                         if (p.id() < 0){
+                            soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
                             for (int comp = 0; comp < NumRealComps(); comp++) {
                                 soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                             }
@@ -1747,6 +1758,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                         ++pindex;
                     }
 
+                    {
+                        auto& iddata = soa.GetIdCPUData();
+                        iddata.erase(iddata.begin() + last + 1, iddata.begin() + npart);
+                    }
                     for (int comp = 0; comp < NumRealComps(); comp++) {
                         RealVector& rdata = soa.GetRealData(comp);
                         rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
@@ -1828,6 +1843,12 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                 auto& soa = ptile.GetStructOfArrays();
                 auto& soa_tmp = soa_local[lev][index];
                 for (int i = 0; i < num_threads; ++i) {
+                    {
+                        auto& arr = soa.GetIdCPUData();
+                        auto& tmp = soa_tmp[i].GetIdCPUData();
+                        arr.insert(arr.end(), tmp.begin(), tmp.end());
+                        tmp.erase(tmp.begin(), tmp.end());
+                    }
                     for (int comp = 0; comp < NumRealComps(); ++comp) {
                         RealVector& arr = soa.GetRealData(comp);
                         RealVector& tmp = soa_tmp[i].GetRealData(comp);
@@ -2045,20 +2066,16 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
 
                 Particle<NStructReal, NStructInt> p;
 
-                if constexpr (!ParticleType::is_soa_particle) {
-                   std::memcpy(&p, pbuf, sizeof(ParticleType));
-                } else {
+                if constexpr (ParticleType::is_soa_particle) {
+                    std::memcpy(&p.m_idcpu, pbuf, sizeof(uint64_t));
+
                     ParticleReal pos[AMREX_SPACEDIM];
-                    std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
+                    std::memcpy(&pos[0], pbuf + sizeof(uint64_t), AMREX_SPACEDIM*sizeof(ParticleReal));
                     AMREX_D_TERM(p.pos(0) = pos[0];,
                                  p.pos(1) = pos[1];,
                                  p.pos(2) = pos[2]);
-
-                    int idcpu[2];
-                    std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int));
-
-                    p.id() = idcpu[0];
-                    p.cpu() = idcpu[1];
+                } else {
+                   std::memcpy(&p, pbuf, sizeof(ParticleType));
                 }
 
                 bool success = Where(p, pld, lev_min, lev_max, 0);
@@ -2097,7 +2114,12 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
                                                                           rcv_tile[ipart])];
                 char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
 
-                if constexpr(! ParticleType::is_soa_particle) {
+                if constexpr (ParticleType::is_soa_particle) {
+                    uint64_t idcpudata;
+                    std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
+                    pbuf += sizeof(uint64_t);
+                    ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata);
+                } else {
                     ParticleType p;
                     std::memcpy(&p, pbuf, sizeof(ParticleType));
                     pbuf += sizeof(ParticleType);
@@ -2146,6 +2168,10 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
         host_int_attribs.reserve(15);
         host_int_attribs.resize(finestLevel()+1);
 
+        Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
+        host_idcpu.reserve(15);
+        host_idcpu.resize(finestLevel()+1);
+
         ipart = 0;
         for (int i = 0; i < nrcvs; ++i)
         {
@@ -2159,7 +2185,15 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
 
                 char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
 
-                if constexpr(! ParticleType::is_soa_particle) {
+                host_real_attribs[lev][ind].resize(NumRealComps());
+                host_int_attribs[lev][ind].resize(NumIntComps());
+
+                if constexpr (ParticleType::is_soa_particle) {
+                    uint64_t idcpudata;
+                    std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
+                    pbuf += sizeof(uint64_t);
+                    host_idcpu[lev][ind].push_back(idcpudata);
+                } else {
                     ParticleType p;
                     std::memcpy(&p, pbuf, sizeof(ParticleType));
                     pbuf += sizeof(ParticleType);
@@ -2210,7 +2244,12 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
               auto new_size = old_size + src_tile.size();
               dst_tile.resize(new_size);
 
-              if constexpr(! ParticleType::is_soa_particle) {
+              if constexpr (ParticleType::is_soa_particle) {
+                  Gpu::copyAsync(Gpu::hostToDevice,
+                                 host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
+                                 host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
+                                 dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
+              } else {
                   Gpu::copyAsync(Gpu::hostToDevice,
                                  src_tile.begin(), src_tile.end(),
                                  dst_tile.GetArrayOfStructs().begin() + old_size);
 
@@ -954,6 +954,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
     host_int_attribs.reserve(15);
     host_int_attribs.resize(finest_level_in_file+1);
 
+    Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
+    host_idcpu.reserve(15);
+    host_idcpu.resize(finestLevel()+1);
+
     for (int i = 0; i < cnt; i++) {
         // note: for pure SoA particle layouts, we do write the id, cpu and positions as a struct
         //       for backwards compatibility with readers
@@ -1021,8 +1025,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
                 host_real_attribs[pld.m_lev][ind][j].push_back(ptemp.pos(j));
             }
 
-            host_int_attribs[pld.m_lev][ind][0].push_back(ptemp.id());
-            host_int_attribs[pld.m_lev][ind][1].push_back(ptemp.cpu());
+            host_idcpu[pld.m_lev][ind].push_back(ptemp.m_idcpu);
 
             // read all other SoA
             // add the real...
@@ -1032,7 +1035,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
             }
 
             // ... and int array data
-            for (int icomp = 2; icomp < NumIntComps(); icomp++) {
+            for (int icomp = 0; icomp < NumIntComps(); icomp++) {
                 host_int_attribs[lev][ind][icomp].push_back(*iptr);
                 ++iptr;
             }
@@ -1061,6 +1064,11 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
             {
                 Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),
                                dst_tile.GetArrayOfStructs().begin() + old_size);
+            } else {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
+                               host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
+                               dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
             }
 
             for (int i = 0; i < NumRealComps(); ++i) { // NOLINT(readability-misleading-indentation)
 
@@ -1062,6 +1062,10 @@ InitRandom (Long                    icount,
         host_int_attribs.reserve(15);
         host_int_attribs.resize(finestLevel()+1);
 
+        Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
+        host_idcpu.reserve(15);
+        host_idcpu.resize(finestLevel()+1);
+
         for (Long j = 0; j < icount; j++)
         {
             Particle<0, 0> ptest;
@@ -1117,8 +1121,9 @@ InitRandom (Long                    icount,
                         host_real_attribs[pld.m_lev][ind][i].push_back(pos[j*AMREX_SPACEDIM+i]);
                     }
 
-                    host_int_attribs[pld.m_lev][ind][0].push_back(ParticleType::NextID());
-                    host_int_attribs[pld.m_lev][ind][1].push_back(MyProc);
+                    host_idcpu[pld.m_lev][ind].push_back(0);
+                    ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID();
+                    ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc();
 
                     host_particles[pld.m_lev][ind];
 
@@ -1157,6 +1162,11 @@ InitRandom (Long                    icount,
                 {
                     Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),
                                    dst_tile.GetArrayOfStructs().begin() + old_size);
+                } else {
+                    Gpu::copyAsync(Gpu::hostToDevice,
+                                   host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
+                                   host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
+                                   dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
                 }
 
                 for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation)
@@ -1201,6 +1211,10 @@ InitRandom (Long                    icount,
         host_int_attribs.reserve(15);
         host_int_attribs.resize(finestLevel()+1);
 
+        Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
+        host_idcpu.reserve(15);
+        host_idcpu.resize(finestLevel()+1);
+
         for (Long icnt = 0; icnt < M; icnt++) {
             Particle<0, 0> ptest;
             for (int i = 0; i < AMREX_SPACEDIM; i++) {
@@ -1261,8 +1275,9 @@ InitRandom (Long                    icount,
                     host_real_attribs[pld.m_lev][ind][i].push_back(ptest.pos(i));
                 }
 
-                host_int_attribs[pld.m_lev][ind][0].push_back(ptest.id());
-                host_int_attribs[pld.m_lev][ind][1].push_back(ptest.cpu());
+                host_idcpu[pld.m_lev][ind].push_back(0);
+                ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID();
+                ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc();
 
                 host_particles[pld.m_lev][ind];
 
@@ -1300,6 +1315,11 @@ InitRandom (Long                    icount,
                 {
                     Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),
                                    dst_tile.GetArrayOfStructs().begin() + old_size);
+                } else {
+                    Gpu::copyAsync(Gpu::hostToDevice,
+                                   host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
+                                   host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
+                                   dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
                 }
 
                 for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation)