@@ -67,14 +67,13 @@ InitParticles(const IntVect& a_num_particles_per_cell,
6767 {
6868 const Box& tile_box = mfi.tilebox ();
6969
70- const auto lo = amrex::lbound (tile_box);
71- const auto hi = amrex::ubound (tile_box);
72-
7370 Gpu::DeviceVector<unsigned int > counts (tile_box.numPts (), 0 );
74- unsigned int * pcount = counts.dataPtr ();
71+ Array4<unsigned int > counts_arr {counts.dataPtr (), amrex::begin (tile_box),
72+ amrex::end (tile_box), 1 };
7573
7674 Gpu::DeviceVector<unsigned int > offsets (tile_box.numPts ());
77- unsigned int * poffset = offsets.dataPtr ();
75+ Array4<unsigned int > offsets_arr {offsets.dataPtr (), amrex::begin (tile_box),
76+ amrex::end (tile_box), 1 };
7877
7978 amrex::ParallelFor (tile_box,
8079 [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
@@ -93,63 +92,28 @@ InitParticles(const IntVect& a_num_particles_per_cell,
9392 y >= a_bounds.hi (1 ) || y < a_bounds.lo (1 ) ||
9493 z >= a_bounds.hi (2 ) || z < a_bounds.lo (2 ) ) continue ;
9594
96- int ix = i - lo.x ;
97- int iy = j - lo.y ;
98- int iz = k - lo.z ;
99- int nx = hi.x -lo.x +1 ;
100- int ny = hi.y -lo.y +1 ;
101- int nz = hi.z -lo.z +1 ;
102- unsigned int uix = amrex::min (nx-1 ,amrex::max (0 ,ix));
103- unsigned int uiy = amrex::min (ny-1 ,amrex::max (0 ,iy));
104- unsigned int uiz = amrex::min (nz-1 ,amrex::max (0 ,iz));
105- unsigned int cellid = (uix * ny + uiy) * nz + uiz;
106- pcount[cellid] += 1 ;
95+ counts_arr (i, j, k) += 1 ;
10796 }
10897 });
10998
110- Gpu::exclusive_scan (counts.begin (), counts.end (), offsets.begin ());
111-
112- unsigned int last_offset;
113- unsigned int last_count;
114- #ifdef AMREX_USE_GPU
115- Gpu::dtoh_memcpy (&last_offset,offsets.dataPtr ()+tile_box.numPts ()-1 ,sizeof (unsigned int ));
116- Gpu::dtoh_memcpy (&last_count,counts.dataPtr ()+tile_box.numPts ()-1 ,sizeof (unsigned int ));
117- #else
118- std::memcpy (&last_offset,offsets.dataPtr ()+tile_box.numPts ()-1 ,sizeof (unsigned int )
119- std::memcpy (&last_count,counts.dataPtr ()+tile_box.numPts ()-1 ,sizeof (unsigned int )
120- #endif
121- int num_to_add = last_offset + last_count;
99+ int num_to_add = Scan::ExclusiveSum (counts.size (), counts.data (), offsets.data ());
122100
123- auto & particles = GetParticles (lev);
124- auto & particle_tile = particles[std::make_pair (mfi.index (), mfi.LocalTileIndex ())];
101+ auto & particle_tile = DefineAndReturnParticleTile (lev, mfi.index (), mfi.LocalTileIndex ());
125102
126103 auto old_size = particle_tile.GetArrayOfStructs ().size ();
127104 auto new_size = old_size + num_to_add;
128105 particle_tile.resize (new_size);
129106
130107 if (num_to_add == 0 ) continue ;
131108
132- ParticleType* pstruct = particle_tile.GetArrayOfStructs ()().data ();
133-
134- auto arrdata = particle_tile.GetStructOfArrays ().realarray ();
109+ auto ptd = particle_tile.getParticleTileData ();
135110
136111 int procID = ParallelDescriptor::MyProc ();
137112
138113 amrex::ParallelForRNG (tile_box,
139114 [=] AMREX_GPU_DEVICE (int i, int j, int k, amrex::RandomEngine const & engine) noexcept
140115 {
141- int ix = i - lo.x ;
142- int iy = j - lo.y ;
143- int iz = k - lo.z ;
144- int nx = hi.x -lo.x +1 ;
145- int ny = hi.y -lo.y +1 ;
146- int nz = hi.z -lo.z +1 ;
147- unsigned int uix = amrex::min (nx-1 ,amrex::max (0 ,ix));
148- unsigned int uiy = amrex::min (ny-1 ,amrex::max (0 ,iy));
149- unsigned int uiz = amrex::min (nz-1 ,amrex::max (0 ,iz));
150- unsigned int cellid = (uix * ny + uiy) * nz + uiz;
151-
152- int pidx = poffset[cellid] + old_size;
116+ int pidx = offsets_arr (i, j, k) + old_size;
153117
154118 for (int i_part=0 ; i_part<num_ppc;i_part++)
155119 {
@@ -179,24 +143,23 @@ InitParticles(const IntVect& a_num_particles_per_cell,
179143 y >= a_bounds.hi (1 ) || y < a_bounds.lo (1 ) ||
180144 z >= a_bounds.hi (2 ) || z < a_bounds.lo (2 ) ) continue ;
181145
182- ParticleType& p = pstruct[pidx];
183- p.id () = pidx + 1 ;
184- p.cpu () = procID;
185- p.pos (0 ) = x;
186- p.pos (1 ) = y;
187- p.pos (2 ) = z;
188-
189- arrdata[PIdx::ux ][pidx] = u[0 ] * PhysConst::c;
190- arrdata[PIdx::uy ][pidx] = u[1 ] * PhysConst::c;
191- arrdata[PIdx::uz ][pidx] = u[2 ] * PhysConst::c;
192- arrdata[PIdx::w ][pidx] = a_density * scale_fac;
193- arrdata[PIdx::Ex ][pidx] = 0.0 ;
194- arrdata[PIdx::Ey ][pidx] = 0.0 ;
195- arrdata[PIdx::Ez ][pidx] = 0.0 ;
196- arrdata[PIdx::Bx ][pidx] = 0.0 ;
197- arrdata[PIdx::By ][pidx] = 0.0 ;
198- arrdata[PIdx::Bz ][pidx] = 0.0 ;
199- arrdata[PIdx::ginv][pidx] = 0.0 ;
146+ ptd.id (pidx) = pidx + 1 ;
147+ ptd.cpu (pidx) = procID;
148+ ptd.pos (0 , pidx) = x;
149+ ptd.pos (1 , pidx) = y;
150+ ptd.pos (2 , pidx) = z;
151+
152+ ptd.rdata (PIdx::ux )[pidx] = u[0 ] * PhysConst::c;
153+ ptd.rdata (PIdx::uy )[pidx] = u[1 ] * PhysConst::c;
154+ ptd.rdata (PIdx::uz )[pidx] = u[2 ] * PhysConst::c;
155+ ptd.rdata (PIdx::w )[pidx] = a_density * scale_fac;
156+ ptd.rdata (PIdx::Ex )[pidx] = 0.0 ;
157+ ptd.rdata (PIdx::Ey )[pidx] = 0.0 ;
158+ ptd.rdata (PIdx::Ez )[pidx] = 0.0 ;
159+ ptd.rdata (PIdx::Bx )[pidx] = 0.0 ;
160+ ptd.rdata (PIdx::By )[pidx] = 0.0 ;
161+ ptd.rdata (PIdx::Bz )[pidx] = 0.0 ;
162+ ptd.rdata (PIdx::ginv)[pidx] = 0.0 ;
200163
201164 ++pidx;
202165 }
0 commit comments