@@ -237,64 +237,18 @@ void ABLStats::compute_zi()
237
237
.create_scratch_field (3 , m_temperature.num_grow ()[0 ]);
238
238
fvm::gradient (*gradT, m_temperature);
239
239
240
- // // Only compute zi using coarsest level
241
- // BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_a", ab);
240
+ // Only compute zi using coarsest level
242
241
const int lev = 0 ;
243
242
const int dir = m_normal_dir;
244
243
const auto & geom = (this ->m_sim .repo ()).mesh ().Geom (lev);
245
244
auto const & domain_box = geom.Domain ();
246
- // const auto& gradT_arrs = (*gradT)(lev).const_arrays();
247
- // auto device_tg_fab = amrex::ReduceToPlane<
248
- // amrex::ReduceOpMax, amrex::KeyValuePair<amrex::Real, int>>(
249
- // dir, domain_box, m_temperature(lev),
250
- // [=] AMREX_GPU_DEVICE(int nbx, int i, int j, int k)
251
- // -> amrex::KeyValuePair<amrex::Real, int> {
252
- // const amrex::IntVect iv(i, j, k);
253
- // return {gradT_arrs[nbx](i, j, k, dir), iv[dir]};
254
- // });
255
- // BL_PROFILE_VAR_STOP(ab);
256
-
257
- // BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_b", bb);
258
- // #ifdef AMREX_USE_GPU
259
- // amrex::BaseFab<amrex::KeyValuePair<amrex::Real, int>> pinned_tg_fab(
260
- // device_tg_fab.box(), device_tg_fab.nComp(), amrex::The_Pinned_Arena());
261
- // amrex::Gpu::dtoh_memcpy(
262
- // pinned_tg_fab.dataPtr(), device_tg_fab.dataPtr(),
263
- // pinned_tg_fab.nBytes());
264
- // #else
265
- // auto& pinned_tg_fab = device_tg_fab;
266
- // #endif
267
- // BL_PROFILE_VAR_STOP(bb);
268
-
269
- // BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_barrier", barrier);
270
- // amrex::ParallelDescriptor::Barrier();
271
- // BL_PROFILE_VAR_STOP(barrier);
272
-
273
- // BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_c", cb);
274
- // amrex::ParallelReduce::Max(
275
- // pinned_tg_fab.dataPtr(), static_cast<int>(pinned_tg_fab.size()),
276
- // amrex::ParallelDescriptor::IOProcessorNumber(),
277
- // amrex::ParallelDescriptor::Communicator());
278
- // BL_PROFILE_VAR_STOP(cb);
279
-
280
- // BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_d", db);
281
- // if (amrex::ParallelDescriptor::IOProcessor()) {
282
- // const auto dnval = m_dn;
283
- // auto* p = pinned_tg_fab.dataPtr();
284
- // m_zi = amrex::Reduce::Sum<amrex::Real>(
285
- // pinned_tg_fab.size(),
286
- // [=] AMREX_GPU_DEVICE(int i) noexcept -> amrex::Real {
287
- // return (p[i].second() + 0.5) * dnval;
288
- // },
289
- // 0.0);
290
- // m_zi /= static_cast<amrex::Real>(pinned_tg_fab.size());
291
- // }
292
- // BL_PROFILE_VAR_STOP(db);
293
-
294
- AMREX_ALWAYS_ASSERT (domain_box.smallEnd () == 0 ); // We could relax this if necessary.
295
- amrex::Array<bool ,AMREX_SPACEDIM> decomp{AMREX_D_DECL (true ,true ,true )};
245
+
246
+ AMREX_ALWAYS_ASSERT (
247
+ domain_box.smallEnd () == 0 ); // We could relax this if necessary.
248
+ amrex::Array<bool , AMREX_SPACEDIM> decomp{AMREX_D_DECL (true , true , true )};
296
249
decomp[dir] = false ; // no domain decompose in the dir direction.
297
- auto new_ba = amrex::decompose (domain_box, amrex::ParallelDescriptor::NProcs (), decomp);
250
+ auto new_ba = amrex::decompose (
251
+ domain_box, amrex::ParallelDescriptor::NProcs (), decomp);
298
252
299
253
amrex::Vector<int > pmap (new_ba.size ());
300
254
std::iota (pmap.begin (), pmap.end (), 0 );
@@ -308,7 +262,8 @@ void ABLStats::compute_zi()
308
262
if (myproc < new_mf.size ()) {
309
263
auto const & a = new_mf.const_array (myproc);
310
264
amrex::Box box2d = amrex::makeSlab (amrex::Box (a), dir, 0 );
311
- AMREX_ALWAYS_ASSERT (dir == 2 ); // xxxxx TODO: we can support other directions later
265
+ AMREX_ALWAYS_ASSERT (
266
+ dir == 2 ); // xxxxx TODO: we can support other directions later
312
267
// xxxxx TODO: sycl can be supported in the future.
313
268
// xxxxx TODO: we can support CPU later.
314
269
const int nblocks = box2d.numPts ();
@@ -319,35 +274,40 @@ void ABLStats::compute_zi()
319
274
const int loy = box2d.smallEnd (1 );
320
275
amrex::Gpu::DeviceVector<int > tmp (nblocks);
321
276
auto * ptmp = tmp.data ();
322
- amrex::launch<nthreads>(nblocks, amrex::Gpu::gpuStream (),
323
- [=] AMREX_GPU_DEVICE ()
324
- {
325
- const int j = int (blockIdx.x ) / lenx + loy;
326
- const int i = int (blockIdx.x ) - j*lenx +lox;
327
- amrex::KeyValuePair<amrex::Real,int > r{std::numeric_limits<amrex::Real>::lowest (),0 };
328
- for (int k = threadIdx.x ; k < lenz; k += nthreads) {
329
- if (a (i,j,k) > r.first ()) { r.second () = k; r.first () = a (i,j,k);}
330
- }
331
- r = amrex::Gpu::blockReduceMax<nthreads>(r);
332
- if (threadIdx.x == 0 ) {
333
- ptmp[blockIdx.x ] = r.second ();
334
- }
335
- });
277
+ amrex::launch<nthreads>(
278
+ nblocks, amrex::Gpu::gpuStream (), [=] AMREX_GPU_DEVICE () {
279
+ const int j = int (blockIdx.x ) / lenx + loy;
280
+ const int i = int (blockIdx.x ) - j * lenx + lox;
281
+ amrex::KeyValuePair<amrex::Real, int > r{
282
+ std::numeric_limits<amrex::Real>::lowest (), 0 };
283
+ for (int k = threadIdx.x ; k < lenz; k += nthreads) {
284
+ if (a (i, j, k) > r.first ()) {
285
+ r.second () = k;
286
+ r.first () = a (i, j, k);
287
+ }
288
+ }
289
+ r = amrex::Gpu::blockReduceMax<nthreads>(r);
290
+ if (threadIdx.x == 0 ) {
291
+ ptmp[blockIdx.x ] = r.second ();
292
+ }
293
+ });
336
294
337
295
const auto dnval = m_dn;
338
- zi_sum = amrex::Reduce::Sum<amrex::Real>
339
- (nblocks, [=] AMREX_GPU_DEVICE (int iblock)
340
- {
341
- return (ptmp[iblock] + amrex::Real (0.5 )) * dnval;
342
- });
296
+ zi_sum = amrex::Reduce::Sum<amrex::Real>(
297
+ nblocks, [=] AMREX_GPU_DEVICE (int iblock) {
298
+ return (ptmp[iblock] + amrex::Real (0.5 )) * dnval;
299
+ });
343
300
}
344
301
345
- amrex::ParallelReduce::Sum (zi_sum, amrex::ParallelDescriptor::IOProcessorNumber (),
346
- amrex::ParallelDescriptor::Communicator ());
302
+ amrex::ParallelReduce::Sum (
303
+ zi_sum, amrex::ParallelDescriptor::IOProcessorNumber (),
304
+ amrex::ParallelDescriptor::Communicator ());
347
305
348
306
amrex::Long npts = 1 ;
349
307
for (int idim = 0 ; idim < AMREX_SPACEDIM; ++idim) {
350
- if (idim != dir) { npts *= domain_box.length (idim); }
308
+ if (idim != dir) {
309
+ npts *= domain_box.length (idim);
310
+ }
351
311
}
352
312
m_zi = zi_sum / static_cast <amrex::Real>(npts);
353
313
}
0 commit comments