Skip to content

Commit df1e7d0

Browse files
authored
Parallel Copy with Offset (#4510)
Add new versions of ParallelCopy and ParallelAdd that effectively shift the source multifab first and then perform copy/add.
1 parent 11d0e80 commit df1e7d0

File tree

4 files changed

+109
-13
lines changed

4 files changed

+109
-13
lines changed

Src/Base/AMReX_FabArray.H

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,18 @@ public:
980980
const FabArrayBase::CPC* a_cpc = nullptr,
981981
bool to_ghost_cells_only = false);
982982

983+
void ParallelCopy_nowait (const FabArray<FAB>& src,
984+
int scomp,
985+
int dcomp,
986+
int ncomp,
987+
const IntVect& snghost,
988+
const IntVect& dnghost,
989+
const IntVect& offset,
990+
const Periodicity& period = Periodicity::NonPeriodic(),
991+
CpOp op = FabArrayBase::COPY,
992+
const FabArrayBase::CPC* a_cpc = nullptr,
993+
bool to_ghost_cells_only = false);
994+
983995
void ParallelCopy_finish ();
984996

985997
void ParallelCopyToGhost (const FabArray<FAB>& src,
@@ -1000,6 +1012,38 @@ public:
10001012

10011013
void ParallelCopyToGhost_finish();
10021014

1015+
/**
1016+
* \brief This function copies data from src to this FabArray.
1017+
*
1018+
* \param src source FabArray
1019+
* \param src_comp starting component of source involved in this function
1020+
* \param dest_comp starting component of this FabArray involved in this function
1021+
* \param num_comp number of components involved in this function
1022+
* \param snghost number of source ghost cells involved in this function
1023+
* \param dnghost number of destination ghost cells involved in this function
1024+
* \param offset index shift. dest(iv,dest_comp+n) = src(iv-offset,src_comp+n)
1025+
* \param period periodicity of the data
1026+
*/
1027+
void ParallelCopy (const FabArray<FAB>& src, int src_comp, int dest_comp, int num_comp,
1028+
const IntVect& snghost, const IntVect& dnghost,
1029+
const IntVect& offset, const Periodicity& period);
1030+
1031+
/**
1032+
* \brief This function adds data from src to this FabArray.
1033+
*
1034+
* \param src source FabArray
1035+
* \param src_comp starting component of source involved in this function
1036+
* \param dest_comp starting component of this FabArray involved in this function
1037+
* \param num_comp number of components involved in this function
1038+
* \param snghost number of source ghost cells involved in this function
1039+
* \param dnghost number of destination ghost cells involved in this function
1040+
* \param offset index shift. dest(iv,dest_comp+n) = src(iv-offset,src_comp+n)
1041+
* \param period periodicity of the data
1042+
*/
1043+
void ParallelAdd (const FabArray<FAB>& src, int src_comp, int dest_comp, int num_comp,
1044+
const IntVect& snghost, const IntVect& dnghost,
1045+
const IntVect& offset, const Periodicity& period);
1046+
10031047
[[deprecated("Use FabArray::ParallelCopy() instead.")]]
10041048
void copy (const FabArray<FAB>& src,
10051049
int src_comp,

Src/Base/AMReX_FabArrayBase.H

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ public:
535535
{
536536
CPC (const FabArrayBase& dstfa, const IntVect& dstng,
537537
const FabArrayBase& srcfa, const IntVect& srcng,
538-
const Periodicity& period, bool to_ghost_cells_only = false);
538+
const Periodicity& period, bool to_ghost_cells_only = false,
539+
const IntVect& offset = IntVect(0));
539540
CPC (const BoxArray& dstba, const DistributionMapping& dstdm,
540541
const Vector<int>& dstidx, const IntVect& dstng,
541542
const BoxArray& srcba, const DistributionMapping& srcdm,
@@ -551,6 +552,7 @@ public:
551552
BDKey m_dstbdk;
552553
IntVect m_srcng;
553554
IntVect m_dstng;
555+
IntVect m_offset;
554556
Periodicity m_period;
555557
bool m_tgco;
556558
BoxArray m_srcba;
@@ -574,7 +576,8 @@ public:
574576
static CacheStats m_CPC_stats;
575577
//
576578
const CPC& getCPC (const IntVect& dstng, const FabArrayBase& src, const IntVect& srcng,
577-
const Periodicity& period, bool to_ghost_cells_only = false) const;
579+
const Periodicity& period, bool to_ghost_cells_only = false,
580+
const IntVect& offset = IntVect(0)) const;
578581

579582
//
580583
//! Rotate Boundary by 90

Src/Base/AMReX_FabArrayBase.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -291,12 +291,14 @@ FabArrayBase::TileArray::bytes () const
291291

292292
FabArrayBase::CPC::CPC (const FabArrayBase& dstfa, const IntVect& dstng,
293293
const FabArrayBase& srcfa, const IntVect& srcng,
294-
const Periodicity& period, bool to_ghost_cells_only)
294+
const Periodicity& period, bool to_ghost_cells_only,
295+
const IntVect& offset)
295296
: m_id(comm_meta_data_id++),
296297
m_srcbdk(srcfa.getBDKey()),
297298
m_dstbdk(dstfa.getBDKey()),
298299
m_srcng(srcng),
299300
m_dstng(dstng),
301+
m_offset(offset),
300302
m_period(period),
301303
m_tgco(to_ghost_cells_only),
302304
m_srcba(srcfa.boxArray()),
@@ -347,7 +349,8 @@ FabArrayBase::CPC::define (const BoxArray& ba_dst, const DistributionMapping& dm
347349

348350
std::vector< std::pair<int,Box> > isects;
349351

350-
const std::vector<IntVect>& pshifts = m_period.shiftIntVect(ng_dst);
352+
std::vector<IntVect> pshifts = m_period.shiftIntVect(ng_dst);
353+
for (auto& pit : pshifts) { pit += m_offset; }
351354

352355
auto& send_tags = *m_SndTags;
353356

@@ -408,26 +411,26 @@ FabArrayBase::CPC::define (const BoxArray& ba_dst, const DistributionMapping& dm
408411

409412
for (auto const& pit : pshifts)
410413
{
411-
ba_src.intersections(bx_dst+pit, isects, false, ng_src);
414+
ba_src.intersections(bx_dst-pit, isects, false, ng_src);
412415

413416
for (auto const& is : isects)
414417
{
415418
const int k_src = is.first;
416-
const Box& bx = is.second - pit;
419+
const Box& bx = is.second + pit;
417420
const int src_owner = dm_src[k_src];
418421

419422
BoxList const bl_dst = m_tgco ? boxDiff(bx,bx_dst_valid) : BoxList(bx);
420423
for (auto const& b : bl_dst) {
421424
if (ParallelDescriptor::sameTeam(src_owner, MyProc)) { // local copy
422425
const BoxList tilelist(b, FabArrayBase::comm_tile_size);
423426
for (auto const& btile : tilelist) {
424-
m_LocTags->emplace_back(btile, btile+pit, k_dst, k_src);
427+
m_LocTags->emplace_back(btile, btile-pit, k_dst, k_src);
425428
}
426429
if (check_local) {
427430
bl_local.push_back(b);
428431
}
429432
} else if (MyProc == dm_dst[k_dst]) {
430-
recv_tags[src_owner].emplace_back(b, b+pit, k_dst, k_src);
433+
recv_tags[src_owner].emplace_back(b, b-pit, k_dst, k_src);
431434
if (check_remote) {
432435
bl_remote.push_back(b);
433436
}
@@ -574,7 +577,8 @@ FabArrayBase::flushCPCache ()
574577

575578
const FabArrayBase::CPC&
576579
FabArrayBase::getCPC (const IntVect& dstng, const FabArrayBase& src, const IntVect& srcng,
577-
const Periodicity& period, bool to_ghost_cells_only) const
580+
const Periodicity& period, bool to_ghost_cells_only,
581+
const IntVect& offset) const
578582
{
579583
BL_PROFILE("FabArrayBase::getCPC()");
580584

@@ -591,6 +595,7 @@ FabArrayBase::getCPC (const IntVect& dstng, const FabArrayBase& src, const IntVe
591595
{
592596
if (it->second->m_srcng == srcng &&
593597
it->second->m_dstng == dstng &&
598+
it->second->m_offset == offset &&
594599
it->second->m_srcbdk == srckey &&
595600
it->second->m_dstbdk == dstkey &&
596601
it->second->m_period == period &&
@@ -605,7 +610,7 @@ FabArrayBase::getCPC (const IntVect& dstng, const FabArrayBase& src, const IntVe
605610
}
606611

607612
// Have to build a new one
608-
CPC* new_cpc = new CPC(*this, dstng, src, srcng, period, to_ghost_cells_only);
613+
CPC* new_cpc = new CPC(*this, dstng, src, srcng, period, to_ghost_cells_only, offset);
609614

610615
#ifdef AMREX_MEM_PROFILING
611616
m_CPC_stats.bytes += new_cpc->bytes();

Src/Base/AMReX_FabArrayCommI.H

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,31 @@ FabArray<FAB>::ParallelCopy (const FabArray<FAB>& src,
271271
ParallelCopy_finish();
272272
}
273273

274+
template <class FAB>
275+
void
276+
FabArray<FAB>::ParallelCopy (const FabArray<FAB>& src, int src_comp, int dest_comp,
277+
int num_comp, const IntVect& snghost, const IntVect& dnghost,
278+
const IntVect& offset, const Periodicity& period)
279+
{
280+
BL_PROFILE("FabArray::ParallelCopy()");
281+
282+
ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,offset,period);
283+
ParallelCopy_finish();
284+
}
285+
286+
template <class FAB>
287+
void
288+
FabArray<FAB>::ParallelAdd (const FabArray<FAB>& src, int src_comp, int dest_comp,
289+
int num_comp, const IntVect& snghost, const IntVect& dnghost,
290+
const IntVect& offset, const Periodicity& period)
291+
{
292+
BL_PROFILE("FabArray::ParallelAdd()");
293+
294+
ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,snghost,dnghost,offset,period,
295+
FabArray::ADD);
296+
ParallelCopy_finish();
297+
}
298+
274299
template <class FAB>
275300
void
276301
FabArray<FAB>::ParallelCopyToGhost (const FabArray<FAB>& src,
@@ -322,6 +347,24 @@ FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
322347
CpOp op,
323348
const FabArrayBase::CPC * a_cpc,
324349
bool to_ghost_cells_only)
350+
{
351+
ParallelCopy_nowait(src,scomp,dcomp,ncomp,snghost,dnghost,IntVect(0),period,op,a_cpc,
352+
to_ghost_cells_only);
353+
}
354+
355+
template <class FAB>
356+
void
357+
FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
358+
int scomp,
359+
int dcomp,
360+
int ncomp,
361+
const IntVect& snghost,
362+
const IntVect& dnghost,
363+
const IntVect& offset,
364+
const Periodicity& period,
365+
CpOp op,
366+
const FabArrayBase::CPC * a_cpc,
367+
bool to_ghost_cells_only)
325368
{
326369
BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: PC");
327370
BL_PROFILE("FabArray::ParallelCopy_nowait()");
@@ -341,7 +384,7 @@ FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
341384

342385
if ((ParallelDescriptor::NProcs() == 1) &&
343386
(this->size() == 1) && (src.size() == 1) &&
344-
!period.isAnyPeriodic() && !to_ghost_cells_only)
387+
!period.isAnyPeriodic() && !to_ghost_cells_only && (offset == 0))
345388
{
346389
if (this != &src) { // avoid self copy or plus
347390
auto const& da = this->array(0, dcomp);
@@ -409,7 +452,7 @@ FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
409452
(boxarray == src.boxarray && distributionMap == src.distributionMap) &&
410453
snghost == IntVect::TheZeroVector() &&
411454
dnghost == IntVect::TheZeroVector() &&
412-
!period.isAnyPeriodic() && !to_ghost_cells_only)
455+
!period.isAnyPeriodic() && !to_ghost_cells_only && (offset == 0))
413456
{
414457
//
415458
// Short-circuit full intersection code if we're doing copy()s or if
@@ -426,7 +469,8 @@ FabArray<FAB>::ParallelCopy_nowait (const FabArray<FAB>& src,
426469
return;
427470
}
428471

429-
const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period, to_ghost_cells_only);
472+
const CPC& thecpc = (a_cpc) ? *a_cpc : getCPC(dnghost, src, snghost, period,
473+
to_ghost_cells_only,offset);
430474

431475
if (ParallelContext::NProcsSub() == 1)
432476
{

0 commit comments

Comments
 (0)