Skip to content

Commit b2f27eb

Browse files
committed
set up solver outside kernel
1 parent 0abb33a commit b2f27eb

File tree

5 files changed

+216
-83
lines changed

5 files changed

+216
-83
lines changed

Src/AmrCore/AMReX_Interp_3D_C.H

Lines changed: 103 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ nodebilin_interp (Box const& bx, Array4<T> const& fine, const int fcomp, const i
127127
}
128128
}
129129

130-
131130
template<typename T>
132131
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
133132
facediv_face_interp (int ci, int cj, int ck,
@@ -174,8 +173,8 @@ facediv_face_interp (int ci, int cj, int ck,
174173
const Real dys = Real(1.)/ratio[1];
175174
const Real dzs = Real(1.)/ratio[2];
176175

177-
for (int j=0; j<ratio[1]; ++j){
178176
for (int k=0; k<ratio[2]; ++k){
177+
for (int j=0; j<ratio[1]; ++j){
179178

180179
Real ysloc = (j+Real(0.5))*dys - Real(0.5);
181180
Real zsloc = (k+Real(0.5))*dzs - Real(0.5);
@@ -212,8 +211,8 @@ facediv_face_interp (int ci, int cj, int ck,
212211
const Real dxs = Real(1.)/ratio[0];
213212
const Real dzs = Real(1.)/ratio[2];
214213

215-
for (int i=0; i<ratio[0]; ++i){
216214
for (int k=0; k<ratio[2]; ++k){
215+
for (int i=0; i<ratio[0]; ++i){
217216

218217
Real xsloc = (i+Real(0.5))*dxs - Real(0.5);
219218
Real zsloc = (k+Real(0.5))*dzs - Real(0.5);
@@ -251,8 +250,8 @@ facediv_face_interp (int ci, int cj, int ck,
251250
const Real dxs = Real(1.)/ratio[0];
252251
const Real dys = Real(1.)/ratio[1];
253252

254-
for (int i=0; i<ratio[0]; ++i){
255253
for (int j=0; j<ratio[2]; ++j){
254+
for (int i=0; i<ratio[0]; ++i){
256255

257256
Real xsloc = (i+Real(0.5))*dxs - Real(0.5);
258257
Real ysloc = (j+Real(0.5))*dys - Real(0.5);
@@ -393,12 +392,101 @@ facediv_int (int ci, int cj, int ck, int nf,
393392
+ dz3/(8*dy*zspxs)*(v000+v021-v001-v020);
394393
}
395394

396-
template<typename T>
397-
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
395+
396+
template <int i, std::enable_if_t<(i>=0 && i<8),int> = 0>
397+
constexpr IntVect facediv_get_refratio ()
398+
{
399+
if constexpr (i == 0) {
400+
return IntVect(2,2,2);
401+
} else if constexpr (i == 1) {
402+
return IntVect(4,2,2);
403+
} else if constexpr (i == 2) {
404+
return IntVect(2,4,2);
405+
} else if constexpr (i == 3) {
406+
return IntVect(4,4,2);
407+
} else if constexpr (i == 4) {
408+
return IntVect(2,2,4);
409+
} else if constexpr (i == 5) {
410+
return IntVect(4,2,4);
411+
} else if constexpr (i == 6) {
412+
return IntVect(2,4,4);
413+
} else {
414+
return IntVect(4,4,4);
415+
}
416+
}
417+
418+
template <int N>
419+
LUSolver<N,Real>
420+
facediv_init_lusolver (IntVect const& ratio,
421+
GpuArray<Real, AMREX_SPACEDIM> const& cellSize)
422+
{
423+
// set up matrix for local MAC projection
424+
Array2D<Real,0,N-1,0,N-1,Order::C> mat;
425+
426+
for (int irow = 0; irow < N; irow++) {
427+
for (int icol = 0; icol < N; icol++) {
428+
mat(irow,icol) = Real(0.0);
429+
}
430+
}
431+
432+
Real dxinv = Real(1.)/cellSize[0];
433+
Real dyinv = Real(1.)/cellSize[1];
434+
Real dzinv = Real(1.)/cellSize[2];
435+
436+
Real ddx2 = dxinv*dxinv;
437+
Real ddy2 = dyinv*dyinv;
438+
Real ddz2 = dzinv*dzinv;
439+
440+
for( int k=0; k < ratio[2]; ++k){
441+
for( int j=0; j < ratio[1]; ++j){
442+
for( int i=0; i < ratio[0]; ++i){
443+
444+
int irow = i + j*ratio[0] +k*ratio[0]*ratio[1];
445+
446+
if(i > 0){
447+
mat(irow,irow) += ddx2;
448+
mat(irow,irow-1) += -ddx2;
449+
}
450+
451+
if(i < ratio[0]-1){
452+
mat(irow,irow) += ddx2;
453+
mat(irow,irow+1) += -ddx2;
454+
}
455+
456+
if(j > 0){
457+
mat(irow,irow) += ddy2;
458+
mat(irow,irow-ratio[0]) += -ddy2;
459+
}
460+
461+
if(j < ratio[1]-1){
462+
mat(irow,irow) += ddy2;
463+
mat(irow,irow+ratio[0]) += -ddy2;
464+
}
465+
466+
if(k > 0){
467+
mat(irow,irow) += ddz2;
468+
mat(irow,irow-ratio[0]*ratio[1]) += -ddz2;
469+
}
470+
471+
if(k < ratio[2]-1){
472+
mat(irow,irow) += ddz2;
473+
mat(irow,irow+ratio[0]*ratio[1]) += -ddz2;
474+
}
475+
}}}
476+
477+
// hcak matrxi to be nonsingular
478+
mat(N-1,N-1) +=ddx2;
479+
480+
return LUSolver<N,Real>{mat};
481+
}
482+
483+
template<typename T, int N>
484+
AMREX_GPU_DEVICE AMREX_FORCE_INLINE void
398485
facediv_int_gen (int ci, int cj, int ck, int nf,
399486
GpuArray<Array4<T>, AMREX_SPACEDIM> const& fine,
400487
IntVect const& ratio,
401-
GpuArray<Real, AMREX_SPACEDIM> const& cellSize) noexcept
488+
GpuArray<Real, AMREX_SPACEDIM> const& cellSize,
489+
LUSolver<N,Real> const& lusolver) noexcept
402490
{
403491
const int fi = ci*ratio[0];
404492
const int fj = cj*ratio[1];
@@ -408,6 +496,10 @@ facediv_int_gen (int ci, int cj, int ck, int nf,
408496
const Real dy = cellSize[1];
409497
const Real dz = cellSize[2];
410498

499+
const Real dxinv = Real(1.)/cellSize[0];
500+
const Real dyinv = Real(1.)/cellSize[1];
501+
const Real dzinv = Real(1.)/cellSize[2];
502+
411503
// interpolate initial veloc
412504

413505
for( int k=0; k < ratio[2]; ++k){
@@ -478,76 +570,17 @@ facediv_int_gen (int ci, int cj, int ck, int nf,
478570
}
479571
}
480572

481-
// xxxxx int prodrat = ratio[0]*ratio[1]*ratio[2];
482-
constexpr int prodrat = 8;
483-
484-
Real target_div = (uplus-uminus)/(prodrat*dx) + (vplus-vminus)/(prodrat*dy) + (wplus-wminus)/(prodrat*dz);
573+
Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy) + (wplus-wminus)/(N*dz);
485574

486575
// set up matrix for local MAC projection
487576

488-
Array2D<Real,0,prodrat-1,0,prodrat-1,Order::C> mat;
489-
490-
491-
for (int irow = 0; irow < prodrat; irow++) {
492-
for (int icol = 0; icol < prodrat; icol++) {
493-
mat(irow,icol) = Real(0.0);
494-
}
495-
}
496-
497-
Real dxinv = Real(1.)/dx;
498-
Real dyinv = Real(1.)/dy;
499-
Real dzinv = Real(1.)/dz;
500-
501-
Real ddx2 = dxinv*dxinv;
502-
Real ddy2 = dyinv*dyinv;
503-
Real ddz2 = dzinv*dzinv;
504-
505-
for( int k=0; k < ratio[2]; ++k){
506-
for( int j=0; j < ratio[1]; ++j){
507-
for( int i=0; i < ratio[0]; ++i){
508-
509-
int irow = i + j*ratio[0] +k*ratio[0]*ratio[1];
510-
511-
if(i > 0){
512-
mat(irow,irow) += ddx2;
513-
mat(irow,irow-1) += -ddx2;
514-
}
515-
516-
if(i < ratio[0]-1){
517-
mat(irow,irow) += ddx2;
518-
mat(irow,irow+1) += -ddx2;
519-
}
520-
521-
if(j > 0){
522-
mat(irow,irow) += ddy2;
523-
mat(irow,irow-ratio[0]) += -ddy2;
524-
}
525-
526-
if(j < ratio[1]-1){
527-
mat(irow,irow) += ddy2;
528-
mat(irow,irow+ratio[0]) += -ddy2;
529-
}
530-
531-
if(k > 0){
532-
mat(irow,irow) += ddz2;
533-
mat(irow,irow-ratio[0]*ratio[1]) += -ddz2;
534-
}
535-
536-
if(k < ratio[2]-1){
537-
mat(irow,irow) += ddz2;
538-
mat(irow,irow+ratio[0]*ratio[1]) += -ddz2;
539-
}
540-
}
541-
}
542-
}
577+
Array2D<Real,0,N-1,0,N-1,Order::C> mat;
543578

544-
// hcak matrxi to be nonsingular
545579

546-
mat(prodrat-1,prodrat-1) +=ddx2;
547580

548581
// form rhs
549-
Array1D<Real,0,prodrat-1> rhs;
550-
Array1D<Real,0,prodrat-1> phi;
582+
Array1D<Real,0,N-1> rhs;
583+
Array1D<Real,0,N-1> phi;
551584

552585
for( int k=0; k < ratio[2]; ++k){
553586
for( int j=0; j < ratio[1]; ++j){
@@ -564,7 +597,6 @@ facediv_int_gen (int ci, int cj, int ck, int nf,
564597

565598
// solver the system
566599

567-
LUSolver<prodrat,Real> lusolver(mat);
568600
lusolver(phi.begin(), rhs.begin());
569601

570602
// this is for sanity check

Src/AmrCore/AMReX_Interpolater.H

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
#include <AMReX_Extension.H>
66
#include <AMReX_GpuControl.H>
77
#include <AMReX_InterpBase.H>
8+
#include <AMReX_LUSolver.H>
9+
#include <AMReX_Vector.H>
10+
11+
#include <map>
12+
#include <variant>
813

914
/**
1015
* \file AMReX_Interpolater.H
@@ -543,6 +548,9 @@ class FaceDivFree
543548
public Interpolater
544549
{
545550
public:
551+
552+
FaceDivFree ();
553+
546554
/**
547555
* \brief Returns coarsened box given fine box and refinement ratio.
548556
*
@@ -622,8 +630,29 @@ public:
622630
int actual_comp,
623631
int actual_state,
624632
RunOn runon) override;
625-
};
626633

634+
private:
635+
636+
#if (AMREX_SPACEDIM == 2)
637+
using Solver_t = std::variant<LUSolver< 4,Real>, // IntVect(2,2): refinement
638+
LUSolver< 8,Real>, // IntVect(4,2)
639+
LUSolver< 8,Real>, // IntVect(2,4)
640+
LUSolver<16,Real>>;// IntVect(4,4)
641+
#elif (AMREX_SPACEDIM == 3)
642+
using Solver_t = std::variant<LUSolver< 8,Real>, // IntVect(2,2,2): refinement
643+
LUSolver<16,Real>, // IntVect(4,2,2)
644+
LUSolver<16,Real>, // IntVect(2,4,2)
645+
LUSolver<32,Real>, // IntVect(4,4,2)
646+
LUSolver<16,Real>, // IntVect(2,2,4)
647+
LUSolver<32,Real>, // IntVect(4,2,4)
648+
LUSolver<32,Real>, // IntVect(2,4,4)
649+
LUSolver<64,Real>>;// IntVect(4,4,4)
650+
#endif
651+
652+
#if (AMREX_SPACEDIM > 1)
653+
Vector<std::map<Box,Solver_t>> m_lusolver;
654+
#endif
655+
};
627656

628657
/**
629658
* \brief Piecewise constant tangential interpolation / linear normal interpolation of face data.

0 commit comments

Comments
 (0)