@@ -127,7 +127,6 @@ nodebilin_interp (Box const& bx, Array4<T> const& fine, const int fcomp, const i
127127 }
128128}
129129
130-
131130template <typename T>
132131AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
133132facediv_face_interp (int ci, int cj, int ck,
@@ -174,8 +173,8 @@ facediv_face_interp (int ci, int cj, int ck,
174173 const Real dys = Real (1 .)/ratio[1 ];
175174 const Real dzs = Real (1 .)/ratio[2 ];
176175
177- for (int j=0 ; j<ratio[1 ]; ++j){
178176 for (int k=0 ; k<ratio[2 ]; ++k){
177+ for (int j=0 ; j<ratio[1 ]; ++j){
179178
180179 Real ysloc = (j+Real (0.5 ))*dys - Real (0.5 );
181180 Real zsloc = (k+Real (0.5 ))*dzs - Real (0.5 );
@@ -212,8 +211,8 @@ facediv_face_interp (int ci, int cj, int ck,
212211 const Real dxs = Real (1 .)/ratio[0 ];
213212 const Real dzs = Real (1 .)/ratio[2 ];
214213
215- for (int i=0 ; i<ratio[0 ]; ++i){
216214 for (int k=0 ; k<ratio[2 ]; ++k){
215+ for (int i=0 ; i<ratio[0 ]; ++i){
217216
218217 Real xsloc = (i+Real (0.5 ))*dxs - Real (0.5 );
219218 Real zsloc = (k+Real (0.5 ))*dzs - Real (0.5 );
@@ -251,8 +250,8 @@ facediv_face_interp (int ci, int cj, int ck,
251250 const Real dxs = Real (1 .)/ratio[0 ];
252251 const Real dys = Real (1 .)/ratio[1 ];
253252
254- for (int i=0 ; i<ratio[0 ]; ++i){
255253 for (int j=0 ; j<ratio[2 ]; ++j){
254+ for (int i=0 ; i<ratio[0 ]; ++i){
256255
257256 Real xsloc = (i+Real (0.5 ))*dxs - Real (0.5 );
258257 Real ysloc = (j+Real (0.5 ))*dys - Real (0.5 );
@@ -393,12 +392,101 @@ facediv_int (int ci, int cj, int ck, int nf,
393392 + dz3/(8 *dy*zspxs)*(v000+v021-v001-v020);
394393}
395394
396- template <typename T>
397- AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
395+
396+ template <int i, std::enable_if_t <(i>=0 && i<8 ),int > = 0 >
397+ constexpr IntVect facediv_get_refratio ()
398+ {
399+ if constexpr (i == 0 ) {
400+ return IntVect (2 ,2 ,2 );
401+ } else if constexpr (i == 1 ) {
402+ return IntVect (4 ,2 ,2 );
403+ } else if constexpr (i == 2 ) {
404+ return IntVect (2 ,4 ,2 );
405+ } else if constexpr (i == 3 ) {
406+ return IntVect (4 ,4 ,2 );
407+ } else if constexpr (i == 4 ) {
408+ return IntVect (2 ,2 ,4 );
409+ } else if constexpr (i == 5 ) {
410+ return IntVect (4 ,2 ,4 );
411+ } else if constexpr (i == 6 ) {
412+ return IntVect (2 ,4 ,4 );
413+ } else {
414+ return IntVect (4 ,4 ,4 );
415+ }
416+ }
417+
418+ template <int N>
419+ LUSolver<N,Real>
420+ facediv_init_lusolver (IntVect const & ratio,
421+ GpuArray<Real, AMREX_SPACEDIM> const & cellSize)
422+ {
423+ // set up matrix for local MAC projection
424+ Array2D<Real,0 ,N-1 ,0 ,N-1 ,Order::C> mat;
425+
426+ for (int irow = 0 ; irow < N; irow++) {
427+ for (int icol = 0 ; icol < N; icol++) {
428+ mat (irow,icol) = Real (0.0 );
429+ }
430+ }
431+
432+ Real dxinv = Real (1 .)/cellSize[0 ];
433+ Real dyinv = Real (1 .)/cellSize[1 ];
434+ Real dzinv = Real (1 .)/cellSize[2 ];
435+
436+ Real ddx2 = dxinv*dxinv;
437+ Real ddy2 = dyinv*dyinv;
438+ Real ddz2 = dzinv*dzinv;
439+
440+ for ( int k=0 ; k < ratio[2 ]; ++k){
441+ for ( int j=0 ; j < ratio[1 ]; ++j){
442+ for ( int i=0 ; i < ratio[0 ]; ++i){
443+
444+ int irow = i + j*ratio[0 ] +k*ratio[0 ]*ratio[1 ];
445+
446+ if (i > 0 ){
447+ mat (irow,irow) += ddx2;
448+ mat (irow,irow-1 ) += -ddx2;
449+ }
450+
451+ if (i < ratio[0 ]-1 ){
452+ mat (irow,irow) += ddx2;
453+ mat (irow,irow+1 ) += -ddx2;
454+ }
455+
456+ if (j > 0 ){
457+ mat (irow,irow) += ddy2;
458+ mat (irow,irow-ratio[0 ]) += -ddy2;
459+ }
460+
461+ if (j < ratio[1 ]-1 ){
462+ mat (irow,irow) += ddy2;
463+ mat (irow,irow+ratio[0 ]) += -ddy2;
464+ }
465+
466+ if (k > 0 ){
467+ mat (irow,irow) += ddz2;
468+ mat (irow,irow-ratio[0 ]*ratio[1 ]) += -ddz2;
469+ }
470+
471+ if (k < ratio[2 ]-1 ){
472+ mat (irow,irow) += ddz2;
473+ mat (irow,irow+ratio[0 ]*ratio[1 ]) += -ddz2;
474+ }
475+ }}}
476+
477+ // hcak matrxi to be nonsingular
478+ mat (N-1 ,N-1 ) +=ddx2;
479+
480+ return LUSolver<N,Real>{mat};
481+ }
482+
483+ template <typename T, int N>
484+ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void
398485facediv_int_gen (int ci, int cj, int ck, int nf,
399486 GpuArray<Array4<T>, AMREX_SPACEDIM> const & fine,
400487 IntVect const & ratio,
401- GpuArray<Real, AMREX_SPACEDIM> const & cellSize) noexcept
488+ GpuArray<Real, AMREX_SPACEDIM> const & cellSize,
489+ LUSolver<N,Real> const & lusolver) noexcept
402490{
403491 const int fi = ci*ratio[0 ];
404492 const int fj = cj*ratio[1 ];
@@ -408,6 +496,10 @@ facediv_int_gen (int ci, int cj, int ck, int nf,
408496 const Real dy = cellSize[1 ];
409497 const Real dz = cellSize[2 ];
410498
499+ const Real dxinv = Real (1 .)/cellSize[0 ];
500+ const Real dyinv = Real (1 .)/cellSize[1 ];
501+ const Real dzinv = Real (1 .)/cellSize[2 ];
502+
411503// interpolate initial veloc
412504
413505 for ( int k=0 ; k < ratio[2 ]; ++k){
@@ -478,76 +570,17 @@ facediv_int_gen (int ci, int cj, int ck, int nf,
478570 }
479571 }
480572
481- // xxxxx int prodrat = ratio[0]*ratio[1]*ratio[2];
482- constexpr int prodrat = 8 ;
483-
484- Real target_div = (uplus-uminus)/(prodrat*dx) + (vplus-vminus)/(prodrat*dy) + (wplus-wminus)/(prodrat*dz);
573+ Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy) + (wplus-wminus)/(N*dz);
485574
486575// set up matrix for local MAC projection
487576
488- Array2D<Real,0 ,prodrat-1 ,0 ,prodrat-1 ,Order::C> mat;
489-
490-
491- for (int irow = 0 ; irow < prodrat; irow++) {
492- for (int icol = 0 ; icol < prodrat; icol++) {
493- mat (irow,icol) = Real (0.0 );
494- }
495- }
496-
497- Real dxinv = Real (1 .)/dx;
498- Real dyinv = Real (1 .)/dy;
499- Real dzinv = Real (1 .)/dz;
500-
501- Real ddx2 = dxinv*dxinv;
502- Real ddy2 = dyinv*dyinv;
503- Real ddz2 = dzinv*dzinv;
504-
505- for ( int k=0 ; k < ratio[2 ]; ++k){
506- for ( int j=0 ; j < ratio[1 ]; ++j){
507- for ( int i=0 ; i < ratio[0 ]; ++i){
508-
509- int irow = i + j*ratio[0 ] +k*ratio[0 ]*ratio[1 ];
510-
511- if (i > 0 ){
512- mat (irow,irow) += ddx2;
513- mat (irow,irow-1 ) += -ddx2;
514- }
515-
516- if (i < ratio[0 ]-1 ){
517- mat (irow,irow) += ddx2;
518- mat (irow,irow+1 ) += -ddx2;
519- }
520-
521- if (j > 0 ){
522- mat (irow,irow) += ddy2;
523- mat (irow,irow-ratio[0 ]) += -ddy2;
524- }
525-
526- if (j < ratio[1 ]-1 ){
527- mat (irow,irow) += ddy2;
528- mat (irow,irow+ratio[0 ]) += -ddy2;
529- }
530-
531- if (k > 0 ){
532- mat (irow,irow) += ddz2;
533- mat (irow,irow-ratio[0 ]*ratio[1 ]) += -ddz2;
534- }
535-
536- if (k < ratio[2 ]-1 ){
537- mat (irow,irow) += ddz2;
538- mat (irow,irow+ratio[0 ]*ratio[1 ]) += -ddz2;
539- }
540- }
541- }
542- }
577+ Array2D<Real,0 ,N-1 ,0 ,N-1 ,Order::C> mat;
543578
544- // hcak matrxi to be nonsingular
545579
546- mat (prodrat-1 ,prodrat-1 ) +=ddx2;
547580
548581// form rhs
549- Array1D<Real,0 ,prodrat -1 > rhs;
550- Array1D<Real,0 ,prodrat -1 > phi;
582+ Array1D<Real,0 ,N -1 > rhs;
583+ Array1D<Real,0 ,N -1 > phi;
551584
552585 for ( int k=0 ; k < ratio[2 ]; ++k){
553586 for ( int j=0 ; j < ratio[1 ]; ++j){
@@ -564,7 +597,6 @@ facediv_int_gen (int ci, int cj, int ck, int nf,
564597
565598// solver the system
566599
567- LUSolver<prodrat,Real> lusolver (mat);
568600 lusolver (phi.begin (), rhs.begin ());
569601
570602// this is for sanity check
0 commit comments