diff --git a/Src/AmrCore/AMReX_Interp_1D_C.H b/Src/AmrCore/AMReX_Interp_1D_C.H index cddb819ac7f..399f40cc5a5 100644 --- a/Src/AmrCore/AMReX_Interp_1D_C.H +++ b/Src/AmrCore/AMReX_Interp_1D_C.H @@ -73,7 +73,7 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int /*ci*/, int /*cj*/, int /*ck*/, - int /*nc*/, int /*nf*/, int /*idir*/, + int /*n*/, int /*idir*/, Array4 const& /*crse*/, Array4 const& /*fine*/, Array4 const& /*mask*/, diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H index 9ddb7144e86..2ffbf9791f6 100644 --- a/Src/AmrCore/AMReX_Interp_2D_C.H +++ b/Src/AmrCore/AMReX_Interp_2D_C.H @@ -4,6 +4,7 @@ #include #include +#include namespace amrex { @@ -97,14 +98,14 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int ci, int cj, int /*ck*/, - int nc, int nf, int idir, + int n, int idir, Array4 const& crse, Array4 const& fine, Array4 const& mask, IntVect const& ratio) noexcept { if (mask) { - if (!mask(ci, cj, 0, nc)) + if (!mask(ci, cj, 0, n)) { return; } } @@ -114,23 +115,25 @@ facediv_face_interp (int ci, int cj, int /*ck*/, switch (idir) { case 0: { - const Real neg = crse(ci, cj-1, 0, nc); - const Real cen = crse(ci, cj , 0, nc); - const Real pos = crse(ci, cj+1, 0, nc); + const Real slpy = Real(0.5)*(crse(ci, cj+1, 0, n) - crse(ci, cj-1, 0, n)); + const Real dys = Real(1.)/Real(ratio[1]); - fine(fi, fj , 0, nf) = Real(0.125)*(8*cen + neg - pos); - fine(fi, fj+1, 0, nf) = Real(0.125)*(8*cen + pos - neg); + for (int j = 0; j < ratio[1]; ++j) { + Real ysloc = (Real(j)+Real(0.5))*dys - Real(0.5); + fine(fi, fj+j, 0, n) = crse(ci, cj, 0, n) + ysloc*slpy; + } break; } case 1: { - const Real neg = crse(ci-1, cj, 0, nc); - const Real cen = crse(ci , cj, 0, nc); - const Real pos = crse(ci+1, cj, 0, nc); + const Real slpx = Real(0.5)*(crse(ci+1, cj, 0, n) - crse(ci-1, cj, 0, n)); + const Real dxs = Real(1.)/Real(ratio[0]); - fine(fi , fj, 0, nf) = Real(0.125)*(8*cen + neg - pos); - fine(fi+1, fj, 0, nf) = Real(0.125)*(8*cen + pos - neg); + for (int i = 0; i < ratio[0]; ++i) { + Real xsloc = (Real(i)+Real(0.5))*dxs - Real(0.5); + fine(fi+i, fj, 0, n) = crse(ci, cj, 0, n) + xsloc*slpx; + } break; } @@ -140,7 +143,7 @@ facediv_face_interp (int ci, int cj, int /*ck*/, template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -facediv_int (int ci, int cj, int /*ck*/, int nf, +facediv_int (int ci, int cj, int /*ck*/, int n, GpuArray, AMREX_SPACEDIM> const& fine, IntVect const& ratio, GpuArray const& cellSize) noexcept @@ -149,26 +152,167 @@ facediv_int (int ci, int cj, int /*ck*/, int nf, const int fj = cj*ratio[1]; // References to fine exterior values. - const Real umm = fine[0](fi, fj, 0, nf); - const Real ump = fine[0](fi, fj+1, 0, nf); - const Real upm = fine[0](fi+2, fj, 0, nf); - const Real upp = fine[0](fi+2, fj+1, 0, nf); + const Real umm = fine[0](fi, fj, 0, n); + const Real ump = fine[0](fi, fj+1, 0, n); + const Real upm = fine[0](fi+2, fj, 0, n); + const Real upp = fine[0](fi+2, fj+1, 0, n); - const Real vmm = fine[1](fi, fj, 0, nf); - const Real vmp = fine[1](fi+1, fj, 0, nf); - const Real vpm = fine[1](fi, fj+2, 0, nf); - const Real vpp = fine[1](fi+1, fj+2, 0, nf); + const Real vmm = fine[1](fi, fj, 0, n); + const Real vmp = fine[1](fi+1, fj, 0, n); + const Real vpm = fine[1](fi, fj+2, 0, n); + const Real vpp = fine[1](fi+1, fj+2, 0, n); const Real dxdy = cellSize[0]/cellSize[1]; const Real x_corr = Real(0.25)*dxdy * (vpp+vmm-vmp-vpm); const Real y_corr = Real(0.25)/dxdy * (upp+umm-ump-upm); // Calc fine faces on interior of coarse cells. - fine[0](fi+1,fj ,0,nf) = Real(0.5)*(umm+upm) + x_corr; - fine[0](fi+1,fj+1,0,nf) = Real(0.5)*(ump+upp) + x_corr; - fine[1](fi, fj+1,0,nf) = Real(0.5)*(vmm+vpm) + y_corr; - fine[1](fi+1,fj+1,0,nf) = Real(0.5)*(vmp+vpp) + y_corr; + fine[0](fi+1,fj ,0,n) = Real(0.5)*(umm+upm) + x_corr; + fine[0](fi+1,fj+1,0,n) = Real(0.5)*(ump+upp) + x_corr; + fine[1](fi, fj+1,0,n) = Real(0.5)*(vmm+vpm) + y_corr; + fine[1](fi+1,fj+1,0,n) = Real(0.5)*(vmp+vpp) + y_corr; + +} + +template =0 && i<4), int> = 0> +constexpr IntVect facediv_get_refratio () +{ + if constexpr (i == 0) { + return IntVect(2,2); + } else if constexpr (i == 1) { + return IntVect(4,2); + } else if constexpr (i == 2) { + return IntVect(2,4); + } else { + return IntVect(4,4); + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +LUSolver +facediv_init_lusolver (IntVect const& ratio, + GpuArray const& cellSize) +{ + Array2D mat; + + for (int irow = 0; irow < N; ++irow) { + for (int icol = 0; icol < N; ++icol) { + mat(irow,icol) = Real(0.0); + } + } + + Real dxinv = Real(1.)/cellSize[0]; + Real dyinv = Real(1.)/cellSize[1]; + Real ddx2 = dxinv*dxinv; + Real ddy2 = dyinv*dyinv; + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + int irow = i + j*ratio[0]; + + if (i > 0) { + mat(irow,irow) += ddx2; + mat(irow,irow-1) += -ddx2; + } + + if (i < ratio[0]-1) { + mat(irow,irow) += ddx2; + mat(irow,irow+1) += -ddx2; + } + + if (j > 0) { + mat(irow,irow) += ddy2; + mat(irow,irow-ratio[0]) += -ddy2; + } + + if (j < ratio[1]-1) { + mat(irow,irow) += ddy2; + mat(irow,irow+ratio[0]) += -ddy2; + } + }} + + // Hack matrix to be nonsingular. This just picks a particular gauge + // with the last row being zero, doesn’t change the gradients we will + // apply. + mat(N-1,N-1) += ddx2; + + return LUSolver{mat}; +} + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +facediv_int_gen (int ci, int cj, int /*ck*/, int nf, + GpuArray, AMREX_SPACEDIM> const& fine, + IntVect const& ratio, + GpuArray const& cellSize, + LUSolver const& lusolver) noexcept +{ + const int fi = ci*ratio[0]; + const int fj = cj*ratio[1]; + + const Real dx = cellSize[0]; + const Real dy = cellSize[1]; + const Real dxinv = Real(1.)/dx; + const Real dyinv = Real(1.)/dy; + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 1; i < ratio[0]; ++i) { + fine[0](fi+i, fj+j, 0, nf) = + ((ratio[0]-i)*fine[0](fi, fj+j, 0, nf) + + i *fine[0](fi+ratio[0], fj+j, 0, nf))/ratio[0]; + }} + + for (int j = 1; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + fine[1](fi+i, fj+j, 0, nf) = + ((ratio[1]-j)*fine[1](fi+i, fj, 0, nf) + + j *fine[1](fi+i, fj+ratio[1], 0, nf))/ratio[1]; + }} + + Real uplus = Real(0.); + Real uminus = Real(0.); + for (int j = 0; j < ratio[1]; ++j) { + uplus += fine[0](fi+ratio[0], fj+j, 0, nf); + uminus += fine[0](fi, fj+j, 0, nf); + } + + Real vplus = Real(0.); + Real vminus = Real(0.); + for (int i = 0; i < ratio[0]; ++i) { + vplus += fine[1](fi+i, fj+ratio[1], 0, nf); + vminus += fine[1](fi+i, fj, 0, nf); + } + Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy); + + Array1D rhs; + Array1D phi; + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + int irow = i + j*ratio[0]; + rhs(irow) = + (fine[0](fi+i+1, fj+j, 0, nf) - fine[0](fi+i, fj+j, 0, nf))*dxinv + + (fine[1](fi+i, fj+j+1, 0, nf) - fine[1](fi+i, fj+j, 0, nf))*dyinv + - target_div; + }} + + lusolver(phi.begin(), rhs.begin()); + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 1; i < ratio[0]; ++i) { + int plus = i + j*ratio[0]; + int minus = i-1 + j*ratio[0]; + fine[0](fi+i, fj+j, 0, nf) += (phi(plus) - phi(minus))*dxinv; + }} + + for (int j = 1; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + int plus = i + j *ratio[0]; + int minus = i + (j-1)*ratio[0]; + fine[1](fi+i, fj+j, 0, nf) += (phi(plus) - phi(minus))*dyinv; + }} } template diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index c8836a6a186..ca45ba16d92 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -4,6 +4,7 @@ #include #include +#include namespace amrex { @@ -126,11 +127,10 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i } } - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int ci, int cj, int ck, - int nc, int nf, int idir, + int n, int idir, Array4 const& crse, Array4 const& fine, Array4 const& mask, @@ -138,7 +138,7 @@ facediv_face_interp (int ci, int cj, int ck, { if (mask) { - if (!mask(ci, cj, ck, nc)) + if (!mask(ci, cj, ck, n)) { return; } } @@ -149,64 +149,117 @@ facediv_face_interp (int ci, int cj, int ck, switch (idir) { case 0: { - const Real ll = crse(ci, cj-1, ck-1, nc); - const Real cl = crse(ci, cj-1, ck, nc); - const Real rl = crse(ci, cj-1, ck+1, nc); +// const Real ll = crse(ci, cj-1, ck-1, nc); +// const Real cl = crse(ci, cj-1, ck, nc); +// const Real rl = crse(ci, cj-1, ck+1, nc); +// +// const Real lc = crse(ci, cj , ck-1, nc); +// const Real cc = crse(ci, cj , ck, nc); +// const Real rc = crse(ci, cj , ck+1, nc); + +// const Real lu = crse(ci, cj+1, ck-1, nc); +// const Real cu = crse(ci, cj+1, ck, nc); +// const Real ru = crse(ci, cj+1, ck+1, nc); + +// fine(fi,fj ,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) ); +// fine(fi,fj ,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) ); +// fine(fi,fj+1,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); +// fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); - const Real lc = crse(ci, cj , ck-1, nc); - const Real cc = crse(ci, cj , ck, nc); - const Real rc = crse(ci, cj , ck+1, nc); + const Real slpy = Real(0.5)* (crse(ci,cj+1,ck ,n) - crse(ci,cj-1,ck ,n)); + const Real slpz = Real(0.5)* (crse(ci,cj, ck+1,n) - crse(ci,cj, ck-1,n)); + const Real slpyz = Real(0.25)*(crse(ci,cj+1,ck+1,n) - crse(ci,cj+1,ck-1,n) - crse(ci,cj-1,ck+1,n) + crse(ci,cj-1,ck-1,n)); - const Real lu = crse(ci, cj+1, ck-1, nc); - const Real cu = crse(ci, cj+1, ck, nc); - const Real ru = crse(ci, cj+1, ck+1, nc); + const Real dys = Real(1.)/Real(ratio[1]); + const Real dzs = Real(1.)/Real(ratio[2]); - fine(fi,fj ,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) ); - fine(fi,fj ,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) ); - fine(fi,fj+1,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); - fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); + for (int k=0; k AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -facediv_int (int ci, int cj, int ck, int nf, +facediv_int (int ci, int cj, int ck, int n, GpuArray, AMREX_SPACEDIM> const& fine, IntVect const& ratio, GpuArray const& cellSize) noexcept @@ -226,32 +279,32 @@ facediv_int (int ci, int cj, int ck, int nf, const int fk = ck*ratio[2]; // References to fine exterior values needed for interior calculation. - const Real u000 = fine[0](fi, fj , fk , nf); - const Real u200 = fine[0](fi+2, fj , fk , nf); - const Real u010 = fine[0](fi, fj+1, fk , nf); - const Real u210 = fine[0](fi+2, fj+1, fk , nf); - const Real u001 = fine[0](fi, fj , fk+1, nf); - const Real u201 = fine[0](fi+2, fj , fk+1, nf); - const Real u011 = fine[0](fi, fj+1, fk+1, nf); - const Real u211 = fine[0](fi+2, fj+1, fk+1, nf); - - const Real v000 = fine[1](fi , fj , fk , nf); - const Real v020 = fine[1](fi , fj+2, fk , nf); - const Real v100 = fine[1](fi+1, fj , fk , nf); - const Real v120 = fine[1](fi+1, fj+2, fk , nf); - const Real v001 = fine[1](fi , fj , fk+1, nf); - const Real v021 = fine[1](fi , fj+2, fk+1, nf); - const Real v101 = fine[1](fi+1, fj , fk+1, nf); - const Real v121 = fine[1](fi+1, fj+2, fk+1, nf); - - const Real w000 = fine[2](fi , fj , fk , nf); - const Real w002 = fine[2](fi , fj , fk+2, nf); - const Real w100 = fine[2](fi+1, fj , fk , nf); - const Real w102 = fine[2](fi+1, fj , fk+2, nf); - const Real w010 = fine[2](fi , fj+1, fk , nf); - const Real w012 = fine[2](fi , fj+1, fk+2, nf); - const Real w110 = fine[2](fi+1, fj+1, fk , nf); - const Real w112 = fine[2](fi+1, fj+1, fk+2, nf); + const Real u000 = fine[0](fi, fj , fk , n); + const Real u200 = fine[0](fi+2, fj , fk , n); + const Real u010 = fine[0](fi, fj+1, fk , n); + const Real u210 = fine[0](fi+2, fj+1, fk , n); + const Real u001 = fine[0](fi, fj , fk+1, n); + const Real u201 = fine[0](fi+2, fj , fk+1, n); + const Real u011 = fine[0](fi, fj+1, fk+1, n); + const Real u211 = fine[0](fi+2, fj+1, fk+1, n); + + const Real v000 = fine[1](fi , fj , fk , n); + const Real v020 = fine[1](fi , fj+2, fk , n); + const Real v100 = fine[1](fi+1, fj , fk , n); + const Real v120 = fine[1](fi+1, fj+2, fk , n); + const Real v001 = fine[1](fi , fj , fk+1, n); + const Real v021 = fine[1](fi , fj+2, fk+1, n); + const Real v101 = fine[1](fi+1, fj , fk+1, n); + const Real v121 = fine[1](fi+1, fj+2, fk+1, n); + + const Real w000 = fine[2](fi , fj , fk , n); + const Real w002 = fine[2](fi , fj , fk+2, n); + const Real w100 = fine[2](fi+1, fj , fk , n); + const Real w102 = fine[2](fi+1, fj , fk+2, n); + const Real w010 = fine[2](fi , fj+1, fk , n); + const Real w012 = fine[2](fi , fj+1, fk+2, n); + const Real w110 = fine[2](fi+1, fj+1, fk , n); + const Real w112 = fine[2](fi+1, fj+1, fk+2, n); const Real dx = cellSize[0]; const Real dy = cellSize[1]; @@ -266,79 +319,345 @@ facediv_int (int ci, int cj, int ck, int nf, const Real yspzs = dy*dy + dz*dz; const Real zspxs = dz*dz + dx*dx; - fine[0](fi+1, fj , fk , nf) = Real(0.5)*(u000+u200) + fine[0](fi+1, fj , fk , n) = Real(0.5)*(u000+u200) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v000+v120-v020-v100) + dx3/(8*dy*zspxs)*(v001+v121-v021-v101) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w000+w102-w002-w100) + dx3/(8*dz*xspys)*(w010+w112-w012-w110); - fine[0](fi+1, fj+1, fk , nf) = Real(0.5)*(u010+u210) + fine[0](fi+1, fj+1, fk , n) = Real(0.5)*(u010+u210) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v000+v120-v020-v100) + dx3/(8*dy*zspxs)*(v001+v121-v021-v101) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w010+w112-w012-w110) + dx3/(8*dz*xspys)*(w000+w102-w100-w002); - fine[0](fi+1, fj , fk+1, nf) = Real(0.5)*(u001+u201) + fine[0](fi+1, fj , fk+1, n) = Real(0.5)*(u001+u201) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v001+v121-v021-v101) + dx3/(8*dy*zspxs)*(v000+v120-v020-v100) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w000+w102-w002-w100) + dx3/(8*dz*xspys)*(w010+w112-w012-w110); - fine[0](fi+1, fj+1, fk+1, nf) = Real(0.5)*(u011+u211) + fine[0](fi+1, fj+1, fk+1, n) = Real(0.5)*(u011+u211) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v001+v121-v021-v101) + dx3/(8*dy*zspxs)*(v000+v120-v020-v100) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w010+w112-w012-w110) + dx3/(8*dz*xspys)*(w000+w102-w002-w100); - fine[1](fi , fj+1, fk , nf) = Real(0.5)*(v000+v020) + fine[1](fi , fj+1, fk , n) = Real(0.5)*(v000+v020) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200) + dy3/(8*dx*yspzs)*(u001+u211-u011-u201) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010) + dy3/(8*dz*xspys)*(w100+w112-w102-w110); - fine[1](fi+1, fj+1, fk , nf) = Real(0.5)*(v100+v120) + fine[1](fi+1, fj+1, fk , n) = Real(0.5)*(v100+v120) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200) + dy3/(8*dx*yspzs)*(u001+u211-u011-u201) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110) + dy3/(8*dz*xspys)*(w000+w012-w002-w010); - fine[1](fi , fj+1, fk+1, nf) = Real(0.5)*(v001+v021) + fine[1](fi , fj+1, fk+1, n) = Real(0.5)*(v001+v021) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201) + dy3/(8*dx*yspzs)*(u000+u210-u010-u200) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010) + dy3/(8*dz*xspys)*(w100+w112-w102-w110); - fine[1](fi+1, fj+1, fk+1, nf) = Real(0.5)*(v101+v121) + fine[1](fi+1, fj+1, fk+1, n) = Real(0.5)*(v101+v121) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201) + dy3/(8*dx*yspzs)*(u000+u210-u010-u200) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110) + dy3/(8*dz*xspys)*(w000+w012-w002-w010); - fine[2](fi , fj , fk+1, nf) = Real(0.5)*(w000+w002) + fine[2](fi , fj , fk+1, n) = Real(0.5)*(w000+w002) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200) + dz3/(8*dx*yspzs)*(u010+u211-u011-u210) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020) + dz3/(8*dy*zspxs)*(v100+v121-v101-v120); - fine[2](fi , fj+1, fk+1, nf) = Real(0.5)*(w010+w012) + fine[2](fi , fj+1, fk+1, n) = Real(0.5)*(w010+w012) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210) + dz3/(8*dx*yspzs)*(u000+u201-u001-u200) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020) + dz3/(8*dy*zspxs)*(v100+v121-v101-v120); - fine[2](fi+1, fj , fk+1, nf) = Real(0.5)*(w100+w102) + fine[2](fi+1, fj , fk+1, n) = Real(0.5)*(w100+w102) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200) + dz3/(8*dx*yspzs)*(u010+u211-u011-u210) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120) + dz3/(8*dy*zspxs)*(v000+v021-v001-v020); - fine[2](fi+1, fj+1, fk+1, nf) = Real(0.5)*(w110+w112) + fine[2](fi+1, fj+1, fk+1, n) = Real(0.5)*(w110+w112) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210) + dz3/(8*dx*yspzs)*(u000+u201-u001-u200) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120) + dz3/(8*dy*zspxs)*(v000+v021-v001-v020); } + +template =0 && i<8),int> = 0> +constexpr IntVect facediv_get_refratio () +{ + if constexpr (i == 0) { + return IntVect(2,2,2); + } else if constexpr (i == 1) { + return IntVect(4,2,2); + } else if constexpr (i == 2) { + return IntVect(2,4,2); + } else if constexpr (i == 3) { + return IntVect(4,4,2); + } else if constexpr (i == 4) { + return IntVect(2,2,4); + } else if constexpr (i == 5) { + return IntVect(4,2,4); + } else if constexpr (i == 6) { + return IntVect(2,4,4); + } else { + return IntVect(4,4,4); + } +} + +template +LUSolver +facediv_init_lusolver (IntVect const& ratio, + GpuArray const& cellSize) +{ + // set up matrix for local MAC projection + Array2D mat; + + for (int irow = 0; irow < N; irow++) { + for (int icol = 0; icol < N; icol++) { + mat(irow,icol) = Real(0.0); + } + } + + Real dxinv = Real(1.)/cellSize[0]; + Real dyinv = Real(1.)/cellSize[1]; + Real dzinv = Real(1.)/cellSize[2]; + + Real ddx2 = dxinv*dxinv; + Real ddy2 = dyinv*dyinv; + Real ddz2 = dzinv*dzinv; + + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + + if(i > 0){ + mat(irow,irow) += ddx2; + mat(irow,irow-1) += -ddx2; + } + + if(i < ratio[0]-1){ + mat(irow,irow) += ddx2; + mat(irow,irow+1) += -ddx2; + } + + if(j > 0){ + mat(irow,irow) += ddy2; + mat(irow,irow-ratio[0]) += -ddy2; + } + + if(j < ratio[1]-1){ + mat(irow,irow) += ddy2; + mat(irow,irow+ratio[0]) += -ddy2; + } + + if(k > 0){ + mat(irow,irow) += ddz2; + mat(irow,irow-ratio[0]*ratio[1]) += -ddz2; + } + + if(k < ratio[2]-1){ + mat(irow,irow) += ddz2; + mat(irow,irow+ratio[0]*ratio[1]) += -ddz2; + } + }}} + + // Hack matrix to be nonsingular. This just picks a particular gauge + // with the last row being zero, doesn’t change the gradients we will + // apply. + mat(N-1,N-1) +=ddx2; + + return LUSolver{mat}; +} + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +facediv_int_gen (int ci, int cj, int ck, int nf, + GpuArray, AMREX_SPACEDIM> const& fine, + IntVect const& ratio, + GpuArray const& cellSize, + LUSolver const& lusolver) noexcept +{ + const int fi = ci*ratio[0]; + const int fj = cj*ratio[1]; + const int fk = ck*ratio[2]; + + const Real dx = cellSize[0]; + const Real dy = cellSize[1]; + const Real dz = cellSize[2]; + + const Real dxinv = Real(1.)/cellSize[0]; + const Real dyinv = Real(1.)/cellSize[1]; + const Real dzinv = Real(1.)/cellSize[2]; + +// interpolate initial veloc + + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=1; i < ratio[0]; ++i){ + + fine[0](fi + i, fj + j, fk + k, nf) = ((ratio[0]-i)*fine[0](fi, fj + j, fk + k, nf) + + i *fine[0](fi+ratio[0], fj + j, fk + k, nf) )/ratio[0]; + + } + } + } + + for( int k=0; k < ratio[2]; ++k){ + for( int j=1; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + fine[1](fi + i, fj + j, fk + k, nf) = ((ratio[1]-j)*fine[1](fi + i, fj, fk + k, nf) + + j *fine[1](fi + i, fj+ratio[1], fk + k, nf) )/ratio[1]; + + } + } + } + + for( int k=1; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + fine[2](fi + i, fj + j, fk + k, nf) = ((ratio[2]-k)*fine[2](fi + i, fj + j, fk, nf) + + k *fine[2](fi + i, fj + j, fk+ratio[2], nf) )/ratio[2]; + + } + } + } + +// compute target divergence + + Real uplus = 0; + Real uminus = 0; + + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + + uplus += fine[0](fi+ratio[0],fj+j,fk+k,nf); + uminus += fine[0](fi ,fj+j,fk+k,nf); + + } + } + + Real vplus = 0; + Real vminus = 0; + + for( int k=0; k < ratio[2]; ++k){ + for( int i=0; i < ratio[0]; ++i){ + + vplus += fine[1](fi+i,fj+ratio[1],fk+k,nf); + vminus += fine[1](fi+i,fj, fk+k,nf); + + } + } + + Real wplus = 0; + Real wminus = 0; + + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + wplus += fine[2](fi+i,fj+j,fk+ratio[2],nf); + wminus += fine[2](fi+i,fj+j,fk ,nf); + + } + } + + Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy) + (wplus-wminus)/(N*dz); + +// form rhs + Array1D rhs; + Array1D phi; + + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + + rhs(irow) = ((fine[0](fi+i+1,fj+j, fk+k, nf) -fine[0](fi+i,fj+j,fk+k,nf))*dxinv + + (fine[1](fi+i ,fj+j+1,fk+k, nf) - fine[1](fi+i,fj+j,fk+k,nf))*dyinv + + (fine[2](fi+i, fj+j, fk+k+1,nf) - fine[2](fi+i,fj+j,fk+k,nf))*dzinv) - target_div; + } + } + } + +// solver the system + + lusolver(phi.begin(), rhs.begin()); + +// this is for sanity check +// can be put in if needed + +// Array3D phi_reshape; + +// for( int i=0; i < ratio[0] , ++j){ +// for( int j=0; j < ratio[1] , ++j){ +// for( int k=0; k < ratio[2] , ++k){ + +// int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + +// phi_reshape(i,j,k) = phi(irow) + +// } +// } +// } + + + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=1; i < ratio[0]; ++i){ + + int plus = i + j*ratio[0] +k*ratio[0]*ratio[1]; + int minus = i - 1 + j*ratio[0] +k*ratio[0]*ratio[1]; +// fine[0](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i-1,j,k))*dxinv + fine[0](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dxinv; + + } + } + } + + for( int k=0; k < ratio[2]; ++k){ + for( int j=1; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + int plus = i + j*ratio[0] +k*ratio[0]*ratio[1]; + int minus = i + (j-1)*ratio[0] +k*ratio[0]*ratio[1]; +// fine[1](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j-1,k))*dyinv + fine[1](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dyinv; + + } + } + } + + for( int k=1; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + int plus = i + j*ratio[0] + k *ratio[0]*ratio[1]; + int minus = i + j*ratio[0] + (k-1)*ratio[0]*ratio[1]; +// fine[2](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j,k-1))*dzinv + fine[2](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dzinv; + + } + } + } + + +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void face_linear_interp_x (int i, int j, int k, int n, Array4 const& fine, @@ -414,7 +733,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int k = klo; k <= khi; ++k) { for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - if ((fine_state(i,j,k,n) + fine(i,j,k,n)) < 0.0) { + if ((fine_state(i,j,k,n) + fine(i,j,k,n)) < Real(0.0)) { redo_me = true; } } @@ -444,14 +763,14 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, * * SumP = volume-weighted sum of all positive values of fine_state */ - Real crseTot = 0.0; - Real SumN = 0.0; - Real SumP = 0.0; + Real crseTot = Real(0.0); + Real SumN = Real(0.0); + Real SumP = Real(0.0); for (int k = klo; k <= khi; ++k) { for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { crseTot += fine(i,j,k,n); - if (fine_state(i,j,k,n) <= 0.0) { + if (fine_state(i,j,k,n) <= Real(0.0)) { SumN += fine_state(i,j,k,n); } else { SumP += fine_state(i,j,k,n); @@ -477,13 +796,13 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int i = ilo; i <= ihi; ++i) { // Fill in negative values first. - if (fine_state(i,j,k,n) < 0.0) { + if (fine_state(i,j,k,n) < Real(0.0)) { fine(i,j,k,n) = -fine_state(i,j,k,n); } // Then, add the remaining positive proportionally. - if (SumP > 0.0) { - if (fine_state(i,j,k,n) > 0.0) { + if (SumP > Real(0.0)) { + if (fine_state(i,j,k,n) > Real(0.0)) { Real alpha = (crseTot - std::abs(SumN)) / SumP; fine(i,j,k,n) = alpha * fine_state(i,j,k,n); } @@ -512,12 +831,12 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { Real alpha = crseTot / std::abs(SumN); - if (fine_state(i,j,k,n) < 0.0) { + if (fine_state(i,j,k,n) < Real(0.0)) { // Add correction to negative cells proportionally. fine(i,j,k,n) = alpha * std::abs(fine_state(i,j,k,n)); } else { // Don't touch the positive states. - fine(i,j,k,n) = 0.0; + fine(i,j,k,n) = Real(0.0); } } @@ -548,7 +867,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, } } else if ( (crseTot < 0) && (std::abs(crseTot) < SumP) && - ((SumP+SumN+crseTot) > 0.0) ) { + ((SumP+SumN+crseTot) > Real(0.0)) ) { /* * Special case 4: @@ -562,7 +881,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - if (fine_state(i,j,k,n) < 0.0) { + if (fine_state(i,j,k,n) < Real(0.0)) { // Absorb the negative correction fine(i,j,k,n) = -fine_state(i,j,k,n); } else { @@ -576,7 +895,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, } } else if ( (crseTot < 0) && (std::abs(crseTot) < SumP) && - ((SumP+SumN+crseTot) < 0.0) ) { + ((SumP+SumN+crseTot) < Real(0.0)) ) { /* * Special case 5: * @@ -592,7 +911,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - if (fine_state(i,j,k,n) > 0.0) { + if (fine_state(i,j,k,n) > Real(0.0)) { // Don't touch the negatives fine(i,j,k,n) = -fine_state(i,j,k,n); } else { @@ -613,7 +932,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int k = klo; k <= khi; ++k) { for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - fine(i,j,k,0) = 0.0; + fine(i,j,k,0) = Real(0.0); for (int n = 1; n < nvar-1; ++n) { fine(i,j,k,0) += fine(i,j,k,n); } diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index 10e60eef41d..ae9bfc430d2 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -5,6 +5,11 @@ #include #include #include +#include +#include + +#include +#include /** * \file AMReX_Interpolater.H @@ -543,6 +548,9 @@ class FaceDivFree public Interpolater { public: + + FaceDivFree (); + /** * \brief Returns coarsened box given fine box and refinement ratio. * @@ -622,8 +630,29 @@ public: int actual_comp, int actual_state, RunOn runon) override; -}; +private: + +#if (AMREX_SPACEDIM == 2) + using Solver_t = std::variant, // IntVect(2,2): refinement + LUSolver< 8,Real>, // IntVect(4,2) + LUSolver< 8,Real>, // IntVect(2,4) + LUSolver<16,Real>>;// IntVect(4,4) +#elif (AMREX_SPACEDIM == 3) + using Solver_t = std::variant, // IntVect(2,2,2): refinement + LUSolver<16,Real>, // IntVect(4,2,2) + LUSolver<16,Real>, // IntVect(2,4,2) + LUSolver<32,Real>, // IntVect(4,4,2) + LUSolver<16,Real>, // IntVect(2,2,4) + LUSolver<32,Real>, // IntVect(4,2,4) + LUSolver<32,Real>, // IntVect(2,4,4) + LUSolver<64,Real>>;// IntVect(4,4,4) +#endif + +#if (AMREX_SPACEDIM > 1) + Vector> m_lusolver; +#endif +}; /** * \brief Piecewise constant tangential interpolation / linear normal interpolation of face data. diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index c25b53fa4f2..a0b247996db 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1,10 +1,12 @@ +#include #include -#include #include +#include #include #include #include +#include #include @@ -1295,6 +1297,13 @@ CellConservativeQuartic::interp (const FArrayBox& crse, }); } +FaceDivFree::FaceDivFree () + : Interpolater() +#if (AMREX_SPACEDIM > 1) + , m_lusolver(OpenMP::get_max_threads()) +#endif +{} + Box FaceDivFree::CoarseBox (const Box& fine, int ratio) @@ -1345,14 +1354,26 @@ FaceDivFree::interp_arr (Array const& crse, const int /*actual_state*/, const RunOn runon) { +#if (AMREX_SPACEDIM == 1) + amrex::ignore_unused(crse,crse_comp,fine,fine_comp,ncomp,fine_region,ratio, + solve_mask,fine_geom,runon); +#else + BL_PROFILE("FaceDivFree::interp()"); Array types; for (int d=0; d cell_size = fine_geom.CellSizeArray(); @@ -1368,6 +1389,11 @@ FaceDivFree::interp_arr (Array const& crse, { maskarr[d] = solve_mask[d]->const_array(0); } } + + +// JBB 1. don't know what mark does here +// JBB 2. build and factor matrix here or build and factor it earlier somehow + // Fuse the launches, 1 for each dimension, into a single launch. AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(runon, amrex::convert(c_fine_region,types[0]), bx0, @@ -1376,7 +1402,7 @@ FaceDivFree::interp_arr (Array const& crse, { for (int n=0; n (i,j,k,crse_comp+n,fine_comp+n, 0, + amrex::facediv_face_interp (i,j,k,n, 0, crsearr[0], finearr[0], maskarr[0], ratio); } }); @@ -1387,7 +1413,7 @@ FaceDivFree::interp_arr (Array const& crse, { for (int n=0; n (i,j,k,crse_comp+n,fine_comp+n, 1, + amrex::facediv_face_interp (i,j,k,n, 1, crsearr[1], finearr[1], maskarr[1], ratio); } }); @@ -1398,16 +1424,62 @@ FaceDivFree::interp_arr (Array const& crse, { for (int n=0; n (i,j,k,crse_comp+n,fine_comp+n, 2, + amrex::facediv_face_interp (i,j,k,n, 2, crsearr[2], finearr[2], maskarr[2], ratio); } }); }); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, - { - amrex::facediv_int(i, j, k, fine_comp+n, finearr, ratio, cell_size); - }); + if (ratio == 2) { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, + { + amrex::facediv_int(i, j, k, n, finearr, ratio, cell_size); + }); + } else { + auto& lusolver = m_lusolver[OpenMP::get_thread_num()][fine_geom.Domain()]; + void* psolver_h = nullptr; + std::size_t solver_size = 0; + + constexpr int ncases = AMREX_D_TERM(2,*2,*2); + int rr_case = -1; + // Case 0 is ratio == 2. It has been handled in the if-block. + constexpr_for<1,ncases>([&] (auto i) + { + if (facediv_get_refratio() == ratio) { + AMREX_ASSERT(rr_case == -1); + rr_case = i; + if (lusolver.index() != i) { + lusolver.template emplace( + facediv_init_lusolver + ::size>(ratio, cell_size)); + } + psolver_h = (void*) &(std::get(lusolver)); + solver_size = sizeof(std::variant_alternative_t); + } + }); + AMREX_ASSERT(rr_case > 0 && rr_case < ncases && psolver_h); + +#ifdef AMREX_USE_GPU + auto* psolver_d = (void*) The_Async_Arena()->alloc(solver_size); + Gpu::htod_memcpy_async(psolver_d, psolver_h, solver_size); +#else + auto* psolver_d = psolver_h; +#endif + + ParallelFor(TypeList>{}, {rr_case}, + c_fine_region, ncomp, + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, auto rrc) + { + auto const* ps = (std::variant_alternative_t const*)psolver_d; + amrex::facediv_int_gen(i, j, k, n, finearr, ratio, cell_size, *ps); + }); + +#ifdef AMREX_USE_GPU + The_Async_Arena()->free(psolver_d); +#endif + } + +#endif } Box diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H index 99dc20b452a..80abe87b1ae 100644 --- a/Src/Base/AMReX_CTOParallelForImpl.H +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -6,6 +6,7 @@ #include #include +#include #include /* This header is not for the users to include directly. It's meant to be @@ -73,9 +74,17 @@ namespace detail amrex::ignore_unused(found_option); AMREX_ASSERT(found_option); } + + template + constexpr auto MakeCTOImpl(std::integer_sequence) + -> CompileTimeOptions; } /// \endcond +template +using CTOSeq = decltype(detail::MakeCTOImpl( + std::make_integer_sequence{})); + /** * \brief Compile time optimization of kernels with run time options. * diff --git a/Src/Base/AMReX_LUSolver.H b/Src/Base/AMReX_LUSolver.H index bd60fc609dc..37ed6f8e2ef 100644 --- a/Src/Base/AMReX_LUSolver.H +++ b/Src/Base/AMReX_LUSolver.H @@ -16,6 +16,8 @@ class LUSolver { public: + static constexpr int size = N; + LUSolver () = default; AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Tests/DivFreePatch/inputs b/Tests/DivFreePatch/inputs index e5245751940..9fb2965336e 100644 --- a/Tests/DivFreePatch/inputs +++ b/Tests/DivFreePatch/inputs @@ -3,6 +3,11 @@ f_offset = 8 max_grid_size = 128 +# default refinement ratio (can set anisotropic values like 4 2 2) +ref_ratio = 2 2 2 + +div_tol = 1e-9 + # For testing nested vs. non-nested InterpFromCoarse #nghost_f = 1 diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index 857461a1f64..fde3919394f 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -29,10 +29,10 @@ void setupMF(MultiFab& mf, const int type = 0, const BoxArray& exclude = BoxArra { Box bx = ba[bid]; - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k, RandomEngine const& eng) noexcept { if (type == 0) - { arr(i,j,k) = amrex::Random()*10; } + { arr(i,j,k) = amrex::Random(eng)*10; } else if (type == 1) { arr(i,j,k) = double(i)+double(j)+double(k); } else if (type == 2) @@ -135,6 +135,8 @@ void main_main () int f_offset = 4; int nghost_c = 1; int nghost_f = 2; + Real div_tol = 1.0e-9; + bool test_pass = true; amrex::Vector c_lo(AMREX_SPACEDIM, 0); amrex::Vector c_hi(AMREX_SPACEDIM, 32); @@ -142,6 +144,8 @@ void main_main () amrex::Vector f_hi(AMREX_SPACEDIM, 4); int max_grid_size = 64; + Vector ref_ratio_vec(AMREX_SPACEDIM, 2); + { ParmParse pp; pp.query("n_cell", n_cell); @@ -149,11 +153,29 @@ void main_main () pp.query("max_grid_size", max_grid_size); pp.query("nghost_c", nghost_c); pp.query("nghost_f", nghost_f); + pp.query("div_tol", div_tol); pp.queryarr("c_hi", c_hi, 0, AMREX_SPACEDIM); pp.queryarr("f_lo", f_lo, 0, AMREX_SPACEDIM); pp.queryarr("f_hi", f_hi, 0, AMREX_SPACEDIM); + int n_ref_ratio = pp.countval("ref_ratio"); + if (n_ref_ratio == 1) { + int rr = 0; + pp.get("ref_ratio", rr); + for (int i=0; i= AMREX_SPACEDIM) { + Vector tmp(n_ref_ratio); + pp.getarr("ref_ratio", tmp, 0, n_ref_ratio); + for (int i=0; i 0) { + amrex::Abort("ref_ratio must contain either 1 value or >= AMREX_SPACEDIM values"); + } + if (n_cell != 0) { for (int i=0; i < AMREX_SPACEDIM; ++i) @@ -165,8 +187,15 @@ void main_main () } } + for (int i = 0; i < AMREX_SPACEDIM; ++i) { + if (ref_ratio_vec[i] != 2 && ref_ratio_vec[i] != 4) { + amrex::Abort("DivFreePatch requires ref_ratio entries of 2 or 4"); + } + } + + IntVect ratio(ref_ratio_vec); + int ncomp = 1; - IntVect ratio{AMREX_D_DECL(2,2,2)}; // For this stencil (octree), always 2. IntVect ghost_c{AMREX_D_DECL(nghost_c, nghost_c, nghost_c)}; // For this stencil (octree), need 1 coarse ghost. IntVect ghost_f{AMREX_D_DECL(nghost_f, nghost_f, nghost_f)}; // For this stencil (octree), need 1 fine ghost. Geometry c_geom, f_geom, f_geom_wghost, f_geom_all, f_geom_partial; @@ -375,12 +404,43 @@ void main_main () ratio, mapper, bcrec, 0); } - // Check for errors + bool any_ratio_ne_two = false; + for (int d=0; d averaged_faces; + for (int d=0; d fine_face_ptrs + {AMREX_D_DECL(&f_mf_faces[0], &f_mf_faces[1], &f_mf_faces[2])}; + Array avg_face_ptrs + {AMREX_D_DECL(&averaged_faces[0], &averaged_faces[1], &averaged_faces[2])}; + amrex::average_down_faces(fine_face_ptrs, avg_face_ptrs, ratio, 0); + for (int d=0; d div_tol) { + amrex::Print() << " InterpFromCoarse abs error exceeds tolerance " << div_tol << '\n'; + test_pass = false; + } + // *************************************************************** // Change coarse values, save current fine values for checking @@ -509,24 +578,34 @@ void main_main () calcDiv(f_mf_faces, div_fine, f_geom.CellSizeArray()); amrex::VisMF::Write(div_fine, std::string("pltfiles/fineFP")); + Real max_abs_fp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "diffFP"); + Real max_rel_fp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "", true); amrex::Print() << " Max FillPatchTwoLevels divergence error: absolute relative\n " << " " - < div_tol) { + amrex::Print() << " FillPatch abs error exceeds tolerance " << div_tol << '\n'; + test_pass = false; + } for (int i=0; i