From 6f34555990e3a4890c42ed35f760560fd370e94a Mon Sep 17 00:00:00 2001 From: jbb Date: Sat, 28 Mar 2026 12:56:55 -0700 Subject: [PATCH 01/15] added general divergence preserving interpolation of face centered fields in 3D --- Src/AmrCore/AMReX_Interp_3D_C.H | 365 ++++++++++++++++++++++++++--- Src/AmrCore/AMReX_Interpolater.cpp | 21 +- 2 files changed, 342 insertions(+), 44 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index c8836a6a186..78d1a9e494c 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -4,6 +4,7 @@ #include #include +#include namespace amrex { @@ -149,64 +150,117 @@ facediv_face_interp (int ci, int cj, int ck, switch (idir) { case 0: { - const Real ll = crse(ci, cj-1, ck-1, nc); - const Real cl = crse(ci, cj-1, ck, nc); - const Real rl = crse(ci, cj-1, ck+1, nc); +// const Real ll = crse(ci, cj-1, ck-1, nc); +// const Real cl = crse(ci, cj-1, ck, nc); +// const Real rl = crse(ci, cj-1, ck+1, nc); +// +// const Real lc = crse(ci, cj , ck-1, nc); +// const Real cc = crse(ci, cj , ck, nc); +// const Real rc = crse(ci, cj , ck+1, nc); - const Real lc = crse(ci, cj , ck-1, nc); - const Real cc = crse(ci, cj , ck, nc); - const Real rc = crse(ci, cj , ck+1, nc); +// const Real lu = crse(ci, cj+1, ck-1, nc); +// const Real cu = crse(ci, cj+1, ck, nc); +// const Real ru = crse(ci, cj+1, ck+1, nc); - const Real lu = crse(ci, cj+1, ck-1, nc); - const Real cu = crse(ci, cj+1, ck, nc); - const Real ru = crse(ci, cj+1, ck+1, nc); +// fine(fi,fj ,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) ); +// fine(fi,fj ,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) ); +// fine(fi,fj+1,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); +// fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); - fine(fi,fj ,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) ); - fine(fi,fj ,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) ); - fine(fi,fj+1,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); - fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); + const Real slpy = 0.5* (crse(ci,cj+1,ck ,nc) - crse(ci,cj-1,ck ,nc)) + const Real slpz = 0.5* (crse(ci,cj, ck+1,nc) - crse(ci,cj, ck-1,nc)) + const Real slpyz = 0.25*(crse(ci,cj+1,ck+1,nc) - crse(ci,cj+1,ck-1,nc) - crse(ci,cj-1,ck+1,nc) + crse(ci,cj-1,ck-1,nc)) + + const Real dys = 1./ratio[1]; + const Real dzs = 1./ratio[2]; + + for int(j=0; j +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void +facediv_int_gen (int ci, int cj, int ck, int nf, + GpuArray, AMREX_SPACEDIM> const& fine, + IntVect const& ratio, + GpuArray const& cellSize) noexcept +{ + const int fi = ci*ratio[0]; + const int fj = cj*ratio[1]; + const int fk = ck*ratio[2]; + + const Real dx = cellSize[0]; + const Real dy = cellSize[1]; + const Real dy = cellSize[2]; + +// interpolate initial veloc + + for( int i=1; i < ratio[0] , ++j){ + for( int j=0; j < ratio[1] , ++j){ + for( int k=0; k < ratio[2] , ++k){ + + fine[0](fi + i, jf + j, fk + k, nf) = ((ratio[0]-i)*fine[0](fi, fj , fk , nf) + i* fine[0](fi+ratio[0], fj , fk , nf) )/ratio[0]; + + } + } + } + + for( int i=0; i < ratio[0] , ++j){ + for( int j=1; j < ratio[1] , ++j){ + for( int k=0; k < ratio[2] , ++k){ + + fine[1](fi + i, jf + j, fk + k, nf) = ((ratio[1]-j)*fine[1](fi, fj , fk , nf) + j* fine[1](fi, fj +ratio[1] , fk , nf) )/ratio[1]; + + } + } + } + + for( int i=0; i < ratio[0] , ++j){ + for( int j=0; j < ratio[1] , ++j){ + for( int k=1; k < ratio[2] , ++k){ + + fine[2](fi + i, jf + j, fk + k, nf) = ((ratio[2]-k)*fine[2](fi, fj , fk , nf) + k* fine[2](fi, fj , fk +ratio[2] , nf) )/ratio[2]; + + } + } + } + +// compute target divergence + + Real uplus = 0; + Real uminus = 0; + + for( int j=0; j < ratio[1], ++j){ + for( int k=0; k < ratio[2], ++k){ + + uplus += fine[0](fi+ratio[0],fj+j,fk+k); + uminus += fine[0](fi ,fj+j,fk+k); + + } + } + + Real vplus = 0; + Real vminus = 0; + + for( int i=0; i < ratio[0], ++i){ + for( int k=0; i < ratio[2], ++k){ + + vplus += fine[1](fi+i,fj+ratio[1],fk+k); + vminus += fine[1](fi+i,fj, fk+k); + + } + } + + Real wplus = 0; + Real wminus = 0; + + for( int i=0; i < ratio[0], ++i){ + for( int j=0; j < ratio[0], ++j){ + + wplus += fine[2](fi+i,fj+j,fk+ratio[2]); + wminus += fine[2](fi+i,fj+j,fk); + + } + } + + int prodrat = ratio[0]*ratio[1]*ratio[2]; + + Real target_div = (uplus-uminus)/(prodrat*dx) + (vplus-vminus)/(prodrat*dy) + (wplus-wminus)/(prodrat*dz); + +// set up matrix for local MAC projection + + Array2D mat; + + + for (int irow = 0; irow < prodrat; irow++) { + for (int icol = 0; icol < prodrat; icol++) { + mat(irow,icol) = 0.0; + } + } + + Real dxinv = 1./dx; + Real dyinv = 1./dy; + Real dzinv = 1./dz; + + Real ddx2 = dxinv*dxinv; + Real ddy2 = dyinv*dyinv; + Real ddz2 = dzinv*dzinv; + + for( int i=0; i < ratio[0] , ++j){ + for( int j=0; j < ratio[1] , ++j){ + for( int k=0; k < ratio[2] , ++k){ + + int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + + if(i > 0){ + mat(irow,irow) += ddx2; + mat(irow,irow-1) += -ddx2; + } + + if(i < ratio[0]-1){ + mat(irow,irow) += ddx2; + mat(irow,irow+1) += -ddx2; + } + + if(j > 0){ + mat(irow,irow) += ddy2; + mat(irow,irow-ratio[0]) += -ddy2; + } + + if(j < ratio[1]-1){ + mat(irow,irow) += ddy2; + mat(irow,irow+ratio[0]) += -ddy2; + } + + if(k > 0){ + mat(irow,irow) += ddz2; + mat(irow,irow-ratio[0]*ratio[1]) += -ddz2; + } + + if(k < ratio[2]-1){ + mat(irow,irow) += ddz2; + mat(irow,irow+ratio[0]*ratio[1]) += -ddz2; + } + } + } + } + +// hcak matrxi to be nonsingular + + mat(prodrat-1,prodrat-1) +=ddx2; + +// form rhs + Array1D rhs; + Array1D phi; + + for( int i=0; i < ratio[0] , ++j){ + for( int j=0; j < ratio[1] , ++j){ + for( int k=0; k < ratio[2] , ++k){ + + int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + + rhs[irow] = targetdiv - ((fine[0](fi+i+1,fj+j, fk+k, nf) -fine[0](fi+i,fj+j,f+k,nf))*dxinv + + (fine[1](fi+i ,fj+j+1,fk+k, nf) - fine[1](fi+i,fj+j,f+k,nf))*dyinv + + (fine[2](fi+i, fj+j, fk+k+1,nf) - fine[2](fi+i,fj+j,f+k,nf))*dzinv); + } + } + } + +// solver the system + + LUSolver lusolver(mat); + lusolver(phi.data(), rhs.data()); + +// this is for sanity check +// can be put in if needed + +// Array3D phi_reshape; + +// for( int i=0; i < ratio[0] , ++j){ +// for( int j=0; j < ratio[1] , ++j){ +// for( int k=0; k < ratio[2] , ++k){ + +// int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + +// phi_reshape(i,j,k) = phi(irow) + +// } +// } +// } + + + for( int i=1; i < ratio[0] , ++j){ + for( int j=0; j < ratio[1] , ++j){ + for( int k=0; k < ratio[2] , ++k){ + + int plus = i + j*ratio[0] +k*ratio[0]*ratio[1]; + int minus = i - 1 + j*ratio[0] +k*ratio[0]*ratio[1]; +// fine[0](fi + i, jf + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i-1,j,k))*dxinv + fine[0](fi + i, jf + j, fk + k, nf) += ( phi(plus) - phi(minus))*dxinv; + + } + } + } + + for( int i=0; i < ratio[0] , ++j){ + for( int j=1; j < ratio[1] , ++j){ + for( int k=0; k < ratio[2] , ++k){ + + int plus = i + j*ratio[0] +k*ratio[0]*ratio[1]; + int minus = i + (j-1)*ratio[0] +k*ratio[0]*ratio[1]; +// fine[1](fi + i, jf + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j-1,k))*dyinv + fine[1](fi + i, jf + j, fk + k, nf) += ( phi(plus) - phi(minus))*dyinv; + + } + } + } + + for( int i=0; i < ratio[0] , ++j){ + for( int j=0; j < ratio[1] , ++j){ + for( int k=1; k < ratio[2] , ++k){ + + int plus = i + j*ratio[0] + k *ratio[0]*ratio[1]; + int minus = i + j*ratio[0] + (k-1)*ratio[0]*ratio[1]; +// fine[2](fi + i, jf + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j,k-1))*dzinv + fine[2](fi + i, jf + j, fk + k, nf) += ( phi(plus) - phi(minus))*dzinv; + + } + } + } + + +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void face_linear_interp_x (int i, int j, int k, int n, Array4 const& fine, diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index c25b53fa4f2..90c784dd990 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1352,7 +1352,7 @@ FaceDivFree::interp_arr (Array const& crse, { types[d].set(d); } // This is currently only designed for octree, where ratio = 2. - AMREX_ALWAYS_ASSERT(ratio == 2); + // AMREX_ALWAYS_ASSERT(ratio == 2); const Box c_fine_region = amrex::coarsen(fine_region, ratio); GpuArray cell_size = fine_geom.CellSizeArray(); @@ -1368,6 +1368,9 @@ FaceDivFree::interp_arr (Array const& crse, { maskarr[d] = solve_mask[d]->const_array(0); } } +// JBB 1. don't know what mark does here +// JBB 2. build and factor matrix here or build and factor it earlier somehow + // Fuse the launches, 1 for each dimension, into a single launch. AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(runon, amrex::convert(c_fine_region,types[0]), bx0, @@ -1404,10 +1407,18 @@ FaceDivFree::interp_arr (Array const& crse, }); }); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, - { - amrex::facediv_int(i, j, k, fine_comp+n, finearr, ratio, cell_size); - }); + if( AMREX_D_TERM (ratio[0] == 2, && ratio[1] == 2, && ratio[2] == 2);){ + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, + { + amrex::facediv_int(i, j, k, fine_comp+n, finearr, ratio, cell_size); + }); + } else { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, + { +// JBB want to pass through LU factored matrix here + amrex::facediv_int_gen(i, j, k, fine_comp+n, finearr, ratio, cell_size); + }); + } } Box From 85ceefd2e9b247a33518e30726b1b0e5928fd1ee Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 17:18:43 -0700 Subject: [PATCH 02/15] Fix typos --- Src/AmrCore/AMReX_Interp_3D_C.H | 189 +++++++++++++++-------------- Src/AmrCore/AMReX_Interpolater.cpp | 2 +- 2 files changed, 96 insertions(+), 95 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index 78d1a9e494c..9de7d732b1e 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -167,19 +167,19 @@ facediv_face_interp (int ci, int cj, int ck, // fine(fi,fj+1,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); // fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); - const Real slpy = 0.5* (crse(ci,cj+1,ck ,nc) - crse(ci,cj-1,ck ,nc)) - const Real slpz = 0.5* (crse(ci,cj, ck+1,nc) - crse(ci,cj, ck-1,nc)) - const Real slpyz = 0.25*(crse(ci,cj+1,ck+1,nc) - crse(ci,cj+1,ck-1,nc) - crse(ci,cj-1,ck+1,nc) + crse(ci,cj-1,ck-1,nc)) + const Real slpy = Real(0.5)* (crse(ci,cj+1,ck ,nc) - crse(ci,cj-1,ck ,nc)); + const Real slpz = Real(0.5)* (crse(ci,cj, ck+1,nc) - crse(ci,cj, ck-1,nc)); + const Real slpyz = Real(0.25)*(crse(ci,cj+1,ck+1,nc) - crse(ci,cj+1,ck-1,nc) - crse(ci,cj-1,ck+1,nc) + crse(ci,cj-1,ck-1,nc)); - const Real dys = 1./ratio[1]; - const Real dzs = 1./ratio[2]; + const Real dys = Real(1.)/ratio[1]; + const Real dzs = Real(1.)/ratio[2]; - for int(j=0; j rhs; Array1D phi; - for( int i=0; i < ratio[0] , ++j){ - for( int j=0; j < ratio[1] , ++j){ - for( int k=0; k < ratio[2] , ++k){ + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; - rhs[irow] = targetdiv - ((fine[0](fi+i+1,fj+j, fk+k, nf) -fine[0](fi+i,fj+j,f+k,nf))*dxinv - + (fine[1](fi+i ,fj+j+1,fk+k, nf) - fine[1](fi+i,fj+j,f+k,nf))*dyinv - + (fine[2](fi+i, fj+j, fk+k+1,nf) - fine[2](fi+i,fj+j,f+k,nf))*dzinv); + rhs(irow) = target_div - ((fine[0](fi+i+1,fj+j, fk+k, nf) -fine[0](fi+i,fj+j,fk+k,nf))*dxinv + + (fine[1](fi+i ,fj+j+1,fk+k, nf) - fine[1](fi+i,fj+j,fk+k,nf))*dyinv + + (fine[2](fi+i, fj+j, fk+k+1,nf) - fine[2](fi+i,fj+j,fk+k,nf))*dzinv); } } } @@ -564,7 +565,7 @@ facediv_int_gen (int ci, int cj, int ck, int nf, // solver the system LUSolver lusolver(mat); - lusolver(phi.data(), rhs.data()); + lusolver(phi.begin(), rhs.begin()); // this is for sanity check // can be put in if needed @@ -584,40 +585,40 @@ facediv_int_gen (int ci, int cj, int ck, int nf, // } - for( int i=1; i < ratio[0] , ++j){ - for( int j=0; j < ratio[1] , ++j){ - for( int k=0; k < ratio[2] , ++k){ + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=1; i < ratio[0]; ++i){ int plus = i + j*ratio[0] +k*ratio[0]*ratio[1]; int minus = i - 1 + j*ratio[0] +k*ratio[0]*ratio[1]; -// fine[0](fi + i, jf + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i-1,j,k))*dxinv - fine[0](fi + i, jf + j, fk + k, nf) += ( phi(plus) - phi(minus))*dxinv; +// fine[0](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i-1,j,k))*dxinv + fine[0](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dxinv; } } } - for( int i=0; i < ratio[0] , ++j){ - for( int j=1; j < ratio[1] , ++j){ - for( int k=0; k < ratio[2] , ++k){ + for( int k=0; k < ratio[2]; ++k){ + for( int j=1; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ int plus = i + j*ratio[0] +k*ratio[0]*ratio[1]; int minus = i + (j-1)*ratio[0] +k*ratio[0]*ratio[1]; -// fine[1](fi + i, jf + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j-1,k))*dyinv - fine[1](fi + i, jf + j, fk + k, nf) += ( phi(plus) - phi(minus))*dyinv; +// fine[1](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j-1,k))*dyinv + fine[1](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dyinv; } } } - for( int i=0; i < ratio[0] , ++j){ - for( int j=0; j < ratio[1] , ++j){ - for( int k=1; k < ratio[2] , ++k){ + for( int k=1; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ int plus = i + j*ratio[0] + k *ratio[0]*ratio[1]; int minus = i + j*ratio[0] + (k-1)*ratio[0]*ratio[1]; -// fine[2](fi + i, jf + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j,k-1))*dzinv - fine[2](fi + i, jf + j, fk + k, nf) += ( phi(plus) - phi(minus))*dzinv; +// fine[2](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j,k-1))*dzinv + fine[2](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dzinv; } } @@ -701,7 +702,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int k = klo; k <= khi; ++k) { for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - if ((fine_state(i,j,k,n) + fine(i,j,k,n)) < 0.0) { + if ((fine_state(i,j,k,n) + fine(i,j,k,n)) < Real(0.0)) { redo_me = true; } } @@ -731,14 +732,14 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, * * SumP = volume-weighted sum of all positive values of fine_state */ - Real crseTot = 0.0; - Real SumN = 0.0; - Real SumP = 0.0; + Real crseTot = Real(0.0); + Real SumN = Real(0.0); + Real SumP = Real(0.0); for (int k = klo; k <= khi; ++k) { for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { crseTot += fine(i,j,k,n); - if (fine_state(i,j,k,n) <= 0.0) { + if (fine_state(i,j,k,n) <= Real(0.0)) { SumN += fine_state(i,j,k,n); } else { SumP += fine_state(i,j,k,n); @@ -764,13 +765,13 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int i = ilo; i <= ihi; ++i) { // Fill in negative values first. - if (fine_state(i,j,k,n) < 0.0) { + if (fine_state(i,j,k,n) < Real(0.0)) { fine(i,j,k,n) = -fine_state(i,j,k,n); } // Then, add the remaining positive proportionally. - if (SumP > 0.0) { - if (fine_state(i,j,k,n) > 0.0) { + if (SumP > Real(0.0)) { + if (fine_state(i,j,k,n) > Real(0.0)) { Real alpha = (crseTot - std::abs(SumN)) / SumP; fine(i,j,k,n) = alpha * fine_state(i,j,k,n); } @@ -799,12 +800,12 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { Real alpha = crseTot / std::abs(SumN); - if (fine_state(i,j,k,n) < 0.0) { + if (fine_state(i,j,k,n) < Real(0.0)) { // Add correction to negative cells proportionally. fine(i,j,k,n) = alpha * std::abs(fine_state(i,j,k,n)); } else { // Don't touch the positive states. - fine(i,j,k,n) = 0.0; + fine(i,j,k,n) = Real(0.0); } } @@ -835,7 +836,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, } } else if ( (crseTot < 0) && (std::abs(crseTot) < SumP) && - ((SumP+SumN+crseTot) > 0.0) ) { + ((SumP+SumN+crseTot) > Real(0.0)) ) { /* * Special case 4: @@ -849,7 +850,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - if (fine_state(i,j,k,n) < 0.0) { + if (fine_state(i,j,k,n) < Real(0.0)) { // Absorb the negative correction fine(i,j,k,n) = -fine_state(i,j,k,n); } else { @@ -863,7 +864,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, } } else if ( (crseTot < 0) && (std::abs(crseTot) < SumP) && - ((SumP+SumN+crseTot) < 0.0) ) { + ((SumP+SumN+crseTot) < Real(0.0)) ) { /* * Special case 5: * @@ -879,7 +880,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - if (fine_state(i,j,k,n) > 0.0) { + if (fine_state(i,j,k,n) > Real(0.0)) { // Don't touch the negatives fine(i,j,k,n) = -fine_state(i,j,k,n); } else { @@ -900,7 +901,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar, for (int k = klo; k <= khi; ++k) { for (int j = jlo; j <= jhi; ++j) { for (int i = ilo; i <= ihi; ++i) { - fine(i,j,k,0) = 0.0; + fine(i,j,k,0) = Real(0.0); for (int n = 1; n < nvar-1; ++n) { fine(i,j,k,0) += fine(i,j,k,n); } diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index 90c784dd990..ee7efede33e 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1407,7 +1407,7 @@ FaceDivFree::interp_arr (Array const& crse, }); }); - if( AMREX_D_TERM (ratio[0] == 2, && ratio[1] == 2, && ratio[2] == 2);){ + if( AMREX_D_TERM (ratio[0] == 2, && ratio[1] == 2, && ratio[2] == 2)){ AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, { amrex::facediv_int(i, j, k, fine_comp+n, finearr, ratio, cell_size); From 0abb33a88d40c73d8ec8595205c9ebeb3f810c8f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 17:29:22 -0700 Subject: [PATCH 03/15] Use Order::C --- Src/AmrCore/AMReX_Interp_3D_C.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index 9de7d732b1e..ce2b6c4ae04 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -485,7 +485,7 @@ facediv_int_gen (int ci, int cj, int ck, int nf, // set up matrix for local MAC projection - Array2D mat; + Array2D mat; for (int irow = 0; irow < prodrat; irow++) { From b2f27eb2f6d4104111222ab5b20d036526593184 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 21:40:04 -0700 Subject: [PATCH 04/15] set up solver outside kernel --- Src/AmrCore/AMReX_Interp_3D_C.H | 174 ++++++++++++++++------------ Src/AmrCore/AMReX_Interpolater.H | 31 ++++- Src/AmrCore/AMReX_Interpolater.cpp | 83 +++++++++++-- Src/Base/AMReX_CTOParallelForImpl.H | 9 ++ Src/Base/AMReX_LUSolver.H | 2 + 5 files changed, 216 insertions(+), 83 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index ce2b6c4ae04..57a894f238f 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -127,7 +127,6 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i } } - template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int ci, int cj, int ck, @@ -174,8 +173,8 @@ facediv_face_interp (int ci, int cj, int ck, const Real dys = Real(1.)/ratio[1]; const Real dzs = Real(1.)/ratio[2]; - for (int j=0; j -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void + +template =0 && i<8),int> = 0> +constexpr IntVect facediv_get_refratio () +{ + if constexpr (i == 0) { + return IntVect(2,2,2); + } else if constexpr (i == 1) { + return IntVect(4,2,2); + } else if constexpr (i == 2) { + return IntVect(2,4,2); + } else if constexpr (i == 3) { + return IntVect(4,4,2); + } else if constexpr (i == 4) { + return IntVect(2,2,4); + } else if constexpr (i == 5) { + return IntVect(4,2,4); + } else if constexpr (i == 6) { + return IntVect(2,4,4); + } else { + return IntVect(4,4,4); + } +} + +template +LUSolver +facediv_init_lusolver (IntVect const& ratio, + GpuArray const& cellSize) +{ + // set up matrix for local MAC projection + Array2D mat; + + for (int irow = 0; irow < N; irow++) { + for (int icol = 0; icol < N; icol++) { + mat(irow,icol) = Real(0.0); + } + } + + Real dxinv = Real(1.)/cellSize[0]; + Real dyinv = Real(1.)/cellSize[1]; + Real dzinv = Real(1.)/cellSize[2]; + + Real ddx2 = dxinv*dxinv; + Real ddy2 = dyinv*dyinv; + Real ddz2 = dzinv*dzinv; + + for( int k=0; k < ratio[2]; ++k){ + for( int j=0; j < ratio[1]; ++j){ + for( int i=0; i < ratio[0]; ++i){ + + int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; + + if(i > 0){ + mat(irow,irow) += ddx2; + mat(irow,irow-1) += -ddx2; + } + + if(i < ratio[0]-1){ + mat(irow,irow) += ddx2; + mat(irow,irow+1) += -ddx2; + } + + if(j > 0){ + mat(irow,irow) += ddy2; + mat(irow,irow-ratio[0]) += -ddy2; + } + + if(j < ratio[1]-1){ + mat(irow,irow) += ddy2; + mat(irow,irow+ratio[0]) += -ddy2; + } + + if(k > 0){ + mat(irow,irow) += ddz2; + mat(irow,irow-ratio[0]*ratio[1]) += -ddz2; + } + + if(k < ratio[2]-1){ + mat(irow,irow) += ddz2; + mat(irow,irow+ratio[0]*ratio[1]) += -ddz2; + } + }}} + + // hcak matrxi to be nonsingular + mat(N-1,N-1) +=ddx2; + + return LUSolver{mat}; +} + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void facediv_int_gen (int ci, int cj, int ck, int nf, GpuArray, AMREX_SPACEDIM> const& fine, IntVect const& ratio, - GpuArray const& cellSize) noexcept + GpuArray const& cellSize, + LUSolver const& lusolver) noexcept { const int fi = ci*ratio[0]; const int fj = cj*ratio[1]; @@ -408,6 +496,10 @@ facediv_int_gen (int ci, int cj, int ck, int nf, const Real dy = cellSize[1]; const Real dz = cellSize[2]; + const Real dxinv = Real(1.)/cellSize[0]; + const Real dyinv = Real(1.)/cellSize[1]; + const Real dzinv = Real(1.)/cellSize[2]; + // interpolate initial veloc for( int k=0; k < ratio[2]; ++k){ @@ -478,76 +570,17 @@ facediv_int_gen (int ci, int cj, int ck, int nf, } } - // xxxxx int prodrat = ratio[0]*ratio[1]*ratio[2]; - constexpr int prodrat = 8; - - Real target_div = (uplus-uminus)/(prodrat*dx) + (vplus-vminus)/(prodrat*dy) + (wplus-wminus)/(prodrat*dz); + Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy) + (wplus-wminus)/(N*dz); // set up matrix for local MAC projection - Array2D mat; - - - for (int irow = 0; irow < prodrat; irow++) { - for (int icol = 0; icol < prodrat; icol++) { - mat(irow,icol) = Real(0.0); - } - } - - Real dxinv = Real(1.)/dx; - Real dyinv = Real(1.)/dy; - Real dzinv = Real(1.)/dz; - - Real ddx2 = dxinv*dxinv; - Real ddy2 = dyinv*dyinv; - Real ddz2 = dzinv*dzinv; - - for( int k=0; k < ratio[2]; ++k){ - for( int j=0; j < ratio[1]; ++j){ - for( int i=0; i < ratio[0]; ++i){ - - int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; - - if(i > 0){ - mat(irow,irow) += ddx2; - mat(irow,irow-1) += -ddx2; - } - - if(i < ratio[0]-1){ - mat(irow,irow) += ddx2; - mat(irow,irow+1) += -ddx2; - } - - if(j > 0){ - mat(irow,irow) += ddy2; - mat(irow,irow-ratio[0]) += -ddy2; - } - - if(j < ratio[1]-1){ - mat(irow,irow) += ddy2; - mat(irow,irow+ratio[0]) += -ddy2; - } - - if(k > 0){ - mat(irow,irow) += ddz2; - mat(irow,irow-ratio[0]*ratio[1]) += -ddz2; - } - - if(k < ratio[2]-1){ - mat(irow,irow) += ddz2; - mat(irow,irow+ratio[0]*ratio[1]) += -ddz2; - } - } - } - } + Array2D mat; -// hcak matrxi to be nonsingular - mat(prodrat-1,prodrat-1) +=ddx2; // form rhs - Array1D rhs; - Array1D phi; + Array1D rhs; + Array1D phi; for( int k=0; k < ratio[2]; ++k){ for( int j=0; j < ratio[1]; ++j){ @@ -564,7 +597,6 @@ facediv_int_gen (int ci, int cj, int ck, int nf, // solver the system - LUSolver lusolver(mat); lusolver(phi.begin(), rhs.begin()); // this is for sanity check diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index 10e60eef41d..ae9bfc430d2 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -5,6 +5,11 @@ #include #include #include +#include +#include + +#include +#include /** * \file AMReX_Interpolater.H @@ -543,6 +548,9 @@ class FaceDivFree public Interpolater { public: + + FaceDivFree (); + /** * \brief Returns coarsened box given fine box and refinement ratio. * @@ -622,8 +630,29 @@ public: int actual_comp, int actual_state, RunOn runon) override; -}; +private: + +#if (AMREX_SPACEDIM == 2) + using Solver_t = std::variant, // IntVect(2,2): refinement + LUSolver< 8,Real>, // IntVect(4,2) + LUSolver< 8,Real>, // IntVect(2,4) + LUSolver<16,Real>>;// IntVect(4,4) +#elif (AMREX_SPACEDIM == 3) + using Solver_t = std::variant, // IntVect(2,2,2): refinement + LUSolver<16,Real>, // IntVect(4,2,2) + LUSolver<16,Real>, // IntVect(2,4,2) + LUSolver<32,Real>, // IntVect(4,4,2) + LUSolver<16,Real>, // IntVect(2,2,4) + LUSolver<32,Real>, // IntVect(4,2,4) + LUSolver<32,Real>, // IntVect(2,4,4) + LUSolver<64,Real>>;// IntVect(4,4,4) +#endif + +#if (AMREX_SPACEDIM > 1) + Vector> m_lusolver; +#endif +}; /** * \brief Piecewise constant tangential interpolation / linear normal interpolation of face data. diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index ee7efede33e..bd6e32b130e 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1,10 +1,12 @@ +#include #include -#include #include +#include #include #include #include +#include #include @@ -1295,6 +1297,13 @@ CellConservativeQuartic::interp (const FArrayBox& crse, }); } +FaceDivFree::FaceDivFree () + : Interpolater() +#if (AMREX_SPACEDIM > 1) + , m_lusolver(OpenMP::get_max_threads()) +#endif +{} + Box FaceDivFree::CoarseBox (const Box& fine, int ratio) @@ -1345,14 +1354,26 @@ FaceDivFree::interp_arr (Array const& crse, const int /*actual_state*/, const RunOn runon) { +#if (AMREX_SPACEDIM == 1) + amrex::ignore_unused(crse,crse_comp,fine,fine_comp,ncomp,fine_region,ratio, + solve_mask,fine_geom,runon); +#else + BL_PROFILE("FaceDivFree::interp()"); Array types; for (int d=0; d cell_size = fine_geom.CellSizeArray(); @@ -1368,6 +1389,8 @@ FaceDivFree::interp_arr (Array const& crse, { maskarr[d] = solve_mask[d]->const_array(0); } } + + // JBB 1. don't know what mark does here // JBB 2. build and factor matrix here or build and factor it earlier somehow @@ -1407,18 +1430,56 @@ FaceDivFree::interp_arr (Array const& crse, }); }); - if( AMREX_D_TERM (ratio[0] == 2, && ratio[1] == 2, && ratio[2] == 2)){ - AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, - { + if (ratio == 2) { + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, + { amrex::facediv_int(i, j, k, fine_comp+n, finearr, ratio, cell_size); }); } else { - AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, - { -// JBB want to pass through LU factored matrix here - amrex::facediv_int_gen(i, j, k, fine_comp+n, finearr, ratio, cell_size); - }); + auto& lusolver = m_lusolver[OpenMP::get_thread_num()][fine_geom.Domain()]; + void* psolver_h = nullptr; + std::size_t solver_size = 0; + + constexpr int ncases = AMREX_D_TERM(2,*2,*2); + int rr_case = -1; + // Case 0 is ratio == 2. It has been handled in the if-block. + constexpr_for<1,ncases>([&] (auto i) + { + if (facediv_get_refratio() == ratio) { + AMREX_ASSERT(rr_case == -1); + rr_case = i; + if (lusolver.index() != i) { + lusolver.template emplace( + facediv_init_lusolver + ::size>(ratio, cell_size)); + } + psolver_h = (void*) &(std::get(lusolver)); + solver_size = sizeof(std::variant_alternative_t); + } + }); + AMREX_ASSERT(rr_case > 0 && rr_case < ncases && psolver_h); + +#ifdef AMREX_USE_GPU + auto* psolver_d = (void*) The_Async_Arena()->alloc(solver_size); + Gpu::htod_memcpy_async(psolver_d, psolver_h, solver_size); +#else + auto* psolver_d = psolver_h; +#endif + + ParallelFor(TypeList>{}, {rr_case}, + fine_region, ncomp, + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, auto rrc) + { + auto const* ps = (std::variant_alternative_t const*)psolver_d; + amrex::facediv_int_gen(i, j, k, fine_comp+n, finearr, ratio, cell_size, *ps); + }); + +#ifdef AMREX_USE_GPU + The_Async_Arena()->free(psolver_d); +#endif } + +#endif } Box diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H index 99dc20b452a..80abe87b1ae 100644 --- a/Src/Base/AMReX_CTOParallelForImpl.H +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -6,6 +6,7 @@ #include #include +#include #include /* This header is not for the users to include directly. It's meant to be @@ -73,9 +74,17 @@ namespace detail amrex::ignore_unused(found_option); AMREX_ASSERT(found_option); } + + template + constexpr auto MakeCTOImpl(std::integer_sequence) + -> CompileTimeOptions; } /// \endcond +template +using CTOSeq = decltype(detail::MakeCTOImpl( + std::make_integer_sequence{})); + /** * \brief Compile time optimization of kernels with run time options. * diff --git a/Src/Base/AMReX_LUSolver.H b/Src/Base/AMReX_LUSolver.H index bd60fc609dc..37ed6f8e2ef 100644 --- a/Src/Base/AMReX_LUSolver.H +++ b/Src/Base/AMReX_LUSolver.H @@ -16,6 +16,8 @@ class LUSolver { public: + static constexpr int size = N; + LUSolver () = default; AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE From 9eab38735d0af786053466dadde2869f9a41aab6 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 21:55:39 -0700 Subject: [PATCH 05/15] use c_fine_region --- Src/AmrCore/AMReX_Interpolater.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index bd6e32b130e..b2cd66ffae9 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1467,7 +1467,7 @@ FaceDivFree::interp_arr (Array const& crse, #endif ParallelFor(TypeList>{}, {rr_case}, - fine_region, ncomp, + c_fine_region, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, auto rrc) { auto const* ps = (std::variant_alternative_t const*)psolver_d; From cb7d3a1f32db6b85497a2b4e259a343763cce05f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 22:28:57 -0700 Subject: [PATCH 06/15] Fix --- Src/AmrCore/AMReX_Interp_3D_C.H | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index 57a894f238f..3ef8569dab7 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -178,7 +178,7 @@ facediv_face_interp (int ci, int cj, int ck, Real ysloc = (j+Real(0.5))*dys - Real(0.5); Real zsloc = (k+Real(0.5))*dzs - Real(0.5); - fine(fi,fj+j,fk+k,nc) = crse(ci,cj,ck,nc) + ysloc*slpy + zsloc*slpz + ysloc*zsloc*slpyz; + fine(fi,fj+j,fk+k,nf) = crse(ci,cj,ck,nc) + ysloc*slpy + zsloc*slpz + ysloc*zsloc*slpyz; } } @@ -216,7 +216,7 @@ facediv_face_interp (int ci, int cj, int ck, Real xsloc = (i+Real(0.5))*dxs - Real(0.5); Real zsloc = (k+Real(0.5))*dzs - Real(0.5); - fine(fi+i,fj,fk+k,nc) = crse(ci,cj,ck,nc) + xsloc*slpx + zsloc*slpz + xsloc*zsloc*slpxz; + fine(fi+i,fj,fk+k,nf) = crse(ci,cj,ck,nc) + xsloc*slpx + zsloc*slpz + xsloc*zsloc*slpxz; } } @@ -250,12 +250,12 @@ facediv_face_interp (int ci, int cj, int ck, const Real dxs = Real(1.)/ratio[0]; const Real dys = Real(1.)/ratio[1]; - for (int j=0; j Date: Sat, 28 Mar 2026 22:30:12 -0700 Subject: [PATCH 07/15] cleanup --- Src/AmrCore/AMReX_Interp_3D_C.H | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index 3ef8569dab7..c0fd65f5ddd 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -575,12 +575,6 @@ facediv_int_gen (int ci, int cj, int ck, int nf, Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy) + (wplus-wminus)/(N*dz); -// set up matrix for local MAC projection - - Array2D mat; - - - // form rhs Array1D rhs; Array1D phi; From 46c36d0d879efae6b8417f80fcbd6e0a261104fe Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 22:35:51 -0700 Subject: [PATCH 08/15] Add ref_ratio --- Tests/DivFreePatch/inputs | 3 +++ Tests/DivFreePatch/main.cpp | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Tests/DivFreePatch/inputs b/Tests/DivFreePatch/inputs index e5245751940..b820ea30b22 100644 --- a/Tests/DivFreePatch/inputs +++ b/Tests/DivFreePatch/inputs @@ -3,6 +3,9 @@ f_offset = 8 max_grid_size = 128 +# default refinement ratio (can set anisotropic values like 4 2 2) +ref_ratio = 2 2 2 + # For testing nested vs. non-nested InterpFromCoarse #nghost_f = 1 diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index 857461a1f64..664d23cd96f 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -142,6 +142,8 @@ void main_main () amrex::Vector f_hi(AMREX_SPACEDIM, 4); int max_grid_size = 64; + Vector ref_ratio_vec(AMREX_SPACEDIM, 2); + { ParmParse pp; pp.query("n_cell", n_cell); @@ -153,6 +155,7 @@ void main_main () pp.queryarr("c_hi", c_hi, 0, AMREX_SPACEDIM); pp.queryarr("f_lo", f_lo, 0, AMREX_SPACEDIM); pp.queryarr("f_hi", f_hi, 0, AMREX_SPACEDIM); + pp.queryarr("ref_ratio", ref_ratio_vec, 0, AMREX_SPACEDIM); if (n_cell != 0) { @@ -165,8 +168,15 @@ void main_main () } } + for (int i = 0; i < AMREX_SPACEDIM; ++i) { + if (ref_ratio_vec[i] < 1) { + amrex::Abort("ref_ratio entries must be >= 1"); + } + } + + IntVect ratio(ref_ratio_vec); + int ncomp = 1; - IntVect ratio{AMREX_D_DECL(2,2,2)}; // For this stencil (octree), always 2. IntVect ghost_c{AMREX_D_DECL(nghost_c, nghost_c, nghost_c)}; // For this stencil (octree), need 1 coarse ghost. IntVect ghost_f{AMREX_D_DECL(nghost_f, nghost_f, nghost_f)}; // For this stencil (octree), need 1 fine ghost. Geometry c_geom, f_geom, f_geom_wghost, f_geom_all, f_geom_partial; From bc7e8785ce9d0c7188a7a5bde3622687067e7600 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 22:48:14 -0700 Subject: [PATCH 09/15] Add abort to test --- Src/AmrCore/AMReX_Interpolater.cpp | 2 +- Tests/DivFreePatch/inputs | 2 ++ Tests/DivFreePatch/main.cpp | 44 ++++++++++++++++++++++++++---- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index b2cd66ffae9..11d8e1b8661 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1366,7 +1366,7 @@ FaceDivFree::interp_arr (Array const& crse, { types[d].set(d); } #ifdef AMREX_USE_GPU - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ratio == 2 || runon = RunOn::Device, + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ratio == 2 || runon == RunOn::Device, "Must run on GPU if ratio is not 2"); #endif diff --git a/Tests/DivFreePatch/inputs b/Tests/DivFreePatch/inputs index b820ea30b22..9fb2965336e 100644 --- a/Tests/DivFreePatch/inputs +++ b/Tests/DivFreePatch/inputs @@ -6,6 +6,8 @@ max_grid_size = 128 # default refinement ratio (can set anisotropic values like 4 2 2) ref_ratio = 2 2 2 +div_tol = 1e-9 + # For testing nested vs. non-nested InterpFromCoarse #nghost_f = 1 diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index 664d23cd96f..5e7846fd313 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -135,6 +135,8 @@ void main_main () int f_offset = 4; int nghost_c = 1; int nghost_f = 2; + Real div_tol = 1.0e-9; + bool test_pass = true; amrex::Vector c_lo(AMREX_SPACEDIM, 0); amrex::Vector c_hi(AMREX_SPACEDIM, 32); @@ -151,6 +153,7 @@ void main_main () pp.query("max_grid_size", max_grid_size); pp.query("nghost_c", nghost_c); pp.query("nghost_f", nghost_f); + pp.query("div_tol", div_tol); pp.queryarr("c_hi", c_hi, 0, AMREX_SPACEDIM); pp.queryarr("f_lo", f_lo, 0, AMREX_SPACEDIM); @@ -385,12 +388,16 @@ void main_main () ratio, mapper, bcrec, 0); } - // Check for errors + bool interp_nan = false; for (int i=0; i div_tol) { + amrex::Print() << " InterpFromCoarse abs error exceeds tolerance " << div_tol << '\n'; + test_pass = false; + } + // *************************************************************** // Change coarse values, save current fine values for checking @@ -519,24 +534,34 @@ void main_main () calcDiv(f_mf_faces, div_fine, f_geom.CellSizeArray()); amrex::VisMF::Write(div_fine, std::string("pltfiles/fineFP")); + Real max_abs_fp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "diffFP"); + Real max_rel_fp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "", true); amrex::Print() << " Max FillPatchTwoLevels divergence error: absolute relative\n " << " " - < div_tol) { + amrex::Print() << " FillPatch abs error exceeds tolerance " << div_tol << '\n'; + test_pass = false; + } for (int i=0; i Date: Sat, 28 Mar 2026 23:34:07 -0700 Subject: [PATCH 10/15] Fix bug --- Src/AmrCore/AMReX_Interp_3D_C.H | 6 ++-- Src/AmrCore/AMReX_Interpolater.cpp | 5 ++-- Tests/DivFreePatch/main.cpp | 47 +++++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 9 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index c0fd65f5ddd..0bd0f3eabd4 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -585,9 +585,9 @@ facediv_int_gen (int ci, int cj, int ck, int nf, int irow = i + j*ratio[0] +k*ratio[0]*ratio[1]; - rhs(irow) = target_div - ((fine[0](fi+i+1,fj+j, fk+k, nf) -fine[0](fi+i,fj+j,fk+k,nf))*dxinv - + (fine[1](fi+i ,fj+j+1,fk+k, nf) - fine[1](fi+i,fj+j,fk+k,nf))*dyinv - + (fine[2](fi+i, fj+j, fk+k+1,nf) - fine[2](fi+i,fj+j,fk+k,nf))*dzinv); + rhs(irow) = ((fine[0](fi+i+1,fj+j, fk+k, nf) -fine[0](fi+i,fj+j,fk+k,nf))*dxinv + + (fine[1](fi+i ,fj+j+1,fk+k, nf) - fine[1](fi+i,fj+j,fk+k,nf))*dyinv + + (fine[2](fi+i, fj+j, fk+k+1,nf) - fine[2](fi+i,fj+j,fk+k,nf))*dzinv) - target_div; } } } diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index 11d8e1b8661..f7af40c4f15 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1383,8 +1383,9 @@ FaceDivFree::interp_arr (Array const& crse, GpuArray, AMREX_SPACEDIM> maskarr; for (int d=0; dconst_array(crse_comp); - finearr[d] = fine[d]->array(fine_comp); + // Use whole-array views so facediv kernels can index absolute components. + crsearr[d] = crse[d]->const_array(); + finearr[d] = fine[d]->array(); if (solve_mask[d] != nullptr) { maskarr[d] = solve_mask[d]->const_array(0); } } diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index 5e7846fd313..05ae62b11cb 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -158,7 +158,23 @@ void main_main () pp.queryarr("c_hi", c_hi, 0, AMREX_SPACEDIM); pp.queryarr("f_lo", f_lo, 0, AMREX_SPACEDIM); pp.queryarr("f_hi", f_hi, 0, AMREX_SPACEDIM); - pp.queryarr("ref_ratio", ref_ratio_vec, 0, AMREX_SPACEDIM); + + int n_ref_ratio = pp.countval("ref_ratio"); + if (n_ref_ratio == 1) { + int rr = 0; + pp.get("ref_ratio", rr); + for (int i=0; i= AMREX_SPACEDIM) { + Vector tmp(n_ref_ratio); + pp.getarr("ref_ratio", tmp, 0, n_ref_ratio); + for (int i=0; i 0) { + amrex::Abort("ref_ratio must contain either 1 value or >= AMREX_SPACEDIM values"); + } if (n_cell != 0) { @@ -172,8 +188,8 @@ void main_main () } for (int i = 0; i < AMREX_SPACEDIM; ++i) { - if (ref_ratio_vec[i] < 1) { - amrex::Abort("ref_ratio entries must be >= 1"); + if (ref_ratio_vec[i] != 2 && ref_ratio_vec[i] != 4) { + amrex::Abort("DivFreePatch requires ref_ratio entries of 2 or 4"); } } @@ -388,6 +404,28 @@ void main_main () ratio, mapper, bcrec, 0); } + if ((ratio[0] != 2) || (ratio[1] != 2) || (ratio[2] != 2)) { + Array averaged_faces; + for (int d=0; d fine_face_ptrs + {AMREX_D_DECL(&f_mf_faces[0], &f_mf_faces[1], &f_mf_faces[2])}; + Array avg_face_ptrs + {AMREX_D_DECL(&averaged_faces[0], &averaged_faces[1], &averaged_faces[2])}; + amrex::average_down_faces(fine_face_ptrs, avg_face_ptrs, ratio, 0); + for (int d=0; d Date: Sat, 28 Mar 2026 23:48:37 -0700 Subject: [PATCH 11/15] clean up --- Src/AmrCore/AMReX_Interp_1D_C.H | 2 +- Src/AmrCore/AMReX_Interp_2D_C.H | 50 +++++++------- Src/AmrCore/AMReX_Interp_3D_C.H | 106 ++++++++++++++--------------- Src/AmrCore/AMReX_Interpolater.cpp | 15 ++-- 4 files changed, 86 insertions(+), 87 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_1D_C.H b/Src/AmrCore/AMReX_Interp_1D_C.H index cddb819ac7f..399f40cc5a5 100644 --- a/Src/AmrCore/AMReX_Interp_1D_C.H +++ b/Src/AmrCore/AMReX_Interp_1D_C.H @@ -73,7 +73,7 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int /*ci*/, int /*cj*/, int /*ck*/, - int /*nc*/, int /*nf*/, int /*idir*/, + int /*n*/, int /*idir*/, Array4 const& /*crse*/, Array4 const& /*fine*/, Array4 const& /*mask*/, diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H index 9ddb7144e86..029403d7e00 100644 --- a/Src/AmrCore/AMReX_Interp_2D_C.H +++ b/Src/AmrCore/AMReX_Interp_2D_C.H @@ -97,14 +97,14 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int ci, int cj, int /*ck*/, - int nc, int nf, int idir, + int n, int idir, Array4 const& crse, Array4 const& fine, Array4 const& mask, IntVect const& ratio) noexcept { if (mask) { - if (!mask(ci, cj, 0, nc)) + if (!mask(ci, cj, 0, n)) { return; } } @@ -114,23 +114,23 @@ facediv_face_interp (int ci, int cj, int /*ck*/, switch (idir) { case 0: { - const Real neg = crse(ci, cj-1, 0, nc); - const Real cen = crse(ci, cj , 0, nc); - const Real pos = crse(ci, cj+1, 0, nc); + const Real neg = crse(ci, cj-1, 0, n); + const Real cen = crse(ci, cj , 0, n); + const Real pos = crse(ci, cj+1, 0, n); - fine(fi, fj , 0, nf) = Real(0.125)*(8*cen + neg - pos); - fine(fi, fj+1, 0, nf) = Real(0.125)*(8*cen + pos - neg); + fine(fi, fj , 0, n) = Real(0.125)*(8*cen + neg - pos); + fine(fi, fj+1, 0, n) = Real(0.125)*(8*cen + pos - neg); break; } case 1: { - const Real neg = crse(ci-1, cj, 0, nc); - const Real cen = crse(ci , cj, 0, nc); - const Real pos = crse(ci+1, cj, 0, nc); + const Real neg = crse(ci-1, cj, 0, n); + const Real cen = crse(ci , cj, 0, n); + const Real pos = crse(ci+1, cj, 0, n); - fine(fi , fj, 0, nf) = Real(0.125)*(8*cen + neg - pos); - fine(fi+1, fj, 0, nf) = Real(0.125)*(8*cen + pos - neg); + fine(fi , fj, 0, n) = Real(0.125)*(8*cen + neg - pos); + fine(fi+1, fj, 0, n) = Real(0.125)*(8*cen + pos - neg); break; } @@ -140,7 +140,7 @@ facediv_face_interp (int ci, int cj, int /*ck*/, template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -facediv_int (int ci, int cj, int /*ck*/, int nf, +facediv_int (int ci, int cj, int /*ck*/, int n, GpuArray, AMREX_SPACEDIM> const& fine, IntVect const& ratio, GpuArray const& cellSize) noexcept @@ -149,25 +149,25 @@ facediv_int (int ci, int cj, int /*ck*/, int nf, const int fj = cj*ratio[1]; // References to fine exterior values. - const Real umm = fine[0](fi, fj, 0, nf); - const Real ump = fine[0](fi, fj+1, 0, nf); - const Real upm = fine[0](fi+2, fj, 0, nf); - const Real upp = fine[0](fi+2, fj+1, 0, nf); + const Real umm = fine[0](fi, fj, 0, n); + const Real ump = fine[0](fi, fj+1, 0, n); + const Real upm = fine[0](fi+2, fj, 0, n); + const Real upp = fine[0](fi+2, fj+1, 0, n); - const Real vmm = fine[1](fi, fj, 0, nf); - const Real vmp = fine[1](fi+1, fj, 0, nf); - const Real vpm = fine[1](fi, fj+2, 0, nf); - const Real vpp = fine[1](fi+1, fj+2, 0, nf); + const Real vmm = fine[1](fi, fj, 0, n); + const Real vmp = fine[1](fi+1, fj, 0, n); + const Real vpm = fine[1](fi, fj+2, 0, n); + const Real vpp = fine[1](fi+1, fj+2, 0, n); const Real dxdy = cellSize[0]/cellSize[1]; const Real x_corr = Real(0.25)*dxdy * (vpp+vmm-vmp-vpm); const Real y_corr = Real(0.25)/dxdy * (upp+umm-ump-upm); // Calc fine faces on interior of coarse cells. - fine[0](fi+1,fj ,0,nf) = Real(0.5)*(umm+upm) + x_corr; - fine[0](fi+1,fj+1,0,nf) = Real(0.5)*(ump+upp) + x_corr; - fine[1](fi, fj+1,0,nf) = Real(0.5)*(vmm+vpm) + y_corr; - fine[1](fi+1,fj+1,0,nf) = Real(0.5)*(vmp+vpp) + y_corr; + fine[0](fi+1,fj ,0,n) = Real(0.5)*(umm+upm) + x_corr; + fine[0](fi+1,fj+1,0,n) = Real(0.5)*(ump+upp) + x_corr; + fine[1](fi, fj+1,0,n) = Real(0.5)*(vmm+vpm) + y_corr; + fine[1](fi+1,fj+1,0,n) = Real(0.5)*(vmp+vpp) + y_corr; } diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index 0bd0f3eabd4..bfd6b689728 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -130,7 +130,7 @@ nodebilin_interp (Box const& bx, Array4 const& fine, const int fcomp, const i template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void facediv_face_interp (int ci, int cj, int ck, - int nc, int nf, int idir, + int n, int idir, Array4 const& crse, Array4 const& fine, Array4 const& mask, @@ -138,7 +138,7 @@ facediv_face_interp (int ci, int cj, int ck, { if (mask) { - if (!mask(ci, cj, ck, nc)) + if (!mask(ci, cj, ck, n)) { return; } } @@ -166,9 +166,9 @@ facediv_face_interp (int ci, int cj, int ck, // fine(fi,fj+1,fk ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); // fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); - const Real slpy = Real(0.5)* (crse(ci,cj+1,ck ,nc) - crse(ci,cj-1,ck ,nc)); - const Real slpz = Real(0.5)* (crse(ci,cj, ck+1,nc) - crse(ci,cj, ck-1,nc)); - const Real slpyz = Real(0.25)*(crse(ci,cj+1,ck+1,nc) - crse(ci,cj+1,ck-1,nc) - crse(ci,cj-1,ck+1,nc) + crse(ci,cj-1,ck-1,nc)); + const Real slpy = Real(0.5)* (crse(ci,cj+1,ck ,n) - crse(ci,cj-1,ck ,n)); + const Real slpz = Real(0.5)* (crse(ci,cj, ck+1,n) - crse(ci,cj, ck-1,n)); + const Real slpyz = Real(0.25)*(crse(ci,cj+1,ck+1,n) - crse(ci,cj+1,ck-1,n) - crse(ci,cj-1,ck+1,n) + crse(ci,cj-1,ck-1,n)); const Real dys = Real(1.)/ratio[1]; const Real dzs = Real(1.)/ratio[2]; @@ -178,7 +178,7 @@ facediv_face_interp (int ci, int cj, int ck, Real ysloc = (j+Real(0.5))*dys - Real(0.5); Real zsloc = (k+Real(0.5))*dzs - Real(0.5); - fine(fi,fj+j,fk+k,nf) = crse(ci,cj,ck,nc) + ysloc*slpy + zsloc*slpz + ysloc*zsloc*slpyz; + fine(fi,fj+j,fk+k,n) = crse(ci,cj,ck,n) + ysloc*slpy + zsloc*slpz + ysloc*zsloc*slpyz; } } @@ -204,9 +204,9 @@ facediv_face_interp (int ci, int cj, int ck, // fine(fi ,fj,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); // fine(fi+1,fj,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); - const Real slpx = Real(0.5)* (crse(ci+1,cj,ck ,nc) - crse(ci-1,cj,ck ,nc)); - const Real slpz = Real(0.5)* (crse(ci ,cj,ck+1,nc) - crse(ci ,cj,ck-1,nc)); - const Real slpxz = Real(0.25)*(crse(ci+1,cj,ck+1,nc) - crse(ci+1,cj,ck-1,nc) - crse(ci-1,cj,ck+1,nc) + crse(ci-1,cj,ck-1,nc)); + const Real slpx = Real(0.5)* (crse(ci+1,cj,ck ,n) - crse(ci-1,cj,ck ,n)); + const Real slpz = Real(0.5)* (crse(ci ,cj,ck+1,n) - crse(ci ,cj,ck-1,n)); + const Real slpxz = Real(0.25)*(crse(ci+1,cj,ck+1,n) - crse(ci+1,cj,ck-1,n) - crse(ci-1,cj,ck+1,n) + crse(ci-1,cj,ck-1,n)); const Real dxs = Real(1.)/ratio[0]; const Real dzs = Real(1.)/ratio[2]; @@ -216,7 +216,7 @@ facediv_face_interp (int ci, int cj, int ck, Real xsloc = (i+Real(0.5))*dxs - Real(0.5); Real zsloc = (k+Real(0.5))*dzs - Real(0.5); - fine(fi+i,fj,fk+k,nf) = crse(ci,cj,ck,nc) + xsloc*slpx + zsloc*slpz + xsloc*zsloc*slpxz; + fine(fi+i,fj,fk+k,n) = crse(ci,cj,ck,n) + xsloc*slpx + zsloc*slpz + xsloc*zsloc*slpxz; } } @@ -243,9 +243,9 @@ facediv_face_interp (int ci, int cj, int ck, // fine(fi ,fj+1,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) ); // fine(fi+1,fj+1,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) ); - const Real slpx = Real(0.5)* (crse(ci+1,cj, ck,nc) - crse(ci-1,cj, ck ,nc)); - const Real slpy = Real(0.5)* (crse(ci ,cj+1,ck,nc) - crse(ci, cj-1,ck ,nc)); - const Real slpxy = Real(0.25)*(crse(ci+1,cj+1,ck,nc) - crse(ci+1,cj-1,ck,nc) - crse(ci-1,cj+1,ck,nc) + crse(ci-1,cj-1,ck,nc)); + const Real slpx = Real(0.5)* (crse(ci+1,cj, ck,n) - crse(ci-1,cj, ck ,n)); + const Real slpy = Real(0.5)* (crse(ci ,cj+1,ck,n) - crse(ci, cj-1,ck ,n)); + const Real slpxy = Real(0.25)*(crse(ci+1,cj+1,ck,n) - crse(ci+1,cj-1,ck,n) - crse(ci-1,cj+1,ck,n) + crse(ci-1,cj-1,ck,n)); const Real dxs = Real(1.)/ratio[0]; const Real dys = Real(1.)/ratio[1]; @@ -255,7 +255,7 @@ facediv_face_interp (int ci, int cj, int ck, Real xsloc = (i+Real(0.5))*dxs - Real(0.5); Real ysloc = (j+Real(0.5))*dys - Real(0.5); - fine(fi+i,fj+j,fk,nf) = crse(ci,cj,ck,nc) + xsloc*slpx + ysloc*slpy + xsloc*ysloc*slpxy; + fine(fi+i,fj+j,fk,n) = crse(ci,cj,ck,n) + xsloc*slpx + ysloc*slpy + xsloc*ysloc*slpxy; } } @@ -269,7 +269,7 @@ facediv_face_interp (int ci, int cj, int ck, template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -facediv_int (int ci, int cj, int ck, int nf, +facediv_int (int ci, int cj, int ck, int n, GpuArray, AMREX_SPACEDIM> const& fine, IntVect const& ratio, GpuArray const& cellSize) noexcept @@ -279,32 +279,32 @@ facediv_int (int ci, int cj, int ck, int nf, const int fk = ck*ratio[2]; // References to fine exterior values needed for interior calculation. - const Real u000 = fine[0](fi, fj , fk , nf); - const Real u200 = fine[0](fi+2, fj , fk , nf); - const Real u010 = fine[0](fi, fj+1, fk , nf); - const Real u210 = fine[0](fi+2, fj+1, fk , nf); - const Real u001 = fine[0](fi, fj , fk+1, nf); - const Real u201 = fine[0](fi+2, fj , fk+1, nf); - const Real u011 = fine[0](fi, fj+1, fk+1, nf); - const Real u211 = fine[0](fi+2, fj+1, fk+1, nf); - - const Real v000 = fine[1](fi , fj , fk , nf); - const Real v020 = fine[1](fi , fj+2, fk , nf); - const Real v100 = fine[1](fi+1, fj , fk , nf); - const Real v120 = fine[1](fi+1, fj+2, fk , nf); - const Real v001 = fine[1](fi , fj , fk+1, nf); - const Real v021 = fine[1](fi , fj+2, fk+1, nf); - const Real v101 = fine[1](fi+1, fj , fk+1, nf); - const Real v121 = fine[1](fi+1, fj+2, fk+1, nf); - - const Real w000 = fine[2](fi , fj , fk , nf); - const Real w002 = fine[2](fi , fj , fk+2, nf); - const Real w100 = fine[2](fi+1, fj , fk , nf); - const Real w102 = fine[2](fi+1, fj , fk+2, nf); - const Real w010 = fine[2](fi , fj+1, fk , nf); - const Real w012 = fine[2](fi , fj+1, fk+2, nf); - const Real w110 = fine[2](fi+1, fj+1, fk , nf); - const Real w112 = fine[2](fi+1, fj+1, fk+2, nf); + const Real u000 = fine[0](fi, fj , fk , n); + const Real u200 = fine[0](fi+2, fj , fk , n); + const Real u010 = fine[0](fi, fj+1, fk , n); + const Real u210 = fine[0](fi+2, fj+1, fk , n); + const Real u001 = fine[0](fi, fj , fk+1, n); + const Real u201 = fine[0](fi+2, fj , fk+1, n); + const Real u011 = fine[0](fi, fj+1, fk+1, n); + const Real u211 = fine[0](fi+2, fj+1, fk+1, n); + + const Real v000 = fine[1](fi , fj , fk , n); + const Real v020 = fine[1](fi , fj+2, fk , n); + const Real v100 = fine[1](fi+1, fj , fk , n); + const Real v120 = fine[1](fi+1, fj+2, fk , n); + const Real v001 = fine[1](fi , fj , fk+1, n); + const Real v021 = fine[1](fi , fj+2, fk+1, n); + const Real v101 = fine[1](fi+1, fj , fk+1, n); + const Real v121 = fine[1](fi+1, fj+2, fk+1, n); + + const Real w000 = fine[2](fi , fj , fk , n); + const Real w002 = fine[2](fi , fj , fk+2, n); + const Real w100 = fine[2](fi+1, fj , fk , n); + const Real w102 = fine[2](fi+1, fj , fk+2, n); + const Real w010 = fine[2](fi , fj+1, fk , n); + const Real w012 = fine[2](fi , fj+1, fk+2, n); + const Real w110 = fine[2](fi+1, fj+1, fk , n); + const Real w112 = fine[2](fi+1, fj+1, fk+2, n); const Real dx = cellSize[0]; const Real dy = cellSize[1]; @@ -319,73 +319,73 @@ facediv_int (int ci, int cj, int ck, int nf, const Real yspzs = dy*dy + dz*dz; const Real zspxs = dz*dz + dx*dx; - fine[0](fi+1, fj , fk , nf) = Real(0.5)*(u000+u200) + fine[0](fi+1, fj , fk , n) = Real(0.5)*(u000+u200) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v000+v120-v020-v100) + dx3/(8*dy*zspxs)*(v001+v121-v021-v101) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w000+w102-w002-w100) + dx3/(8*dz*xspys)*(w010+w112-w012-w110); - fine[0](fi+1, fj+1, fk , nf) = Real(0.5)*(u010+u210) + fine[0](fi+1, fj+1, fk , n) = Real(0.5)*(u010+u210) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v000+v120-v020-v100) + dx3/(8*dy*zspxs)*(v001+v121-v021-v101) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w010+w112-w012-w110) + dx3/(8*dz*xspys)*(w000+w102-w100-w002); - fine[0](fi+1, fj , fk+1, nf) = Real(0.5)*(u001+u201) + fine[0](fi+1, fj , fk+1, n) = Real(0.5)*(u001+u201) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v001+v121-v021-v101) + dx3/(8*dy*zspxs)*(v000+v120-v020-v100) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w000+w102-w002-w100) + dx3/(8*dz*xspys)*(w010+w112-w012-w110); - fine[0](fi+1, fj+1, fk+1, nf) = Real(0.5)*(u011+u211) + fine[0](fi+1, fj+1, fk+1, n) = Real(0.5)*(u011+u211) + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v001+v121-v021-v101) + dx3/(8*dy*zspxs)*(v000+v120-v020-v100) + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w010+w112-w012-w110) + dx3/(8*dz*xspys)*(w000+w102-w002-w100); - fine[1](fi , fj+1, fk , nf) = Real(0.5)*(v000+v020) + fine[1](fi , fj+1, fk , n) = Real(0.5)*(v000+v020) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200) + dy3/(8*dx*yspzs)*(u001+u211-u011-u201) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010) + dy3/(8*dz*xspys)*(w100+w112-w102-w110); - fine[1](fi+1, fj+1, fk , nf) = Real(0.5)*(v100+v120) + fine[1](fi+1, fj+1, fk , n) = Real(0.5)*(v100+v120) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200) + dy3/(8*dx*yspzs)*(u001+u211-u011-u201) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110) + dy3/(8*dz*xspys)*(w000+w012-w002-w010); - fine[1](fi , fj+1, fk+1, nf) = Real(0.5)*(v001+v021) + fine[1](fi , fj+1, fk+1, n) = Real(0.5)*(v001+v021) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201) + dy3/(8*dx*yspzs)*(u000+u210-u010-u200) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010) + dy3/(8*dz*xspys)*(w100+w112-w102-w110); - fine[1](fi+1, fj+1, fk+1, nf) = Real(0.5)*(v101+v121) + fine[1](fi+1, fj+1, fk+1, n) = Real(0.5)*(v101+v121) + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201) + dy3/(8*dx*yspzs)*(u000+u210-u010-u200) + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110) + dy3/(8*dz*xspys)*(w000+w012-w002-w010); - fine[2](fi , fj , fk+1, nf) = Real(0.5)*(w000+w002) + fine[2](fi , fj , fk+1, n) = Real(0.5)*(w000+w002) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200) + dz3/(8*dx*yspzs)*(u010+u211-u011-u210) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020) + dz3/(8*dy*zspxs)*(v100+v121-v101-v120); - fine[2](fi , fj+1, fk+1, nf) = Real(0.5)*(w010+w012) + fine[2](fi , fj+1, fk+1, n) = Real(0.5)*(w010+w012) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210) + dz3/(8*dx*yspzs)*(u000+u201-u001-u200) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020) + dz3/(8*dy*zspxs)*(v100+v121-v101-v120); - fine[2](fi+1, fj , fk+1, nf) = Real(0.5)*(w100+w102) + fine[2](fi+1, fj , fk+1, n) = Real(0.5)*(w100+w102) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200) + dz3/(8*dx*yspzs)*(u010+u211-u011-u210) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120) + dz3/(8*dy*zspxs)*(v000+v021-v001-v020); - fine[2](fi+1, fj+1, fk+1, nf) = Real(0.5)*(w110+w112) + fine[2](fi+1, fj+1, fk+1, n) = Real(0.5)*(w110+w112) + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210) + dz3/(8*dx*yspzs)*(u000+u201-u001-u200) + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120) diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index f7af40c4f15..a0b247996db 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -1383,9 +1383,8 @@ FaceDivFree::interp_arr (Array const& crse, GpuArray, AMREX_SPACEDIM> maskarr; for (int d=0; dconst_array(); - finearr[d] = fine[d]->array(); + crsearr[d] = crse[d]->const_array(crse_comp); + finearr[d] = fine[d]->array(fine_comp); if (solve_mask[d] != nullptr) { maskarr[d] = solve_mask[d]->const_array(0); } } @@ -1403,7 +1402,7 @@ FaceDivFree::interp_arr (Array const& crse, { for (int n=0; n (i,j,k,crse_comp+n,fine_comp+n, 0, + amrex::facediv_face_interp (i,j,k,n, 0, crsearr[0], finearr[0], maskarr[0], ratio); } }); @@ -1414,7 +1413,7 @@ FaceDivFree::interp_arr (Array const& crse, { for (int n=0; n (i,j,k,crse_comp+n,fine_comp+n, 1, + amrex::facediv_face_interp (i,j,k,n, 1, crsearr[1], finearr[1], maskarr[1], ratio); } }); @@ -1425,7 +1424,7 @@ FaceDivFree::interp_arr (Array const& crse, { for (int n=0; n (i,j,k,crse_comp+n,fine_comp+n, 2, + amrex::facediv_face_interp (i,j,k,n, 2, crsearr[2], finearr[2], maskarr[2], ratio); } }); @@ -1434,7 +1433,7 @@ FaceDivFree::interp_arr (Array const& crse, if (ratio == 2) { AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n, { - amrex::facediv_int(i, j, k, fine_comp+n, finearr, ratio, cell_size); + amrex::facediv_int(i, j, k, n, finearr, ratio, cell_size); }); } else { auto& lusolver = m_lusolver[OpenMP::get_thread_num()][fine_geom.Domain()]; @@ -1472,7 +1471,7 @@ FaceDivFree::interp_arr (Array const& crse, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, auto rrc) { auto const* ps = (std::variant_alternative_t const*)psolver_d; - amrex::facediv_int_gen(i, j, k, fine_comp+n, finearr, ratio, cell_size, *ps); + amrex::facediv_int_gen(i, j, k, n, finearr, ratio, cell_size, *ps); }); #ifdef AMREX_USE_GPU From 6a62707e2653be34048097336ac761d9195167c4 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 28 Mar 2026 23:55:08 -0700 Subject: [PATCH 12/15] 2D --- Src/AmrCore/AMReX_Interp_2D_C.H | 161 ++++++++++++++++++++++++++++++-- Tests/DivFreePatch/main.cpp | 7 +- 2 files changed, 157 insertions(+), 11 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H index 029403d7e00..fdcf0cf6220 100644 --- a/Src/AmrCore/AMReX_Interp_2D_C.H +++ b/Src/AmrCore/AMReX_Interp_2D_C.H @@ -4,6 +4,7 @@ #include #include +#include namespace amrex { @@ -114,23 +115,25 @@ facediv_face_interp (int ci, int cj, int /*ck*/, switch (idir) { case 0: { - const Real neg = crse(ci, cj-1, 0, n); - const Real cen = crse(ci, cj , 0, n); - const Real pos = crse(ci, cj+1, 0, n); + const Real slpy = Real(0.5)*(crse(ci, cj+1, 0, n) - crse(ci, cj-1, 0, n)); + const Real dys = Real(1.)/ratio[1]; - fine(fi, fj , 0, n) = Real(0.125)*(8*cen + neg - pos); - fine(fi, fj+1, 0, n) = Real(0.125)*(8*cen + pos - neg); + for (int j = 0; j < ratio[1]; ++j) { + Real ysloc = (j+Real(0.5))*dys - Real(0.5); + fine(fi, fj+j, 0, n) = crse(ci, cj, 0, n) + ysloc*slpy; + } break; } case 1: { - const Real neg = crse(ci-1, cj, 0, n); - const Real cen = crse(ci , cj, 0, n); - const Real pos = crse(ci+1, cj, 0, n); + const Real slpx = Real(0.5)*(crse(ci+1, cj, 0, n) - crse(ci-1, cj, 0, n)); + const Real dxs = Real(1.)/ratio[0]; - fine(fi , fj, 0, n) = Real(0.125)*(8*cen + neg - pos); - fine(fi+1, fj, 0, n) = Real(0.125)*(8*cen + pos - neg); + for (int i = 0; i < ratio[0]; ++i) { + Real xsloc = (i+Real(0.5))*dxs - Real(0.5); + fine(fi+i, fj, 0, n) = crse(ci, cj, 0, n) + xsloc*slpx; + } break; } @@ -171,6 +174,144 @@ facediv_int (int ci, int cj, int /*ck*/, int n, } +template =0 && i<4), int> = 0> +constexpr IntVect facediv_get_refratio () +{ + if constexpr (i == 0) { + return IntVect(2,2); + } else if constexpr (i == 1) { + return IntVect(4,2); + } else if constexpr (i == 2) { + return IntVect(2,4); + } else { + return IntVect(4,4); + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +LUSolver +facediv_init_lusolver (IntVect const& ratio, + GpuArray const& cellSize) +{ + Array2D mat; + + for (int irow = 0; irow < N; ++irow) { + for (int icol = 0; icol < N; ++icol) { + mat(irow,icol) = Real(0.0); + } + } + + Real dxinv = Real(1.)/cellSize[0]; + Real dyinv = Real(1.)/cellSize[1]; + Real ddx2 = dxinv*dxinv; + Real ddy2 = dyinv*dyinv; + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + int irow = i + j*ratio[0]; + + if (i > 0) { + mat(irow,irow) += ddx2; + mat(irow,irow-1) += -ddx2; + } + + if (i < ratio[0]-1) { + mat(irow,irow) += ddx2; + mat(irow,irow+1) += -ddx2; + } + + if (j > 0) { + mat(irow,irow) += ddy2; + mat(irow,irow-ratio[0]) += -ddy2; + } + + if (j < ratio[1]-1) { + mat(irow,irow) += ddy2; + mat(irow,irow+ratio[0]) += -ddy2; + } + }} + + mat(N-1,N-1) += ddx2; + + return LUSolver{mat}; +} + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +facediv_int_gen (int ci, int cj, int /*ck*/, int nf, + GpuArray, AMREX_SPACEDIM> const& fine, + IntVect const& ratio, + GpuArray const& cellSize, + LUSolver const& lusolver) noexcept +{ + const int fi = ci*ratio[0]; + const int fj = cj*ratio[1]; + + const Real dx = cellSize[0]; + const Real dy = cellSize[1]; + const Real dxinv = Real(1.)/dx; + const Real dyinv = Real(1.)/dy; + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 1; i < ratio[0]; ++i) { + fine[0](fi+i, fj+j, 0, nf) = + ((ratio[0]-i)*fine[0](fi, fj+j, 0, nf) + + i *fine[0](fi+ratio[0], fj+j, 0, nf))/ratio[0]; + }} + + for (int j = 1; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + fine[1](fi+i, fj+j, 0, nf) = + ((ratio[1]-j)*fine[1](fi+i, fj, 0, nf) + + j *fine[1](fi+i, fj+ratio[1], 0, nf))/ratio[1]; + }} + + Real uplus = Real(0.); + Real uminus = Real(0.); + for (int j = 0; j < ratio[1]; ++j) { + uplus += fine[0](fi+ratio[0], fj+j, 0, nf); + uminus += fine[0](fi, fj+j, 0, nf); + } + + Real vplus = Real(0.); + Real vminus = Real(0.); + for (int i = 0; i < ratio[0]; ++i) { + vplus += fine[1](fi+i, fj+ratio[1], 0, nf); + vminus += fine[1](fi+i, fj, 0, nf); + } + + Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy); + + Array1D rhs; + Array1D phi; + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + int irow = i + j*ratio[0]; + rhs(irow) = + (fine[0](fi+i+1, fj+j, 0, nf) - fine[0](fi+i, fj+j, 0, nf))*dxinv + + (fine[1](fi+i, fj+j+1, 0, nf) - fine[1](fi+i, fj+j, 0, nf))*dyinv + - target_div; + }} + + lusolver(phi.begin(), rhs.begin()); + + for (int j = 0; j < ratio[1]; ++j) { + for (int i = 1; i < ratio[0]; ++i) { + int plus = i + j*ratio[0]; + int minus = i-1 + j*ratio[0]; + fine[0](fi+i, fj+j, 0, nf) += (phi(plus) - phi(minus))*dxinv; + }} + + for (int j = 1; j < ratio[1]; ++j) { + for (int i = 0; i < ratio[0]; ++i) { + int plus = i + j *ratio[0]; + int minus = i + (j-1)*ratio[0]; + fine[1](fi+i, fj+j, 0, nf) += (phi(plus) - phi(minus))*dyinv; + }} +} + template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void face_linear_interp_x (int i, int j, int /*k*/, int n, Array4 const& fine, diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index 05ae62b11cb..befc83646cd 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -404,7 +404,12 @@ void main_main () ratio, mapper, bcrec, 0); } - if ((ratio[0] != 2) || (ratio[1] != 2) || (ratio[2] != 2)) { + bool any_ratio_ne_two = false; + for (int d=0; d averaged_faces; for (int d=0; d Date: Sun, 29 Mar 2026 00:01:35 -0700 Subject: [PATCH 13/15] Fix test --- Tests/DivFreePatch/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp index befc83646cd..fde3919394f 100644 --- a/Tests/DivFreePatch/main.cpp +++ b/Tests/DivFreePatch/main.cpp @@ -29,10 +29,10 @@ void setupMF(MultiFab& mf, const int type = 0, const BoxArray& exclude = BoxArra { Box bx = ba[bid]; - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k, RandomEngine const& eng) noexcept { if (type == 0) - { arr(i,j,k) = amrex::Random()*10; } + { arr(i,j,k) = amrex::Random(eng)*10; } else if (type == 1) { arr(i,j,k) = double(i)+double(j)+double(k); } else if (type == 2) From 0bf05a943da12bd65f5a687c7756032fe685add1 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 29 Mar 2026 07:44:14 -0700 Subject: [PATCH 14/15] comments --- Src/AmrCore/AMReX_Interp_2D_C.H | 3 +++ Src/AmrCore/AMReX_Interp_3D_C.H | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H index fdcf0cf6220..49bc28c3e72 100644 --- a/Src/AmrCore/AMReX_Interp_2D_C.H +++ b/Src/AmrCore/AMReX_Interp_2D_C.H @@ -232,6 +232,9 @@ facediv_init_lusolver (IntVect const& ratio, } }} + // Hack matrix to be nonsingular. This just picks a particular gauge + // with the last row being zero, doesn’t change the gradients we will + // apply. mat(N-1,N-1) += ddx2; return LUSolver{mat}; diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index bfd6b689728..487a332c5c9 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -474,7 +474,9 @@ facediv_init_lusolver (IntVect const& ratio, } }}} - // hcak matrxi to be nonsingular + // Hack matrix to be nonsingular. This just picks a particular gauge + // with the last row being zero, doesn’t change the gradients we will + // apply. mat(N-1,N-1) +=ddx2; return LUSolver{mat}; From 149e78ec526503a4c4a5556d0a329aefb36b5f0f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 29 Mar 2026 07:45:24 -0700 Subject: [PATCH 15/15] Fix warning --- Src/AmrCore/AMReX_Interp_2D_C.H | 8 ++++---- Src/AmrCore/AMReX_Interp_3D_C.H | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H index 49bc28c3e72..2ffbf9791f6 100644 --- a/Src/AmrCore/AMReX_Interp_2D_C.H +++ b/Src/AmrCore/AMReX_Interp_2D_C.H @@ -116,10 +116,10 @@ facediv_face_interp (int ci, int cj, int /*ck*/, case 0: { const Real slpy = Real(0.5)*(crse(ci, cj+1, 0, n) - crse(ci, cj-1, 0, n)); - const Real dys = Real(1.)/ratio[1]; + const Real dys = Real(1.)/Real(ratio[1]); for (int j = 0; j < ratio[1]; ++j) { - Real ysloc = (j+Real(0.5))*dys - Real(0.5); + Real ysloc = (Real(j)+Real(0.5))*dys - Real(0.5); fine(fi, fj+j, 0, n) = crse(ci, cj, 0, n) + ysloc*slpy; } @@ -128,10 +128,10 @@ facediv_face_interp (int ci, int cj, int /*ck*/, case 1: { const Real slpx = Real(0.5)*(crse(ci+1, cj, 0, n) - crse(ci-1, cj, 0, n)); - const Real dxs = Real(1.)/ratio[0]; + const Real dxs = Real(1.)/Real(ratio[0]); for (int i = 0; i < ratio[0]; ++i) { - Real xsloc = (i+Real(0.5))*dxs - Real(0.5); + Real xsloc = (Real(i)+Real(0.5))*dxs - Real(0.5); fine(fi+i, fj, 0, n) = crse(ci, cj, 0, n) + xsloc*slpx; } diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H index 487a332c5c9..ca45ba16d92 100644 --- a/Src/AmrCore/AMReX_Interp_3D_C.H +++ b/Src/AmrCore/AMReX_Interp_3D_C.H @@ -170,14 +170,14 @@ facediv_face_interp (int ci, int cj, int ck, const Real slpz = Real(0.5)* (crse(ci,cj, ck+1,n) - crse(ci,cj, ck-1,n)); const Real slpyz = Real(0.25)*(crse(ci,cj+1,ck+1,n) - crse(ci,cj+1,ck-1,n) - crse(ci,cj-1,ck+1,n) + crse(ci,cj-1,ck-1,n)); - const Real dys = Real(1.)/ratio[1]; - const Real dzs = Real(1.)/ratio[2]; + const Real dys = Real(1.)/Real(ratio[1]); + const Real dzs = Real(1.)/Real(ratio[2]); for (int k=0; k