diff --git a/Src/AmrCore/AMReX_Interp_1D_C.H b/Src/AmrCore/AMReX_Interp_1D_C.H
index cddb819ac7f..399f40cc5a5 100644
--- a/Src/AmrCore/AMReX_Interp_1D_C.H
+++ b/Src/AmrCore/AMReX_Interp_1D_C.H
@@ -73,7 +73,7 @@ nodebilin_interp (Box const& bx, Array4<T> const& fine, const int fcomp, const i
 template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
 facediv_face_interp (int /*ci*/, int /*cj*/, int /*ck*/,
-                     int /*nc*/, int /*nf*/, int /*idir*/,
+                     int /*n*/, int /*idir*/,
                      Array4<T const> const& /*crse*/,
                      Array4<T> const& /*fine*/,
                      Array4<const int> const& /*mask*/,
diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H
index 9ddb7144e86..2ffbf9791f6 100644
--- a/Src/AmrCore/AMReX_Interp_2D_C.H
+++ b/Src/AmrCore/AMReX_Interp_2D_C.H
@@ -4,6 +4,7 @@
 
 #include <AMReX_Array.H>
 #include <AMReX_Geometry.H>
+#include <AMReX_LUSolver.H>
 
 namespace amrex {
 
@@ -97,14 +98,14 @@ nodebilin_interp (Box const& bx, Array4<T> const& fine, const int fcomp, const i
 template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
 facediv_face_interp (int ci, int cj, int /*ck*/,
-                     int nc, int nf, int idir,
+                     int n, int idir,
                      Array4<T const> const& crse,
                      Array4<T> const& fine,
                      Array4<int const> const& mask,
                      IntVect const& ratio) noexcept
 {
     if (mask) {
-        if (!mask(ci, cj, 0, nc))
+        if (!mask(ci, cj, 0, n))
             { return; }
     }
 
@@ -114,23 +115,25 @@ facediv_face_interp (int ci, int cj, int /*ck*/,
     switch (idir) {
         case 0:
         {
-            const Real neg = crse(ci, cj-1, 0, nc);
-            const Real cen = crse(ci, cj  , 0, nc);
-            const Real pos = crse(ci, cj+1, 0, nc);
+            const Real slpy = Real(0.5)*(crse(ci, cj+1, 0, n) - crse(ci, cj-1, 0, n));
+            const Real dys = Real(1.)/Real(ratio[1]);
 
-            fine(fi, fj  , 0, nf) = Real(0.125)*(8*cen + neg - pos);
-            fine(fi, fj+1, 0, nf) = Real(0.125)*(8*cen + pos - neg);
+            for (int j = 0; j < ratio[1]; ++j) {
+                Real ysloc = (Real(j)+Real(0.5))*dys - Real(0.5);
+                fine(fi, fj+j, 0, n) = crse(ci, cj, 0, n) + ysloc*slpy;
+            }
 
             break;
         }
         case 1:
         {
-            const Real neg = crse(ci-1, cj, 0, nc);
-            const Real cen = crse(ci  , cj, 0, nc);
-            const Real pos = crse(ci+1, cj, 0, nc);
+            const Real slpx = Real(0.5)*(crse(ci+1, cj, 0, n) - crse(ci-1, cj, 0, n));
+            const Real dxs = Real(1.)/Real(ratio[0]);
 
-            fine(fi  , fj, 0, nf) = Real(0.125)*(8*cen + neg - pos);
-            fine(fi+1, fj, 0, nf) = Real(0.125)*(8*cen + pos - neg);
+            for (int i = 0; i < ratio[0]; ++i) {
+                Real xsloc = (Real(i)+Real(0.5))*dxs - Real(0.5);
+                fine(fi+i, fj, 0, n) = crse(ci, cj, 0, n) + xsloc*slpx;
+            }
 
             break;
         }
@@ -140,7 +143,7 @@ facediv_face_interp (int ci, int cj, int /*ck*/,
 
 template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
-facediv_int (int ci, int cj, int /*ck*/, int nf,
+facediv_int (int ci, int cj, int /*ck*/, int n,
              GpuArray<Array4<T>, AMREX_SPACEDIM> const& fine,
              IntVect const& ratio,
              GpuArray<Real, AMREX_SPACEDIM> const& cellSize) noexcept
@@ -149,26 +152,167 @@ facediv_int (int ci, int cj, int /*ck*/, int nf,
     const int fj = cj*ratio[1];
 
     // References to fine exterior values.
-    const Real umm = fine[0](fi,   fj,   0, nf);
-    const Real ump = fine[0](fi,   fj+1, 0, nf);
-    const Real upm = fine[0](fi+2, fj,   0, nf);
-    const Real upp = fine[0](fi+2, fj+1, 0, nf);
+    const Real umm = fine[0](fi,   fj,   0, n);
+    const Real ump = fine[0](fi,   fj+1, 0, n);
+    const Real upm = fine[0](fi+2, fj,   0, n);
+    const Real upp = fine[0](fi+2, fj+1, 0, n);
 
-    const Real vmm = fine[1](fi,   fj,   0, nf);
-    const Real vmp = fine[1](fi+1, fj,   0, nf);
-    const Real vpm = fine[1](fi,   fj+2, 0, nf);
-    const Real vpp = fine[1](fi+1, fj+2, 0, nf);
+    const Real vmm = fine[1](fi,   fj,   0, n);
+    const Real vmp = fine[1](fi+1, fj,   0, n);
+    const Real vpm = fine[1](fi,   fj+2, 0, n);
+    const Real vpp = fine[1](fi+1, fj+2, 0, n);
 
     const Real dxdy = cellSize[0]/cellSize[1];
     const Real x_corr = Real(0.25)*dxdy * (vpp+vmm-vmp-vpm);
     const Real y_corr = Real(0.25)/dxdy * (upp+umm-ump-upm);
 
     // Calc fine faces on interior of coarse cells.
-    fine[0](fi+1,fj  ,0,nf) = Real(0.5)*(umm+upm) + x_corr;
-    fine[0](fi+1,fj+1,0,nf) = Real(0.5)*(ump+upp) + x_corr;
-    fine[1](fi,  fj+1,0,nf) = Real(0.5)*(vmm+vpm) + y_corr;
-    fine[1](fi+1,fj+1,0,nf) = Real(0.5)*(vmp+vpp) + y_corr;
+    fine[0](fi+1,fj  ,0,n) = Real(0.5)*(umm+upm) + x_corr;
+    fine[0](fi+1,fj+1,0,n) = Real(0.5)*(ump+upp) + x_corr;
+    fine[1](fi,  fj+1,0,n) = Real(0.5)*(vmm+vpm) + y_corr;
+    fine[1](fi+1,fj+1,0,n) = Real(0.5)*(vmp+vpp) + y_corr;
+
+}
+
+template <int i, std::enable_if_t<(i>=0 && i<4), int> = 0>
+constexpr IntVect facediv_get_refratio ()
+{
+    if constexpr (i == 0) {
+        return IntVect(2,2);
+    } else if constexpr (i == 1) {
+        return IntVect(4,2);
+    } else if constexpr (i == 2) {
+        return IntVect(2,4);
+    } else {
+        return IntVect(4,4);
+    }
+}
+
+template <int N>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+LUSolver<N,Real>
+facediv_init_lusolver (IntVect const& ratio,
+                       GpuArray<Real, AMREX_SPACEDIM> const& cellSize)
+{
+    Array2D<Real,0,N-1,0,N-1,Order::C> mat;
+
+    for (int irow = 0; irow < N; ++irow) {
+        for (int icol = 0; icol < N; ++icol) {
+            mat(irow,icol) = Real(0.0);
+        }
+    }
+
+    Real dxinv = Real(1.)/cellSize[0];
+    Real dyinv = Real(1.)/cellSize[1];
+    Real ddx2 = dxinv*dxinv;
+    Real ddy2 = dyinv*dyinv;
+
+    for (int j = 0; j < ratio[1]; ++j) {
+    for (int i = 0; i < ratio[0]; ++i) {
+        int irow = i + j*ratio[0];
+
+        if (i > 0) {
+            mat(irow,irow) += ddx2;
+            mat(irow,irow-1) += -ddx2;
+        }
+
+        if (i < ratio[0]-1) {
+            mat(irow,irow) += ddx2;
+            mat(irow,irow+1) += -ddx2;
+        }
+
+        if (j > 0) {
+            mat(irow,irow) += ddy2;
+            mat(irow,irow-ratio[0]) += -ddy2;
+        }
+
+        if (j < ratio[1]-1) {
+            mat(irow,irow) += ddy2;
+            mat(irow,irow+ratio[0]) += -ddy2;
+        }
+    }}
+
+    // Hack matrix to be nonsingular. This just picks a particular gauge
+    // with the last row being zero, doesn’t change the gradients we will
+    // apply.
+    mat(N-1,N-1) += ddx2;
+
+    return LUSolver<N,Real>{mat};
+}
+
+template<typename T, int N>
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE void
+facediv_int_gen (int ci, int cj, int /*ck*/, int nf,
+                 GpuArray<Array4<T>, AMREX_SPACEDIM> const& fine,
+                 IntVect const& ratio,
+                 GpuArray<Real, AMREX_SPACEDIM> const& cellSize,
+                 LUSolver<N,Real> const& lusolver) noexcept
+{
+    const int fi = ci*ratio[0];
+    const int fj = cj*ratio[1];
+
+    const Real dx = cellSize[0];
+    const Real dy = cellSize[1];
+    const Real dxinv = Real(1.)/dx;
+    const Real dyinv = Real(1.)/dy;
+
+    for (int j = 0; j < ratio[1]; ++j) {
+    for (int i = 1; i < ratio[0]; ++i) {
+        fine[0](fi+i, fj+j, 0, nf) =
+            ((ratio[0]-i)*fine[0](fi,           fj+j, 0, nf)
+           +          i *fine[0](fi+ratio[0], fj+j, 0, nf))/ratio[0];
+    }}
+
+    for (int j = 1; j < ratio[1]; ++j) {
+    for (int i = 0; i < ratio[0]; ++i) {
+        fine[1](fi+i, fj+j, 0, nf) =
+            ((ratio[1]-j)*fine[1](fi+i, fj,           0, nf)
+           +          j *fine[1](fi+i, fj+ratio[1], 0, nf))/ratio[1];
+    }}
+
+    Real uplus = Real(0.);
+    Real uminus = Real(0.);
+    for (int j = 0; j < ratio[1]; ++j) {
+        uplus  += fine[0](fi+ratio[0], fj+j, 0, nf);
+        uminus += fine[0](fi,          fj+j, 0, nf);
+    }
+
+    Real vplus = Real(0.);
+    Real vminus = Real(0.);
+    for (int i = 0; i < ratio[0]; ++i) {
+        vplus  += fine[1](fi+i, fj+ratio[1], 0, nf);
+        vminus += fine[1](fi+i, fj,          0, nf);
+    }
 
+    Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy);
+
+    Array1D<Real,0,N-1> rhs;
+    Array1D<Real,0,N-1> phi;
+
+    for (int j = 0; j < ratio[1]; ++j) {
+    for (int i = 0; i < ratio[0]; ++i) {
+        int irow = i + j*ratio[0];
+        rhs(irow) =
+            (fine[0](fi+i+1, fj+j,   0, nf) - fine[0](fi+i,   fj+j,   0, nf))*dxinv
+          + (fine[1](fi+i,   fj+j+1, 0, nf) - fine[1](fi+i,   fj+j,   0, nf))*dyinv
+          - target_div;
+    }}
+
+    lusolver(phi.begin(), rhs.begin());
+
+    for (int j = 0; j < ratio[1]; ++j) {
+    for (int i = 1; i < ratio[0]; ++i) {
+        int plus  = i   + j*ratio[0];
+        int minus = i-1 + j*ratio[0];
+        fine[0](fi+i, fj+j, 0, nf) += (phi(plus) - phi(minus))*dxinv;
+    }}
+
+    for (int j = 1; j < ratio[1]; ++j) {
+    for (int i = 0; i < ratio[0]; ++i) {
+        int plus  = i + j   *ratio[0];
+        int minus = i + (j-1)*ratio[0];
+        fine[1](fi+i, fj+j, 0, nf) += (phi(plus) - phi(minus))*dyinv;
+    }}
 }
 
 template<typename T>
diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H
index c8836a6a186..ca45ba16d92 100644
--- a/Src/AmrCore/AMReX_Interp_3D_C.H
+++ b/Src/AmrCore/AMReX_Interp_3D_C.H
@@ -4,6 +4,7 @@
 
 #include <AMReX_FArrayBox.H>
 #include <AMReX_Array.H>
+#include <AMReX_LUSolver.H>
 
 namespace amrex {
 
@@ -126,11 +127,10 @@ nodebilin_interp (Box const& bx, Array4<T> const& fine, const int fcomp, const i
     }
 }
 
-
 template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
 facediv_face_interp (int ci, int cj, int ck,
-                     int nc, int nf, int idir,
+                     int n, int idir,
                      Array4<T const> const& crse,
                      Array4<T> const& fine,
                      Array4<int const> const& mask,
@@ -138,7 +138,7 @@ facediv_face_interp (int ci, int cj, int ck,
 {
 
     if (mask) {
-        if (!mask(ci, cj, ck, nc))
+        if (!mask(ci, cj, ck, n))
             { return; }
     }
 
@@ -149,64 +149,117 @@ facediv_face_interp (int ci, int cj, int ck,
     switch (idir) {
         case 0:
         {
-            const Real ll = crse(ci, cj-1, ck-1, nc);
-            const Real cl = crse(ci, cj-1, ck,   nc);
-            const Real rl = crse(ci, cj-1, ck+1, nc);
+//            const Real ll = crse(ci, cj-1, ck-1, nc);
+//            const Real cl = crse(ci, cj-1, ck,   nc);
+//            const Real rl = crse(ci, cj-1, ck+1, nc);
+//
+//            const Real lc = crse(ci, cj  , ck-1, nc);
+//            const Real cc = crse(ci, cj  , ck,   nc);
+//            const Real rc = crse(ci, cj  , ck+1, nc);
+
+//            const Real lu = crse(ci, cj+1, ck-1, nc);
+//            const Real cu = crse(ci, cj+1, ck,   nc);
+//            const Real ru = crse(ci, cj+1, ck+1, nc);
+
+//            fine(fi,fj  ,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) );
+//            fine(fi,fj  ,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) );
+//            fine(fi,fj+1,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) );
+//            fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) );
 
-            const Real lc = crse(ci, cj  , ck-1, nc);
-            const Real cc = crse(ci, cj  , ck,   nc);
-            const Real rc = crse(ci, cj  , ck+1, nc);
+            const Real slpy  = Real(0.5)* (crse(ci,cj+1,ck  ,n) - crse(ci,cj-1,ck  ,n));
+            const Real slpz  = Real(0.5)* (crse(ci,cj,  ck+1,n) - crse(ci,cj,  ck-1,n));
+            const Real slpyz = Real(0.25)*(crse(ci,cj+1,ck+1,n) - crse(ci,cj+1,ck-1,n) - crse(ci,cj-1,ck+1,n) + crse(ci,cj-1,ck-1,n));
 
-            const Real lu = crse(ci, cj+1, ck-1, nc);
-            const Real cu = crse(ci, cj+1, ck,   nc);
-            const Real ru = crse(ci, cj+1, ck+1, nc);
+            const Real dys = Real(1.)/Real(ratio[1]);
+            const Real dzs = Real(1.)/Real(ratio[2]);
 
-            fine(fi,fj  ,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) );
-            fine(fi,fj  ,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) );
-            fine(fi,fj+1,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) );
-            fine(fi,fj+1,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) );
+            for (int k=0; k<ratio[2]; ++k){
+            for (int j=0; j<ratio[1]; ++j){
+
+              Real ysloc = (Real(j)+Real(0.5))*dys - Real(0.5);
+              Real zsloc = (Real(k)+Real(0.5))*dzs - Real(0.5);
+              fine(fi,fj+j,fk+k,n) = crse(ci,cj,ck,n) +  ysloc*slpy + zsloc*slpz + ysloc*zsloc*slpyz;
+
+            }
+            }
 
             break;
         }
         case 1:
         {
-            const Real ll = crse(ci-1, cj, ck-1, nc);
-            const Real cl = crse(ci  , cj, ck-1, nc);
-            const Real rl = crse(ci+1, cj, ck-1, nc);
+//            const Real ll = crse(ci-1, cj, ck-1, nc);
+//            const Real cl = crse(ci  , cj, ck-1, nc);
+//            const Real rl = crse(ci+1, cj, ck-1, nc);
+
+//            const Real lc = crse(ci-1, cj, ck  , nc);
+//            const Real cc = crse(ci  , cj, ck  , nc);
+//            const Real rc = crse(ci+1, cj, ck  , nc);
+
+//            const Real lu = crse(ci-1, cj, ck+1, nc);
+//            const Real cu = crse(ci  , cj, ck+1, nc);
+//            const Real ru = crse(ci+1, cj, ck+1, nc);
 
-            const Real lc = crse(ci-1, cj, ck  , nc);
-            const Real cc = crse(ci  , cj, ck  , nc);
-            const Real rc = crse(ci+1, cj, ck  , nc);
+//            fine(fi  ,fj,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) );
+//            fine(fi+1,fj,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) );
+//            fine(fi  ,fj,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) );
+//            fine(fi+1,fj,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) );
 
-            const Real lu = crse(ci-1, cj, ck+1, nc);
-            const Real cu = crse(ci  , cj, ck+1, nc);
-            const Real ru = crse(ci+1, cj, ck+1, nc);
+            const Real slpx  = Real(0.5)* (crse(ci+1,cj,ck  ,n) - crse(ci-1,cj,ck  ,n));
+            const Real slpz  = Real(0.5)* (crse(ci  ,cj,ck+1,n) - crse(ci  ,cj,ck-1,n));
+            const Real slpxz = Real(0.25)*(crse(ci+1,cj,ck+1,n) - crse(ci+1,cj,ck-1,n) - crse(ci-1,cj,ck+1,n) + crse(ci-1,cj,ck-1,n));
+
+            const Real dxs = Real(1.)/Real(ratio[0]);
+            const Real dzs = Real(1.)/Real(ratio[2]);
+
+            for (int k=0; k<ratio[2]; ++k){
+            for (int i=0; i<ratio[0]; ++i){
+
+              Real xsloc = (Real(i)+Real(0.5))*dxs - Real(0.5);
+              Real zsloc = (Real(k)+Real(0.5))*dzs - Real(0.5);
+              fine(fi+i,fj,fk+k,n) = crse(ci,cj,ck,n) + xsloc*slpx + zsloc*slpz + xsloc*zsloc*slpxz;
+
+            }
+            }
 
-            fine(fi  ,fj,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) );
-            fine(fi+1,fj,fk  ,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) );
-            fine(fi  ,fj,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) );
-            fine(fi+1,fj,fk+1,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) );
 
             break;
         }
         case 2:
         {
-            const Real ll = crse(ci-1, cj-1, ck, nc);
-            const Real cl = crse(ci  , cj-1, ck, nc);
-            const Real rl = crse(ci+1, cj-1, ck, nc);
+//            const Real ll = crse(ci-1, cj-1, ck, nc);
+//            const Real cl = crse(ci  , cj-1, ck, nc);
+//            const Real rl = crse(ci+1, cj-1, ck, nc);
+
+//            const Real lc = crse(ci-1, cj  , ck, nc);
+//            const Real cc = crse(ci  , cj  , ck, nc);
+//            const Real rc = crse(ci+1, cj  , ck, nc);
+
+//            const Real lu = crse(ci-1, cj+1, ck, nc);
+//            const Real cu = crse(ci  , cj+1, ck, nc);
+//            const Real ru = crse(ci+1, cj+1, ck, nc);
+
+//            fine(fi  ,fj  ,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) );
+//            fine(fi+1,fj  ,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) );
+//            fine(fi  ,fj+1,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) );
+//            fine(fi+1,fj+1,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) );
 
-            const Real lc = crse(ci-1, cj  , ck, nc);
-            const Real cc = crse(ci  , cj  , ck, nc);
-            const Real rc = crse(ci+1, cj  , ck, nc);
+            const Real slpx  = Real(0.5)* (crse(ci+1,cj,  ck,n) - crse(ci-1,cj,  ck  ,n));
+            const Real slpy  = Real(0.5)* (crse(ci  ,cj+1,ck,n) - crse(ci,  cj-1,ck  ,n));
+            const Real slpxy = Real(0.25)*(crse(ci+1,cj+1,ck,n) - crse(ci+1,cj-1,ck,n) - crse(ci-1,cj+1,ck,n) + crse(ci-1,cj-1,ck,n));
 
-            const Real lu = crse(ci-1, cj+1, ck, nc);
-            const Real cu = crse(ci  , cj+1, ck, nc);
-            const Real ru = crse(ci+1, cj+1, ck, nc);
+            const Real dxs = Real(1.)/Real(ratio[0]);
+            const Real dys = Real(1.)/Real(ratio[1]);
+
+            for (int j=0; j<ratio[1]; ++j){
+            for (int i=0; i<ratio[0]; ++i){
+
+              Real xsloc = (Real(i)+Real(0.5))*dxs - Real(0.5);
+              Real ysloc = (Real(j)+Real(0.5))*dys - Real(0.5);
+              fine(fi+i,fj+j,fk,n) = crse(ci,cj,ck,n) +  xsloc*slpx + ysloc*slpy + xsloc*ysloc*slpxy;
+
+            }
+            }
 
-            fine(fi  ,fj  ,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+lc-cu-rc) + (ll+ru-lu-rl) );
-            fine(fi+1,fj  ,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cl+rc-cu-lc) + (lu+rl-ll-ru) );
-            fine(fi  ,fj+1,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+lc-cl-rc) + (lu+rl-ll-ru) );
-            fine(fi+1,fj+1,fk,nf) = Real(1.)/Real(64.)*( 64*cc + 8*(cu+rc-cl-lc) + (ll+ru-lu-rl) );
 
             break;
         }
@@ -216,7 +269,7 @@ facediv_face_interp (int ci, int cj, int ck,
 
 template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
-facediv_int (int ci, int cj, int ck, int nf,
+facediv_int (int ci, int cj, int ck, int n,
              GpuArray<Array4<T>, AMREX_SPACEDIM> const& fine,
              IntVect const& ratio,
              GpuArray<Real, AMREX_SPACEDIM> const& cellSize) noexcept
@@ -226,32 +279,32 @@ facediv_int (int ci, int cj, int ck, int nf,
     const int fk = ck*ratio[2];
 
     // References to fine exterior values needed for interior calculation.
-    const Real u000 = fine[0](fi,   fj  , fk  , nf);
-    const Real u200 = fine[0](fi+2, fj  , fk  , nf);
-    const Real u010 = fine[0](fi,   fj+1, fk  , nf);
-    const Real u210 = fine[0](fi+2, fj+1, fk  , nf);
-    const Real u001 = fine[0](fi,   fj  , fk+1, nf);
-    const Real u201 = fine[0](fi+2, fj  , fk+1, nf);
-    const Real u011 = fine[0](fi,   fj+1, fk+1, nf);
-    const Real u211 = fine[0](fi+2, fj+1, fk+1, nf);
-
-    const Real v000 = fine[1](fi  , fj  , fk  , nf);
-    const Real v020 = fine[1](fi  , fj+2, fk  , nf);
-    const Real v100 = fine[1](fi+1, fj  , fk  , nf);
-    const Real v120 = fine[1](fi+1, fj+2, fk  , nf);
-    const Real v001 = fine[1](fi  , fj  , fk+1, nf);
-    const Real v021 = fine[1](fi  , fj+2, fk+1, nf);
-    const Real v101 = fine[1](fi+1, fj  , fk+1, nf);
-    const Real v121 = fine[1](fi+1, fj+2, fk+1, nf);
-
-    const Real w000 = fine[2](fi  , fj  , fk  , nf);
-    const Real w002 = fine[2](fi  , fj  , fk+2, nf);
-    const Real w100 = fine[2](fi+1, fj  , fk  , nf);
-    const Real w102 = fine[2](fi+1, fj  , fk+2, nf);
-    const Real w010 = fine[2](fi  , fj+1, fk  , nf);
-    const Real w012 = fine[2](fi  , fj+1, fk+2, nf);
-    const Real w110 = fine[2](fi+1, fj+1, fk  , nf);
-    const Real w112 = fine[2](fi+1, fj+1, fk+2, nf);
+    const Real u000 = fine[0](fi,   fj  , fk  , n);
+    const Real u200 = fine[0](fi+2, fj  , fk  , n);
+    const Real u010 = fine[0](fi,   fj+1, fk  , n);
+    const Real u210 = fine[0](fi+2, fj+1, fk  , n);
+    const Real u001 = fine[0](fi,   fj  , fk+1, n);
+    const Real u201 = fine[0](fi+2, fj  , fk+1, n);
+    const Real u011 = fine[0](fi,   fj+1, fk+1, n);
+    const Real u211 = fine[0](fi+2, fj+1, fk+1, n);
+
+    const Real v000 = fine[1](fi  , fj  , fk  , n);
+    const Real v020 = fine[1](fi  , fj+2, fk  , n);
+    const Real v100 = fine[1](fi+1, fj  , fk  , n);
+    const Real v120 = fine[1](fi+1, fj+2, fk  , n);
+    const Real v001 = fine[1](fi  , fj  , fk+1, n);
+    const Real v021 = fine[1](fi  , fj+2, fk+1, n);
+    const Real v101 = fine[1](fi+1, fj  , fk+1, n);
+    const Real v121 = fine[1](fi+1, fj+2, fk+1, n);
+
+    const Real w000 = fine[2](fi  , fj  , fk  , n);
+    const Real w002 = fine[2](fi  , fj  , fk+2, n);
+    const Real w100 = fine[2](fi+1, fj  , fk  , n);
+    const Real w102 = fine[2](fi+1, fj  , fk+2, n);
+    const Real w010 = fine[2](fi  , fj+1, fk  , n);
+    const Real w012 = fine[2](fi  , fj+1, fk+2, n);
+    const Real w110 = fine[2](fi+1, fj+1, fk  , n);
+    const Real w112 = fine[2](fi+1, fj+1, fk+2, n);
 
     const Real dx = cellSize[0];
     const Real dy = cellSize[1];
@@ -266,79 +319,345 @@ facediv_int (int ci, int cj, int ck, int nf,
     const Real yspzs = dy*dy + dz*dz;
     const Real zspxs = dz*dz + dx*dx;
 
-    fine[0](fi+1, fj  , fk  , nf) = Real(0.5)*(u000+u200)
+    fine[0](fi+1, fj  , fk  , n) = Real(0.5)*(u000+u200)
                                   + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v000+v120-v020-v100)
                                   +                dx3/(8*dy*zspxs)*(v001+v121-v021-v101)
                                   + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w000+w102-w002-w100)
                                   +                dx3/(8*dz*xspys)*(w010+w112-w012-w110);
 
-    fine[0](fi+1, fj+1, fk  , nf) = Real(0.5)*(u010+u210)
+    fine[0](fi+1, fj+1, fk  , n) = Real(0.5)*(u010+u210)
                                   + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v000+v120-v020-v100)
                                   +                dx3/(8*dy*zspxs)*(v001+v121-v021-v101)
                                   + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w010+w112-w012-w110)
                                   +                dx3/(8*dz*xspys)*(w000+w102-w100-w002);
 
-    fine[0](fi+1, fj  , fk+1, nf) = Real(0.5)*(u001+u201)
+    fine[0](fi+1, fj  , fk+1, n) = Real(0.5)*(u001+u201)
                                   + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v001+v121-v021-v101)
                                   +                dx3/(8*dy*zspxs)*(v000+v120-v020-v100)
                                   + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w000+w102-w002-w100)
                                   +                dx3/(8*dz*xspys)*(w010+w112-w012-w110);
 
-    fine[0](fi+1, fj+1, fk+1, nf) = Real(0.5)*(u011+u211)
+    fine[0](fi+1, fj+1, fk+1, n) = Real(0.5)*(u011+u211)
                                   + dx*(2*dz*dz+dx*dx)/(8*dy*zspxs)*(v001+v121-v021-v101)
                                   +                dx3/(8*dy*zspxs)*(v000+v120-v020-v100)
                                   + dx*(2*dy*dy+dx*dx)/(8*dz*xspys)*(w010+w112-w012-w110)
                                   +                dx3/(8*dz*xspys)*(w000+w102-w002-w100);
 
-    fine[1](fi  , fj+1, fk  , nf) = Real(0.5)*(v000+v020)
+    fine[1](fi  , fj+1, fk  , n) = Real(0.5)*(v000+v020)
                                   + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200)
                                   +                dy3/(8*dx*yspzs)*(u001+u211-u011-u201)
                                   + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010)
                                   +                dy3/(8*dz*xspys)*(w100+w112-w102-w110);
 
-    fine[1](fi+1, fj+1, fk  , nf) = Real(0.5)*(v100+v120)
+    fine[1](fi+1, fj+1, fk  , n) = Real(0.5)*(v100+v120)
                                   + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u000+u210-u010-u200)
                                   +                dy3/(8*dx*yspzs)*(u001+u211-u011-u201)
                                   + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110)
                                   +                dy3/(8*dz*xspys)*(w000+w012-w002-w010);
 
-    fine[1](fi  , fj+1, fk+1, nf) = Real(0.5)*(v001+v021)
+    fine[1](fi  , fj+1, fk+1, n) = Real(0.5)*(v001+v021)
                                   + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201)
                                   +                dy3/(8*dx*yspzs)*(u000+u210-u010-u200)
                                   + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w000+w012-w002-w010)
                                   +                dy3/(8*dz*xspys)*(w100+w112-w102-w110);
 
-    fine[1](fi+1, fj+1, fk+1, nf) = Real(0.5)*(v101+v121)
+    fine[1](fi+1, fj+1, fk+1, n) = Real(0.5)*(v101+v121)
                                   + dy*(2*dz*dz+dy*dy)/(8*dx*yspzs)*(u001+u211-u011-u201)
                                   +                dy3/(8*dx*yspzs)*(u000+u210-u010-u200)
                                   + dy*(2*dx*dx+dy*dy)/(8*dz*xspys)*(w100+w112-w102-w110)
                                   +                dy3/(8*dz*xspys)*(w000+w012-w002-w010);
 
-    fine[2](fi  , fj  , fk+1, nf) = Real(0.5)*(w000+w002)
+    fine[2](fi  , fj  , fk+1, n) = Real(0.5)*(w000+w002)
                                   + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200)
                                   +                dz3/(8*dx*yspzs)*(u010+u211-u011-u210)
                                   + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020)
                                   +                dz3/(8*dy*zspxs)*(v100+v121-v101-v120);
 
-    fine[2](fi  , fj+1, fk+1, nf) = Real(0.5)*(w010+w012)
+    fine[2](fi  , fj+1, fk+1, n) = Real(0.5)*(w010+w012)
                                   + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210)
                                   +                dz3/(8*dx*yspzs)*(u000+u201-u001-u200)
                                   + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v000+v021-v001-v020)
                                   +                dz3/(8*dy*zspxs)*(v100+v121-v101-v120);
 
-    fine[2](fi+1, fj  , fk+1, nf) = Real(0.5)*(w100+w102)
+    fine[2](fi+1, fj  , fk+1, n) = Real(0.5)*(w100+w102)
                                   + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u000+u201-u001-u200)
                                   +                dz3/(8*dx*yspzs)*(u010+u211-u011-u210)
                                   + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120)
                                   +                dz3/(8*dy*zspxs)*(v000+v021-v001-v020);
 
-    fine[2](fi+1, fj+1, fk+1, nf) = Real(0.5)*(w110+w112)
+    fine[2](fi+1, fj+1, fk+1, n) = Real(0.5)*(w110+w112)
                                   + dz*(2*dy*dy+dz*dz)/(8*dx*yspzs)*(u010+u211-u011-u210)
                                   +                dz3/(8*dx*yspzs)*(u000+u201-u001-u200)
                                   + dz*(2*dx*dx+dz*dz)/(8*dy*zspxs)*(v100+v121-v101-v120)
                                   +                dz3/(8*dy*zspxs)*(v000+v021-v001-v020);
 }
 
+
+template <int i, std::enable_if_t<(i>=0 && i<8),int> = 0>
+constexpr IntVect facediv_get_refratio ()
+{
+    if constexpr (i == 0) {
+        return IntVect(2,2,2);
+    } else if constexpr (i == 1) {
+        return IntVect(4,2,2);
+    } else if constexpr (i == 2) {
+        return IntVect(2,4,2);
+    } else if constexpr (i == 3) {
+        return IntVect(4,4,2);
+    } else if constexpr (i == 4) {
+        return IntVect(2,2,4);
+    } else if constexpr (i == 5) {
+        return IntVect(4,2,4);
+    } else if constexpr (i == 6) {
+        return IntVect(2,4,4);
+    } else {
+        return IntVect(4,4,4);
+    }
+}
+
+template <int N>
+LUSolver<N,Real>
+facediv_init_lusolver (IntVect const& ratio,
+                       GpuArray<Real, AMREX_SPACEDIM> const& cellSize)
+{
+    //  set up matrix for local MAC projection
+    Array2D<Real,0,N-1,0,N-1,Order::C> mat;
+
+    for (int irow = 0; irow < N; irow++) {
+        for (int icol = 0; icol < N; icol++) {
+            mat(irow,icol) = Real(0.0);
+        }
+    }
+
+    Real dxinv = Real(1.)/cellSize[0];
+    Real dyinv = Real(1.)/cellSize[1];
+    Real dzinv = Real(1.)/cellSize[2];
+
+    Real ddx2 = dxinv*dxinv;
+    Real ddy2 = dyinv*dyinv;
+    Real ddz2 = dzinv*dzinv;
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+        int irow = i + j*ratio[0] +k*ratio[0]*ratio[1];
+
+        if(i > 0){
+            mat(irow,irow) += ddx2;
+            mat(irow,irow-1) += -ddx2;
+        }
+
+        if(i < ratio[0]-1){
+            mat(irow,irow) += ddx2;
+            mat(irow,irow+1) += -ddx2;
+        }
+
+        if(j > 0){
+            mat(irow,irow) += ddy2;
+            mat(irow,irow-ratio[0]) += -ddy2;
+        }
+
+        if(j < ratio[1]-1){
+            mat(irow,irow) += ddy2;
+            mat(irow,irow+ratio[0]) += -ddy2;
+        }
+
+        if(k > 0){
+            mat(irow,irow) += ddz2;
+            mat(irow,irow-ratio[0]*ratio[1]) += -ddz2;
+        }
+
+        if(k < ratio[2]-1){
+            mat(irow,irow) += ddz2;
+            mat(irow,irow+ratio[0]*ratio[1]) += -ddz2;
+        }
+    }}}
+
+    // Hack matrix to be nonsingular. This just picks a particular gauge
+    // with the last row being zero, doesn’t change the gradients we will
+    // apply.
+    mat(N-1,N-1) +=ddx2;
+
+    return LUSolver<N,Real>{mat};
+}
+
+template<typename T, int N>
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE void
+facediv_int_gen (int ci, int cj, int ck, int nf,
+                 GpuArray<Array4<T>, AMREX_SPACEDIM> const& fine,
+                 IntVect const& ratio,
+                 GpuArray<Real, AMREX_SPACEDIM> const& cellSize,
+                 LUSolver<N,Real> const& lusolver) noexcept
+{
+    const int fi = ci*ratio[0];
+    const int fj = cj*ratio[1];
+    const int fk = ck*ratio[2];
+
+    const Real dx = cellSize[0];
+    const Real dy = cellSize[1];
+    const Real dz = cellSize[2];
+
+    const Real dxinv = Real(1.)/cellSize[0];
+    const Real dyinv = Real(1.)/cellSize[1];
+    const Real dzinv = Real(1.)/cellSize[2];
+
+// interpolate initial veloc
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=1; i < ratio[0]; ++i){
+
+       fine[0](fi + i, fj + j, fk + k, nf) = ((ratio[0]-i)*fine[0](fi,           fj + j, fk + k, nf)
+                                        +     i       *fine[0](fi+ratio[0], fj + j, fk + k, nf) )/ratio[0];
+
+    }
+    }
+    }
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=1; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+       fine[1](fi + i, fj + j, fk + k, nf) = ((ratio[1]-j)*fine[1](fi + i, fj,           fk + k, nf)
+                                        +     j       *fine[1](fi + i, fj+ratio[1], fk + k, nf) )/ratio[1];
+
+    }
+    }
+    }
+
+    for( int k=1; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+       fine[2](fi + i, fj + j, fk + k, nf) = ((ratio[2]-k)*fine[2](fi + i, fj + j, fk,           nf)
+                                        +     k       *fine[2](fi + i, fj + j, fk+ratio[2], nf) )/ratio[2];
+
+    }
+    }
+    }
+
+// compute target divergence
+
+    Real uplus  = 0;
+    Real uminus = 0;
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+
+         uplus  += fine[0](fi+ratio[0],fj+j,fk+k,nf);
+         uminus += fine[0](fi         ,fj+j,fk+k,nf);
+
+    }
+    }
+
+    Real vplus  = 0;
+    Real vminus = 0;
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int i=0; i < ratio[0]; ++i){
+
+         vplus  += fine[1](fi+i,fj+ratio[1],fk+k,nf);
+         vminus += fine[1](fi+i,fj,         fk+k,nf);
+
+    }
+    }
+
+    Real wplus  = 0;
+    Real wminus = 0;
+
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+         wplus  += fine[2](fi+i,fj+j,fk+ratio[2],nf);
+         wminus += fine[2](fi+i,fj+j,fk         ,nf);
+
+    }
+    }
+
+    Real target_div = (uplus-uminus)/(N*dx) + (vplus-vminus)/(N*dy) + (wplus-wminus)/(N*dz);
+
+// form rhs
+    Array1D<Real,0,N-1> rhs;
+    Array1D<Real,0,N-1> phi;
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+       int irow = i + j*ratio[0] +k*ratio[0]*ratio[1];
+
+       rhs(irow) = ((fine[0](fi+i+1,fj+j,  fk+k,  nf)  -fine[0](fi+i,fj+j,fk+k,nf))*dxinv
+                 +  (fine[1](fi+i  ,fj+j+1,fk+k,  nf) - fine[1](fi+i,fj+j,fk+k,nf))*dyinv
+                 +  (fine[2](fi+i,  fj+j,  fk+k+1,nf) - fine[2](fi+i,fj+j,fk+k,nf))*dzinv) - target_div;
+    }
+    }
+    }
+
+//  solver the system
+
+    lusolver(phi.begin(), rhs.begin());
+
+//  this is for sanity check
+//  can be put in if needed
+
+//    Array3D<Real,0,ratio[0]-1,0,ratio[1]-1,0,ratio[2]-1> phi_reshape;
+
+//    for( int i=0; i < ratio[0]  , ++j){
+//    for( int j=0; j < ratio[1]  , ++j){
+//    for( int k=0; k < ratio[2]  , ++k){
+
+//       int irow = i + j*ratio[0] +k*ratio[0]*ratio[1];
+
+//       phi_reshape(i,j,k) = phi(irow)
+
+//    }
+//    }
+//    }
+
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=1; i < ratio[0]; ++i){
+
+       int plus  = i     + j*ratio[0] +k*ratio[0]*ratio[1];
+       int minus = i - 1 + j*ratio[0] +k*ratio[0]*ratio[1];
+//       fine[0](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i-1,j,k))*dxinv
+       fine[0](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dxinv;
+
+    }
+    }
+    }
+
+    for( int k=0; k < ratio[2]; ++k){
+    for( int j=1; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+       int plus  = i  +     j*ratio[0] +k*ratio[0]*ratio[1];
+       int minus = i  + (j-1)*ratio[0] +k*ratio[0]*ratio[1];
+//       fine[1](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j-1,k))*dyinv
+       fine[1](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dyinv;
+
+    }
+    }
+    }
+
+    for( int k=1; k < ratio[2]; ++k){
+    for( int j=0; j < ratio[1]; ++j){
+    for( int i=0; i < ratio[0]; ++i){
+
+       int plus  = i  + j*ratio[0] + k    *ratio[0]*ratio[1];
+       int minus = i  + j*ratio[0] + (k-1)*ratio[0]*ratio[1];
+//       fine[2](fi + i, fj + j, fk + k, nf) += ( phi_reshape(i,j,k) - phi_reshape(i,j,k-1))*dzinv
+       fine[2](fi + i, fj + j, fk + k, nf) += ( phi(plus) - phi(minus))*dzinv;
+
+    }
+    }
+    }
+
+
+}
+
 template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
 face_linear_interp_x (int i, int j, int k, int n, Array4<T> const& fine,
@@ -414,7 +733,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
         for         (int k = klo; k <= khi; ++k) {
             for     (int j = jlo; j <= jhi; ++j) {
                 for (int i = ilo; i <= ihi; ++i) {
-                    if ((fine_state(i,j,k,n) + fine(i,j,k,n)) < 0.0) {
+                    if ((fine_state(i,j,k,n) + fine(i,j,k,n)) < Real(0.0)) {
                         redo_me = true;
                     }
                 }
@@ -444,14 +763,14 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
              *
              * SumP = volume-weighted sum of all positive values of fine_state
              */
-            Real crseTot = 0.0;
-            Real SumN = 0.0;
-            Real SumP = 0.0;
+            Real crseTot = Real(0.0);
+            Real SumN = Real(0.0);
+            Real SumP = Real(0.0);
             for         (int k = klo; k <= khi; ++k) {
                 for     (int j = jlo; j <= jhi; ++j) {
                     for (int i = ilo; i <= ihi; ++i) {
                         crseTot += fine(i,j,k,n);
-                        if (fine_state(i,j,k,n) <= 0.0) {
+                        if (fine_state(i,j,k,n) <= Real(0.0)) {
                             SumN += fine_state(i,j,k,n);
                         } else {
                             SumP += fine_state(i,j,k,n);
@@ -477,13 +796,13 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
                         for (int i = ilo; i <= ihi; ++i) {
 
                             // Fill in negative values first.
-                            if (fine_state(i,j,k,n) < 0.0) {
+                            if (fine_state(i,j,k,n) < Real(0.0)) {
                                 fine(i,j,k,n) = -fine_state(i,j,k,n);
                             }
 
                             // Then, add the remaining positive proportionally.
-                            if (SumP > 0.0) {
-                                if (fine_state(i,j,k,n) > 0.0) {
+                            if (SumP > Real(0.0)) {
+                                if (fine_state(i,j,k,n) > Real(0.0)) {
                                     Real alpha = (crseTot - std::abs(SumN)) / SumP;
                                     fine(i,j,k,n) = alpha * fine_state(i,j,k,n);
                                 }
@@ -512,12 +831,12 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
                     for     (int j = jlo; j <= jhi; ++j) {
                         for (int i = ilo; i <= ihi; ++i) {
                             Real alpha = crseTot / std::abs(SumN);
-                            if (fine_state(i,j,k,n) < 0.0) {
+                            if (fine_state(i,j,k,n) < Real(0.0)) {
                                 // Add correction to negative cells proportionally.
                                 fine(i,j,k,n) = alpha * std::abs(fine_state(i,j,k,n));
                             } else {
                                 // Don't touch the positive states.
-                                fine(i,j,k,n) = 0.0;
+                                fine(i,j,k,n) = Real(0.0);
                             }
 
                         }
@@ -548,7 +867,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
                 }
 
             } else if ( (crseTot < 0) && (std::abs(crseTot) < SumP) &&
-                        ((SumP+SumN+crseTot) > 0.0) )  {
+                        ((SumP+SumN+crseTot) > Real(0.0)) )  {
 
                 /*
                  * Special case 4:
@@ -562,7 +881,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
                     for     (int j = jlo; j <= jhi; ++j) {
                         for (int i = ilo; i <= ihi; ++i) {
 
-                            if (fine_state(i,j,k,n) < 0.0) {
+                            if (fine_state(i,j,k,n) < Real(0.0)) {
                                 // Absorb the negative correction
                                 fine(i,j,k,n) = -fine_state(i,j,k,n);
                             } else {
@@ -576,7 +895,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
                 }
 
             } else if ( (crseTot < 0) && (std::abs(crseTot) < SumP) &&
-                        ((SumP+SumN+crseTot) < 0.0) )  {
+                        ((SumP+SumN+crseTot) < Real(0.0)) )  {
                 /*
                  * Special case 5:
                  *
@@ -592,7 +911,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
                     for     (int j = jlo; j <= jhi; ++j) {
                         for (int i = ilo; i <= ihi; ++i) {
 
-                            if (fine_state(i,j,k,n) > 0.0) {
+                            if (fine_state(i,j,k,n) > Real(0.0)) {
                                 // Don't touch the negatives
                                 fine(i,j,k,n) = -fine_state(i,j,k,n);
                             } else {
@@ -613,7 +932,7 @@ void ccprotect_3d (int ic, int jc, int kc, int nvar,
     for         (int k = klo; k <= khi; ++k) {
         for     (int j = jlo; j <= jhi; ++j) {
             for (int i = ilo; i <= ihi; ++i) {
-                fine(i,j,k,0) = 0.0;
+                fine(i,j,k,0) = Real(0.0);
                 for (int n = 1; n < nvar-1; ++n) {
                     fine(i,j,k,0) += fine(i,j,k,n);
                 }
diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H
index 10e60eef41d..ae9bfc430d2 100644
--- a/Src/AmrCore/AMReX_Interpolater.H
+++ b/Src/AmrCore/AMReX_Interpolater.H
@@ -5,6 +5,11 @@
 #include <AMReX_Extension.H>
 #include <AMReX_GpuControl.H>
 #include <AMReX_InterpBase.H>
+#include <AMReX_LUSolver.H>
+#include <AMReX_Vector.H>
+
+#include <map>
+#include <variant>
 
 /**
  * \file AMReX_Interpolater.H
@@ -543,6 +548,9 @@ class FaceDivFree
     public Interpolater
 {
 public:
+
+    FaceDivFree ();
+
     /**
     * \brief Returns coarsened box given fine box and refinement ratio.
     *
@@ -622,8 +630,29 @@ public:
                      int         actual_comp,
                      int         actual_state,
                      RunOn       runon) override;
-};
 
+private:
+
+#if (AMREX_SPACEDIM == 2)
+    using Solver_t = std::variant<LUSolver< 4,Real>, // IntVect(2,2): refinement
+                                  LUSolver< 8,Real>, // IntVect(4,2)
+                                  LUSolver< 8,Real>, // IntVect(2,4)
+                                  LUSolver<16,Real>>;// IntVect(4,4)
+#elif (AMREX_SPACEDIM == 3)
+    using Solver_t = std::variant<LUSolver< 8,Real>, // IntVect(2,2,2): refinement
+                                  LUSolver<16,Real>, // IntVect(4,2,2)
+                                  LUSolver<16,Real>, // IntVect(2,4,2)
+                                  LUSolver<32,Real>, // IntVect(4,4,2)
+                                  LUSolver<16,Real>, // IntVect(2,2,4)
+                                  LUSolver<32,Real>, // IntVect(4,2,4)
+                                  LUSolver<32,Real>, // IntVect(2,4,4)
+                                  LUSolver<64,Real>>;// IntVect(4,4,4)
+#endif
+
+#if (AMREX_SPACEDIM > 1)
+    Vector<std::map<Box,Solver_t>> m_lusolver;
+#endif
+};
 
 /**
 * \brief Piecewise constant tangential interpolation / linear normal interpolation of face data.
diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp
index c25b53fa4f2..a0b247996db 100644
--- a/Src/AmrCore/AMReX_Interpolater.cpp
+++ b/Src/AmrCore/AMReX_Interpolater.cpp
@@ -1,10 +1,12 @@
 
+#include <AMReX_ConstexprFor.H>
 #include <AMReX_FArrayBox.H>
-#include <AMReX_IArrayBox.H>
 #include <AMReX_Geometry.H>
+#include <AMReX_IArrayBox.H>
 #include <AMReX_Interpolater.H>
 #include <AMReX_Interp_C.H>
 #include <AMReX_MFInterp_C.H>
+#include <AMReX_OpenMP.H>
 
 #include <climits>
 
@@ -1295,6 +1297,13 @@ CellConservativeQuartic::interp (const FArrayBox&  crse,
     });
 }
 
+FaceDivFree::FaceDivFree ()
+    : Interpolater()
+#if (AMREX_SPACEDIM > 1)
+      , m_lusolver(OpenMP::get_max_threads())
+#endif
+{}
+
 Box
 FaceDivFree::CoarseBox (const Box& fine,
                         int        ratio)
@@ -1345,14 +1354,26 @@ FaceDivFree::interp_arr (Array<FArrayBox*, AMREX_SPACEDIM> const& crse,
                          const int         /*actual_state*/,
                          const RunOn       runon)
 {
+#if (AMREX_SPACEDIM == 1)
+    amrex::ignore_unused(crse,crse_comp,fine,fine_comp,ncomp,fine_region,ratio,
+                         solve_mask,fine_geom,runon);
+#else
+
     BL_PROFILE("FaceDivFree::interp()");
 
     Array<IndexType, AMREX_SPACEDIM> types;
     for (int d=0; d<AMREX_SPACEDIM; ++d)
         { types[d].set(d); }
 
-    // This is currently only designed for octree, where ratio = 2.
-    AMREX_ALWAYS_ASSERT(ratio == 2);
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ratio == 2 || runon == RunOn::Device,
+                                     "Must run on GPU if ratio is not 2");
+#endif
+
+    for (int d = 0; d < AMREX_SPACEDIM; ++d) {
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ratio[d] == 2 || ratio[d] == 4,
+                                         "Only refinement ratio of 2 or 4 is supported");
+    }
 
     const Box c_fine_region = amrex::coarsen(fine_region, ratio);
     GpuArray<Real, AMREX_SPACEDIM> cell_size = fine_geom.CellSizeArray();
@@ -1368,6 +1389,11 @@ FaceDivFree::interp_arr (Array<FArrayBox*, AMREX_SPACEDIM> const& crse,
             { maskarr[d] = solve_mask[d]->const_array(0); }
     }
 
+
+
+//  JBB 1.  don't know what mark does here
+//  JBB 2.  build and factor matrix here or build and factor it earlier somehow
+
     // Fuse the launches, 1 for each dimension, into a single launch.
     AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(runon,
               amrex::convert(c_fine_region,types[0]), bx0,
@@ -1376,7 +1402,7 @@ FaceDivFree::interp_arr (Array<FArrayBox*, AMREX_SPACEDIM> const& crse,
                   {
                       for (int n=0; n<ncomp; ++n)
                       {
-                          amrex::facediv_face_interp<Real> (i,j,k,crse_comp+n,fine_comp+n, 0,
+                          amrex::facediv_face_interp<Real> (i,j,k,n, 0,
                                                             crsearr[0], finearr[0], maskarr[0], ratio);
                       }
                   });
@@ -1387,7 +1413,7 @@ FaceDivFree::interp_arr (Array<FArrayBox*, AMREX_SPACEDIM> const& crse,
                   {
                       for (int n=0; n<ncomp; ++n)
                       {
-                          amrex::facediv_face_interp<Real> (i,j,k,crse_comp+n,fine_comp+n, 1,
+                          amrex::facediv_face_interp<Real> (i,j,k,n, 1,
                                                             crsearr[1], finearr[1], maskarr[1], ratio);
                       }
                   });
@@ -1398,16 +1424,62 @@ FaceDivFree::interp_arr (Array<FArrayBox*, AMREX_SPACEDIM> const& crse,
                   {
                       for (int n=0; n<ncomp; ++n)
                       {
-                          amrex::facediv_face_interp<Real> (i,j,k,crse_comp+n,fine_comp+n, 2,
+                          amrex::facediv_face_interp<Real> (i,j,k,n, 2,
                                                             crsearr[2], finearr[2], maskarr[2], ratio);
                       }
                   });
               });
 
-    AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n,
-    {
-        amrex::facediv_int<Real>(i, j, k, fine_comp+n, finearr, ratio, cell_size);
-    });
+    if (ratio == 2) {
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,c_fine_region,ncomp,i,j,k,n,
+        {
+            amrex::facediv_int<Real>(i, j, k, n, finearr, ratio, cell_size);
+        });
+    } else {
+        auto& lusolver = m_lusolver[OpenMP::get_thread_num()][fine_geom.Domain()];
+        void* psolver_h = nullptr;
+        std::size_t solver_size = 0;
+
+        constexpr int ncases = AMREX_D_TERM(2,*2,*2);
+        int rr_case = -1;
+        // Case 0 is ratio == 2. It has been handled in the if-block.
+        constexpr_for<1,ncases>([&] (auto i)
+        {
+            if (facediv_get_refratio<i>() == ratio) {
+                AMREX_ASSERT(rr_case == -1);
+                rr_case = i;
+                if (lusolver.index() != i) {
+                    lusolver.template emplace<i>(
+                        facediv_init_lusolver
+                        <std::variant_alternative_t<i,Solver_t>::size>(ratio, cell_size));
+                }
+                psolver_h = (void*) &(std::get<i>(lusolver));
+                solver_size = sizeof(std::variant_alternative_t<i,Solver_t>);
+            }
+        });
+        AMREX_ASSERT(rr_case > 0 && rr_case < ncases && psolver_h);
+
+#ifdef AMREX_USE_GPU
+        auto* psolver_d = (void*) The_Async_Arena()->alloc(solver_size);
+        Gpu::htod_memcpy_async(psolver_d, psolver_h, solver_size);
+#else
+        auto* psolver_d = psolver_h;
+#endif
+
+        ParallelFor(TypeList<CTOSeq<0,ncases>>{}, {rr_case},
+                    c_fine_region, ncomp,
+                    [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, auto rrc)
+        {
+            auto const* ps = (std::variant_alternative_t<rrc,Solver_t> const*)psolver_d;
+            amrex::facediv_int_gen<Real>(i, j, k, n, finearr, ratio, cell_size, *ps);
+        });
+
+#ifdef AMREX_USE_GPU
+        The_Async_Arena()->free(psolver_d);
+#endif
+    }
+
+#endif
 }
 
 Box
diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H
index 99dc20b452a..80abe87b1ae 100644
--- a/Src/Base/AMReX_CTOParallelForImpl.H
+++ b/Src/Base/AMReX_CTOParallelForImpl.H
@@ -6,6 +6,7 @@
 #include <AMReX_TypeList.H>
 
 #include <array>
+#include <utility>
 #include <type_traits>
 
 /* This header is not for the users to include directly.  It's meant to be
@@ -73,9 +74,17 @@ namespace detail
         amrex::ignore_unused(found_option);
         AMREX_ASSERT(found_option);
     }
+
+    template <int Begin, int End, int... I>
+    constexpr auto MakeCTOImpl(std::integer_sequence<int, I...>)
+        -> CompileTimeOptions<Begin + I...>;
 }
 /// \endcond
 
+template <int Begin, int End>
+using CTOSeq = decltype(detail::MakeCTOImpl<Begin, End>(
+                            std::make_integer_sequence<int, End - Begin>{}));
+
 /**
  * \brief Compile time optimization of kernels with run time options.
  *
diff --git a/Src/Base/AMReX_LUSolver.H b/Src/Base/AMReX_LUSolver.H
index bd60fc609dc..37ed6f8e2ef 100644
--- a/Src/Base/AMReX_LUSolver.H
+++ b/Src/Base/AMReX_LUSolver.H
@@ -16,6 +16,8 @@ class LUSolver
 {
 public:
 
+    static constexpr int size = N;
+
     LUSolver () = default;
 
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
diff --git a/Tests/DivFreePatch/inputs b/Tests/DivFreePatch/inputs
index e5245751940..9fb2965336e 100644
--- a/Tests/DivFreePatch/inputs
+++ b/Tests/DivFreePatch/inputs
@@ -3,6 +3,11 @@ f_offset = 8
 
 max_grid_size = 128 
 
+# default refinement ratio (can set anisotropic values like 4 2 2)
+ref_ratio = 2 2 2
+
+div_tol = 1e-9
+
 # For testing nested vs. non-nested InterpFromCoarse
 #nghost_f = 1
 
diff --git a/Tests/DivFreePatch/main.cpp b/Tests/DivFreePatch/main.cpp
index 857461a1f64..fde3919394f 100644
--- a/Tests/DivFreePatch/main.cpp
+++ b/Tests/DivFreePatch/main.cpp
@@ -29,10 +29,10 @@ void setupMF(MultiFab& mf, const int type = 0, const BoxArray& exclude = BoxArra
         {
             Box bx = ba[bid];
 
-            amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            amrex::ParallelForRNG(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k, RandomEngine const& eng) noexcept
             {
                 if (type == 0)
-                    { arr(i,j,k) = amrex::Random()*10; }
+                    { arr(i,j,k) = amrex::Random(eng)*10; }
                 else if (type == 1)
                     { arr(i,j,k) = double(i)+double(j)+double(k); }
                 else if (type == 2)
@@ -135,6 +135,8 @@ void main_main ()
     int f_offset = 4;
     int nghost_c = 1;
     int nghost_f = 2;
+    Real div_tol = 1.0e-9;
+    bool test_pass = true;
 
     amrex::Vector<int> c_lo(AMREX_SPACEDIM,  0);
     amrex::Vector<int> c_hi(AMREX_SPACEDIM, 32);
@@ -142,6 +144,8 @@ void main_main ()
     amrex::Vector<int> f_hi(AMREX_SPACEDIM,  4);
     int max_grid_size = 64;
 
+    Vector<int> ref_ratio_vec(AMREX_SPACEDIM, 2);
+
     {
         ParmParse pp;
         pp.query("n_cell", n_cell);
@@ -149,11 +153,29 @@ void main_main ()
         pp.query("max_grid_size", max_grid_size);
         pp.query("nghost_c", nghost_c);
         pp.query("nghost_f", nghost_f);
+        pp.query("div_tol", div_tol);
 
         pp.queryarr("c_hi",  c_hi, 0, AMREX_SPACEDIM);
         pp.queryarr("f_lo",  f_lo, 0, AMREX_SPACEDIM);
         pp.queryarr("f_hi",  f_hi, 0, AMREX_SPACEDIM);
 
+        int n_ref_ratio = pp.countval("ref_ratio");
+        if (n_ref_ratio == 1) {
+            int rr = 0;
+            pp.get("ref_ratio", rr);
+            for (int i=0; i<AMREX_SPACEDIM; ++i) {
+                ref_ratio_vec[i] = rr;
+            }
+        } else if (n_ref_ratio >= AMREX_SPACEDIM) {
+            Vector<int> tmp(n_ref_ratio);
+            pp.getarr("ref_ratio", tmp, 0, n_ref_ratio);
+            for (int i=0; i<AMREX_SPACEDIM; ++i) {
+                ref_ratio_vec[i] = tmp[i];
+            }
+        } else if (n_ref_ratio > 0) {
+            amrex::Abort("ref_ratio must contain either 1 value or >= AMREX_SPACEDIM values");
+        }
+
         if (n_cell != 0)
         {
             for (int i=0; i < AMREX_SPACEDIM; ++i)
@@ -165,8 +187,15 @@ void main_main ()
         }
     }
 
+    for (int i = 0; i < AMREX_SPACEDIM; ++i) {
+        if (ref_ratio_vec[i] != 2 && ref_ratio_vec[i] != 4) {
+            amrex::Abort("DivFreePatch requires ref_ratio entries of 2 or 4");
+        }
+    }
+
+    IntVect ratio(ref_ratio_vec);
+
     int ncomp = 1;
-    IntVect ratio{AMREX_D_DECL(2,2,2)};    // For this stencil (octree), always 2.
     IntVect ghost_c{AMREX_D_DECL(nghost_c, nghost_c, nghost_c)};  // For this stencil (octree), need 1 coarse ghost.
     IntVect ghost_f{AMREX_D_DECL(nghost_f, nghost_f, nghost_f)};  // For this stencil (octree), need 1 fine ghost.
     Geometry c_geom, f_geom, f_geom_wghost, f_geom_all, f_geom_partial;
@@ -375,12 +404,43 @@ void main_main ()
                               ratio, mapper, bcrec, 0);
     }
 
-    // Check for errors
+    bool any_ratio_ne_two = false;
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        if (ratio[d] != 2) { any_ratio_ne_two = true; break; }
+    }
+
+    if (any_ratio_ne_two) {
+        Array<MultiFab, AMREX_SPACEDIM> averaged_faces;
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            averaged_faces[d].define(c_mf_faces[d].boxArray(),
+                                     c_mf_faces[d].DistributionMap(),
+                                     c_mf_faces[d].nComp(), 0);
+        }
+        Array<const MultiFab*, AMREX_SPACEDIM> fine_face_ptrs
+            {AMREX_D_DECL(&f_mf_faces[0], &f_mf_faces[1], &f_mf_faces[2])};
+        Array<MultiFab*, AMREX_SPACEDIM> avg_face_ptrs
+            {AMREX_D_DECL(&averaged_faces[0], &averaged_faces[1], &averaged_faces[2])};
+        amrex::average_down_faces(fine_face_ptrs, avg_face_ptrs, ratio, 0);
+        for (int d=0; d<AMREX_SPACEDIM; ++d) {
+            MultiFab diff(averaged_faces[d].boxArray(), averaged_faces[d].DistributionMap(), 1, 0);
+            MultiFab::Copy(diff, averaged_faces[d], 0, 0, 1, 0);
+            diff.minus(c_mf_faces[d], 0, 1, 0);
+            diff.abs(0, 1);
+            Real max_face_diff = diff.max(0);
+            amrex::Print() << "Face average diff dir " << d << " = " << max_face_diff << '\n';
+        }
+    }
+
+    bool interp_nan = false;
     for (int i=0; i<AMREX_SPACEDIM; ++i) {
         if (f_mf_faces[i].contains_nan()) {
             amrex::Print() << "******** Nans present in fine velocity in dimension " << i << '\n';
+            interp_nan = true;
         }
     }
+    if (interp_nan) {
+        test_pass = false;
+    }
 
     AMREX_D_TERM( amrex::VisMF::Write(f_mf_faces[0], std::string("pltfiles/fx"));,
                   amrex::VisMF::Write(f_mf_faces[1], std::string("pltfiles/fy"));,
@@ -396,6 +456,8 @@ void main_main ()
     div_fine_wg.ParallelCopy(div_fine, 0, 0, 1, ghost_f, IntVect::TheZeroVector());
     amrex::VisMF::Write(div_fine, std::string("pltfiles/fine"));
     amrex::VisMF::Write(div_fine_wg, std::string("pltfiles/finewg"));
+    amrex::Print() << "   max|div_fine| = " << div_fine.norm0(0) << '\n';
+    amrex::Print() << "   max|div_refined_coarse| = " << div_refined_coarse.norm0(0) << '\n';
 
     for (int i=0; i<AMREX_SPACEDIM; ++i)
     {
@@ -406,13 +468,20 @@ void main_main ()
                   amrex::VisMF::Write(f_mf_faces_wg[1], std::string("pltfiles/fwgy"));,
                   amrex::VisMF::Write(f_mf_faces_wg[2], std::string("pltfiles/fwgz"));  );
 
+    Real max_abs_interp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "diff");
+    Real max_rel_interp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "", true);
     amrex::Print() << " Max InterpFromCoarse divergence error: absolute         relative\n "
                    << "                                       "
-                   <<MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "diff")
+                   << max_abs_interp
                    << "  "
-                   <<MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "", true)
+                   << max_rel_interp
                    << '\n';
 
+    if (max_abs_interp > div_tol) {
+        amrex::Print() << "  InterpFromCoarse abs error exceeds tolerance " << div_tol << '\n';
+        test_pass = false;
+    }
+
 // ***************************************************************
 
     // Change coarse values, save current fine values for checking
@@ -509,24 +578,34 @@ void main_main ()
     calcDiv(f_mf_faces, div_fine, f_geom.CellSizeArray());
     amrex::VisMF::Write(div_fine, std::string("pltfiles/fineFP"));
 
+    Real max_abs_fp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "diffFP");
+    Real max_rel_fp = MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "", true);
     amrex::Print() << " Max FillPatchTwoLevels divergence error:  absolute         relative\n "
                    << "                                       "
-                   <<MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "diffFP")
+                   << max_abs_fp
                    << "  "
-                   <<MFdiff(div_fine, div_refined_coarse, 0, 1, nghost_f, "", true)
+                   << max_rel_fp
                    << '\n';
+    if (max_abs_fp > div_tol) {
+        amrex::Print() << "  FillPatch abs error exceeds tolerance " << div_tol << '\n';
+        test_pass = false;
+    }
 
     for (int i=0; i<AMREX_SPACEDIM; ++i)
     {
         f_mf_faces_wg[i].ParallelCopy(f_mf_faces[i], 0, 0, 1, ghost_f, IntVect::TheZeroVector());
     }
 
-    // Check for errors
+    bool fp_nan = false;
     for (int i=0; i<AMREX_SPACEDIM; ++i) {
         if (f_mf_faces_wg[i].contains_nan()) {
             amrex::Print() << "******** Nans present in fine velocity after FillPatch (including ghosts)  in dimension " << i << '\n';
+            fp_nan = true;
         }
     }
+    if (fp_nan) {
+        test_pass = false;
+    }
 
     AMREX_D_TERM( amrex::VisMF::Write(f_mf_faces_wg[0], std::string("pltfiles/fwgxFP"));,
                   amrex::VisMF::Write(f_mf_faces_wg[1], std::string("pltfiles/fwgyFP"));,
@@ -534,4 +613,11 @@ void main_main ()
 
 // ***************************************************************
 
+    if (test_pass) {
+        amrex::Print() << "*** DivFreePatch: PASS (tol = " << div_tol << ")\n";
+    } else {
+        amrex::AllPrint() << "*** DivFreePatch: FAIL (tol = " << div_tol << ")\n";
+        amrex::Abort("DivFreePatch failed");
+    }
+
 }