AMReX-Codes · WeiqunZhang · Feb 22, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/Src/LinearSolvers/AMReX_AlgPartition.H b/Src/LinearSolvers/AMReX_AlgPartition.H
@@ -10,27 +10,50 @@
 
 namespace amrex {
 
+/**
+ * \file AMReX_AlgPartition.H
+ *
+ * Lightweight helpers that describe how global sparse rows are distributed
+ * across MPI ranks for algebra kernels.
+ */
+
 class AlgPartition
 {
 public:
+    //! Construct an empty partition that must be defined later.
     AlgPartition ();
+    //! Evenly partition \p global_size rows across all ranks.
     explicit AlgPartition (Long global_size);
+    //! Adopt an explicit prefix array \p rows describing local row extents.
     explicit AlgPartition (Vector<Long> const& rows);
+    //! Adopt an explicit prefix array \p rows, transferring ownership.
     explicit AlgPartition (Vector<Long>&& rows) noexcept;
 
+    //! Recompute an even partition spanning \p global_size rows.
     void define (Long global_size);
+    //! Replace the partition with the explicit prefix array \p rows.
     void define (Vector<Long> const& rows);
+    //! Replace the partition, taking ownership of \p rows.
     void define (Vector<Long>&& rows);
 
+    //! True if the partition contains no rows.
     [[nodiscard]] bool empty () const { return m_ref->m_row.empty(); }
 
+    //! Starting global row index owned by rank \p i.
     [[nodiscard]] Long operator[] (int i) const { return m_ref->m_row[i]; }
+    /**
+     * \brief Total number of rows covered by the partition.
+     */
     [[nodiscard]] Long numGlobalRows () const { return m_ref->m_row.back(); }
+    //! Number of MPI ranks that own at least one row.
     [[nodiscard]] int numActiveProcs () const { return m_ref->m_n_active_procs; }
 
+    //! Underlying prefix array describing row offsets (size nproc+1).
     [[nodiscard]] Vector<Long> const& dataVector () const { return m_ref->m_row; }
 
+    //! Compare partitions for identical layouts.
     [[nodiscard]] bool operator== (AlgPartition const& rhs) const noexcept;
+    //! Negation of operator==.
     [[nodiscard]] bool operator!= (AlgPartition const& rhs) const noexcept;
 
 private:

diff --git a/Src/LinearSolvers/AMReX_AlgVecUtil.H b/Src/LinearSolvers/AMReX_AlgVecUtil.H
@@ -6,6 +6,13 @@
 
 namespace amrex {
 
+/**
+ * \file AMReX_AlgVecUtil.H
+ *
+ * Device-friendly helpers for element-wise operations on AlgVector
+ * specializations (ForEach, BLAS-like routines, etc.).
+ */
+
 template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
 //
 template <class V>
@@ -14,6 +21,13 @@ struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::valu
                                                       V> > >
     : std::true_type {};
 
+/**
+ * \brief Apply \p f to every element of \p x.
+ *
+ * \param x AlgVector whose entries are visited in place.
+ * \param f Callable with signature `void(Value&)` that will be executed on the
+ *          device/host for each local row.
+ */
 template <typename V1, typename F>
 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
 ForEach (V1 & x, F const& f)
@@ -26,6 +40,13 @@ ForEach (V1 & x, F const& f)
     });
 }
 
+/**
+ * \brief Apply \p f to paired entries from \p x and \p y.
+ *
+ * \param x First AlgVector; entries may be modified by \p f.
+ * \param y Second AlgVector; entries may be modified by \p f.
+ * \param f Callable with signature `void(X&, Y&)`.
+ */
 template <typename V1, typename V2, typename F>
 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
                  IsAlgVector<std::decay_t<V2> >::value>
@@ -41,6 +62,14 @@ ForEach (V1 & x, V2 & y, F const& f)
     });
 }
 
+/**
+ * \brief Apply \p f to triplets drawn from \p x, \p y, and \p z.
+ *
+ * \param x First AlgVector; entries may be modified by \p f.
+ * \param y Second AlgVector; entries may be modified by \p f.
+ * \param z Third AlgVector; entries may be modified by \p f.
+ * \param f Callable with signature `void(X&, Y&, Z&)`.
+ */
 template <typename V1, typename V2, typename V3, typename F>
 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
                  IsAlgVector<std::decay_t<V2> >::value &&
@@ -59,6 +88,15 @@ ForEach (V1 & x, V2 & y, V3 & z, F const& f)
     });
 }
 
+/**
+ * \brief Apply \p f to tuples drawn from four AlgVectors.
+ *
+ * \param x First AlgVector; entries may be modified by \p f.
+ * \param y Second AlgVector; entries may be modified by \p f.
+ * \param z Third AlgVector; entries may be modified by \p f.
+ * \param a Fourth AlgVector; entries may be modified by \p f.
+ * \param f Callable with signature `void(X&, Y&, Z&, A&)`.
+ */
 template <typename V1, typename V2, typename V3, typename V4, typename F>
 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
                  IsAlgVector<std::decay_t<V2> >::value &&
@@ -80,6 +118,16 @@ ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
     });
 }
 
+/**
+ * \brief Apply \p f to tuples spanning five AlgVectors.
+ *
+ * \param x First AlgVector; entries may be modified by \p f.
+ * \param y Second AlgVector; entries may be modified by \p f.
+ * \param z Third AlgVector; entries may be modified by \p f.
+ * \param a Fourth AlgVector; entries may be modified by \p f.
+ * \param b Fifth AlgVector; entries may be modified by \p f.
+ * \param f Callable with signature `void(X&, Y&, Z&, A&, B&)`.
+ */
 template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
 std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
                  IsAlgVector<std::decay_t<V2> >::value &&
@@ -104,9 +152,15 @@ ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
     });
 }
 
-//! Return dot product of two vectors. By default, this returns the global
-//! result over all MPI ranks. If `local` is true, it returns the result
-//! over the locally stored entries only.
+/**
+ * \brief Dot product of two AlgVectors with optional local-only reduction.
+ *
+ * \param x First vector.
+ * \param y Second vector.
+ * \param local Set true to skip the MPI reduction and return only this rank's
+ *              contribution.
+ * \return Dot product of \p x and \p y (global unless \p local is true).
+ */
 template <typename T, typename Allocator>
 T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool local = false)
 {
@@ -124,24 +178,21 @@ T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool lo
     return r;
 }
 
-//! y = ax + y. For GPU guilds, this function is asynchronous with respect
-//! to the host.
+//! y = ax + y. For GPU builds this is asynchronous with respect to the host.
 template <typename T, typename Allocator>
 void Axpy (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& x)
 {
     ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
 }
 
-//! y = x + a*y. For GPU guilds, this function is asynchronous with respect
-//! to the host.
+//! y = x + a*y. For GPU builds this is asynchronous with respect to the host.
 template <typename T, typename Allocator>
 void Xpay (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& x)
 {
     ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi = a*yi + xi; });
 }
 
-//! y = a*xa + b*xb. For GPU guilds, this function is asynchronous with
-//! respect to the host.
+//! y = a*xa + b*xb. For GPU builds this is asynchronous with respect to the host.
 template <typename T, typename Allocator>
 void LinComb (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& xa,
               T b, AlgVector<T,Allocator> const& xb)

diff --git a/Src/LinearSolvers/AMReX_AlgVector.H b/Src/LinearSolvers/AMReX_AlgVector.H
@@ -15,6 +15,15 @@
 
 namespace amrex {
 
+/**
+ * \file AMReX_AlgVector.H
+ *
+ * Distributed dense vector container backed by AlgPartition with host/GPU helpers.
+ */
+
+/**
+ * \brief Distributed dense vector that mirrors the layout of an AlgPartition.
+ */
 template <typename T, typename Allocator = DefaultAllocator<T> >
 class AlgVector
 {
@@ -24,8 +33,11 @@ public:
 
     using Vec = PODVector<T,Allocator>;
 
+    //! Create an empty vector; call define() before use.
     AlgVector () = default;
+    //! Allocate and partition \p global_size rows across MPI ranks.
     explicit AlgVector (Long global_size);
+    //! Adopt an explicit partition that describes the data layout.
     explicit AlgVector (AlgPartition partition);
 
     AlgVector (AlgVector<T, Allocator> const&) = delete;
@@ -36,14 +48,31 @@ public:
 
     ~AlgVector () = default;
 
+    /**
+     * \brief Resize/repartition the vector to span \p global_size rows.
+     *
+     * Existing contents are discarded; rows are divided evenly across
+     * participating ranks.
+     *
+     * \param global_size Total number of entries in the vector.
+     */
     void define (Long global_size);
+    /**
+     * \brief Replace the current partition and storage with \p partition.
+     *
+     * \param partition Partition describing the global layout to adopt.
+     */
     void define (AlgPartition partition);
 
+    //! True if the local storage holds zero entries.
     [[nodiscard]] bool empty () const { return m_partition.empty(); }
 
+    //! Partition describing the global layout of the vector.
     [[nodiscard]] AlgPartition const& partition () const { return m_partition; }
 
+    //! Number of entries stored on this rank.
     [[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
+    //! Global number of rows.
     [[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }
 
     //! Inclusive global index begin.
@@ -388,6 +417,11 @@ std::pair<T,T> AlgVector<T,Allocator>::norm1and2 (bool local) const
     }
 }
 
+/**
+ * \brief Flatten a cell-centered FabArray into this vector's storage.
+ *
+ * \param fa Source data; each valid box is copied in IndexType order.
+ */
 template <typename T, typename Allocator>
 template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
                                          std::is_same_v<T,typename FAB::value_type>, int> >
@@ -410,6 +444,11 @@ void AlgVector<T,Allocator>::copyFrom (FabArray<FAB> const& fa)
     }
 }
 
+/**
+ * \brief Scatter this vector back into a cell-centered FabArray.
+ *
+ * \param fa Destination data that receives entries owned by this rank.
+ */
 template <typename T, typename Allocator>
 template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
                                          std::is_same_v<T,typename FAB::value_type>, int> >

diff --git a/Src/LinearSolvers/AMReX_Algebra.H b/Src/LinearSolvers/AMReX_Algebra.H
@@ -8,4 +8,10 @@
 #include <AMReX_SpMatUtil.H>
 #include <AMReX_SpMV.H>
 
+/**
+ * \file AMReX_Algebra.H
+ *
+ * Convenience header that pulls in the core algebra data structures and helpers.
+ */
+
 #endif
diff --git a/Src/LinearSolvers/AMReX_CSR.H b/Src/LinearSolvers/AMReX_CSR.H
@@ -16,6 +16,19 @@
 
 namespace amrex {
 
+/**
+ * \file AMReX_CSR.H
+ *
+ * CSR containers and helper utilities shared by sparse solvers that rely on
+ * compressed-sparse-row storage.
+ */
+
+/**
+ * \brief Lightweight non-owning CSR view that can point to host or device buffers.
+ *
+ * The template parameter \p T controls constness of the value array; the
+ * column/row arrays follow suit.
+ */
 template <typename T>
 struct CsrView {
     using U = std::conditional_t<std::is_const_v<T>, Long const, Long>;
@@ -26,48 +39,80 @@ struct CsrView {
     Long nrows = 0;
 };
 
+/**
+ * \brief Owning CSR container backed by AMReX resizable vectors.
+ *
+ * @tparam T Value type stored in the matrix.
+ * @tparam V Vector-like container template (e.g., PODVector, Gpu::DeviceVector).
+ */
 template <typename T, template <typename> class V>
 struct CSR {
     V<T> mat;
     V<Long> col_index;
     V<Long> row_offset;
     Long nnz = 0;
 
+    //! Number of logical rows represented by the CSR offset array.
     [[nodiscard]] Long nrows () const {
         return row_offset.empty() ? Long(0) : Long(row_offset.size())-1;
     }
 
+    /**
+     * \brief Resize the storage to accommodate \p num_rows and \p num_non_zeros entries.
+     *
+     * Existing data are discarded; value/column arrays are resized to the
+     * specified nonzero count and row offsets to `num_rows+1`.
+     *
+     * \param num_rows       Row count.
+     * \param num_non_zeros  Number of nonzeros.
+     */
     void resize (Long num_rows, Long num_non_zeros) {
         mat.resize(num_non_zeros);
         col_index.resize(num_non_zeros);
         row_offset.resize(num_rows+1);
         nnz = num_non_zeros;
     }
 
+    //! Mutable view of the underlying buffers.
     CsrView<T> view () {
         return CsrView<T>{mat.data(), col_index.data(), row_offset.data(),
                           nnz, Long(row_offset.size())-1};
     }
 
+    //! Const view of the underlying buffers.
     [[nodiscard]] CsrView<T const> view () const {
         return CsrView<T>{mat.data(), col_index.data(), row_offset.data(),
                           nnz, Long(row_offset.size())-1};
     }
 
+    //! Convenience alias for view() const.
     [[nodiscard]] CsrView<T const> const_view () const {
         return CsrView<T const>{mat.data(), col_index.data(), row_offset.data(),
                                 nnz, Long(row_offset.size())-1};
     }
 
+    /**
+     * \brief Sort each row by column index. Uses GPU acceleration when possible.
+     */
     void sort ();
 
+    /**
+     * \brief Host-only fallback that sorts column indices row by row.
+     */
     void sort_on_host ();
 };
 
 template <typename C, typename T, template<typename> class AD, template<typename> class AS,
           std::enable_if_t<std::is_same_v<C,Gpu::HostToDevice> ||
                            std::is_same_v<C,Gpu::DeviceToHost> ||
                            std::is_same_v<C,Gpu::DeviceToDevice>, int> = 0>
+/**
+ * \brief Copy CSR buffers between memory spaces asynchronously.
+ *
+ * \param c   Copy policy describing the direction (HostToDevice, etc.).
+ * \param dst Destination CSR that will be resized and filled.
+ * \param src Source CSR whose buffers are copied verbatim.
+ */
 void duplicateCSR (C c, CSR<T,AD>& dst, CSR<T,AS> const& src)
 {
     dst.mat.resize(src.mat.size());