Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions Src/LinearSolvers/AMReX_AlgPartition.H
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,50 @@

namespace amrex {

/**
* \file AMReX_AlgPartition.H
*
* Lightweight helpers that describe how global sparse rows are distributed
* across MPI ranks for algebra kernels.
*/

class AlgPartition
{
public:
//! Construct an empty partition that must be defined later.
AlgPartition ();
//! Evenly partition \p global_size rows across all ranks.
explicit AlgPartition (Long global_size);
//! Adopt an explicit prefix array \p rows describing local row extents.
explicit AlgPartition (Vector<Long> const& rows);
//! Adopt an explicit prefix array \p rows, transferring ownership.
explicit AlgPartition (Vector<Long>&& rows) noexcept;

//! Recompute an even partition spanning \p global_size rows.
void define (Long global_size);
//! Replace the partition with the explicit prefix array \p rows.
void define (Vector<Long> const& rows);
//! Replace the partition, taking ownership of \p rows.
void define (Vector<Long>&& rows);

//! True if the partition contains no rows.
[[nodiscard]] bool empty () const { return m_ref->m_row.empty(); }

//! Starting global row index owned by rank \p i.
[[nodiscard]] Long operator[] (int i) const { return m_ref->m_row[i]; }
/**
* \brief Total number of rows covered by the partition.
*/
[[nodiscard]] Long numGlobalRows () const { return m_ref->m_row.back(); }
//! Number of MPI ranks that own at least one row.
[[nodiscard]] int numActiveProcs () const { return m_ref->m_n_active_procs; }

//! Underlying prefix array describing row offsets (size nproc+1).
[[nodiscard]] Vector<Long> const& dataVector () const { return m_ref->m_row; }

//! Compare partitions for identical layouts.
[[nodiscard]] bool operator== (AlgPartition const& rhs) const noexcept;
//! Negation of operator==.
[[nodiscard]] bool operator!= (AlgPartition const& rhs) const noexcept;

private:
Expand Down
69 changes: 60 additions & 9 deletions Src/LinearSolvers/AMReX_AlgVecUtil.H
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@

namespace amrex {

/**
* \file AMReX_AlgVecUtil.H
*
* Device-friendly helpers for element-wise operations on AlgVector
* specializations (ForEach, BLAS-like routines, etc.).
*/

template <class V, class Enable = void> struct IsAlgVector : std::false_type {};
//
template <class V>
Expand All @@ -14,6 +21,13 @@ struct IsAlgVector<V, std::enable_if_t<std::is_same_v<AlgVector<typename V::valu
V> > >
: std::true_type {};

/**
* \brief Apply \p f to every element of \p x.
*
* \param x AlgVector whose entries are visited in place.
* \param f Callable with signature `void(Value&)` that will be executed on the
* device/host for each local row.
*/
template <typename V1, typename F>
std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value>
ForEach (V1 & x, F const& f)
Expand All @@ -26,6 +40,13 @@ ForEach (V1 & x, F const& f)
});
}

/**
* \brief Apply \p f to paired entries from \p x and \p y.
*
* \param x First AlgVector; entries may be modified by \p f.
* \param y Second AlgVector; entries may be modified by \p f.
* \param f Callable with signature `void(X&, Y&)`.
*/
template <typename V1, typename V2, typename F>
std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
IsAlgVector<std::decay_t<V2> >::value>
Expand All @@ -41,6 +62,14 @@ ForEach (V1 & x, V2 & y, F const& f)
});
}

/**
* \brief Apply \p f to triplets drawn from \p x, \p y, and \p z.
*
* \param x First AlgVector; entries may be modified by \p f.
* \param y Second AlgVector; entries may be modified by \p f.
* \param z Third AlgVector; entries may be modified by \p f.
* \param f Callable with signature `void(X&, Y&, Z&)`.
*/
template <typename V1, typename V2, typename V3, typename F>
std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
IsAlgVector<std::decay_t<V2> >::value &&
Expand All @@ -59,6 +88,15 @@ ForEach (V1 & x, V2 & y, V3 & z, F const& f)
});
}

/**
* \brief Apply \p f to tuples drawn from four AlgVectors.
*
* \param x First AlgVector; entries may be modified by \p f.
* \param y Second AlgVector; entries may be modified by \p f.
* \param z Third AlgVector; entries may be modified by \p f.
* \param a Fourth AlgVector; entries may be modified by \p f.
* \param f Callable with signature `void(X&, Y&, Z&, A&)`.
*/
template <typename V1, typename V2, typename V3, typename V4, typename F>
std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
IsAlgVector<std::decay_t<V2> >::value &&
Expand All @@ -80,6 +118,16 @@ ForEach (V1 & x, V2 & y, V3 & z, V4 & a, F const& f)
});
}

/**
* \brief Apply \p f to tuples spanning five AlgVectors.
*
* \param x First AlgVector; entries may be modified by \p f.
* \param y Second AlgVector; entries may be modified by \p f.
* \param z Third AlgVector; entries may be modified by \p f.
* \param a Fourth AlgVector; entries may be modified by \p f.
* \param b Fifth AlgVector; entries may be modified by \p f.
* \param f Callable with signature `void(X&, Y&, Z&, A&, B&)`.
*/
template <typename V1, typename V2, typename V3, typename V4, typename V5, typename F>
std::enable_if_t<IsAlgVector<std::decay_t<V1> >::value &&
IsAlgVector<std::decay_t<V2> >::value &&
Expand All @@ -104,9 +152,15 @@ ForEach (V1 & x, V2 & y, V3 & z, V4 & a, V5 & b, F const& f)
});
}

//! Return dot product of two vectors. By default, this returns the global
//! result over all MPI ranks. If `local` is true, it returns the result
//! over the locally stored entries only.
/**
* \brief Dot product of two AlgVectors with optional local-only reduction.
*
* \param x First vector.
* \param y Second vector.
* \param local Set true to skip the MPI reduction and return only this rank's
* contribution.
* \return Dot product of \p x and \p y (global unless \p local is true).
*/
template <typename T, typename Allocator>
T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool local = false)
{
Expand All @@ -124,24 +178,21 @@ T Dot (AlgVector<T,Allocator> const& x, AlgVector<T,Allocator> const& y, bool lo
return r;
}

//! y = ax + y. For GPU guilds, this function is asynchronous with respect
//! to the host.
//! y = ax + y. For GPU builds this is asynchronous with respect to the host.
template <typename T, typename Allocator>
void Axpy (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& x)
{
ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi += a*xi; });
}

//! y = x + a*y. For GPU guilds, this function is asynchronous with respect
//! to the host.
//! y = x + a*y. For GPU builds this is asynchronous with respect to the host.
template <typename T, typename Allocator>
void Xpay (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& x)
{
ForEach(y, x, [=] AMREX_GPU_DEVICE (T& yi, T const& xi) { yi = a*yi + xi; });
}

//! y = a*xa + b*xb. For GPU guilds, this function is asynchronous with
//! respect to the host.
//! y = a*xa + b*xb. For GPU builds this is asynchronous with respect to the host.
template <typename T, typename Allocator>
void LinComb (AlgVector<T,Allocator>& y, T a, AlgVector<T,Allocator> const& xa,
T b, AlgVector<T,Allocator> const& xb)
Expand Down
39 changes: 39 additions & 0 deletions Src/LinearSolvers/AMReX_AlgVector.H
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@

namespace amrex {

/**
* \file AMReX_AlgVector.H
*
* Distributed dense vector container backed by AlgPartition with host/GPU helpers.
*/

/**
* \brief Distributed dense vector that mirrors the layout of an AlgPartition.
*/
template <typename T, typename Allocator = DefaultAllocator<T> >
class AlgVector
{
Expand All @@ -24,8 +33,11 @@ public:

using Vec = PODVector<T,Allocator>;

//! Create an empty vector; call define() before use.
AlgVector () = default;
//! Allocate and partition \p global_size rows across MPI ranks.
explicit AlgVector (Long global_size);
//! Adopt an explicit partition that describes the data layout.
explicit AlgVector (AlgPartition partition);

AlgVector (AlgVector<T, Allocator> const&) = delete;
Expand All @@ -36,14 +48,31 @@ public:

~AlgVector () = default;

/**
* \brief Resize/repartition the vector to span \p global_size rows.
*
* Existing contents are discarded; rows are divided evenly across
* participating ranks.
*
* \param global_size Total number of entries in the vector.
*/
void define (Long global_size);
/**
* \brief Replace the current partition and storage with \p partition.
*
* \param partition Partition describing the global layout to adopt.
*/
void define (AlgPartition partition);

//! True if the local storage holds zero entries.
[[nodiscard]] bool empty () const { return m_partition.empty(); }

//! Partition describing the global layout of the vector.
[[nodiscard]] AlgPartition const& partition () const { return m_partition; }

//! Number of entries stored on this rank.
[[nodiscard]] Long numLocalRows () const { return m_end - m_begin; }
//! Global number of rows.
[[ nodiscard]] Long numGlobalRows () const { return m_partition.numGlobalRows(); }

//! Inclusive global index begin.
Expand Down Expand Up @@ -388,6 +417,11 @@ std::pair<T,T> AlgVector<T,Allocator>::norm1and2 (bool local) const
}
}

/**
* \brief Flatten a cell-centered FabArray into this vector's storage.
*
* \param fa Source data; each valid box is copied in IndexType order.
*/
template <typename T, typename Allocator>
template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
std::is_same_v<T,typename FAB::value_type>, int> >
Expand All @@ -410,6 +444,11 @@ void AlgVector<T,Allocator>::copyFrom (FabArray<FAB> const& fa)
}
}

/**
* \brief Scatter this vector back into a cell-centered FabArray.
*
* \param fa Destination data that receives entries owned by this rank.
*/
template <typename T, typename Allocator>
template <typename FAB, std::enable_if_t<amrex::IsBaseFab<FAB>::value &&
std::is_same_v<T,typename FAB::value_type>, int> >
Expand Down
6 changes: 6 additions & 0 deletions Src/LinearSolvers/AMReX_Algebra.H
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,10 @@
#include <AMReX_SpMatUtil.H>
#include <AMReX_SpMV.H>

/**
* \file AMReX_Algebra.H
*
* Convenience header that pulls in the core algebra data structures and helpers.
*/

#endif
45 changes: 45 additions & 0 deletions Src/LinearSolvers/AMReX_CSR.H
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@

namespace amrex {

/**
* \file AMReX_CSR.H
*
* CSR containers and helper utilities shared by sparse solvers that rely on
* compressed-sparse-row storage.
*/

/**
* \brief Lightweight non-owning CSR view that can point to host or device buffers.
*
* The template parameter \p T controls constness of the value array; the
* column/row arrays follow suit.
*/
template <typename T>
struct CsrView {
using U = std::conditional_t<std::is_const_v<T>, Long const, Long>;
Expand All @@ -26,48 +39,80 @@ struct CsrView {
Long nrows = 0;
};

/**
* \brief Owning CSR container backed by AMReX resizable vectors.
*
* @tparam T Value type stored in the matrix.
* @tparam V Vector-like container template (e.g., PODVector, Gpu::DeviceVector).
*/
template <typename T, template <typename> class V>
struct CSR {
V<T> mat;
V<Long> col_index;
V<Long> row_offset;
Long nnz = 0;

//! Number of logical rows represented by the CSR offset array.
[[nodiscard]] Long nrows () const {
return row_offset.empty() ? Long(0) : Long(row_offset.size())-1;
}

/**
* \brief Resize the storage to accommodate \p num_rows and \p num_non_zeros entries.
*
* Existing data are discarded; value/column arrays are resized to the
* specified nonzero count and row offsets to `num_rows+1`.
*
* \param num_rows Row count.
* \param num_non_zeros Number of nonzeros.
*/
void resize (Long num_rows, Long num_non_zeros) {
mat.resize(num_non_zeros);
col_index.resize(num_non_zeros);
row_offset.resize(num_rows+1);
nnz = num_non_zeros;
}

//! Mutable view of the underlying buffers.
CsrView<T> view () {
return CsrView<T>{mat.data(), col_index.data(), row_offset.data(),
nnz, Long(row_offset.size())-1};
}

//! Const view of the underlying buffers.
[[nodiscard]] CsrView<T const> view () const {
return CsrView<T>{mat.data(), col_index.data(), row_offset.data(),
nnz, Long(row_offset.size())-1};
}

//! Convenience alias for view() const.
[[nodiscard]] CsrView<T const> const_view () const {
return CsrView<T const>{mat.data(), col_index.data(), row_offset.data(),
nnz, Long(row_offset.size())-1};
}

/**
* \brief Sort each row by column index. Uses GPU acceleration when possible.
*/
void sort ();

/**
* \brief Host-only fallback that sorts column indices row by row.
*/
void sort_on_host ();
};

template <typename C, typename T, template<typename> class AD, template<typename> class AS,
std::enable_if_t<std::is_same_v<C,Gpu::HostToDevice> ||
std::is_same_v<C,Gpu::DeviceToHost> ||
std::is_same_v<C,Gpu::DeviceToDevice>, int> = 0>
/**
* \brief Copy CSR buffers between memory spaces asynchronously.
*
* \param c Copy policy describing the direction (HostToDevice, etc.).
* \param dst Destination CSR that will be resized and filled.
* \param src Source CSR whose buffers are copied verbatim.
*/
void duplicateCSR (C c, CSR<T,AD>& dst, CSR<T,AS> const& src)
{
dst.mat.resize(src.mat.size());
Expand Down
Loading
Loading