Skip to content
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ add_example(simple_spmm)
add_example(simple_spgemm)
add_example(simple_sptrsv)
add_example(matrix_opt_example)
add_example(spmm_csc)
53 changes: 53 additions & 0 deletions examples/spmm_csc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include <iostream>
#include <spblas/spblas.hpp>

#include <fmt/core.h>
#include <fmt/ranges.h>

int main(int argc, char** argv) {
using namespace spblas;
namespace md = spblas::__mdspan;

using T = float;

spblas::index_t m = 100;
spblas::index_t n = 10;
spblas::index_t k = 100;
spblas::index_t nnz_in = 10;

fmt::print("\n\t###########################################################"
"######################");
fmt::print("\n\t### Running SpMM Example:");
fmt::print("\n\t###");
fmt::print("\n\t### Y = alpha * A * X");
fmt::print("\n\t###");
fmt::print("\n\t### with ");
fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k,
nnz_in);
fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n);
fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n);
fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
sizeof(spblas::index_t));
fmt::print("\n\t###########################################################"
"######################");
fmt::print("\n");

auto&& [values, colptr, rowind, shape, nnz] = generate_csc<T>(m, k, nnz_in);

csc_view<T> a(values, colptr, rowind, shape, nnz);

std::vector<T> x_values(k * n, 1);
std::vector<T> y_values(m * n, 0);

md::mdspan x(x_values.data(), k, n);
md::mdspan y(y_values.data(), m, n);

// y = A * (alpha * x)
multiply(a, scaled(2.f, x), y);

fmt::print("{}\n", spblas::__backend::values(y));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you sure we want to print out the 100x10 = 1000 values of y ? maybe we want to have a "print dense matrix" routine that prints the first 4 rows (first 4 columns, ... , last 4 columns) and then ... then last 4 rows (first 4 columns, ... , last 4 columns) in a nicely printed and tab aligned to commas view ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could do similar thing with dense vector, only printing out first 4 elements, then last 4 elements. And also a similar thing for sparse matrix with printing first 4 rows and last 4 rows ...

maybe it could take in a parameter that represents block_size (default = 4) to be printed ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think writing a pretty printer and then using that in the examples would be nice. (We can implement a ostream / fmt formatter.) I think that probably belongs in another PR, though.


fmt::print("\tExample is completed!\n");

return 0;
}
47 changes: 45 additions & 2 deletions include/spblas/algorithms/multiply_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <spblas/concepts.hpp>
#include <spblas/detail/log.hpp>

#include <spblas/algorithms/transposed.hpp>
#include <spblas/backend/csr_builder.hpp>
#include <spblas/backend/spa_accumulator.hpp>
#include <spblas/detail/operation_info_t.hpp>
Expand Down Expand Up @@ -95,14 +96,27 @@ void multiply(A&& a, B&& b, C&& c) {
try {
c_builder.insert_row(i, c_row.get());
} catch (...) {
throw std::runtime_error("multiply: ran out of memory. CSR output view "
"has insufficient memory.");
throw std::runtime_error("multiply: SpGEMM ran out of memory.");
}
}
c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
c.rowptr()[c.shape()[0]]);
}

template <matrix A, matrix B, matrix C>
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
__detail::is_csc_view_v<C>)
void multiply(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}
multiply(transposed(b), transposed(a), transposed(c));
}

template <matrix A, matrix B, matrix C>
operation_info_t multiply_inspect(A&& a, B&& b, C&& c) {
return operation_info_t{};
Expand Down Expand Up @@ -147,6 +161,26 @@ operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
return operation_info_t{__backend::shape(c), nnz};
}

// C = AB
// SpGEMM (Gustavson's Algorithm, transposed)
template <matrix A, matrix B, matrix C>
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
__detail::is_csc_view_v<C>)
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}

auto info = multiply_compute(transposed(b), transposed(a), transposed(c));
info.update_impl_({info.result_shape()[1], info.result_shape()[0]},
info.result_nnz());
return info;
}

template <matrix A, matrix B, matrix C>
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
__detail::is_csr_view_v<C>)
Expand All @@ -156,6 +190,15 @@ void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
info.update_impl_(new_info.result_shape(), new_info.result_nnz());
}

template <matrix A, matrix B, matrix C>
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
__detail::is_csc_view_v<C>)
void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
auto new_info = multiply_compute(std::forward<A>(a), std::forward<B>(b),
std::forward<C>(c));
info.update_impl_(new_info.result_shape(), new_info.result_nnz());
}

// C = AB
template <matrix A, matrix B, matrix C>
void multiply_fill(operation_info_t info, A&& a, B&& b, C&& c) {
Expand Down
10 changes: 10 additions & 0 deletions include/spblas/algorithms/transpose.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#pragma once

#include <spblas/concepts.hpp>

namespace spblas {

template <matrix M>
auto transposed(M&& m);

}
23 changes: 23 additions & 0 deletions include/spblas/algorithms/transposed.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include <spblas/detail/view_inspectors.hpp>

namespace spblas {

template <matrix M>
requires(__detail::is_csr_view_v<M>)
auto transposed(M&& m) {
return csc_view<tensor_scalar_t<M>, tensor_index_t<M>, tensor_offset_t<M>>(
m.values(), m.rowptr(), m.colind(), {m.shape()[1], m.shape()[0]},
m.size());
}

template <matrix M>
requires(__detail::is_csc_view_v<M>)
auto transposed(M&& m) {
return csr_view<tensor_scalar_t<M>, tensor_index_t<M>, tensor_offset_t<M>>(
m.values(), m.colptr(), m.rowind(), {m.shape()[1], m.shape()[0]},
m.size());
}
Comment on lines +7 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is where the relationship between CSR and CSC through transpose (not conjugate transpose though) comes into play with the switching of nRows/nCols and reinterpretting rowptr <-> colptr and rowind <-> colind ...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you had

csr_view<float> a(/* ... */);

// Explicit transpose class
transposed_view<csr_view<float>> a_t = transposed(a);

// Versus csc_view
csc_view<float> a_csc_t = transposed(a);

If a.shape() was (10, 20), wouldn't both a_t.shape() and a_csc_t.shape() both be (20, 10)? It seems to me the two should behave identically (except the explicit one has a .base() and the CSC one doesn't).


} // namespace spblas
10 changes: 10 additions & 0 deletions include/spblas/backend/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ void for_each(M&& m, F&& f) {
}
}

template <matrix M, typename F>
requires(__backend::column_iterable<M>)
void for_each(M&& m, F&& f) {
for (auto&& [j, column] : __backend::columns(m)) {
for (auto&& [i, v] : column) {
f(std::make_tuple(std::tuple{i, j}, std::reference_wrapper(v)));
}
}
}

template <vector V, typename F>
requires(__backend::lookupable<V> && __ranges::random_access_range<V>)
void for_each(V&& v, F&& f) {
Expand Down
7 changes: 7 additions & 0 deletions include/spblas/backend/concepts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ namespace __backend {
template <typename T>
concept row_iterable = requires(T& t) { rows(t); };

template <typename T>
concept column_iterable = requires(T& t) { columns(t); };

template <typename T>
concept row_lookupable = requires(T& t) { lookup_row(t, tensor_index_t<T>{}); };

template <typename T>
concept column_lookupable =
requires(T& t) { lookup_column(t, tensor_index_t<T>{}); };

namespace {

template <typename T>
Expand Down
22 changes: 22 additions & 0 deletions include/spblas/backend/cpos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ struct rows_fn_ {

inline constexpr auto rows = rows_fn_{};

struct columns_fn_ {
template <typename T>
requires(spblas::is_tag_invocable_v<columns_fn_, T>)
constexpr auto operator()(T&& t) const {
return spblas::tag_invoke(columns_fn_{}, std::forward<T>(t));
}
};

inline constexpr auto columns = columns_fn_{};

struct lookup_fn_ {
template <typename T, typename... Args>
requires(spblas::is_tag_invocable_v<lookup_fn_, T, Args...>)
Expand All @@ -70,6 +80,18 @@ struct lookup_row_fn_ {

inline constexpr auto lookup_row = lookup_row_fn_{};

struct lookup_column_fn_ {
template <typename T, typename... Args>
requires(spblas::is_tag_invocable_v<lookup_column_fn_, T, Args...>)
constexpr tag_invoke_result_t<lookup_column_fn_, T, Args...>
operator()(T&& t, Args&&... args) const {
return spblas::tag_invoke(lookup_column_fn_{}, std::forward<T>(t),
std::forward<Args>(args)...);
}
};

inline constexpr auto lookup_column = lookup_column_fn_{};

} // namespace __backend

} // namespace spblas
62 changes: 61 additions & 1 deletion include/spblas/backend/view_customizations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,44 @@

namespace spblas {

// Customization point implementations for csr_view.
// Customization point implementations for csr_view and csc_view.

template <typename M>
requires(__detail::is_csr_view_v<M>)
auto tag_invoke(__backend::size_fn_, M&& m) {
return m.size();
}

template <typename M>
requires(__detail::is_csc_view_v<M>)
auto tag_invoke(__backend::size_fn_, M&& m) {
return m.size();
}

template <typename M>
requires(__detail::is_csr_view_v<M>)
auto tag_invoke(__backend::shape_fn_, M&& m) {
return m.shape();
}

template <typename M>
requires(__detail::is_csc_view_v<M>)
auto tag_invoke(__backend::shape_fn_, M&& m) {
return m.shape();
}

template <typename M>
requires(__detail::is_csr_view_v<M>)
auto tag_invoke(__backend::values_fn_, M&& m) {
return m.values();
}

template <typename M>
requires(__detail::is_csc_view_v<M>)
auto tag_invoke(__backend::values_fn_, M&& m) {
return m.values();
}

namespace {

template <typename M>
Expand All @@ -47,6 +65,26 @@ auto row(M&& m, typename std::remove_cvref_t<M>::index_type row_index) {
return __ranges::views::zip(column_indices, row_values);
}

template <typename M>
requires(__detail::is_csc_view_v<M>)
auto column(M&& m, typename std::remove_cvref_t<M>::index_type column_index) {
using O = typename std::remove_cvref_t<M>::offset_type;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we probably need to go through and update all the

  using T = tensor_scalar_t<A>;
  using I = tensor_index_t<A>;
  using O = tensor_offset_t<A>;

usages everywhere to include a std::remove_cvref_t<> call similar to what you are doing here ...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually tensor_scalar_t<A>, etc. will automatically apply a std::remove_cvref_t. I think the only reason I don't use it here is include order.

O first = m.colptr()[column_index];
O last = m.colptr()[column_index + 1];

using row_iter_t = decltype(m.rowind().data());
using value_iter_t = decltype(m.values().data());

__ranges::subrange<row_iter_t> column_indices(
__ranges::next(m.rowind().data(), first),
__ranges::next(m.rowind().data(), last));
__ranges::subrange<value_iter_t> column_values(
__ranges::next(m.values().data(), first),
__ranges::next(m.values().data(), last));

return __ranges::views::zip(column_indices, column_values);
}

} // namespace

template <typename M>
Expand All @@ -62,6 +100,20 @@ auto tag_invoke(__backend::rows_fn_, M&& m) {
return __ranges::views::zip(row_indices, row_values);
}

template <typename M>
requires(__detail::is_csc_view_v<M>)
auto tag_invoke(__backend::columns_fn_, M&& m) {
using I = typename std::remove_cvref_t<M>::index_type;
auto column_indices = __ranges::views::iota(I(0), I(m.shape()[1]));

auto column_values =
column_indices | __ranges::views::transform([=](auto column_index) {
return column(m, column_index);
});

return __ranges::views::zip(column_indices, column_values);
}

template <typename M>
requires(__detail::is_csr_view_v<M>)
auto tag_invoke(__backend::lookup_row_fn_, M&& m,
Expand All @@ -70,6 +122,14 @@ auto tag_invoke(__backend::lookup_row_fn_, M&& m,
return row(m, row_index);
}

template <typename M>
requires(__detail::is_csc_view_v<M>)
auto tag_invoke(__backend::lookup_column_fn_, M&& m,
typename std::remove_cvref_t<M>::index_type column_index) {
using I = typename std::remove_cvref_t<M>::index_type;
return column(m, column_index);
}

// Customization point implementations for vectors

template <__ranges::random_access_range V>
Expand Down
3 changes: 3 additions & 0 deletions include/spblas/detail/view_inspectors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ using ultimate_base_type_t = decltype(get_ultimate_base(std::declval<T>()));
template <typename T>
concept has_csr_base = is_csr_view_v<ultimate_base_type_t<T>>;

template <typename T>
concept has_csc_base = is_csc_view_v<ultimate_base_type_t<T>>;

template <typename T>
concept has_mdspan_matrix_base =
is_matrix_instantiation_of_mdspan_v<ultimate_base_type_t<T>>;
Expand Down
15 changes: 15 additions & 0 deletions include/spblas/vendor/armpl/detail/armpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ template <>
inline constexpr auto create_spmat_csr<std::complex<double>> =
&armpl_spmat_create_csr_z;

template <typename T>
armpl_status_t (*create_spmat_csc)(armpl_spmat_t*, armpl_int_t, armpl_int_t,
const armpl_int_t*, const armpl_int_t*,
const T*, armpl_int_t);
template <>
inline constexpr auto create_spmat_csc<float> = &armpl_spmat_create_csc_s;
template <>
inline constexpr auto create_spmat_csc<double> = &armpl_spmat_create_csc_d;
template <>
inline constexpr auto create_spmat_csc<std::complex<float>> =
&armpl_spmat_create_csc_c;
template <>
inline constexpr auto create_spmat_csc<std::complex<double>> =
&armpl_spmat_create_csc_z;

Comment on lines +29 to +43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you prefer this kind of template specialization over the kind I have in https://github.com/SparseBLAS/spblas-reference/pull/23/files#diff-6ac61c9e281330753acb230d10581803329208dd22ef5d9fdb5492b845c8acf1R19-R42 for instance ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I actually prefer what you're doing there with IESparseBLAS. I think Chris originally wrote this code, but the two styles are pretty much equivalent.

template <typename T>
armpl_status_t (*create_spmat_dense)(armpl_spmat_t*, enum armpl_dense_layout,
armpl_int_t, armpl_int_t, armpl_int_t,
Expand Down
Loading