SparseBLAS · BenBrock · Mar 6, 2025 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -8,3 +8,4 @@ add_example(simple_spmm)
 add_example(simple_spgemm)
 add_example(simple_sptrsv)
 add_example(matrix_opt_example)
+add_example(spmm_csc)
diff --git a/examples/spmm_csc.cpp b/examples/spmm_csc.cpp
@@ -0,0 +1,53 @@
+#include <iostream>
+#include <spblas/spblas.hpp>
+
+#include <fmt/core.h>
+#include <fmt/ranges.h>
+
+int main(int argc, char** argv) {
+  using namespace spblas;
+  namespace md = spblas::__mdspan;
+
+  using T = float;
+
+  spblas::index_t m = 100;
+  spblas::index_t n = 10;
+  spblas::index_t k = 100;
+  spblas::index_t nnz_in = 10;
+
+  fmt::print("\n\t###########################################################"
+             "######################");
+  fmt::print("\n\t### Running SpMM Example:");
+  fmt::print("\n\t###");
+  fmt::print("\n\t###   Y = alpha * A * X");
+  fmt::print("\n\t###");
+  fmt::print("\n\t### with ");
+  fmt::print("\n\t### A, in CSR format, of size ({}, {}) with nnz = {}", m, k,
+             nnz_in);
+  fmt::print("\n\t### x, a dense matrix, of size ({}, {})", k, n);
+  fmt::print("\n\t### y, a dense vector, of size ({}, {})", m, n);
+  fmt::print("\n\t### using float and spblas::index_t (size = {} bytes)",
+             sizeof(spblas::index_t));
+  fmt::print("\n\t###########################################################"
+             "######################");
+  fmt::print("\n");
+
+  auto&& [values, colptr, rowind, shape, nnz] = generate_csc<T>(m, k, nnz_in);
+
+  csc_view<T> a(values, colptr, rowind, shape, nnz);
+
+  std::vector<T> x_values(k * n, 1);
+  std::vector<T> y_values(m * n, 0);
+
+  md::mdspan x(x_values.data(), k, n);
+  md::mdspan y(y_values.data(), m, n);
+
+  // y = A * (alpha * x)
+  multiply(a, scaled(2.f, x), y);
+
+  fmt::print("{}\n", spblas::__backend::values(y));
+
+  fmt::print("\tExample is completed!\n");
+
+  return 0;
+}
diff --git a/include/spblas/algorithms/multiply_impl.hpp b/include/spblas/algorithms/multiply_impl.hpp
@@ -4,6 +4,7 @@
 #include <spblas/concepts.hpp>
 #include <spblas/detail/log.hpp>
 
+#include <spblas/algorithms/transposed.hpp>
 #include <spblas/backend/csr_builder.hpp>
 #include <spblas/backend/spa_accumulator.hpp>
 #include <spblas/detail/operation_info_t.hpp>
@@ -95,14 +96,27 @@ void multiply(A&& a, B&& b, C&& c) {
     try {
       c_builder.insert_row(i, c_row.get());
     } catch (...) {
-      throw std::runtime_error("multiply: ran out of memory.  CSR output view "
-                               "has insufficient memory.");
+      throw std::runtime_error("multiply: SpGEMM ran out of memory.");
     }
   }
   c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
            c.rowptr()[c.shape()[0]]);
 }
 
+template <matrix A, matrix B, matrix C>
+  requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
+           __detail::is_csc_view_v<C>)
+void multiply(A&& a, B&& b, C&& c) {
+  log_trace("");
+  if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
+      __backend::shape(b)[1] != __backend::shape(c)[1] ||
+      __backend::shape(a)[1] != __backend::shape(b)[0]) {
+    throw std::invalid_argument(
+        "multiply: matrix dimensions are incompatible.");
+  }
+  multiply(transposed(b), transposed(a), transposed(c));
+}
+
 template <matrix A, matrix B, matrix C>
 operation_info_t multiply_inspect(A&& a, B&& b, C&& c) {
   return operation_info_t{};
@@ -147,6 +161,26 @@ operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
   return operation_info_t{__backend::shape(c), nnz};
 }
 
+// C = AB
+// SpGEMM (Gustavson's Algorithm, transposed)
+template <matrix A, matrix B, matrix C>
+  requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
+           __detail::is_csc_view_v<C>)
+operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
+  log_trace("");
+  if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
+      __backend::shape(b)[1] != __backend::shape(c)[1] ||
+      __backend::shape(a)[1] != __backend::shape(b)[0]) {
+    throw std::invalid_argument(
+        "multiply: matrix dimensions are incompatible.");
+  }
+
+  auto info = multiply_compute(transposed(b), transposed(a), transposed(c));
+  info.update_impl_({info.result_shape()[1], info.result_shape()[0]},
+                    info.result_nnz());
+  return info;
+}
+
 template <matrix A, matrix B, matrix C>
   requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
            __detail::is_csr_view_v<C>)
@@ -156,6 +190,15 @@ void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
   info.update_impl_(new_info.result_shape(), new_info.result_nnz());
 }
 
+template <matrix A, matrix B, matrix C>
+  requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
+           __detail::is_csc_view_v<C>)
+void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
+  auto new_info = multiply_compute(std::forward<A>(a), std::forward<B>(b),
+                                   std::forward<C>(c));
+  info.update_impl_(new_info.result_shape(), new_info.result_nnz());
+}
+
 // C = AB
 template <matrix A, matrix B, matrix C>
 void multiply_fill(operation_info_t info, A&& a, B&& b, C&& c) {

diff --git a/include/spblas/algorithms/transpose.hpp b/include/spblas/algorithms/transpose.hpp
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <spblas/concepts.hpp>
+
+namespace spblas {
+
+template <matrix M>
+auto transposed(M&& m);
+
+}
diff --git a/include/spblas/algorithms/transposed.hpp b/include/spblas/algorithms/transposed.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <spblas/detail/view_inspectors.hpp>
+
+namespace spblas {
+
+template <matrix M>
+  requires(__detail::is_csr_view_v<M>)
+auto transposed(M&& m) {
+  return csc_view<tensor_scalar_t<M>, tensor_index_t<M>, tensor_offset_t<M>>(
+      m.values(), m.rowptr(), m.colind(), {m.shape()[1], m.shape()[0]},
+      m.size());
+}
+
+template <matrix M>
+  requires(__detail::is_csc_view_v<M>)
+auto transposed(M&& m) {
+  return csr_view<tensor_scalar_t<M>, tensor_index_t<M>, tensor_offset_t<M>>(
+      m.values(), m.colptr(), m.rowind(), {m.shape()[1], m.shape()[0]},
+      m.size());
+}
+
+} // namespace spblas
diff --git a/include/spblas/backend/algorithms.hpp b/include/spblas/backend/algorithms.hpp
@@ -18,6 +18,16 @@ void for_each(M&& m, F&& f) {
   }
 }
 
+template <matrix M, typename F>
+  requires(__backend::column_iterable<M>)
+void for_each(M&& m, F&& f) {
+  for (auto&& [j, column] : __backend::columns(m)) {
+    for (auto&& [i, v] : column) {
+      f(std::make_tuple(std::tuple{i, j}, std::reference_wrapper(v)));
+    }
+  }
+}
+
 template <vector V, typename F>
   requires(__backend::lookupable<V> && __ranges::random_access_range<V>)
 void for_each(V&& v, F&& f) {

diff --git a/include/spblas/backend/concepts.hpp b/include/spblas/backend/concepts.hpp
@@ -10,9 +10,16 @@ namespace __backend {
 template <typename T>
 concept row_iterable = requires(T& t) { rows(t); };
 
+template <typename T>
+concept column_iterable = requires(T& t) { columns(t); };
+
 template <typename T>
 concept row_lookupable = requires(T& t) { lookup_row(t, tensor_index_t<T>{}); };
 
+template <typename T>
+concept column_lookupable =
+    requires(T& t) { lookup_column(t, tensor_index_t<T>{}); };
+
 namespace {
 
 template <typename T>

diff --git a/include/spblas/backend/cpos.hpp b/include/spblas/backend/cpos.hpp
@@ -46,6 +46,16 @@ struct rows_fn_ {
 
 inline constexpr auto rows = rows_fn_{};
 
+struct columns_fn_ {
+  template <typename T>
+    requires(spblas::is_tag_invocable_v<columns_fn_, T>)
+  constexpr auto operator()(T&& t) const {
+    return spblas::tag_invoke(columns_fn_{}, std::forward<T>(t));
+  }
+};
+
+inline constexpr auto columns = columns_fn_{};
+
 struct lookup_fn_ {
   template <typename T, typename... Args>
     requires(spblas::is_tag_invocable_v<lookup_fn_, T, Args...>)
@@ -70,6 +80,18 @@ struct lookup_row_fn_ {
 
 inline constexpr auto lookup_row = lookup_row_fn_{};
 
+struct lookup_column_fn_ {
+  template <typename T, typename... Args>
+    requires(spblas::is_tag_invocable_v<lookup_column_fn_, T, Args...>)
+  constexpr tag_invoke_result_t<lookup_column_fn_, T, Args...>
+  operator()(T&& t, Args&&... args) const {
+    return spblas::tag_invoke(lookup_column_fn_{}, std::forward<T>(t),
+                              std::forward<Args>(args)...);
+  }
+};
+
+inline constexpr auto lookup_column = lookup_column_fn_{};
+
 } // namespace __backend
 
 } // namespace spblas
diff --git a/include/spblas/backend/view_customizations.hpp b/include/spblas/backend/view_customizations.hpp
@@ -5,26 +5,44 @@
 
 namespace spblas {
 
-// Customization point implementations for csr_view.
+// Customization point implementations for csr_view and csc_view.
 
 template <typename M>
   requires(__detail::is_csr_view_v<M>)
 auto tag_invoke(__backend::size_fn_, M&& m) {
   return m.size();
 }
 
+template <typename M>
+  requires(__detail::is_csc_view_v<M>)
+auto tag_invoke(__backend::size_fn_, M&& m) {
+  return m.size();
+}
+
 template <typename M>
   requires(__detail::is_csr_view_v<M>)
 auto tag_invoke(__backend::shape_fn_, M&& m) {
   return m.shape();
 }
 
+template <typename M>
+  requires(__detail::is_csc_view_v<M>)
+auto tag_invoke(__backend::shape_fn_, M&& m) {
+  return m.shape();
+}
+
 template <typename M>
   requires(__detail::is_csr_view_v<M>)
 auto tag_invoke(__backend::values_fn_, M&& m) {
   return m.values();
 }
 
+template <typename M>
+  requires(__detail::is_csc_view_v<M>)
+auto tag_invoke(__backend::values_fn_, M&& m) {
+  return m.values();
+}
+
 namespace {
 
 template <typename M>
@@ -47,6 +65,26 @@ auto row(M&& m, typename std::remove_cvref_t<M>::index_type row_index) {
   return __ranges::views::zip(column_indices, row_values);
 }
 
+template <typename M>
+  requires(__detail::is_csc_view_v<M>)
+auto column(M&& m, typename std::remove_cvref_t<M>::index_type column_index) {
+  using O = typename std::remove_cvref_t<M>::offset_type;
+  O first = m.colptr()[column_index];
+  O last = m.colptr()[column_index + 1];
+
+  using row_iter_t = decltype(m.rowind().data());
+  using value_iter_t = decltype(m.values().data());
+
+  __ranges::subrange<row_iter_t> column_indices(
+      __ranges::next(m.rowind().data(), first),
+      __ranges::next(m.rowind().data(), last));
+  __ranges::subrange<value_iter_t> column_values(
+      __ranges::next(m.values().data(), first),
+      __ranges::next(m.values().data(), last));
+
+  return __ranges::views::zip(column_indices, column_values);
+}
+
 } // namespace
 
 template <typename M>
@@ -62,6 +100,20 @@ auto tag_invoke(__backend::rows_fn_, M&& m) {
   return __ranges::views::zip(row_indices, row_values);
 }
 
+template <typename M>
+  requires(__detail::is_csc_view_v<M>)
+auto tag_invoke(__backend::columns_fn_, M&& m) {
+  using I = typename std::remove_cvref_t<M>::index_type;
+  auto column_indices = __ranges::views::iota(I(0), I(m.shape()[1]));
+
+  auto column_values =
+      column_indices | __ranges::views::transform([=](auto column_index) {
+        return column(m, column_index);
+      });
+
+  return __ranges::views::zip(column_indices, column_values);
+}
+
 template <typename M>
   requires(__detail::is_csr_view_v<M>)
 auto tag_invoke(__backend::lookup_row_fn_, M&& m,
@@ -70,6 +122,14 @@ auto tag_invoke(__backend::lookup_row_fn_, M&& m,
   return row(m, row_index);
 }
 
+template <typename M>
+  requires(__detail::is_csc_view_v<M>)
+auto tag_invoke(__backend::lookup_column_fn_, M&& m,
+                typename std::remove_cvref_t<M>::index_type column_index) {
+  using I = typename std::remove_cvref_t<M>::index_type;
+  return column(m, column_index);
+}
+
 // Customization point implementations for vectors
 
 template <__ranges::random_access_range V>

diff --git a/include/spblas/detail/view_inspectors.hpp b/include/spblas/detail/view_inspectors.hpp
@@ -95,6 +95,9 @@ using ultimate_base_type_t = decltype(get_ultimate_base(std::declval<T>()));
 template <typename T>
 concept has_csr_base = is_csr_view_v<ultimate_base_type_t<T>>;
 
+template <typename T>
+concept has_csc_base = is_csc_view_v<ultimate_base_type_t<T>>;
+
 template <typename T>
 concept has_mdspan_matrix_base =
     is_matrix_instantiation_of_mdspan_v<ultimate_base_type_t<T>>;

diff --git a/include/spblas/vendor/armpl/detail/armpl.hpp b/include/spblas/vendor/armpl/detail/armpl.hpp
@@ -26,6 +26,21 @@ template <>
 inline constexpr auto create_spmat_csr<std::complex<double>> =
     &armpl_spmat_create_csr_z;
 
+template <typename T>
+armpl_status_t (*create_spmat_csc)(armpl_spmat_t*, armpl_int_t, armpl_int_t,
+                                   const armpl_int_t*, const armpl_int_t*,
+                                   const T*, armpl_int_t);
+template <>
+inline constexpr auto create_spmat_csc<float> = &armpl_spmat_create_csc_s;
+template <>
+inline constexpr auto create_spmat_csc<double> = &armpl_spmat_create_csc_d;
+template <>
+inline constexpr auto create_spmat_csc<std::complex<float>> =
+    &armpl_spmat_create_csc_c;
+template <>
+inline constexpr auto create_spmat_csc<std::complex<double>> =
+    &armpl_spmat_create_csc_z;
+
 template <typename T>
 armpl_status_t (*create_spmat_dense)(armpl_spmat_t*, enum armpl_dense_layout,
                                      armpl_int_t, armpl_int_t, armpl_int_t,