Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
de12da2
Implement `csc_view`, implement `transposed`.
BenBrock Feb 27, 2025
ae5af30
Fix tests---should be recovered from force push.
BenBrock Feb 27, 2025
6632825
Add CSC SpMV test.
BenBrock Feb 27, 2025
3750a1e
Make `scaled_view` work with `csc_view`.
BenBrock Feb 27, 2025
e1f2cd2
Update MKL to support CSC.
BenBrock Mar 3, 2025
3fde64c
Merge branch 'main' into dev/brock/implement-transposed
BenBrock Mar 3, 2025
1f2be4a
Suppose operations with CSC in ArmPL.
BenBrock Mar 3, 2025
dd7bfb4
Update include/spblas/vendor/onemkl_sycl/detail/create_matrix_handle.hpp
BenBrock Mar 3, 2025
907539b
Update examples/spmm_csc.cpp
BenBrock Mar 4, 2025
273ae73
Fix formatting
BenBrock Mar 4, 2025
cf16b4c
Add CSC View SpMV BScaled test.
BenBrock Mar 4, 2025
7a8cb29
First stab at implementing CSR x CSC -> CSR.
BenBrock Mar 4, 2025
53a59c0
Merge branch 'main' into dev/brock/mixed-csr-csc
BenBrock Mar 6, 2025
543ed24
Support mixed CSR/CSC in MKL.
BenBrock Mar 6, 2025
91c017e
Implement dot product with accumulator to avoid having to sort indices
BenBrock Mar 6, 2025
a83b0a0
Bugfix: wrong dimension.
BenBrock Mar 6, 2025
dae6de6
Implement outer product SpGEMM.
BenBrock Mar 7, 2025
b13e998
Remove unnecessary transposed inner product implementations.
BenBrock Mar 7, 2025
4a2e3a1
Implement "scattered Gustavson's," add testing, ensure works in ArmPL.
BenBrock Mar 7, 2025
3a77817
Commit missing files
BenBrock Mar 7, 2025
0ca3447
Enable CSC output in MKL.
BenBrock Mar 7, 2025
1188be9
Use `hash_accumulator` for outer product, use `sparse_intersection` for
BenBrock Mar 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(CMAKE_CXX_FLAGS "-O3 -march=native")

option(ENABLE_SANITIZERS "Enable Clang sanitizers" OFF)

# Get includes, which declares the `spblas` library
add_subdirectory(include)

Expand Down Expand Up @@ -36,6 +38,13 @@ if (LOG_LEVEL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLOG_LEVEL=${LOG_LEVEL}") # SPBLAS_DEBUG | SPBLAS_WARNING | SPBLAS_TRACE | SPBLAS_INFO
endif()

# Enable sanitizers
if (ENABLE_SANITIZERS)
set(SANITIZER_FLAGS "-fsanitize=address,undefined")
target_compile_options(spblas INTERFACE ${SANITIZER_FLAGS} -g -O1 -fno-omit-frame-pointer)
target_link_options(spblas INTERFACE ${SANITIZER_FLAGS})
endif()

# mdspan
FetchContent_Declare(
mdspan
Expand Down
38 changes: 38 additions & 0 deletions include/spblas/algorithms/detail/sparse_dot_product.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include <optional>

#include <spblas/backend/spa_accumulator.hpp>

namespace spblas {

namespace __detail {

template <typename T, typename I, typename A, typename B>
std::optional<T> sparse_dot_product(__backend::spa_accumulator<T, I>& acc,
A&& a, B&& b) {
acc.clear();

for (auto&& [i, v] : a) {
acc[i] = v;
}

T sum = 0;
bool implicit_zero = true;
for (auto&& [i, v] : b) {
if (acc.contains(i)) {
sum += acc[i] * v;
implicit_zero = false;
}
}

if (implicit_zero) {
return {};
} else {
return sum;
}
}

} // namespace __detail

} // namespace spblas
5 changes: 5 additions & 0 deletions include/spblas/algorithms/detail/spgemm/spgemm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#pragma once

#include "spgemm_gustavsons.hpp"
#include "spgemm_innerproduct.hpp"
#include "spgemm_outerproduct.hpp"
217 changes: 217 additions & 0 deletions include/spblas/algorithms/detail/spgemm/spgemm_gustavsons.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#pragma once

#include <spblas/backend/backend.hpp>
#include <spblas/concepts.hpp>
#include <spblas/detail/log.hpp>

#include <spblas/algorithms/transposed.hpp>
#include <spblas/backend/csr_builder.hpp>
#include <spblas/backend/spa_accumulator.hpp>
#include <spblas/detail/operation_info_t.hpp>

namespace spblas {

// C = AB
// CSR * CSR -> CSR
// SpGEMM (Gustavson's Algorithm)
template <matrix A, matrix B, matrix C>
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
__detail::is_csr_view_v<C>)
void multiply(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}

using T = tensor_scalar_t<C>;
using I = tensor_index_t<C>;

__backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);
__backend::csr_builder c_builder(c);

for (auto&& [i, a_row] : __backend::rows(a)) {
c_row.clear();
for (auto&& [k, a_v] : a_row) {
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
c_row[j] += a_v * b_v;
}
}
c_row.sort();

try {
c_builder.insert_row(i, c_row.get());
} catch (...) {
throw std::runtime_error("multiply: SpGEMM ran out of memory.");
}
}
c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
c.rowptr()[c.shape()[0]]);
}

// C = AB
// CSR * CSR -> CSR
// SpGEMM (Gustavson's Algorithm)
template <matrix A, matrix B, matrix C>
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
__detail::is_csr_view_v<C>)
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}

using T = tensor_scalar_t<C>;
using I = tensor_index_t<C>;
using O = tensor_offset_t<C>;

O nnz = 0;
__backend::spa_set<I> c_row(__backend::shape(c)[1]);

for (auto&& [i, a_row] : __backend::rows(a)) {
c_row.clear();

for (auto&& [k, a_v] : a_row) {
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
c_row.insert(j);
}
}

nnz += c_row.size();
}

return operation_info_t{__backend::shape(c), nnz};
}

// C = AB
// CSC * CSC -> CSC
// SpGEMM (Gustavson's Algorithm, transposed)
template <matrix A, matrix B, matrix C>
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
__detail::is_csc_view_v<C>)
void multiply(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}
multiply(transposed(b), transposed(a), transposed(c));
}

// C = AB
// CSC * CSC -> CSC
// SpGEMM (Gustavson's Algorithm, transposed)
template <matrix A, matrix B, matrix C>
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
__detail::is_csc_view_v<C>)
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}

auto info = multiply_compute(transposed(b), transposed(a), transposed(c));
info.update_impl_({info.result_shape()[1], info.result_shape()[0]},
info.result_nnz());
return info;
}

// C = AB
// CSR * CSR -> CSC
// SpGEMM (Gustavson's Algorithm, scattered)
template <matrix A, matrix B, matrix C>
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
__detail::is_csc_view_v<C>)
void multiply(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}

using T = tensor_scalar_t<C>;
using I = tensor_index_t<C>;

__backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);

std::vector<std::vector<std::pair<I, T>>> columns(__backend::shape(c)[1]);

for (auto&& [i, a_row] : __backend::rows(a)) {
c_row.clear();
for (auto&& [k, a_v] : a_row) {
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
c_row[j] += a_v * b_v;
}
}
for (auto&& [j, v] : c_row.get()) {
columns[j].push_back({i, v});
}
}

__backend::csc_builder c_builder(c);

for (std::size_t j = 0; j < columns.size(); j++) {
auto&& column = columns[j];
std::sort(column.begin(), column.end(),
[](auto&& a, auto&& b) { return a.first < b.first; });

try {
c_builder.insert_column(j, column);
} catch (...) {
throw std::runtime_error("multiply: SpGEMM ran out of memory.");
}
}
c.update(c.values(), c.colptr(), c.rowind(), c.shape(),
c.colptr()[c.shape()[1]]);
}

// C = AB
// CSR * CSR -> CSC
// SpGEMM (Gustavson's Algorithm, scattered)
template <matrix A, matrix B, matrix C>
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
__detail::is_csc_view_v<C>)
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
log_trace("");
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
__backend::shape(b)[1] != __backend::shape(c)[1] ||
__backend::shape(a)[1] != __backend::shape(b)[0]) {
throw std::invalid_argument(
"multiply: matrix dimensions are incompatible.");
}

using T = tensor_scalar_t<C>;
using I = tensor_index_t<C>;
using O = tensor_offset_t<C>;

O nnz = 0;
__backend::spa_set<I> c_row(__backend::shape(c)[1]);

for (auto&& [i, a_row] : __backend::rows(a)) {
c_row.clear();

for (auto&& [k, a_v] : a_row) {
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
c_row.insert(j);
}
}

nnz += c_row.size();
}

return operation_info_t{__backend::shape(c), nnz};
}

} // namespace spblas
Loading