Skip to content

Commit c512782

Browse files
Support mixed CSR/CSC operations (#43)
* Support mixed CSR/CSC operations. * Update ArmPL backend to support mixed CSR/CSC operations. * Update oneMKL backend to support mixed CSR/CSC operations. --------- Co-authored-by: Spencer Patty <[email protected]>
1 parent 0372684 commit c512782

19 files changed

+1164
-182
lines changed

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
66

77
set(CMAKE_CXX_FLAGS "-O3 -march=native")
88

9+
option(ENABLE_SANITIZERS "Enable Clang sanitizers" OFF)
10+
911
# Get includes, which declares the `spblas` library
1012
add_subdirectory(include)
1113

@@ -36,6 +38,13 @@ if (LOG_LEVEL)
3638
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLOG_LEVEL=${LOG_LEVEL}") # SPBLAS_DEBUG | SPBLAS_WARNING | SPBLAS_TRACE | SPBLAS_INFO
3739
endif()
3840

41+
# Enable sanitizers
42+
if (ENABLE_SANITIZERS)
43+
set(SANITIZER_FLAGS "-fsanitize=address,undefined")
44+
target_compile_options(spblas INTERFACE ${SANITIZER_FLAGS} -g -O1 -fno-omit-frame-pointer)
45+
target_link_options(spblas INTERFACE ${SANITIZER_FLAGS})
46+
endif()
47+
3948
# mdspan
4049
FetchContent_Declare(
4150
mdspan
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#pragma once
2+
3+
#include <optional>
4+
5+
#include <spblas/backend/spa_accumulator.hpp>
6+
7+
namespace spblas {
8+
9+
namespace __detail {
10+
11+
template <typename T, typename I, typename A, typename B>
12+
std::optional<T> sparse_dot_product(__backend::spa_accumulator<T, I>& acc,
13+
A&& a, B&& b) {
14+
acc.clear();
15+
16+
for (auto&& [i, v] : a) {
17+
acc[i] = v;
18+
}
19+
20+
T sum = 0;
21+
bool implicit_zero = true;
22+
for (auto&& [i, v] : b) {
23+
if (acc.contains(i)) {
24+
sum += acc[i] * v;
25+
implicit_zero = false;
26+
}
27+
}
28+
29+
if (implicit_zero) {
30+
return {};
31+
} else {
32+
return sum;
33+
}
34+
}
35+
36+
template <typename Set, typename A, typename B>
37+
bool sparse_intersection(Set&& set, A&& a, B&& b) {
38+
set.clear();
39+
40+
for (auto&& [i, v] : a) {
41+
set.insert(i);
42+
}
43+
44+
for (auto&& [i, v] : b) {
45+
if (set.contains(i)) {
46+
return true;
47+
}
48+
}
49+
50+
return false;
51+
}
52+
53+
} // namespace __detail
54+
55+
} // namespace spblas
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma once
2+
3+
#include "spgemm_gustavsons.hpp"
4+
#include "spgemm_innerproduct.hpp"
5+
#include "spgemm_outerproduct.hpp"
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
#pragma once
2+
3+
#include <spblas/backend/backend.hpp>
4+
#include <spblas/concepts.hpp>
5+
#include <spblas/detail/log.hpp>
6+
7+
#include <spblas/algorithms/transposed.hpp>
8+
#include <spblas/backend/csr_builder.hpp>
9+
#include <spblas/backend/spa_accumulator.hpp>
10+
#include <spblas/detail/operation_info_t.hpp>
11+
12+
namespace spblas {
13+
14+
// C = AB
15+
// CSR * CSR -> CSR
16+
// SpGEMM (Gustavson's Algorithm)
17+
template <matrix A, matrix B, matrix C>
18+
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
19+
__detail::is_csr_view_v<C>)
20+
void multiply(A&& a, B&& b, C&& c) {
21+
log_trace("");
22+
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
23+
__backend::shape(b)[1] != __backend::shape(c)[1] ||
24+
__backend::shape(a)[1] != __backend::shape(b)[0]) {
25+
throw std::invalid_argument(
26+
"multiply: matrix dimensions are incompatible.");
27+
}
28+
29+
using T = tensor_scalar_t<C>;
30+
using I = tensor_index_t<C>;
31+
32+
__backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);
33+
__backend::csr_builder c_builder(c);
34+
35+
for (auto&& [i, a_row] : __backend::rows(a)) {
36+
c_row.clear();
37+
for (auto&& [k, a_v] : a_row) {
38+
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
39+
c_row[j] += a_v * b_v;
40+
}
41+
}
42+
c_row.sort();
43+
44+
try {
45+
c_builder.insert_row(i, c_row.get());
46+
} catch (...) {
47+
throw std::runtime_error("multiply: SpGEMM ran out of memory.");
48+
}
49+
}
50+
c.update(c.values(), c.rowptr(), c.colind(), c.shape(),
51+
c.rowptr()[c.shape()[0]]);
52+
}
53+
54+
// C = AB
55+
// CSR * CSR -> CSR
56+
// SpGEMM (Gustavson's Algorithm)
57+
template <matrix A, matrix B, matrix C>
58+
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
59+
__detail::is_csr_view_v<C>)
60+
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
61+
log_trace("");
62+
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
63+
__backend::shape(b)[1] != __backend::shape(c)[1] ||
64+
__backend::shape(a)[1] != __backend::shape(b)[0]) {
65+
throw std::invalid_argument(
66+
"multiply: matrix dimensions are incompatible.");
67+
}
68+
69+
using T = tensor_scalar_t<C>;
70+
using I = tensor_index_t<C>;
71+
using O = tensor_offset_t<C>;
72+
73+
O nnz = 0;
74+
__backend::spa_set<I> c_row(__backend::shape(c)[1]);
75+
76+
for (auto&& [i, a_row] : __backend::rows(a)) {
77+
c_row.clear();
78+
79+
for (auto&& [k, a_v] : a_row) {
80+
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
81+
c_row.insert(j);
82+
}
83+
}
84+
85+
nnz += c_row.size();
86+
}
87+
88+
return operation_info_t{__backend::shape(c), nnz};
89+
}
90+
91+
// C = AB
92+
// CSC * CSC -> CSC
93+
// SpGEMM (Gustavson's Algorithm, transposed)
94+
template <matrix A, matrix B, matrix C>
95+
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
96+
__detail::is_csc_view_v<C>)
97+
void multiply(A&& a, B&& b, C&& c) {
98+
log_trace("");
99+
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
100+
__backend::shape(b)[1] != __backend::shape(c)[1] ||
101+
__backend::shape(a)[1] != __backend::shape(b)[0]) {
102+
throw std::invalid_argument(
103+
"multiply: matrix dimensions are incompatible.");
104+
}
105+
multiply(transposed(b), transposed(a), transposed(c));
106+
}
107+
108+
// C = AB
109+
// CSC * CSC -> CSC
110+
// SpGEMM (Gustavson's Algorithm, transposed)
111+
template <matrix A, matrix B, matrix C>
112+
requires(__backend::column_iterable<A> && __backend::column_iterable<B> &&
113+
__detail::is_csc_view_v<C>)
114+
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
115+
log_trace("");
116+
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
117+
__backend::shape(b)[1] != __backend::shape(c)[1] ||
118+
__backend::shape(a)[1] != __backend::shape(b)[0]) {
119+
throw std::invalid_argument(
120+
"multiply: matrix dimensions are incompatible.");
121+
}
122+
123+
auto info = multiply_compute(transposed(b), transposed(a), transposed(c));
124+
info.update_impl_({info.result_shape()[1], info.result_shape()[0]},
125+
info.result_nnz());
126+
return info;
127+
}
128+
129+
// C = AB
130+
// CSR * CSR -> CSC
131+
// SpGEMM (Gustavson's Algorithm, scattered)
132+
template <matrix A, matrix B, matrix C>
133+
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
134+
__detail::is_csc_view_v<C>)
135+
void multiply(A&& a, B&& b, C&& c) {
136+
log_trace("");
137+
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
138+
__backend::shape(b)[1] != __backend::shape(c)[1] ||
139+
__backend::shape(a)[1] != __backend::shape(b)[0]) {
140+
throw std::invalid_argument(
141+
"multiply: matrix dimensions are incompatible.");
142+
}
143+
144+
using T = tensor_scalar_t<C>;
145+
using I = tensor_index_t<C>;
146+
147+
__backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);
148+
149+
std::vector<std::vector<std::pair<I, T>>> columns(__backend::shape(c)[1]);
150+
151+
for (auto&& [i, a_row] : __backend::rows(a)) {
152+
c_row.clear();
153+
for (auto&& [k, a_v] : a_row) {
154+
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
155+
c_row[j] += a_v * b_v;
156+
}
157+
}
158+
for (auto&& [j, v] : c_row.get()) {
159+
columns[j].push_back({i, v});
160+
}
161+
}
162+
163+
__backend::csc_builder c_builder(c);
164+
165+
for (std::size_t j = 0; j < columns.size(); j++) {
166+
auto&& column = columns[j];
167+
std::sort(column.begin(), column.end(),
168+
[](auto&& a, auto&& b) { return a.first < b.first; });
169+
170+
try {
171+
c_builder.insert_column(j, column);
172+
} catch (...) {
173+
throw std::runtime_error("multiply: SpGEMM ran out of memory.");
174+
}
175+
}
176+
c.update(c.values(), c.colptr(), c.rowind(), c.shape(),
177+
c.colptr()[c.shape()[1]]);
178+
}
179+
180+
// C = AB
181+
// CSR * CSR -> CSC
182+
// SpGEMM (Gustavson's Algorithm, scattered)
183+
template <matrix A, matrix B, matrix C>
184+
requires(__backend::row_iterable<A> && __backend::row_iterable<B> &&
185+
__detail::is_csc_view_v<C>)
186+
operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
187+
log_trace("");
188+
if (__backend::shape(a)[0] != __backend::shape(c)[0] ||
189+
__backend::shape(b)[1] != __backend::shape(c)[1] ||
190+
__backend::shape(a)[1] != __backend::shape(b)[0]) {
191+
throw std::invalid_argument(
192+
"multiply: matrix dimensions are incompatible.");
193+
}
194+
195+
using T = tensor_scalar_t<C>;
196+
using I = tensor_index_t<C>;
197+
using O = tensor_offset_t<C>;
198+
199+
O nnz = 0;
200+
__backend::spa_set<I> c_row(__backend::shape(c)[1]);
201+
202+
for (auto&& [i, a_row] : __backend::rows(a)) {
203+
c_row.clear();
204+
205+
for (auto&& [k, a_v] : a_row) {
206+
for (auto&& [j, b_v] : __backend::lookup_row(b, k)) {
207+
c_row.insert(j);
208+
}
209+
}
210+
211+
nnz += c_row.size();
212+
}
213+
214+
return operation_info_t{__backend::shape(c), nnz};
215+
}
216+
217+
} // namespace spblas

0 commit comments

Comments
 (0)