Implement dot product with accumulator to avoid having to sort indices

BenBrock · BenBrock · commit 91c017e043e0 · 2025-03-06T15:22:50.000-08:00
of inputs.
diff --git a/include/spblas/algorithms/detail/sparse_dot_product.hpp b/include/spblas/algorithms/detail/sparse_dot_product.hpp
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <optional>
+
+#include <spblas/backend/spa_accumulator.hpp>
+
+namespace spblas {
+
+namespace __detail {
+
+template <typename T, typename I, typename A, typename B>
+std::optional<T> sparse_dot_product(__backend::spa_accumulator<T, I>& acc,
+                                    A&& a, B&& b) {
+  acc.clear();
+
+  for (auto&& [i, v] : a) {
+    acc[i] = v;
+  }
+
+  T sum = 0;
+  bool implicit_zero = true;
+  for (auto&& [i, v] : b) {
+    if (acc.contains(i)) {
+      sum += acc[i] * v;
+      implicit_zero = false;
+    }
+  }
+
+  if (implicit_zero) {
+    return {};
+  } else {
+    return sum;
+  }
+}
+
+} // namespace __detail
+
+} // namespace spblas
diff --git a/include/spblas/algorithms/multiply_impl.hpp b/include/spblas/algorithms/multiply_impl.hpp
@@ -4,6 +4,7 @@
 #include <spblas/concepts.hpp>
 #include <spblas/detail/log.hpp>
 
+#include <spblas/algorithms/detail/sparse_dot_product.hpp>
 #include <spblas/algorithms/transposed.hpp>
 #include <spblas/backend/csr_builder.hpp>
 #include <spblas/backend/spa_accumulator.hpp>
@@ -190,44 +191,6 @@ void multiply_compute(operation_info_t& info, A&& a, B&& b, C&& c) {
   info.update_impl_(new_info.result_shape(), new_info.result_nnz());
 }
 
-template <typename T, typename A, typename B>
-std::optional<T> sparse_dot_product(A&& a, B&& b) {
-  auto sort_by_index = [](auto&& a, auto&& b) {
-    auto&& [a_i, a_v] = a;
-    auto&& [b_i, b_v] = b;
-    return a_i < b_i;
-  };
-  std::sort(a.begin(), a.end(), sort_by_index);
-  std::sort(b.begin(), b.end(), sort_by_index);
-
-  auto a_iter = a.begin();
-  auto b_iter = b.begin();
-
-  T sum = 0;
-  bool implicit_zero = true;
-  for (; a_iter != a.end() && b_iter != b.end();) {
-    auto&& [a_i, a_v] = *a_iter;
-    auto&& [b_i, b_v] = *b_iter;
-
-    if (a_i == b_i) {
-      sum += a_v * b_v;
-      implicit_zero = false;
-      ++a_iter;
-      ++b_iter;
-    } else if (a_i < b_i) {
-      ++a_iter;
-    } else {
-      ++b_iter;
-    }
-  }
-
-  if (implicit_zero) {
-    return {};
-  } else {
-    return sum;
-  }
-}
-
 // C = AB
 // SpGEMM (Inner Product)
 template <matrix A, matrix B, matrix C>
@@ -245,6 +208,7 @@ void multiply(A&& a, B&& b, C&& c) {
   using T = tensor_scalar_t<C>;
   using I = tensor_index_t<C>;
 
+  __backend::spa_accumulator<T, I> dot_product_acc(__backend::shape(c)[1]);
   __backend::spa_accumulator<T, I> c_row(__backend::shape(c)[1]);
   __backend::csr_builder c_builder(c);
 
@@ -254,7 +218,8 @@ void multiply(A&& a, B&& b, C&& c) {
     if (!__ranges::empty(a_row)) {
       for (auto&& [j, b_column] : __backend::columns(b)) {
         if (!__ranges::empty(b_column)) {
-          auto v = sparse_dot_product<T>(a_row, b_column);
+          auto v =
+              __detail::sparse_dot_product<T>(dot_product_acc, a_row, b_column);
 
           if (v.has_value()) {
             c_row[j] += v.value();
@@ -295,11 +260,14 @@ operation_info_t multiply_compute(A&& a, B&& b, C&& c) {
 
   O nnz = 0;
 
+  __backend::spa_accumulator<T, I> dot_product_acc(__backend::shape(c)[1]);
+
   for (auto&& [i, a_row] : __backend::rows(a)) {
     if (!__ranges::empty(a_row)) {
       for (auto&& [j, b_column] : __backend::columns(b)) {
         if (!__ranges::empty(b_column)) {
-          auto v = sparse_dot_product<T>(a_row, b_column);
+          auto v =
+              __detail::sparse_dot_product<T>(dot_product_acc, a_row, b_column);
 
           if (v.has_value()) {
             nnz++;
diff --git a/include/spblas/backend/spa_accumulator.hpp b/include/spblas/backend/spa_accumulator.hpp
@@ -25,6 +25,10 @@ class spa_accumulator {
     return data_[pos];
   }
 
+  bool contains(I pos) {
+    return set_[pos];
+  }
+
   void clear() {
     for (auto&& pos : stored_) {
       set_[pos] = false;