Add performance documentation to LinearAlgebraTorch GPU operations

Copilot · pmaciel · commit ccb0c520aff6 · 2026-03-03T10:16:09.000Z
diff --git a/src/eckit/linalg/sparse/LinearAlgebraTorch.cc b/src/eckit/linalg/sparse/LinearAlgebraTorch.cc
@@ -48,6 +48,13 @@ void LinearAlgebraTorch::spmv(const SparseMatrix& A, const Vector& x, Vector& y)
     ASSERT(Ni == y.rows());
     ASSERT(Nj == x.rows());
 
+    // Note: This implementation copies data to GPU memory for each operation and immediately
+    // copies the result back to CPU. This data transfer overhead can be significant and may
+    // negate the performance benefits of GPU computation for small matrices or frequent operations.
+    // GPU acceleration is most beneficial for large matrices where computation time dominates
+    // transfer overhead. For optimal performance, consider keeping data on GPU across multiple
+    // operations rather than transferring for each call.
+
     // multiplication
     auto A_tensor = make_torch_sparse_csr(A, get_torch_device(name()));
     auto x_tensor = make_torch_dense_tensor(x, get_torch_device(name()));
@@ -66,6 +73,13 @@ void LinearAlgebraTorch::spmm(const SparseMatrix& A, const Matrix& X, Matrix& Y)
     ASSERT(Nj == X.rows());
     ASSERT(Nk == Y.cols());
 
+    // Note: This implementation copies data to GPU memory for each operation and immediately
+    // copies the result back to CPU. This data transfer overhead can be significant and may
+    // negate the performance benefits of GPU computation for small matrices or frequent operations.
+    // GPU acceleration is most beneficial for large matrices where computation time dominates
+    // transfer overhead. For optimal performance, consider keeping data on GPU across multiple
+    // operations rather than transferring for each call.
+
     // multiplication and conversion from column-major to row-major (and back)
     auto A_tensor = make_torch_sparse_csr(A, get_torch_device(name()));
     auto X_tensor = make_torch_dense_tensor(X, get_torch_device(name()));