Skip to content

Commit 3de04a9

Browse files
gmgunterGitHub Enterprise
authored andcommitted
Patch DenseMatrix to fix matrix multiplication on CUDA 11.3 (#809)
* Patch DenseMatrix to fix matrix multiplication on CUDA 11.3 This update patches the DenseMatrix class to workaround an issue with recent releases of Eigen and some CUDA configurations. When compiled with nvcc 11.3, Eigen's matrix-matrix and matrix-vector product operations compile to nothing (no assembly instructions are emitted). In addition, these operations result in "illegal memory access" errors when compiled with nvcc 11.2 in debug mode. These issues seem to be resolved in Eigen's 'master' branch at time of writing this, but are present in multiple recent releases that are supported by isce3. The patch implements matrix multiplication as a sequence of dot products. This implementation probably isn't optimal in general (and may inhibit expression template chaining) but, in the common case of small, fixed-size matrices/vectors, this implementation generates the same assembly as before on nvcc 11.2 and works around the bug with nvcc 11.3.
1 parent 1939ccc commit 3de04a9

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

cxx/isce3/core/DenseMatrix.h

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,26 @@
66
#define EIGEN_MPL2_ONLY
77
#include <Eigen/Dense>
88
#include "Common.h"
9+
#include "Vector.h"
910

1011
namespace isce3 { namespace core {
1112

1213
template<int N, typename T>
1314
class DenseMatrix : public Eigen::Matrix<T, N, N> {
1415
using super_t = Eigen::Matrix<T, N, N>;
1516
using super_t::super_t;
17+
18+
static_assert(N > 0);
1619
public:
1720
DenseMatrix() = default;
1821
CUDA_HOSTDEV auto operator[](int i) { return this->row(i); }
1922
CUDA_HOSTDEV auto operator[](int i) const { return this->row(i); }
2023

21-
CUDA_HOSTDEV auto dot(const super_t& other) const {
24+
CUDA_HOSTDEV auto dot(const DenseMatrix& other) const {
2225
return *this * other;
2326
}
2427

25-
CUDA_HOSTDEV auto dot(const Eigen::Matrix<T, N, 1>& other) const {
28+
CUDA_HOSTDEV auto dot(const Vector<N, T>& other) const {
2629
return *this * other;
2730
}
2831

@@ -78,4 +81,33 @@ CUDA_HOSTDEV Mat3 DenseMatrix<N, T>::enuToXyz(double lat, double lon)
7881
{0, cos(lat), sin(lat)}}};
7982
}
8083

84+
// XXX
85+
// These overloads are a workaround to resolve an issue observed with certain
86+
// Eigen & CUDA version combinations where matrix-matrix and matrix-vector
87+
// multiplication produced incorrect results (or raised "illegal memory access"
88+
// errors in debug mode).
89+
template<int N, typename T>
90+
CUDA_HOSTDEV auto
91+
operator*(const DenseMatrix<N, T>& a, const DenseMatrix<N, T>& b)
92+
{
93+
DenseMatrix<N, T> out;
94+
for (int i = 0; i < N; ++i) {
95+
for (int j = 0; j < N; ++j) {
96+
out(i, j) = a.row(i).dot(b.col(j));
97+
}
98+
}
99+
return out;
100+
}
101+
102+
template<int N, typename T>
103+
CUDA_HOSTDEV auto
104+
operator*(const DenseMatrix<N, T>& m, const Vector<N, T>& v)
105+
{
106+
Vector<N, T> out;
107+
for (int i = 0; i < N; ++i) {
108+
out[i] = m.row(i).dot(v);
109+
}
110+
return out;
111+
}
112+
81113
}}

cxx/isce3/core/Vector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ template<int N, typename T>
1414
class Vector : public Eigen::Matrix<T, N, 1> {
1515
using super_t = Eigen::Matrix<T, N, 1>;
1616
using super_t::super_t;
17+
18+
static_assert(N > 0);
1719
};
1820

1921
// Function to compute normal vector to a plane given three points

0 commit comments

Comments
 (0)