Patch DenseMatrix to fix matrix multiplication on CUDA 11.3 (#809)

gmgunter · GitHub Enterprise · commit 3de04a90919e · 2021-06-15T11:54:23.000-07:00
* Patch DenseMatrix to fix matrix multiplication on CUDA 11.3

This update patches the DenseMatrix class to workaround an issue with
recent releases of Eigen and some CUDA configurations.

When compiled with nvcc 11.3, Eigen's matrix-matrix and matrix-vector
product operations compile to nothing (no assembly instructions are
emitted). In addition, these operations result in "illegal memory
access" errors when compiled with nvcc 11.2 in debug mode. These issues
seem to be resolved in Eigen's 'master' branch at time of writing this,
but are present in multiple recent releases that are supported by isce3.

The patch implements matrix multiplication as a sequence of dot
products. This implementation probably isn't optimal in general (and may
inhibit expression template chaining) but, in the common case of small,
fixed-size matrices/vectors, this implementation generates the same
assembly as before on nvcc 11.2 and works around the bug with nvcc 11.3.
diff --git a/cxx/isce3/core/DenseMatrix.h b/cxx/isce3/core/DenseMatrix.h
@@ -6,23 +6,26 @@
 #define EIGEN_MPL2_ONLY
 #include <Eigen/Dense>
 #include "Common.h"
+#include "Vector.h"
 
 namespace isce3 { namespace core {
 
 template<int N, typename T>
 class DenseMatrix : public Eigen::Matrix<T, N, N> {
     using super_t = Eigen::Matrix<T, N, N>;
     using super_t::super_t;
+
+    static_assert(N > 0);
 public:
     DenseMatrix() = default;
     CUDA_HOSTDEV auto operator[](int i)       { return this->row(i); }
     CUDA_HOSTDEV auto operator[](int i) const { return this->row(i); }
 
-    CUDA_HOSTDEV auto dot(const super_t& other) const {
+    CUDA_HOSTDEV auto dot(const DenseMatrix& other) const {
         return *this * other;
     }
 
-    CUDA_HOSTDEV auto dot(const Eigen::Matrix<T, N, 1>& other) const {
+    CUDA_HOSTDEV auto dot(const Vector<N, T>& other) const {
         return *this * other;
     }
 
@@ -78,4 +81,33 @@ CUDA_HOSTDEV Mat3 DenseMatrix<N, T>::enuToXyz(double lat, double lon)
                   {0, cos(lat), sin(lat)}}};
 }
 
+// XXX
+// These overloads are a workaround to resolve an issue observed with certain
+// Eigen & CUDA version combinations where matrix-matrix and matrix-vector
+// multiplication produced incorrect results (or raised "illegal memory access"
+// errors in debug mode).
+template<int N, typename T>
+CUDA_HOSTDEV auto
+operator*(const DenseMatrix<N, T>& a, const DenseMatrix<N, T>& b)
+{
+    DenseMatrix<N, T> out;
+    for (int i = 0; i < N; ++i) {
+        for (int j = 0; j < N; ++j) {
+            out(i, j) = a.row(i).dot(b.col(j));
+        }
+    }
+    return out;
+}
+
+template<int N, typename T>
+CUDA_HOSTDEV auto
+operator*(const DenseMatrix<N, T>& m, const Vector<N, T>& v)
+{
+    Vector<N, T> out;
+    for (int i = 0; i < N; ++i) {
+        out[i] = m.row(i).dot(v);
+    }
+    return out;
+}
+
 }}
diff --git a/cxx/isce3/core/Vector.h b/cxx/isce3/core/Vector.h
@@ -14,6 +14,8 @@ template<int N, typename T>
 class Vector : public Eigen::Matrix<T, N, 1> {
     using super_t = Eigen::Matrix<T, N, 1>;
     using super_t::super_t;
+
+    static_assert(N > 0);
 };
 
 // Function to compute normal vector to a plane given three points