-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcoo_mumps_solver.h
More file actions
224 lines (195 loc) · 9.48 KB
/
coo_mumps_solver.h
File metadata and controls
224 lines (195 loc) · 9.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#pragma once
#ifdef GMGPOLAR_USE_MUMPS
#include <string>
#include <stdexcept>
#include "dmumps_c.h"
#include "../../LinearAlgebra/Matrix/coo_matrix.h"
#include "../../LinearAlgebra/Vector/vector.h"
/*
* Wraps MUMPS for solving sparse linear systems given in COO format.
* For general matrices, all non-zero entries must be provided.
* For symmetric matrices (is_symmetric = true), only the upper or lower
* triangular entries should be provided, and the matrix is assumed to be
* positive definite. In GMGPolar this holds true since the domain mapping is invertible.
*/
class CooMumpsSolver
{
public:
explicit CooMumpsSolver(SparseMatrixCOO<double> matrix)
: matrix_(std::move(matrix))
{
initialize();
}
~CooMumpsSolver()
{
finalize();
}
// rhs is overwritten in-place with the solution on return.
void solve(Vector<double>& rhs)
{
assert(std::ssize(rhs) == mumps_solver_.n);
mumps_solver_.job = JOB_COMPUTE_SOLUTION;
mumps_solver_.nrhs = 1;
mumps_solver_.lrhs = mumps_solver_.n; // leading dimension: must equal n for dense centralized RHS
mumps_solver_.rhs = rhs.data(); // in: RHS, out: solution (overwritten in-place)
dmumps_c(&mumps_solver_);
if (INFOG(1) != 0) {
throw std::runtime_error("MUMPS reported an error during solution phase "
"(INFOG(1) = " +
std::to_string(INFOG(1)) + ").");
}
}
private:
void initialize()
{
assert(matrix_.rows() == matrix_.columns());
/*
* MUMPS uses 1-based indexing; our COO matrix uses 0-based indexing.
* Adjust row and column indices to match MUMPS' requirements.
*/
for (int i = 0; i < matrix_.non_zero_size(); i++) {
matrix_.row_index(i) += 1;
matrix_.col_index(i) += 1;
}
mumps_solver_.job = JOB_INIT;
mumps_solver_.par = PAR_PARALLEL;
mumps_solver_.sym = matrix_.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC;
mumps_solver_.comm_fortran = USE_COMM_WORLD;
dmumps_c(&mumps_solver_);
configureICNTL();
configureCNTL();
mumps_solver_.job = JOB_ANALYSIS_AND_FACTORIZATION;
mumps_solver_.n = matrix_.rows();
mumps_solver_.nz = matrix_.non_zero_size();
mumps_solver_.irn = matrix_.row_indices_data();
mumps_solver_.jcn = matrix_.column_indices_data();
mumps_solver_.a = matrix_.values_data();
dmumps_c(&mumps_solver_);
if (INFOG(1) != 0) {
throw std::runtime_error("MUMPS reported an error during analysis/factorization "
"(INFOG(1) = " +
std::to_string(INFOG(1)) + ").");
}
if (mumps_solver_.sym == SYM_POSITIVE_DEFINITE && INFOG(12) != 0) {
throw std::runtime_error("Matrix declared positive definite, "
"but negative pivots were encountered during factorization "
"(INFOG(12) = " +
std::to_string(INFOG(12)) + ").");
}
}
void finalize()
{
mumps_solver_.job = JOB_END;
dmumps_c(&mumps_solver_);
}
void configureICNTL()
{
// All ICNTL values are left at their defaults unless noted below.
// ICNTL(1) = 0: suppress error message output
// ICNTL(3) = 0: suppress global information output
// ICNTL(6) = 7: automatically choose permutation/scaling strategy
// ICNTL(7) = 5: use METIS for fill-reducing ordering
// ICNTL(48) = 0: disable tree parallelism (conflicts with OpenMP in newer MUMPS releases)
ICNTL(1) = 0; // Output stream for error messages
ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process
ICNTL(3) = 0; // Output stream for global information, collected on the host
ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages
ICNTL(5) = 0; // Controls the matrix input format
ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scales the matrix
ICNTL(7) = 5; // Symmetric permutation (ordering) to determine pivot order for sequential analysis
ICNTL(8) = 77; // Scaling strategy
ICNTL(9) = 1; // Computes the solution using A or A^T
ICNTL(10) = 0; // Iterative refinement steps applied to the computed solution
ICNTL(11) = 0; // Error analysis statistics
ICNTL(12) = 0; // Ordering strategy for symmetric matrices
ICNTL(13) = 0; // Controls the parallelism of the root node
ICNTL(14) = matrix_.is_symmetric() ? 5 : 20; // Percentage increase in estimated working space
ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format
ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads
// ICNTL(17) does not exist
ICNTL(18) = 0; // Strategy for the distributed input matrix
ICNTL(19) = 0; // Computes the Schur complement matrix
ICNTL(20) = 0; // Format of the right-hand sides (dense, sparse, or distributed)
ICNTL(21) = 0; // Distribution of the solution vectors (centralized or distributed)
ICNTL(22) = 0; // In-core/out-of-core (OOC) factorization and solve
ICNTL(23) = 0; // Maximum working memory in MegaBytes per working process
ICNTL(24) = 0; // Detection of null pivot rows
ICNTL(25) = 0; // Solution of a deficient matrix and null space basis computation
ICNTL(26) = 0; // Solution phase when Schur complement has been computed
ICNTL(27) = -32; // Blocking size for multiple right-hand sides
ICNTL(28) = 0; // Sequential or parallel computation of the ordering
ICNTL(29) = 0; // Parallel ordering tool when ICNTL(28)=1
ICNTL(30) = 0; // User-specified entries in the inverse A^-1
ICNTL(31) = 0; // Which factors may be discarded during factorization
ICNTL(32) = 0; // Forward elimination of the right-hand sides during factorization
ICNTL(33) = 0; // Computes the determinant of the input matrix
ICNTL(34) = 0; // Conservation of OOC files during JOB=-3
ICNTL(35) = 0; // Activation of the BLR feature
ICNTL(36) = 0; // Choice of BLR factorization variant
ICNTL(37) = 0; // BLR compression of the contribution blocks
ICNTL(38) = 600; // Estimated compression rate of LU factors
ICNTL(39) = 500; // Estimated compression rate of contribution blocks
// ICNTL(40-47) do not exist
ICNTL(48) = 0; // Multithreading with tree parallelism
ICNTL(49) = 0; // Compact workarray id%S at end of factorization phase
// ICNTL(50-55) do not exist
ICNTL(56) = 0; // Detects pseudo-singularities; rank-revealing factorization of root node
// ICNTL(57) does not exist
ICNTL(58) = 2; // Options for symbolic factorization
// ICNTL(59-60) do not exist
}
void configureCNTL()
{
// All CNTL values are left at their defaults unless noted below.
CNTL(1) = -1.0; // Relative threshold for numerical pivoting
CNTL(2) = -1.0; // Stopping criterion for iterative refinement
CNTL(3) = 0.0; // Threshold for null pivot row detection
CNTL(4) = -1.0; // Threshold for static pivoting
CNTL(5) = 0.0; // Fixation for null pivots (effective only when null pivot detection is active)
// CNTL(6) does not exist
CNTL(7) = 0.0; // Dropping parameter precision for BLR compression
// CNTL(8-15) do not exist
}
private:
SparseMatrixCOO<double> matrix_;
DMUMPS_STRUC_C mumps_solver_ = {};
/* ------------------------------------------------ */
/* MUMPS uses 1-based indexing in the documentation */
/* ------------------------------------------------ */
int& ICNTL(int i)
{
return mumps_solver_.icntl[i - 1];
}
double& CNTL(int i)
{
return mumps_solver_.cntl[i - 1];
}
int& INFOG(int i)
{
return mumps_solver_.infog[i - 1];
}
/* ----------------------------------- */
/* MUMPS jobs and constant definitions */
/* ----------------------------------- */
static constexpr int USE_COMM_WORLD = -987654;
static constexpr int PAR_NOT_PARALLEL = 0;
static constexpr int PAR_PARALLEL = 1;
static constexpr int JOB_INIT = -1;
static constexpr int JOB_END = -2;
static constexpr int JOB_REMOVE_SAVED_DATA = -3;
static constexpr int JOB_FREE_INTERNAL_DATA = -4;
static constexpr int JOB_SUPPRESS_OOC_FILES = -200;
static constexpr int JOB_ANALYSIS_PHASE = 1;
static constexpr int JOB_FACTORIZATION_PHASE = 2;
static constexpr int JOB_COMPUTE_SOLUTION = 3;
static constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4;
static constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5;
static constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6;
static constexpr int JOB_SAVE_INTERNAL_DATA = 7;
static constexpr int JOB_RESTORE_INTERNAL_DATA = 8;
static constexpr int JOB_DISTRIBUTE_RHS = 9;
static constexpr int SYM_UNSYMMETRIC = 0;
static constexpr int SYM_POSITIVE_DEFINITE = 1;
static constexpr int SYM_GENERAL_SYMMETRIC = 2;
};
#endif // GMGPOLAR_USE_MUMPS