Skip to content

Commit 3eebc5b

Browse files
authored
Merge pull request #2339 from borglab/feature/fasterSolver
Cache optimization
2 parents ec51716 + e5e01f4 commit 3eebc5b

File tree

10 files changed

+501
-162
lines changed

10 files changed

+501
-162
lines changed

gtsam/base/SymmetricBlockMatrix.cpp

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,30 @@ void SymmetricBlockMatrix::choleskyPartial(DenseIndex nFrontals) {
8989
}
9090

9191
/* ************************************************************************* */
92-
VerticalBlockMatrix SymmetricBlockMatrix::split(DenseIndex nFrontals) {
92+
void SymmetricBlockMatrix::split(DenseIndex nFrontals,
93+
VerticalBlockMatrix* RSd) {
9394
gttic(VerticalBlockMatrix_split);
95+
assert(RSd);
9496

9597
// Construct a VerticalBlockMatrix that contains [R Sd]
96-
const size_t n1 = offset(nFrontals);
97-
VerticalBlockMatrix RSd = VerticalBlockMatrix::LikeActiveViewOf(*this, n1);
98+
const DenseIndex n1 = offset(nFrontals);
99+
assert(RSd->rows() == n1);
100+
assert(RSd->cols() == cols());
101+
assert(RSd->nBlocks() == nBlocks());
98102

99103
// Copy into it.
100-
RSd.full() = matrix_.topRows(n1);
101-
RSd.full().triangularView<Eigen::StrictlyLower>().setZero();
104+
RSd->full() = matrix_.topRows(n1);
105+
RSd->full().triangularView<Eigen::StrictlyLower>().setZero();
102106

103107
// Take lower-right block of Ab_ to get the remaining factor
104108
blockStart() = nFrontals;
109+
}
105110

111+
VerticalBlockMatrix SymmetricBlockMatrix::split(DenseIndex nFrontals) {
112+
// Construct a VerticalBlockMatrix that contains [R Sd]
113+
const DenseIndex n1 = offset(nFrontals);
114+
VerticalBlockMatrix RSd = VerticalBlockMatrix::LikeActiveViewOf(*this, n1);
115+
split(nFrontals, &RSd);
106116
return RSd;
107117
}
108118

@@ -126,6 +136,30 @@ void SymmetricBlockMatrix::updateFromMappedBlocks(
126136
}
127137
}
128138

139+
/* ************************************************************************* */
140+
void SymmetricBlockMatrix::updateFromOuterProductBlocks(
141+
const VerticalBlockMatrix& other,
142+
const std::vector<DenseIndex>& blockIndices) {
143+
assert(static_cast<DenseIndex>(blockIndices.size()) == other.nBlocks());
144+
const DenseIndex otherBlocks = other.nBlocks();
145+
for (DenseIndex i = 0; i < otherBlocks; ++i) {
146+
const DenseIndex I = blockIndices[i];
147+
if (I < 0) continue;
148+
assert(I < nBlocks());
149+
const auto Si = other(i);
150+
Matrix diag = Si.transpose() * Si;
151+
updateDiagonalBlock(I, diag);
152+
for (DenseIndex j = i + 1; j < otherBlocks; ++j) {
153+
const DenseIndex J = blockIndices[j];
154+
if (J < 0) continue;
155+
assert(J < nBlocks());
156+
const auto Sj = other(j);
157+
Matrix off = Si.transpose() * Sj;
158+
updateOffDiagonalBlock(I, J, off);
159+
}
160+
}
161+
}
162+
129163
/* ************************************************************************* */
130164

131165
} //\ namespace gtsam

gtsam/base/SymmetricBlockMatrix.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,19 @@ namespace gtsam {
234234
}
235235
}
236236

237+
/// Add a vector to the diagonal entries of block I.
238+
void addToDiagonalBlock(DenseIndex I, const Vector& deltaDiag) {
239+
auto dest = block_(I, I);
240+
assert(dest.rows() == deltaDiag.size());
241+
dest.diagonal().array() += deltaDiag.array();
242+
}
243+
244+
/// Add lambda * I to the diagonal block I.
245+
void addScaledIdentity(DenseIndex I, double lambda) {
246+
auto dest = block_(I, I);
247+
dest.diagonal().array() += lambda;
248+
}
249+
237250
/// Update an off diagonal block.
238251
/// NOTE(emmett): This assumes noalias().
239252
template <typename XprType>
@@ -251,6 +264,12 @@ namespace gtsam {
251264
void updateFromMappedBlocks(const SymmetricBlockMatrix& other,
252265
const std::vector<DenseIndex>& blockIndices);
253266

267+
/// Update this matrix with blockwise outer products from a vertical block matrix.
268+
/// Adds S_i^T S_j into block (I,J), using a block mapping; entries with index -1 are skipped.
269+
/// The range to use is controlled by other.firstBlock().
270+
void updateFromOuterProductBlocks(const VerticalBlockMatrix& other,
271+
const std::vector<DenseIndex>& blockIndices);
272+
254273
/// @}
255274
/// @name Accessing the full matrix.
256275
/// @{
@@ -317,6 +336,9 @@ namespace gtsam {
317336
*/
318337
VerticalBlockMatrix split(DenseIndex nFrontals);
319338

339+
/// I n-place version of split.
340+
void split(DenseIndex nFrontals, VerticalBlockMatrix* RSd);
341+
320342
protected:
321343

322344
/// Number of offsets in the full matrix.

gtsam/base/tests/testSymmetricBlockMatrix.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include <CppUnitLite/TestHarness.h>
1919
#include <gtsam/base/SymmetricBlockMatrix.h>
20+
#include <gtsam/base/VerticalBlockMatrix.h>
2021

2122
using namespace std;
2223
using namespace gtsam;
@@ -187,6 +188,29 @@ TEST(SymmetricBlockMatrix, UpdateFromMappedBlocks)
187188
Matrix(doubled.selfadjointView())));
188189
}
189190

191+
/* ************************************************************************* */
192+
// Update via blockwise outer products from a VerticalBlockMatrix view.
193+
TEST(SymmetricBlockMatrix, UpdateFromOuterProductBlocks)
194+
{
195+
const std::vector<size_t> vbmDims{2, 1};
196+
VerticalBlockMatrix vbm(vbmDims, 4, true);
197+
vbm.matrix() = (Matrix(4, 4) <<
198+
1, 2, 3, 4,
199+
5, 6, 7, 8,
200+
9, 10, 11, 12,
201+
13, 14, 15, 16).finished();
202+
203+
const std::vector<size_t> destDims{1};
204+
SymmetricBlockMatrix actual(destDims, true);
205+
actual.setZero();
206+
const std::vector<DenseIndex> mapping{0, 1};
207+
const Matrix S = vbm.range(1, 3);
208+
const Matrix expected = S.transpose() * S;
209+
vbm.firstBlock() = 1;
210+
actual.updateFromOuterProductBlocks(vbm, mapping);
211+
EXPECT(assert_equal(expected, Matrix(actual.selfadjointView())));
212+
}
213+
190214
/* ************************************************************************* */
191215
// In-place inversion path.
192216
TEST(SymmetricBlockMatrix, inverseInPlace) {

gtsam/linear/MultifrontalClique.cpp

Lines changed: 103 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* @date December 2025
1717
*/
1818

19+
#include <gtsam/base/Matrix.h>
1920
#include <gtsam/linear/GaussianConditional.h>
2021
#include <gtsam/linear/HessianFactor.h>
2122
#include <gtsam/linear/JacobianFactor.h>
@@ -81,9 +82,8 @@ bool validateFactorKeys(const GaussianFactorGraph& graph,
8182
MultifrontalClique::MultifrontalClique(
8283
std::vector<size_t> factorIndices,
8384
const std::weak_ptr<MultifrontalClique>& parent, const KeyVector& frontals,
84-
const KeySet& separatorKeys, const KeyDimMap& dims,
85-
const GaussianFactorGraph& graph, VectorValues* solution,
86-
const std::unordered_set<Key>* fixedKeys) {
85+
const KeySet& separatorKeys, const KeyDimMap& dims, size_t vbmRows,
86+
VectorValues* solution, const std::unordered_set<Key>* fixedKeys) {
8787
factorIndices_ = std::move(factorIndices);
8888
this->parent = parent;
8989
fixedKeys_ = fixedKeys;
@@ -111,10 +111,8 @@ MultifrontalClique::MultifrontalClique(
111111
cacheSolutionPointers(solution, frontals, separatorKeys);
112112

113113
// Pre-allocate matrices once per structure.
114-
std::vector<size_t> blockDims =
115-
this->blockDims(dims, frontals, separatorKeys);
116-
size_t vbmRows = countRows(graph);
117-
initializeMatrices(blockDims, vbmRows);
114+
blockDims_ = this->blockDims(dims, frontals, separatorKeys);
115+
initializeMatrices(blockDims_, vbmRows);
118116
}
119117

120118
void MultifrontalClique::finalize(std::vector<ChildInfo> children) {
@@ -136,6 +134,8 @@ void MultifrontalClique::finalize(std::vector<ChildInfo> children) {
136134
indices.push_back(static_cast<DenseIndex>(orderedKeys_.size()));
137135
child.clique->setParentIndices(indices);
138136
}
137+
138+
// Allocation is deferred until load.
139139
}
140140

141141
DenseIndex MultifrontalClique::blockIndex(Key key) const {
@@ -169,24 +169,18 @@ std::vector<size_t> MultifrontalClique::blockDims(
169169
return blockDims;
170170
}
171171

172-
size_t MultifrontalClique::countRows(const GaussianFactorGraph& graph) const {
173-
size_t vbmRows = 0;
174-
for (size_t index : factorIndices_) {
175-
assert(index < graph.size());
176-
if (auto jacobianFactor =
177-
std::dynamic_pointer_cast<JacobianFactor>(graph[index])) {
178-
vbmRows += jacobianFactor->rows();
179-
}
180-
}
181-
return vbmRows;
182-
}
183-
184172
void MultifrontalClique::initializeMatrices(
185173
const std::vector<size_t>& blockDims, size_t verticalBlockMatrixRows) {
186-
sbm_ = SymmetricBlockMatrix(blockDims, true);
187174
Ab_ = VerticalBlockMatrix(blockDims, verticalBlockMatrixRows, true);
188175
// Ab's structure is fixed; clear it once and reuse across loads.
189176
Ab_.matrix().setZero();
177+
RSd_ =
178+
VerticalBlockMatrix(blockDims, static_cast<DenseIndex>(frontalDim), true);
179+
}
180+
181+
void MultifrontalClique::allocateSbm() {
182+
if (sbm_.nBlocks() > 0) return;
183+
sbm_ = SymmetricBlockMatrix(blockDims_, true);
190184
}
191185

192186
size_t MultifrontalClique::addJacobianFactor(
@@ -233,8 +227,8 @@ void MultifrontalClique::addHessianFactor(const HessianFactor& hessianFactor) {
233227

234228
void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) {
235229
assert(validateFactorKeys(graph, factorIndices_, orderedKeys_, fixedKeys_));
236-
sbm_.setZero(); // Zero the active SBM once before accumulating factors.
237230

231+
hessianFactors_.clear();
238232
size_t rowOffset = 0;
239233
for (size_t index : factorIndices_) {
240234
assert(index < graph.size());
@@ -244,53 +238,119 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) {
244238
rowOffset += addJacobianFactor(*jacobianFactor, rowOffset);
245239
} else if (auto hessianFactor =
246240
std::dynamic_pointer_cast<HessianFactor>(gf)) {
247-
addHessianFactor(*hessianFactor);
241+
hessianFactors_.push_back(hessianFactor);
248242
}
249243
}
244+
245+
// Lock in QR only for leaf cliques with no Hessian factors.
246+
const bool isLeaf = children.empty();
247+
const bool useQR =
248+
isLeaf && hessianFactors_.empty() && (frontalDim > 0) &&
249+
(frontalDim + separatorDim > kQrAspectRatio * frontalDim) &&
250+
(Ab_.matrix().rows() >= static_cast<DenseIndex>(frontalDim));
251+
solveMode_ = useQR ? SolveMode::QrLeaf : SolveMode::Cholesky;
252+
RSdReady_ = false;
253+
assert((useQR && RSd_.matrix().rows() ==
254+
static_cast<DenseIndex>(Ab_.matrix().rows()) ||
255+
RSd_.matrix().rows() == static_cast<DenseIndex>(frontalDim)));
256+
allocateSbm();
250257
}
251258

252-
void MultifrontalClique::eliminateInPlace() {
253-
// Update SBM with the local factors, Ab^T * Ab
254-
sbm_.selfadjointView().rankUpdate(Ab_.matrix().transpose());
259+
void MultifrontalClique::prepareForElimination() {
260+
// QR leaf cliques skip SBM assembly entirely.
261+
if (useQR()) return;
262+
assert(sbm_.nBlocks() > 0);
263+
sbm_.setZero();
264+
for (const auto& hf : hessianFactors_) {
265+
addHessianFactor(*hf);
266+
}
267+
268+
if (Ab_.matrix().rows() > 0) {
269+
sbm_.selfadjointView().rankUpdate(Ab_.matrix().transpose());
270+
}
255271

256272
for (const auto& child : children) {
257273
if (!child) continue;
258274
child->updateParent(*this);
259275
}
276+
}
277+
278+
void MultifrontalClique::factorize() {
279+
if (useQR()) {
280+
// Copy Ab_ to preserve its invariant; QR writes in place.
281+
assert(RSd_.matrix().rows() == Ab_.matrix().rows());
282+
assert(RSd_.matrix().cols() == Ab_.matrix().cols());
283+
RSd_.matrix() = Ab_.matrix();
284+
inplace_QR(RSd_.matrix());
285+
} else {
286+
sbm_.choleskyPartial(numFrontals());
287+
sbm_.split(numFrontals(), &RSd_);
288+
sbm_.blockStart() = 0;
289+
}
290+
RSdReady_ = true;
291+
}
292+
293+
void MultifrontalClique::addIdentityDamping(double lambda) {
294+
const size_t nf = numFrontals();
295+
for (size_t j = 0; j < nf; ++j) {
296+
sbm_.addScaledIdentity(j, lambda);
297+
}
298+
}
260299

261-
// Form normal equations and factor the frontal block (Schur complement step).
262-
sbm_.choleskyPartial(numFrontals());
300+
void MultifrontalClique::addDiagonalDamping(double lambda, double minDiagonal,
301+
double maxDiagonal) {
302+
const size_t nf = numFrontals();
303+
for (size_t j = 0; j < nf; ++j) {
304+
const Vector scaled =
305+
lambda * sbm_.diagonal(j).cwiseMax(minDiagonal).cwiseMin(maxDiagonal);
306+
sbm_.addToDiagonalBlock(j, scaled);
307+
}
308+
}
309+
310+
void MultifrontalClique::eliminateInPlace() {
311+
prepareForElimination();
312+
factorize();
263313
}
264314

265315
void MultifrontalClique::updateParent(MultifrontalClique& parent) const {
266-
// Expose only the separator+RHS view when contributing to the parent.
267-
assert(sbm_.blockStart() == 0);
268-
sbm_.blockStart() = numFrontals();
269-
assert(sbm_.nBlocks() == parentIndices_.size());
270-
parent.sbm_.updateFromMappedBlocks(sbm_, parentIndices_);
271-
sbm_.blockStart() = 0;
316+
if (useQR()) {
317+
// Accumulate separator (and RHS) normal equations (S^T S) into the parent.
318+
assert(RSdReady_);
319+
assert(RSd_.firstBlock() == 0);
320+
assert(parent.sbm_.nBlocks() > 0);
321+
const DenseIndex nfBlocks = static_cast<DenseIndex>(numFrontals());
322+
RSd_.firstBlock() = nfBlocks;
323+
parent.sbm_.updateFromOuterProductBlocks(RSd_, parentIndices_);
324+
RSd_.firstBlock() = 0;
325+
} else {
326+
// Accumulate the S^T S part from this clique's SBM into the parent.
327+
assert(sbm_.nBlocks() > 0 && sbm_.blockStart() == 0);
328+
sbm_.blockStart() = numFrontals();
329+
parent.sbm_.updateFromMappedBlocks(sbm_, parentIndices_);
330+
sbm_.blockStart() = 0;
331+
}
272332
}
273333

274334
std::shared_ptr<GaussianConditional> MultifrontalClique::conditional() const {
275-
const KeyVector& keys = orderedKeys_;
276-
assert(sbm_.blockStart() == 0);
277-
VerticalBlockMatrix Ab = sbm_.split(numFrontals());
278-
sbm_.blockStart() = 0; // Split sets it to numFrontals(), reset to 0.
279-
return std::make_shared<GaussianConditional>(keys, numFrontals(),
280-
std::move(Ab));
335+
assert(RSdReady_);
336+
// RSd_ is cached at elimination time.
337+
return std::make_shared<GaussianConditional>(orderedKeys_, numFrontals(),
338+
RSd_);
281339
}
282340

283341
// Solve with block back-substitution on the Cholesky-stored SBM.
284342
void MultifrontalClique::updateSolution() const {
343+
assert(RSdReady_);
344+
// Use cached [R S d] for fast back-substitution.
285345
const size_t nf = numFrontals();
286-
const size_t n = sbm_.nBlocks() - 1; // # frontals + # separators
346+
const size_t n = RSd_.nBlocks() - 1; // # frontals + # separators
287347

288348
// The in-place factorization yields an upper-triangular system [R S d]:
289349
// R * x_f + S * x_s = d,
290350
// with x_f the frontals and x_s the separators.
291-
const auto R = sbm_.triangularView(0, nf);
292-
const auto S = sbm_.aboveDiagonalRange(0, nf, nf, n);
293-
const auto d = sbm_.aboveDiagonalRange(0, nf, n, n + 1);
351+
const auto R = RSd_.range(0, nf).triangularView<Eigen::Upper>();
352+
const auto S = RSd_.range(nf, n);
353+
const auto d = RSd_.range(n, n + 1);
294354

295355
// We first solve rhs = d - S * x_s
296356
rhsScratch_.noalias() = d;

0 commit comments

Comments
 (0)