From ad3551bb1323e0ff1812ee40acdd790e8dcfc69a Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 13:45:44 -0500 Subject: [PATCH 01/12] Constructor with PrecomputedData --- gtsam/base/SymmetricBlockMatrix.h | 13 +++ gtsam/linear/MultifrontalClique.cpp | 53 ++++++---- gtsam/linear/MultifrontalClique.h | 39 ++++--- gtsam/linear/MultifrontalSolver.cpp | 100 +++++++++++------- gtsam/linear/MultifrontalSolver.h | 28 ++++- gtsam/linear/tests/testMultifrontalSolver.cpp | 37 ++++++- timing/timeMultifrontalSolver.cpp | 8 +- timing/timeSFMBAL.h | 14 ++- 8 files changed, 206 insertions(+), 86 deletions(-) diff --git a/gtsam/base/SymmetricBlockMatrix.h b/gtsam/base/SymmetricBlockMatrix.h index c6a767012d..f46dc72fae 100644 --- a/gtsam/base/SymmetricBlockMatrix.h +++ b/gtsam/base/SymmetricBlockMatrix.h @@ -234,6 +234,19 @@ namespace gtsam { } } + /// Add a vector to the diagonal entries of block I. + void addToDiagonalBlock(DenseIndex I, const Vector& deltaDiag) { + auto dest = block_(I, I); + assert(dest.rows() == deltaDiag.size()); + dest.diagonal().array() += deltaDiag.array(); + } + + /// Add lambda * I to the diagonal block I. + void addScaledIdentity(DenseIndex I, double lambda) { + auto dest = block_(I, I); + dest.diagonal().array() += lambda; + } + /// Update an off diagonal block. /// NOTE(emmett): This assumes noalias(). template diff --git a/gtsam/linear/MultifrontalClique.cpp b/gtsam/linear/MultifrontalClique.cpp index 29397a52b1..5c95b91e90 100644 --- a/gtsam/linear/MultifrontalClique.cpp +++ b/gtsam/linear/MultifrontalClique.cpp @@ -82,7 +82,7 @@ MultifrontalClique::MultifrontalClique( std::vector factorIndices, const std::weak_ptr& parent, const KeyVector& frontals, const KeySet& separatorKeys, const KeyDimMap& dims, - const GaussianFactorGraph& graph, VectorValues* solution, + size_t vbmRows, VectorValues* solution, const std::unordered_set* fixedKeys) { factorIndices_ = std::move(factorIndices); this->parent = parent; @@ -113,7 +113,6 @@ MultifrontalClique::MultifrontalClique( // Pre-allocate matrices once per structure. std::vector blockDims = this->blockDims(dims, frontals, separatorKeys); - size_t vbmRows = countRows(graph); initializeMatrices(blockDims, vbmRows); } @@ -169,18 +168,6 @@ std::vector MultifrontalClique::blockDims( return blockDims; } -size_t MultifrontalClique::countRows(const GaussianFactorGraph& graph) const { - size_t vbmRows = 0; - for (size_t index : factorIndices_) { - assert(index < graph.size()); - if (auto jacobianFactor = - std::dynamic_pointer_cast(graph[index])) { - vbmRows += jacobianFactor->rows(); - } - } - return vbmRows; -} - void MultifrontalClique::initializeMatrices( const std::vector& blockDims, size_t verticalBlockMatrixRows) { sbm_ = SymmetricBlockMatrix(blockDims, true); @@ -233,8 +220,9 @@ void MultifrontalClique::addHessianFactor(const HessianFactor& hessianFactor) { void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { assert(validateFactorKeys(graph, factorIndices_, orderedKeys_, fixedKeys_)); - sbm_.setZero(); // Zero the active SBM once before accumulating factors. + assert(validateFactorKeys(graph, factorIndices_, orderedKeys_, fixedKeys_)); + hessianFactors_.clear(); size_t rowOffset = 0; for (size_t index : factorIndices_) { assert(index < graph.size()); @@ -244,12 +232,17 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { rowOffset += addJacobianFactor(*jacobianFactor, rowOffset); } else if (auto hessianFactor = std::dynamic_pointer_cast(gf)) { - addHessianFactor(*hessianFactor); + hessianFactors_.push_back(hessianFactor); } } } -void MultifrontalClique::eliminateInPlace() { +void MultifrontalClique::prepareForElimination() { + sbm_.setZero(); + for (const auto& hf : hessianFactors_) { + addHessianFactor(*hf); + } + // Update SBM with the local factors, Ab^T * Ab sbm_.selfadjointView().rankUpdate(Ab_.matrix().transpose()); @@ -257,9 +250,31 @@ void MultifrontalClique::eliminateInPlace() { if (!child) continue; child->updateParent(*this); } +} - // Form normal equations and factor the frontal block (Schur complement step). - sbm_.choleskyPartial(numFrontals()); +void MultifrontalClique::factorize() { sbm_.choleskyPartial(numFrontals()); } + +void MultifrontalClique::addIdentityDamping(double lambda) { + const size_t nf = numFrontals(); + for (size_t j = 0; j < nf; ++j) { + sbm_.addScaledIdentity(j, lambda); + } +} + +void MultifrontalClique::addDiagonalDamping(double lambda, double minDiagonal, + double maxDiagonal) { + const size_t nf = numFrontals(); + for (size_t j = 0; j < nf; ++j) { + Vector diag = sbm_.diagonal(j); + diag = diag.cwiseMax(minDiagonal).cwiseMin(maxDiagonal); + const Vector scaled = lambda * diag; + sbm_.addToDiagonalBlock(j, scaled); + } +} + +void MultifrontalClique::eliminateInPlace() { + prepareForElimination(); + factorize(); } void MultifrontalClique::updateParent(MultifrontalClique& parent) const { diff --git a/gtsam/linear/MultifrontalClique.h b/gtsam/linear/MultifrontalClique.h index bc5c5f527d..93e1251b61 100644 --- a/gtsam/linear/MultifrontalClique.h +++ b/gtsam/linear/MultifrontalClique.h @@ -43,12 +43,13 @@ using KeyDimMap = std::map; namespace internal { -/// Helper class to track original factor indices. +/// Helper class to track original factor indices and row counts. class IndexedSymbolicFactor : public SymbolicFactor { public: size_t index_; - IndexedSymbolicFactor(const KeyVector& keys, size_t index) - : SymbolicFactor(), index_(index) { + size_t rows_; + IndexedSymbolicFactor(const KeyVector& keys, size_t index, size_t rows) + : SymbolicFactor(), index_(index), rows_(rows) { keys_ = keys; } }; @@ -89,7 +90,7 @@ class GTSAM_EXPORT MultifrontalClique { /// @param frontals Frontal keys for this clique. /// @param separatorKeys Separator keys for this clique. /// @param dims Key->dimension map. - /// @param graph Factor graph for sizing and constraints. + /// @param vbmRows Number of rows needed for the vertical block matrix. /// @param solution Solution storage for cached pointers. /// @param fixedKeys Keys fixed to zero by constraints (may be null). explicit MultifrontalClique(std::vector factorIndices, @@ -97,7 +98,7 @@ class GTSAM_EXPORT MultifrontalClique { const KeyVector& frontals, const KeySet& separatorKeys, const KeyDimMap& dims, - const GaussianFactorGraph& graph, + size_t vbmRows, VectorValues* solution, const std::unordered_set* fixedKeys); @@ -107,10 +108,23 @@ class GTSAM_EXPORT MultifrontalClique { /// Cache the children list and compute parent indices. void finalize(std::vector children); - /// Load factor values into the pre-allocated Ab matrix and Hessians into - /// sbm_. + /// Load factor values into the pre-allocated Ab matrix. /// @param graph The factor graph with updated values. void fillAb(const GaussianFactorGraph& graph); + + /// Zero out sbm, re-add Hessians, accumulate Jacobians and children. + void prepareForElimination(); + + /// Perform Cholesky factorization on the frontal block. + void factorize(); + + /// Add identity damping to the frontal block. + void addIdentityDamping(double lambda); + + /// Add diagonal damping to the frontal block, with clamping. + void addDiagonalDamping(double lambda, double minDiagonal, + double maxDiagonal); + /// @} /// @name Read-only methods @@ -130,19 +144,9 @@ class GTSAM_EXPORT MultifrontalClique { /// Get the vertical block matrix Ab. const VerticalBlockMatrix& Ab() const { return Ab_; } - /// Get the symmetric block matrix (mutable). - SymmetricBlockMatrix& sbm() { return sbm_; } - /// Get the symmetric block matrix (const). const SymmetricBlockMatrix& sbm() const { return sbm_; } - /** - * Count rows needed for the vertical block matrix. - * @param graph The factor graph. - * @return Total number of rows. - */ - size_t countRows(const GaussianFactorGraph& graph) const; - /** * Print this clique. * @param s Optional string prefix. @@ -231,6 +235,7 @@ class GTSAM_EXPORT MultifrontalClique { std::vector frontalPtrs_; ///< Pointers into solution frontals. std::vector separatorPtrs_; ///< Pointers into solution separator. + std::vector hessianFactors_; ///< Hessian factors. }; std::ostream& operator<<(std::ostream& os, const MultifrontalClique& clique); diff --git a/gtsam/linear/MultifrontalSolver.cpp b/gtsam/linear/MultifrontalSolver.cpp index 6cf6b2d459..01d4022f60 100644 --- a/gtsam/linear/MultifrontalSolver.cpp +++ b/gtsam/linear/MultifrontalSolver.cpp @@ -137,6 +137,15 @@ std::map computeDims(const GaussianFactorGraph& graph) { return dims; } +size_t factorRowCount(const GaussianFactorGraph& graph, size_t index) { + if (index >= graph.size() || !graph[index]) return 0; + if (auto jacobianFactor = + std::dynamic_pointer_cast(graph[index])) { + return jacobianFactor->rows(); + } + return 0; +} + // Build SymbolicFactorGraph from GaussianFactorGraph SymbolicFactorGraph buildSymbolicGraph( const GaussianFactorGraph& graph, @@ -154,7 +163,8 @@ SymbolicFactorGraph buildSymbolicGraph( } // Skip factors that are fully constrained away. if (keys.empty()) continue; - symbolicGraph.emplace_shared(keys, i); + symbolicGraph.emplace_shared( + keys, i, factorRowCount(graph, i)); } return symbolicGraph; } @@ -238,19 +248,6 @@ void reportStructure(const SymbolicJunctionTree& junctionTree, stats.report(name, reportStream); } -std::vector factorIndicesForSymbolicCluster( - const SymbolicJunctionTree::sharedNode& cluster) { - std::vector indices; - indices.reserve(cluster->factors.size()); - for (const auto& factor : cluster->factors) { - assert(factor); - auto indexed = - std::static_pointer_cast(factor); - indices.push_back(indexed->index_); - } - return indices; -} - struct BuiltClique { MultifrontalSolver::CliquePtr clique; KeySet separatorKeys; @@ -259,7 +256,6 @@ struct BuiltClique { // Build cliques from a symbolic junction tree and wire parent/child metadata. struct CliqueBuilder { const std::map& dims; - const GaussianFactorGraph& graph; VectorValues* solution; std::vector* cliques; const std::unordered_set* fixedKeys; @@ -272,11 +268,21 @@ struct CliqueBuilder { // Gather symbolic metadata for this clique. const KeyVector& frontals = cluster->orderedFrontalKeys; const KeySet& separatorKeys = cluster->separatorKeys(&separatorCache); + std::vector factorIndices; + factorIndices.reserve(cluster->factors.size()); + size_t vbmRows = 0; + for (const auto& factor : cluster->factors) { + assert(factor); + auto indexed = + std::static_pointer_cast(factor); + factorIndices.push_back(indexed->index_); + vbmRows += indexed->rows_; + } // Create the clique node and cache static structure. auto clique = std::make_shared( - factorIndicesForSymbolicCluster(cluster), parent, frontals, - separatorKeys, dims, graph, solution, fixedKeys); + std::move(factorIndices), parent, frontals, separatorKeys, dims, + vbmRows, solution, fixedKeys); // Build children and collect separator keys. std::vector childInfos; @@ -300,21 +306,23 @@ struct CliqueBuilder { /* ************************************************************************* */ MultifrontalSolver::MultifrontalSolver(const GaussianFactorGraph& graph, + const Ordering& ordering, + size_t mergeDimCap, + std::ostream* reportStream) + : MultifrontalSolver(Precompute(graph, ordering), ordering, + mergeDimCap, reportStream) {} + +/* ************************************************************************* */ +MultifrontalSolver::MultifrontalSolver(PrecomputedData data, const Ordering& ordering, size_t mergeDimCap, std::ostream* reportStream) { - // Pre-compute variable dimensions and fixed keys. - dims_ = computeDims(graph); - fixedKeys_ = collectFixedKeys(graph); + dims_ = std::move(data.dims); + fixedKeys_ = std::move(data.fixedKeys); // Seed the cached solution with zero vectors for all variables. - Ordering reducedOrdering; - reducedOrdering.reserve(ordering.size()); for (Key key : ordering) { solution_.insert(key, Vector::Zero(dims_.at(key))); - if (!fixedKeys_.count(key)) { - reducedOrdering.push_back(key); - } } for (Key key : fixedKeys_) { if (!solution_.exists(key)) { @@ -322,29 +330,22 @@ MultifrontalSolver::MultifrontalSolver(const GaussianFactorGraph& graph, } } - // Convert to SymbolicFactorGraph to build the elimination tree - SymbolicFactorGraph symbolicGraph = buildSymbolicGraph(graph, fixedKeys_); - - // Build SymbolicEliminationTree and then SymbolicJunctionTree - SymbolicEliminationTree eliminationTree(symbolicGraph, reducedOrdering); - SymbolicJunctionTree junctionTree(eliminationTree); - // Report the symbolic structure before any merge. - reportStructure(junctionTree, dims_, "Symbolic cluster structure", + reportStructure(data.junctionTree, dims_, "Symbolic cluster structure", reportStream); // If applicable, merge small child cliques bottom-up. if (mergeDimCap > 0) { - for (const auto& rootCluster : junctionTree.roots()) { + for (const auto& rootCluster : data.junctionTree.roots()) { mergeSmallClusters(rootCluster, dims_, mergeDimCap); } - reportStructure(junctionTree, dims_, "Clique structure after merge", + reportStructure(data.junctionTree, dims_, "Clique structure after merge", reportStream); } // Build the actual MultifrontalClique structure. - CliqueBuilder builder{dims_, graph, &solution_, &cliques_, &fixedKeys_}; - for (const auto& rootCluster : junctionTree.roots()) { + CliqueBuilder builder{dims_, &solution_, &cliques_, &fixedKeys_}; + for (const auto& rootCluster : data.junctionTree.roots()) { if (rootCluster) { roots_.push_back( builder.build(rootCluster, std::weak_ptr()) @@ -352,8 +353,29 @@ MultifrontalSolver::MultifrontalSolver(const GaussianFactorGraph& graph, } } - // Load initial numerical values after the structure is built. - load(graph); + // Caller is responsible for loading numerical values via load(). +} + +/* ************************************************************************* */ +MultifrontalSolver::PrecomputedData MultifrontalSolver::Precompute( + const GaussianFactorGraph& graph, const Ordering& ordering) { + auto dims = computeDims(graph); + auto fixedKeys = collectFixedKeys(graph); + + Ordering reducedOrdering; + reducedOrdering.reserve(ordering.size()); + for (Key key : ordering) { + if (!fixedKeys.count(key)) { + reducedOrdering.push_back(key); + } + } + + SymbolicFactorGraph symbolicGraph = buildSymbolicGraph(graph, fixedKeys); + SymbolicEliminationTree eliminationTree(symbolicGraph, reducedOrdering); + SymbolicJunctionTree junctionTree(eliminationTree); + + return MultifrontalSolver::PrecomputedData{ + std::move(dims), std::move(fixedKeys), std::move(junctionTree)}; } /* ************************************************************************* */ diff --git a/gtsam/linear/MultifrontalSolver.h b/gtsam/linear/MultifrontalSolver.h index 3880daea7a..cc67fcba53 100644 --- a/gtsam/linear/MultifrontalSolver.h +++ b/gtsam/linear/MultifrontalSolver.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -43,12 +44,18 @@ class MultifrontalClique; */ class GTSAM_EXPORT MultifrontalSolver { public: + struct PrecomputedData { + std::map dims; + std::unordered_set fixedKeys; + SymbolicJunctionTree junctionTree; + }; + /// Shared pointer to a MultifrontalClique. using CliquePtr = std::shared_ptr; /// Node type for tree traversal utilities. using Node = MultifrontalClique; - private: + protected: std::vector roots_; ///< Roots of the elimination tree. std::vector cliques_; ///< All cliques in the solver. std::map dims_; ///< Map from variable key to dimension. @@ -59,6 +66,7 @@ class GTSAM_EXPORT MultifrontalSolver { /** * Construct the solver from a factor graph and an ordering. * This builds the symbolic junction tree and pre-allocates all matrices. + * Call load() before eliminating to populate numerical values. * @param graph The factor graph to solve. * @param ordering The variable ordering to use for elimination. * @param mergeDimCap Merge a child if its frontal dimension plus the @@ -70,6 +78,24 @@ class GTSAM_EXPORT MultifrontalSolver { size_t mergeDimCap = 0, std::ostream* reportStream = nullptr); + /** + * Construct the solver from precomputed symbolic data. + * Call load() before eliminating to populate numerical values. + * @param data Precomputed symbolic structure and sizing data. + * @param ordering The variable ordering to use for seeding solution storage. + * @param mergeDimCap Merge a child if its frontal dimension plus the + * parent's total dimension is below this threshold (0 disables merging). + * @param reportStream Optional stream to report clique structure stats + * (frontals, separators, total dims, and children). + */ + MultifrontalSolver(PrecomputedData data, const Ordering& ordering, + size_t mergeDimCap = 0, + std::ostream* reportStream = nullptr); + + /// Precompute symbolic structure and sizing data from a factor graph. + static PrecomputedData Precompute(const GaussianFactorGraph& graph, + const Ordering& ordering); + /** * Load new numerical values from the factor graph. * This overwrites the values in the pre-allocated matrices. diff --git a/gtsam/linear/tests/testMultifrontalSolver.cpp b/gtsam/linear/tests/testMultifrontalSolver.cpp index afe5d8e7a8..e2f2ca2082 100644 --- a/gtsam/linear/tests/testMultifrontalSolver.cpp +++ b/gtsam/linear/tests/testMultifrontalSolver.cpp @@ -50,9 +50,10 @@ const Ordering chainOrdering{x2, x1, x3, x4}; } // namespace /* ************************************************************************* */ -// Build the solver and validate initial structure and load. +// Build the solver and validate initial structure and explicit load. TEST(MultifrontalSolver, Constructor) { MultifrontalSolver solver(chain, chainOrdering); + solver.load(chain); // Verify roots EXPECT(solver.roots().size() == 1); @@ -78,6 +79,32 @@ TEST(MultifrontalSolver, Constructor) { EXPECT(assert_equal((Matrix(2, 1) << 2., 1.).finished(), Ab)); } +/* ************************************************************************* */ +// Build the solver from precomputed data and validate structure and load. +TEST(MultifrontalSolver, ConstructorPrecomputed) { + auto data = MultifrontalSolver::Precompute(chain, chainOrdering); + MultifrontalSolver solver(std::move(data), chainOrdering); + solver.load(chain); + + // Verify roots + EXPECT(solver.roots().size() == 1); + auto root = solver.roots()[0]; + EXPECT(root != nullptr); + + // Root should have 1 child {x2, x1} + EXPECT_LONGS_EQUAL(1, root->children.size()); + auto childClique = root->children[0]; + + // Verify matrices in leaf (childClique) + EXPECT_LONGS_EQUAL(4, childClique->sbm().nBlocks()); + EXPECT_LONGS_EQUAL(2, childClique->Ab().rows()); + EXPECT_LONGS_EQUAL(4, childClique->Ab().nBlocks()); + + // Verify load for childClique + Matrix A0 = childClique->Ab()(0); + EXPECT(assert_equal((Matrix(2, 1) << 2., 1.).finished(), A0)); +} + /* ************************************************************************* */ // Reload numerical values and ensure Ab updates match whitening. TEST(MultifrontalSolver, Load) { @@ -110,6 +137,7 @@ TEST(MultifrontalSolver, Load) { // Compare solver output against multifrontal elimination baseline. TEST(MultifrontalSolver, Eliminate) { MultifrontalSolver solver(chain, chainOrdering); + solver.load(chain); solver.eliminateInPlace(); // Solve @@ -126,6 +154,7 @@ TEST(MultifrontalSolver, Eliminate) { // Compare marginals from in-place Bayes tree against standard elimination. TEST(MultifrontalSolver, ComputeBayesTreeMarginals) { MultifrontalSolver solver(chain, chainOrdering); + solver.load(chain); solver.eliminateInPlace(); GaussianBayesTree actualBT = solver.computeBayesTree(); @@ -161,6 +190,7 @@ TEST(MultifrontalSolver, ComputeBayesTreeMarginalsConstrainedChain) { x2, I_1x1, (Vector(1) << 0.0).finished(), hardConstraint); MultifrontalSolver solver(constrainedChain, chainOrdering); + solver.load(constrainedChain); solver.eliminateInPlace(); GaussianBayesTree actualBT = solver.computeBayesTree(); @@ -189,6 +219,7 @@ TEST(MultifrontalSolver, ConstrainedNoiseFeasible) { const Ordering ordering{x1}; MultifrontalSolver solver(graph, ordering); + solver.load(graph); solver.eliminateInPlace(); const VectorValues& actual = solver.updateSolution(); @@ -225,6 +256,7 @@ TEST(MultifrontalSolver, ConstrainedNoiseUnaryFeasible) { const Ordering ordering{x1}; MultifrontalSolver solver(graph, ordering); + solver.load(graph); solver.eliminateInPlace(); const VectorValues& actual = solver.updateSolution(); @@ -261,6 +293,7 @@ TEST(MultifrontalSolver, WeightedScalarMeasurements) { const Ordering ordering{x1}; MultifrontalSolver solver(graph, ordering); + solver.load(graph); solver.eliminateInPlace(); const VectorValues& actual = solver.updateSolution(); @@ -276,6 +309,7 @@ TEST(MultifrontalSolver, HessianFactors) { const Ordering ordering{x1}; MultifrontalSolver solver(graph, ordering); + solver.load(graph); solver.eliminateInPlace(); const VectorValues& actual = solver.updateSolution(); @@ -332,6 +366,7 @@ TEST(MultifrontalSolver, BalancedSmoother) { EXPECT_LONGS_EQUAL(3, minBlocks); // Eliminate and solve + solver.load(smoother); solver.eliminateInPlace(); const VectorValues& actual = solver.updateSolution(); diff --git a/timing/timeMultifrontalSolver.cpp b/timing/timeMultifrontalSolver.cpp index 598c87873b..ba1313ce2f 100644 --- a/timing/timeMultifrontalSolver.cpp +++ b/timing/timeMultifrontalSolver.cpp @@ -73,13 +73,7 @@ int main() { const Values initial = buildGeneralSfmInitial(db); const GaussianFactorGraph linear = *graph.linearize(initial); - const std::vector> orderings = { - {"Burn", createSchurOrdering(db, false)}, - {"Metis", Ordering::Metis(linear)}, - {"Schur", createSchurOrdering(db, false)}, - {"Colamd", Ordering::Colamd(linear)}, - }; - + auto orderings = createOrderings(db, linear); for (const auto& [label, ordering] : orderings) { cout << "\nBAL Benchmark (" << label << ", iterations=" << bal_iterations << "):" << std::endl; diff --git a/timing/timeSFMBAL.h b/timing/timeSFMBAL.h index f83cca9f80..74cc13e6e7 100644 --- a/timing/timeSFMBAL.h +++ b/timing/timeSFMBAL.h @@ -123,6 +123,16 @@ inline Ordering createSchurOrdering(const SfmData& db, return ordering; } +inline std::vector> createOrderings( + const SfmData& db, const GaussianFactorGraph& linear) { + return { + {"Burn", createSchurOrdering(db, false)}, + {"Metis", Ordering::Metis(linear)}, + {"Schur", createSchurOrdering(db, false)}, + {"Colamd", Ordering::Colamd(linear)}, + }; +} + // Create ordering and optimize int optimize(const SfmData& db, const NonlinearFactorGraph& graph, const Values& initial, bool separateCalibration = false) { @@ -132,8 +142,8 @@ int optimize(const SfmData& db, const NonlinearFactorGraph& graph, LevenbergMarquardtParams params; LevenbergMarquardtParams::SetCeresDefaults(¶ms); // params.setLinearSolverType("SEQUENTIAL_CHOLESKY"); - params.setVerbosityLM("SUMMARY"); - params.setRelativeErrorTol(0.01); // 1% relative error tol + params.setVerbosityLM("SUMMARY"); + params.setRelativeErrorTol(0.01); // 1% relative error tol if (gUseSchur) { // Create Schur-complement ordering From 11e4c89d0a52c0b20d9f1333c0bc1271e0de7392 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 14:25:31 -0500 Subject: [PATCH 02/12] Check load occurred --- gtsam/linear/MultifrontalSolver.cpp | 6 ++ gtsam/linear/MultifrontalSolver.h | 1 + timing/timeMultifrontalSolver.cpp | 101 +++++++++++++++++----------- 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/gtsam/linear/MultifrontalSolver.cpp b/gtsam/linear/MultifrontalSolver.cpp index 01d4022f60..01b04e9d44 100644 --- a/gtsam/linear/MultifrontalSolver.cpp +++ b/gtsam/linear/MultifrontalSolver.cpp @@ -383,10 +383,16 @@ void MultifrontalSolver::load(const GaussianFactorGraph& graph) { for (auto& clique : cliques_) { clique->fillAb(graph); } + loaded_ = true; } /* ************************************************************************* */ void MultifrontalSolver::eliminateInPlace() { + if (!loaded_) { + throw std::runtime_error( + "MultifrontalSolver::eliminateInPlace: load() must be called before " + "eliminating."); + } // Parallel elimination uses PostOrderForestParallel, which will be // multi-threaded if GTSAM was compiled with TBB. TbbOpenMPMixedScope threadLimiter; diff --git a/gtsam/linear/MultifrontalSolver.h b/gtsam/linear/MultifrontalSolver.h index cc67fcba53..cb2b7c4a8e 100644 --- a/gtsam/linear/MultifrontalSolver.h +++ b/gtsam/linear/MultifrontalSolver.h @@ -61,6 +61,7 @@ class GTSAM_EXPORT MultifrontalSolver { std::map dims_; ///< Map from variable key to dimension. mutable VectorValues solution_; ///< Cached solution vector. std::unordered_set fixedKeys_; ///< Keys fixed by constrained factors. + bool loaded_ = false; ///< Whether load() has been called. public: /** diff --git a/timing/timeMultifrontalSolver.cpp b/timing/timeMultifrontalSolver.cpp index ba1313ce2f..307d7f1ae7 100644 --- a/timing/timeMultifrontalSolver.cpp +++ b/timing/timeMultifrontalSolver.cpp @@ -52,53 +52,71 @@ static void runMultifrontalSolver(MultifrontalSolver& solver, const GaussianFactorGraph& smoother, size_t iterations) { for (size_t i = 0; i < iterations; ++i) { - if (i > 0) solver.load(smoother); + solver.load(smoother); solver.eliminateInPlace(); const VectorValues& solution = solver.updateSolution(); (void)solution; } } -int main() { - cout << "Merging dim cap " << kMergeDimCap << std::endl; +void runBAL135Benchmark() { + const size_t iterations = 1; + const std::string filename = + findExampleDataFile("dubrovnik-135-90642-pre.txt"); + cout << "\nSingle MFS test: " << filename << " (iterations=" << iterations + << ")" << std::endl; + + const SfmData db = SfmData::FromBalFile(filename); + const NonlinearFactorGraph graph = buildGeneralSfmGraph(db, 0.1); + const Values initial = buildGeneralSfmInitial(db); + const GaussianFactorGraph linear = *graph.linearize(initial); + const Ordering ordering = Ordering::Metis(linear); + + MultifrontalSolver solver(linear, ordering, kMergeDimCap, nullptr); + auto start = std::chrono::high_resolution_clock::now(); + runMultifrontalSolver(solver, linear, iterations); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration t_imperative = end - start; + cout << " MultifrontalSolver: " << t_imperative.count() << " s" << std::endl; +} - { - const size_t bal_iterations = 5; - const string bal16 = findExampleDataFile("dubrovnik-16-22106-pre"); - const string bal88 = findExampleDataFile("dubrovnik-88-64298-pre"); - for (const auto& filename : {bal16, bal88}) { - cout << "\nProcessing BAL file: " << filename << std::endl; - const SfmData db = SfmData::FromBalFile(filename); - const NonlinearFactorGraph graph = buildGeneralSfmGraph(db, 0.1); - const Values initial = buildGeneralSfmInitial(db); - const GaussianFactorGraph linear = *graph.linearize(initial); - - auto orderings = createOrderings(db, linear); - for (const auto& [label, ordering] : orderings) { - cout << "\nBAL Benchmark (" << label - << ", iterations=" << bal_iterations << "):" << std::endl; - - auto start = std::chrono::high_resolution_clock::now(); - MultifrontalSolver solver(linear, ordering, kMergeDimCap, nullptr); - runMultifrontalSolver(solver, linear, bal_iterations); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration t_imperative = end - start; - cout << " MultifrontalSolver: " << t_imperative.count() << " s" - << std::endl; - - start = std::chrono::high_resolution_clock::now(); - runStandardSolver(linear, ordering, bal_iterations); - end = std::chrono::high_resolution_clock::now(); - std::chrono::duration t_standard = end - start; - cout << " Standard GTSAM: " << t_standard.count() << " s" - << std::endl; - - cout << " Speedup: " - << t_standard.count() / t_imperative.count() << "x" << std::endl; - } +void runBALBenchmark() { + const size_t bal_iterations = 5; + const string bal16 = findExampleDataFile("dubrovnik-16-22106-pre"); + const string bal88 = findExampleDataFile("dubrovnik-88-64298-pre"); + for (const auto& filename : {bal16, bal88}) { + cout << "\nProcessing BAL file: " << filename << std::endl; + const SfmData db = SfmData::FromBalFile(filename); + const NonlinearFactorGraph graph = buildGeneralSfmGraph(db, 0.1); + const Values initial = buildGeneralSfmInitial(db); + const GaussianFactorGraph linear = *graph.linearize(initial); + + auto orderings = createOrderings(db, linear); + for (const auto& [label, ordering] : orderings) { + cout << "\nBAL Benchmark (" << label << ", iterations=" << bal_iterations + << "):" << std::endl; + + auto start = std::chrono::high_resolution_clock::now(); + MultifrontalSolver solver(linear, ordering, kMergeDimCap, nullptr); + runMultifrontalSolver(solver, linear, bal_iterations); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration t_imperative = end - start; + cout << " MultifrontalSolver: " << t_imperative.count() << " s" + << std::endl; + + start = std::chrono::high_resolution_clock::now(); + runStandardSolver(linear, ordering, bal_iterations); + end = std::chrono::high_resolution_clock::now(); + std::chrono::duration t_standard = end - start; + cout << " Standard GTSAM: " << t_standard.count() << " s" + << std::endl; + + cout << " Speedup: " + << t_standard.count() / t_imperative.count() << "x" << std::endl; } } - +} +void runChainBenchmark() { const std::vector T_values = {10, 50, 100, 500, 1000, 5000}; const size_t iterations = 1000; @@ -126,5 +144,12 @@ int main() { cout << " Speedup: " << t_standard.count() / t_imperative.count() << "x" << std::endl; } +} +int main() { + cout << "Merging dim cap " << kMergeDimCap << std::endl; + + runBAL135Benchmark(); + // runBALBenchmark(); + // runChainBenchmark(); return 0; } From b3657fab5ef24b7e69f33817f03c7aa0608831bc Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 14:55:11 -0500 Subject: [PATCH 03/12] In solver, different problem-size threshold --- gtsam/linear/MultifrontalSolver.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gtsam/linear/MultifrontalSolver.cpp b/gtsam/linear/MultifrontalSolver.cpp index 01b04e9d44..714b62d262 100644 --- a/gtsam/linear/MultifrontalSolver.cpp +++ b/gtsam/linear/MultifrontalSolver.cpp @@ -309,8 +309,8 @@ MultifrontalSolver::MultifrontalSolver(const GaussianFactorGraph& graph, const Ordering& ordering, size_t mergeDimCap, std::ostream* reportStream) - : MultifrontalSolver(Precompute(graph, ordering), ordering, - mergeDimCap, reportStream) {} + : MultifrontalSolver(Precompute(graph, ordering), ordering, mergeDimCap, + reportStream) {} /* ************************************************************************* */ MultifrontalSolver::MultifrontalSolver(PrecomputedData data, @@ -455,11 +455,16 @@ const VectorValues& MultifrontalSolver::updateSolution() const { }; auto visitorPost = [](const CliquePtr&, int) {}; + // Threshold chosen to balance task overhead vs parallelism. Small cliques + // (like points in SFM) are better handled sequentially within larger tasks + // to minimize scheduling overhead and maximize cache locality. + constexpr int kSolutionParallelThreshold = 4096; + // Cast to non-const because treeTraversal expects a non-const reference, // even though we are only calling const methods on the nodes. treeTraversal::DepthFirstForestParallel( const_cast(*this), rootData, visitorPre, visitorPost, - 10); + kSolutionParallelThreshold); for (Key key : fixedKeys_) { solution_.at(key).setZero(); From 37ea3b24c431d780b7594d919bcbcf74eccf8da6 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 15:38:17 -0500 Subject: [PATCH 04/12] eliminated_ flag --- gtsam/linear/MultifrontalSolver.cpp | 5 +++++ gtsam/linear/MultifrontalSolver.h | 1 + 2 files changed, 6 insertions(+) diff --git a/gtsam/linear/MultifrontalSolver.cpp b/gtsam/linear/MultifrontalSolver.cpp index 714b62d262..d8ab2868b9 100644 --- a/gtsam/linear/MultifrontalSolver.cpp +++ b/gtsam/linear/MultifrontalSolver.cpp @@ -384,6 +384,7 @@ void MultifrontalSolver::load(const GaussianFactorGraph& graph) { clique->fillAb(graph); } loaded_ = true; + eliminated_ = false; } /* ************************************************************************* */ @@ -393,6 +394,7 @@ void MultifrontalSolver::eliminateInPlace() { "MultifrontalSolver::eliminateInPlace: load() must be called before " "eliminating."); } + eliminated_ = false; // Parallel elimination uses PostOrderForestParallel, which will be // multi-threaded if GTSAM was compiled with TBB. TbbOpenMPMixedScope threadLimiter; @@ -400,10 +402,12 @@ void MultifrontalSolver::eliminateInPlace() { if (node) node->eliminateInPlace(); }; treeTraversal::PostOrderForestParallel(*this, visitorPost, 10); + eliminated_ = true; } /* ************************************************************************* */ GaussianBayesTree MultifrontalSolver::computeBayesTree() const { + assert(loaded_ && eliminated_); GaussianBayesTree bayesTree; using Clique = GaussianBayesTreeClique; using BayesCliquePtr = GaussianBayesTree::sharedClique; @@ -445,6 +449,7 @@ GaussianBayesTree MultifrontalSolver::computeBayesTree() const { /* ************************************************************************* */ const VectorValues& MultifrontalSolver::updateSolution() const { + assert(loaded_ && eliminated_); // Parallel solve uses treeTraversal::DepthFirstForestParallel (Pre-order / // Top-Down). TbbOpenMPMixedScope threadLimiter; diff --git a/gtsam/linear/MultifrontalSolver.h b/gtsam/linear/MultifrontalSolver.h index cb2b7c4a8e..502f5d8c41 100644 --- a/gtsam/linear/MultifrontalSolver.h +++ b/gtsam/linear/MultifrontalSolver.h @@ -62,6 +62,7 @@ class GTSAM_EXPORT MultifrontalSolver { mutable VectorValues solution_; ///< Cached solution vector. std::unordered_set fixedKeys_; ///< Keys fixed by constrained factors. bool loaded_ = false; ///< Whether load() has been called. + bool eliminated_ = false; ///< Whether eliminateInPlace() ran. public: /** From 3b9b48f166c21a613d5b7cea5c58ce469c28c926 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 15:38:54 -0500 Subject: [PATCH 05/12] Store RSd_ to avoid trashing cache. --- gtsam/linear/MultifrontalClique.cpp | 30 +++++++++++++++-------------- gtsam/linear/MultifrontalClique.h | 4 ++-- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/gtsam/linear/MultifrontalClique.cpp b/gtsam/linear/MultifrontalClique.cpp index 5c95b91e90..8d16ac716d 100644 --- a/gtsam/linear/MultifrontalClique.cpp +++ b/gtsam/linear/MultifrontalClique.cpp @@ -16,6 +16,7 @@ * @date December 2025 */ +#include #include #include #include @@ -81,9 +82,8 @@ bool validateFactorKeys(const GaussianFactorGraph& graph, MultifrontalClique::MultifrontalClique( std::vector factorIndices, const std::weak_ptr& parent, const KeyVector& frontals, - const KeySet& separatorKeys, const KeyDimMap& dims, - size_t vbmRows, VectorValues* solution, - const std::unordered_set* fixedKeys) { + const KeySet& separatorKeys, const KeyDimMap& dims, size_t vbmRows, + VectorValues* solution, const std::unordered_set* fixedKeys) { factorIndices_ = std::move(factorIndices); this->parent = parent; fixedKeys_ = fixedKeys; @@ -252,7 +252,11 @@ void MultifrontalClique::prepareForElimination() { } } -void MultifrontalClique::factorize() { sbm_.choleskyPartial(numFrontals()); } +void MultifrontalClique::factorize() { + sbm_.choleskyPartial(numFrontals()); + RSd_ = sbm_.split(numFrontals()); + sbm_.blockStart() = 0; +} void MultifrontalClique::addIdentityDamping(double lambda) { const size_t nf = numFrontals(); @@ -287,25 +291,23 @@ void MultifrontalClique::updateParent(MultifrontalClique& parent) const { } std::shared_ptr MultifrontalClique::conditional() const { - const KeyVector& keys = orderedKeys_; - assert(sbm_.blockStart() == 0); - VerticalBlockMatrix Ab = sbm_.split(numFrontals()); - sbm_.blockStart() = 0; // Split sets it to numFrontals(), reset to 0. - return std::make_shared(keys, numFrontals(), - std::move(Ab)); + assert(RSd_.nBlocks() > 0); + return std::make_shared(orderedKeys_, numFrontals(), + RSd_); } // Solve with block back-substitution on the Cholesky-stored SBM. void MultifrontalClique::updateSolution() const { + assert(RSd_.nBlocks() > 0); const size_t nf = numFrontals(); - const size_t n = sbm_.nBlocks() - 1; // # frontals + # separators + const size_t n = RSd_.nBlocks() - 1; // # frontals + # separators // The in-place factorization yields an upper-triangular system [R S d]: // R * x_f + S * x_s = d, // with x_f the frontals and x_s the separators. - const auto R = sbm_.triangularView(0, nf); - const auto S = sbm_.aboveDiagonalRange(0, nf, nf, n); - const auto d = sbm_.aboveDiagonalRange(0, nf, n, n + 1); + const auto R = RSd_.range(0, nf).triangularView(); + const auto S = RSd_.range(nf, n); + const auto d = RSd_.range(n, n + 1); // We first solve rhs = d - S * x_s rhsScratch_.noalias() = d; diff --git a/gtsam/linear/MultifrontalClique.h b/gtsam/linear/MultifrontalClique.h index 93e1251b61..ec646fc26b 100644 --- a/gtsam/linear/MultifrontalClique.h +++ b/gtsam/linear/MultifrontalClique.h @@ -97,8 +97,7 @@ class GTSAM_EXPORT MultifrontalClique { const std::weak_ptr& parent, const KeyVector& frontals, const KeySet& separatorKeys, - const KeyDimMap& dims, - size_t vbmRows, + const KeyDimMap& dims, size_t vbmRows, VectorValues* solution, const std::unordered_set* fixedKeys); @@ -223,6 +222,7 @@ class GTSAM_EXPORT MultifrontalClique { } VerticalBlockMatrix Ab_; mutable SymmetricBlockMatrix sbm_; + VerticalBlockMatrix RSd_; ///< Cached [R S d] from the last elimination. mutable Vector rhsScratch_; ///< Cached RHS workspace for back-substitution. mutable Vector separatorScratch_; ///< Cached separator stack for back-substitution. From a25afc0b014b578e6e3e96f0d142c07786ded1e9 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 17:28:23 -0500 Subject: [PATCH 06/12] QR mode if n>2*m --- gtsam/linear/MultifrontalClique.cpp | 66 +++++++++++++++++-- gtsam/linear/MultifrontalClique.h | 37 +++++++++-- gtsam/linear/tests/testMultifrontalSolver.cpp | 2 +- timing/timeMultifrontalSolver.cpp | 25 +++---- 4 files changed, 105 insertions(+), 25 deletions(-) diff --git a/gtsam/linear/MultifrontalClique.cpp b/gtsam/linear/MultifrontalClique.cpp index 8d16ac716d..1174df166a 100644 --- a/gtsam/linear/MultifrontalClique.cpp +++ b/gtsam/linear/MultifrontalClique.cpp @@ -16,6 +16,7 @@ * @date December 2025 */ +#include #include #include #include @@ -111,9 +112,8 @@ MultifrontalClique::MultifrontalClique( cacheSolutionPointers(solution, frontals, separatorKeys); // Pre-allocate matrices once per structure. - std::vector blockDims = - this->blockDims(dims, frontals, separatorKeys); - initializeMatrices(blockDims, vbmRows); + blockDims_ = this->blockDims(dims, frontals, separatorKeys); + initializeMatrices(blockDims_, vbmRows); } void MultifrontalClique::finalize(std::vector children) { @@ -135,6 +135,8 @@ void MultifrontalClique::finalize(std::vector children) { indices.push_back(static_cast(orderedKeys_.size())); child.clique->setParentIndices(indices); } + + // Allocation is deferred until load. } DenseIndex MultifrontalClique::blockIndex(Key key) const { @@ -170,12 +172,23 @@ std::vector MultifrontalClique::blockDims( void MultifrontalClique::initializeMatrices( const std::vector& blockDims, size_t verticalBlockMatrixRows) { - sbm_ = SymmetricBlockMatrix(blockDims, true); + separatorBlockDims_.assign(blockDims.begin() + numFrontals(), + blockDims.end()); Ab_ = VerticalBlockMatrix(blockDims, verticalBlockMatrixRows, true); // Ab's structure is fixed; clear it once and reuse across loads. Ab_.matrix().setZero(); } +void MultifrontalClique::allocateSbm() { + if (sbm_.nBlocks() > 0) return; + sbm_ = SymmetricBlockMatrix(blockDims_, true); +} + +void MultifrontalClique::allocateSeparatorSbm() { + if (separatorSbm_.nBlocks() > 0) return; + separatorSbm_ = SymmetricBlockMatrix(separatorBlockDims_, true); +} + size_t MultifrontalClique::addJacobianFactor( const JacobianFactor& jacobianFactor, size_t rowOffset) { // We only overwrite the fixed sparsity pattern, so Ab must be zeroed once in @@ -235,16 +248,33 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { hessianFactors_.push_back(hessianFactor); } } + + // Lock in QR only for leaf cliques with no Hessian factors. + const bool isLeaf = children.empty(); + const bool useQr = + isLeaf && hessianFactors_.empty() && (frontalDim > 0) && + (frontalDim + separatorDim > kQrAspectRatio * frontalDim) && + (Ab_.matrix().rows() >= static_cast(frontalDim)); + solveMode_ = useQr ? SolveMode::QrLeaf : SolveMode::Cholesky; + if (useQr) { + allocateSeparatorSbm(); // QR only needs separator updates for the parent. + } else { + allocateSbm(); + } } void MultifrontalClique::prepareForElimination() { + // QR leaf cliques skip SBM assembly entirely. + if (useQr()) return; + assert(sbm_.nBlocks() > 0); sbm_.setZero(); for (const auto& hf : hessianFactors_) { addHessianFactor(*hf); } - // Update SBM with the local factors, Ab^T * Ab - sbm_.selfadjointView().rankUpdate(Ab_.matrix().transpose()); + if (Ab_.matrix().rows() > 0) { + sbm_.selfadjointView().rankUpdate(Ab_.matrix().transpose()); + } for (const auto& child : children) { if (!child) continue; @@ -253,6 +283,16 @@ void MultifrontalClique::prepareForElimination() { } void MultifrontalClique::factorize() { + if (useQr()) { + const DenseIndex nfRows = static_cast(frontalDim); + assert(Ab_.matrix().rows() >= nfRows); + // Copy Ab_ to preserve its invariant; QR writes in place. + Matrix qr = Ab_.matrix(); + inplace_QR(qr); + RSd_ = VerticalBlockMatrix(blockDims_, nfRows, true); + RSd_.matrix() = qr.topRows(nfRows); + return; + } sbm_.choleskyPartial(numFrontals()); RSd_ = sbm_.split(numFrontals()); sbm_.blockStart() = 0; @@ -282,7 +322,19 @@ void MultifrontalClique::eliminateInPlace() { } void MultifrontalClique::updateParent(MultifrontalClique& parent) const { + if (useQr()) { + assert(separatorSbm_.nBlocks() > 0); + const DenseIndex nfBlocks = static_cast(numFrontals()); + const DenseIndex totalBlocks = RSd_.nBlocks(); + const Matrix Sd = RSd_.range(nfBlocks, totalBlocks); + // Accumulate separator normal equations (S^T S) for the parent update. + separatorSbm_.setZero(); + separatorSbm_.selfadjointView().rankUpdate(Sd.transpose()); + parent.sbm_.updateFromMappedBlocks(separatorSbm_, parentIndices_); + return; + } // Expose only the separator+RHS view when contributing to the parent. + assert(sbmAllocated_); assert(sbm_.blockStart() == 0); sbm_.blockStart() = numFrontals(); assert(sbm_.nBlocks() == parentIndices_.size()); @@ -292,6 +344,7 @@ void MultifrontalClique::updateParent(MultifrontalClique& parent) const { std::shared_ptr MultifrontalClique::conditional() const { assert(RSd_.nBlocks() > 0); + // RSd_ is cached at elimination time. return std::make_shared(orderedKeys_, numFrontals(), RSd_); } @@ -299,6 +352,7 @@ std::shared_ptr MultifrontalClique::conditional() const { // Solve with block back-substitution on the Cholesky-stored SBM. void MultifrontalClique::updateSolution() const { assert(RSd_.nBlocks() > 0); + // Use cached [R S d] for fast back-substitution. const size_t nf = numFrontals(); const size_t n = RSd_.nBlocks() - 1; // # frontals + # separators diff --git a/gtsam/linear/MultifrontalClique.h b/gtsam/linear/MultifrontalClique.h index ec646fc26b..5c4453043b 100644 --- a/gtsam/linear/MultifrontalClique.h +++ b/gtsam/linear/MultifrontalClique.h @@ -188,6 +188,8 @@ class GTSAM_EXPORT MultifrontalClique { const MultifrontalClique& clique); private: + enum class SolveMode { Cholesky, QrLeaf }; + /// Cache pointers to frontal and separator update vectors. void cacheSolutionPointers(VectorValues* delta, const KeyVector& frontals, const KeySet& separatorKeys); @@ -208,6 +210,14 @@ class GTSAM_EXPORT MultifrontalClique { void initializeMatrices(const std::vector& blockDims, size_t totalNumRows); + /// Allocate the symmetric block matrix if needed. + void allocateSbm(); + + /// Allocate the separator-only SBM used for QR leaf updates. + void allocateSeparatorSbm(); + + bool useQr() const { return solveMode_ == SolveMode::QrLeaf; } + /** * Add a Jacobian factor's contributions into the Ab matrix. * @return Number of rows added. @@ -220,22 +230,35 @@ class GTSAM_EXPORT MultifrontalClique { void setParentIndices(const std::vector& indices) { parentIndices_ = indices; } - VerticalBlockMatrix Ab_; - mutable SymmetricBlockMatrix sbm_; - VerticalBlockMatrix RSd_; ///< Cached [R S d] from the last elimination. - mutable Vector rhsScratch_; ///< Cached RHS workspace for back-substitution. - mutable Vector - separatorScratch_; ///< Cached separator stack for back-substitution. - + // Build-time metadata. std::vector factorIndices_; KeyVector orderedKeys_; ///< Keys ordered by block index (frontals+seps). const std::unordered_set* fixedKeys_ = nullptr; + std::vector blockDims_; + std::vector separatorBlockDims_; std::vector parentIndices_; ///< Parent block indices for separators + RHS. std::vector frontalPtrs_; ///< Pointers into solution frontals. std::vector separatorPtrs_; ///< Pointers into solution separator. + + // Load-time state. + VerticalBlockMatrix Ab_; std::vector hessianFactors_; ///< Hessian factors. + SolveMode solveMode_ = SolveMode::Cholesky; + + // Elimination-time state. + mutable SymmetricBlockMatrix sbm_; + mutable SymmetricBlockMatrix + separatorSbm_; ///< Cached separator update for QR. + VerticalBlockMatrix RSd_; ///< Cached [R S d] from elimination. + + // Solve-time scratch space. + mutable Vector rhsScratch_; ///< Cached RHS workspace for back-substitution. + mutable Vector + separatorScratch_; ///< Cached separator stack for back-substitution. + + static constexpr double kQrAspectRatio = 2.0; }; std::ostream& operator<<(std::ostream& os, const MultifrontalClique& clique); diff --git a/gtsam/linear/tests/testMultifrontalSolver.cpp b/gtsam/linear/tests/testMultifrontalSolver.cpp index e2f2ca2082..cbe4145f45 100644 --- a/gtsam/linear/tests/testMultifrontalSolver.cpp +++ b/gtsam/linear/tests/testMultifrontalSolver.cpp @@ -338,6 +338,7 @@ TEST(MultifrontalSolver, BalancedSmoother) { const Ordering ordering{X(1), X(3), X(5), X(7), X(2), X(6), X(4)}; MultifrontalSolver solver(smoother, ordering); + solver.load(smoother); // Verify roots EXPECT(solver.roots().size() == 1); @@ -366,7 +367,6 @@ TEST(MultifrontalSolver, BalancedSmoother) { EXPECT_LONGS_EQUAL(3, minBlocks); // Eliminate and solve - solver.load(smoother); solver.eliminateInPlace(); const VectorValues& actual = solver.updateSolution(); diff --git a/timing/timeMultifrontalSolver.cpp b/timing/timeMultifrontalSolver.cpp index 307d7f1ae7..9c1767e6f5 100644 --- a/timing/timeMultifrontalSolver.cpp +++ b/timing/timeMultifrontalSolver.cpp @@ -59,14 +59,16 @@ static void runMultifrontalSolver(MultifrontalSolver& solver, } } +namespace { +const std::string bal135 = findExampleDataFile("dubrovnik-135-90642-pre.txt"); +} + void runBAL135Benchmark() { const size_t iterations = 1; - const std::string filename = - findExampleDataFile("dubrovnik-135-90642-pre.txt"); - cout << "\nSingle MFS test: " << filename << " (iterations=" << iterations + cout << "\nSingle MFS test: " << bal135 << " (iterations=" << iterations << ")" << std::endl; - const SfmData db = SfmData::FromBalFile(filename); + const SfmData db = SfmData::FromBalFile(bal135); const NonlinearFactorGraph graph = buildGeneralSfmGraph(db, 0.1); const Values initial = buildGeneralSfmInitial(db); const GaussianFactorGraph linear = *graph.linearize(initial); @@ -78,13 +80,14 @@ void runBAL135Benchmark() { auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration t_imperative = end - start; cout << " MultifrontalSolver: " << t_imperative.count() << " s" << std::endl; + tictoc_print(); } void runBALBenchmark() { - const size_t bal_iterations = 5; + const size_t bal_iterations = 2; const string bal16 = findExampleDataFile("dubrovnik-16-22106-pre"); const string bal88 = findExampleDataFile("dubrovnik-88-64298-pre"); - for (const auto& filename : {bal16, bal88}) { + for (const auto& filename : {bal16, bal88, bal135}) { cout << "\nProcessing BAL file: " << filename << std::endl; const SfmData db = SfmData::FromBalFile(filename); const NonlinearFactorGraph graph = buildGeneralSfmGraph(db, 0.1); @@ -96,8 +99,8 @@ void runBALBenchmark() { cout << "\nBAL Benchmark (" << label << ", iterations=" << bal_iterations << "):" << std::endl; - auto start = std::chrono::high_resolution_clock::now(); MultifrontalSolver solver(linear, ordering, kMergeDimCap, nullptr); + auto start = std::chrono::high_resolution_clock::now(); runMultifrontalSolver(solver, linear, bal_iterations); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration t_imperative = end - start; @@ -118,7 +121,7 @@ void runBALBenchmark() { } void runChainBenchmark() { const std::vector T_values = {10, 50, 100, 500, 1000, 5000}; - const size_t iterations = 1000; + const size_t iterations = 500; for (size_t T : T_values) { cout << "\nBenchmark (T=" << T << ", iterations=" << iterations @@ -148,8 +151,8 @@ void runChainBenchmark() { int main() { cout << "Merging dim cap " << kMergeDimCap << std::endl; - runBAL135Benchmark(); - // runBALBenchmark(); - // runChainBenchmark(); + // runBAL135Benchmark(); + runBALBenchmark(); + runChainBenchmark(); return 0; } From d57dbccf816ada1cce93be2168f98732d76f565d Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 18:32:55 -0500 Subject: [PATCH 07/12] Fused path --- gtsam/linear/MultifrontalSolver.cpp | 14 ++++++++++++++ gtsam/linear/MultifrontalSolver.h | 6 ++++++ gtsam/linear/tests/testMultifrontalSolver.cpp | 14 ++++++++++++++ timing/timeMultifrontalSolver.cpp | 5 ++--- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/gtsam/linear/MultifrontalSolver.cpp b/gtsam/linear/MultifrontalSolver.cpp index d8ab2868b9..ae84dce926 100644 --- a/gtsam/linear/MultifrontalSolver.cpp +++ b/gtsam/linear/MultifrontalSolver.cpp @@ -405,6 +405,20 @@ void MultifrontalSolver::eliminateInPlace() { eliminated_ = true; } +/* ************************************************************************* */ +void MultifrontalSolver::eliminateInPlace(const GaussianFactorGraph& graph) { + // Combine load + eliminate in one post-order traversal to improve locality. + TbbOpenMPMixedScope threadLimiter; + auto visitorPost = [&graph](const CliquePtr& node) { + if (!node) return; + node->fillAb(graph); + node->eliminateInPlace(); + }; + treeTraversal::PostOrderForestParallel(*this, visitorPost, 10); + loaded_ = true; + eliminated_ = true; +} + /* ************************************************************************* */ GaussianBayesTree MultifrontalSolver::computeBayesTree() const { assert(loaded_ && eliminated_); diff --git a/gtsam/linear/MultifrontalSolver.h b/gtsam/linear/MultifrontalSolver.h index 502f5d8c41..63a02d1067 100644 --- a/gtsam/linear/MultifrontalSolver.h +++ b/gtsam/linear/MultifrontalSolver.h @@ -112,6 +112,12 @@ class GTSAM_EXPORT MultifrontalSolver { */ void eliminateInPlace(); + /** + * Load and eliminate the graph in a single traversal. + * This calls fillAb() and eliminateInPlace() per clique in post-order. + */ + void eliminateInPlace(const GaussianFactorGraph& graph); + /** * Compute a Bayes tree from the in-place Cholesky factorization. * Requires eliminateInPlace() to have been called beforehand. diff --git a/gtsam/linear/tests/testMultifrontalSolver.cpp b/gtsam/linear/tests/testMultifrontalSolver.cpp index cbe4145f45..c8d08efb9a 100644 --- a/gtsam/linear/tests/testMultifrontalSolver.cpp +++ b/gtsam/linear/tests/testMultifrontalSolver.cpp @@ -150,6 +150,20 @@ TEST(MultifrontalSolver, Eliminate) { EXPECT(assert_equal(expected, actual, 1e-9)); } +/* ************************************************************************* */ +// Load + eliminate in one traversal matches standard elimination. +TEST(MultifrontalSolver, EliminateWithLoad) { + MultifrontalSolver solver(chain, chainOrdering); + solver.eliminateInPlace(chain); + + const VectorValues& actual = solver.updateSolution(); + + GaussianBayesTree expectedBT = *chain.eliminateMultifrontal(chainOrdering); + VectorValues expected = expectedBT.optimize(); + + EXPECT(assert_equal(expected, actual, 1e-9)); +} + /* ************************************************************************* */ // Compare marginals from in-place Bayes tree against standard elimination. TEST(MultifrontalSolver, ComputeBayesTreeMarginals) { diff --git a/timing/timeMultifrontalSolver.cpp b/timing/timeMultifrontalSolver.cpp index 9c1767e6f5..5d7f8d90a7 100644 --- a/timing/timeMultifrontalSolver.cpp +++ b/timing/timeMultifrontalSolver.cpp @@ -52,8 +52,7 @@ static void runMultifrontalSolver(MultifrontalSolver& solver, const GaussianFactorGraph& smoother, size_t iterations) { for (size_t i = 0; i < iterations; ++i) { - solver.load(smoother); - solver.eliminateInPlace(); + solver.eliminateInPlace(smoother); const VectorValues& solution = solver.updateSolution(); (void)solution; } @@ -151,7 +150,7 @@ void runChainBenchmark() { int main() { cout << "Merging dim cap " << kMergeDimCap << std::endl; - // runBAL135Benchmark(); + runBAL135Benchmark(); runBALBenchmark(); runChainBenchmark(); return 0; From ab62e92c22daf521066d51975f548b17a042a091 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Sun, 4 Jan 2026 22:53:53 -0500 Subject: [PATCH 08/12] Review comments --- gtsam/linear/MultifrontalClique.cpp | 22 ++++++++----------- gtsam/linear/MultifrontalClique.h | 17 ++++++++++---- gtsam/linear/MultifrontalSolver.h | 6 ++--- gtsam/linear/tests/testMultifrontalSolver.cpp | 1 + timing/timeMultifrontalSolver.cpp | 4 ++-- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/gtsam/linear/MultifrontalClique.cpp b/gtsam/linear/MultifrontalClique.cpp index 1174df166a..81791ad209 100644 --- a/gtsam/linear/MultifrontalClique.cpp +++ b/gtsam/linear/MultifrontalClique.cpp @@ -17,7 +17,6 @@ */ #include -#include #include #include #include @@ -233,7 +232,6 @@ void MultifrontalClique::addHessianFactor(const HessianFactor& hessianFactor) { void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { assert(validateFactorKeys(graph, factorIndices_, orderedKeys_, fixedKeys_)); - assert(validateFactorKeys(graph, factorIndices_, orderedKeys_, fixedKeys_)); hessianFactors_.clear(); size_t rowOffset = 0; @@ -251,12 +249,12 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { // Lock in QR only for leaf cliques with no Hessian factors. const bool isLeaf = children.empty(); - const bool useQr = + const bool useQR = isLeaf && hessianFactors_.empty() && (frontalDim > 0) && (frontalDim + separatorDim > kQrAspectRatio * frontalDim) && (Ab_.matrix().rows() >= static_cast(frontalDim)); - solveMode_ = useQr ? SolveMode::QrLeaf : SolveMode::Cholesky; - if (useQr) { + solveMode_ = useQR ? SolveMode::QrLeaf : SolveMode::Cholesky; + if (useQR) { allocateSeparatorSbm(); // QR only needs separator updates for the parent. } else { allocateSbm(); @@ -265,7 +263,7 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { void MultifrontalClique::prepareForElimination() { // QR leaf cliques skip SBM assembly entirely. - if (useQr()) return; + if (useQR()) return; assert(sbm_.nBlocks() > 0); sbm_.setZero(); for (const auto& hf : hessianFactors_) { @@ -283,7 +281,7 @@ void MultifrontalClique::prepareForElimination() { } void MultifrontalClique::factorize() { - if (useQr()) { + if (useQR()) { const DenseIndex nfRows = static_cast(frontalDim); assert(Ab_.matrix().rows() >= nfRows); // Copy Ab_ to preserve its invariant; QR writes in place. @@ -309,9 +307,8 @@ void MultifrontalClique::addDiagonalDamping(double lambda, double minDiagonal, double maxDiagonal) { const size_t nf = numFrontals(); for (size_t j = 0; j < nf; ++j) { - Vector diag = sbm_.diagonal(j); - diag = diag.cwiseMax(minDiagonal).cwiseMin(maxDiagonal); - const Vector scaled = lambda * diag; + const Vector scaled = + lambda * sbm_.diagonal(j).cwiseMax(minDiagonal).cwiseMin(maxDiagonal); sbm_.addToDiagonalBlock(j, scaled); } } @@ -322,7 +319,7 @@ void MultifrontalClique::eliminateInPlace() { } void MultifrontalClique::updateParent(MultifrontalClique& parent) const { - if (useQr()) { + if (useQR()) { assert(separatorSbm_.nBlocks() > 0); const DenseIndex nfBlocks = static_cast(numFrontals()); const DenseIndex totalBlocks = RSd_.nBlocks(); @@ -334,8 +331,7 @@ void MultifrontalClique::updateParent(MultifrontalClique& parent) const { return; } // Expose only the separator+RHS view when contributing to the parent. - assert(sbmAllocated_); - assert(sbm_.blockStart() == 0); + assert(sbm_.nBlocks() > 0 && sbm_.blockStart() == 0); sbm_.blockStart() = numFrontals(); assert(sbm_.nBlocks() == parentIndices_.size()); parent.sbm_.updateFromMappedBlocks(sbm_, parentIndices_); diff --git a/gtsam/linear/MultifrontalClique.h b/gtsam/linear/MultifrontalClique.h index 5c4453043b..3fde296abb 100644 --- a/gtsam/linear/MultifrontalClique.h +++ b/gtsam/linear/MultifrontalClique.h @@ -117,10 +117,18 @@ class GTSAM_EXPORT MultifrontalClique { /// Perform Cholesky factorization on the frontal block. void factorize(); - /// Add identity damping to the frontal block. + /** + * Add identity damping to the frontal block. + * @param lambda Damping factor + */ void addIdentityDamping(double lambda); - /// Add diagonal damping to the frontal block, with clamping. + /** + * Add diagonal damping to the frontal block. + * @param lambda Damping factor + * @param minDiagonal Minimum diagonal value + * @param maxDiagonal Maximum diagonal value + */ void addDiagonalDamping(double lambda, double minDiagonal, double maxDiagonal); @@ -146,6 +154,9 @@ class GTSAM_EXPORT MultifrontalClique { /// Get the symmetric block matrix (const). const SymmetricBlockMatrix& sbm() const { return sbm_; } + /// Check if this clique is using QR elimination. + bool useQR() const { return solveMode_ == SolveMode::QrLeaf; } + /** * Print this clique. * @param s Optional string prefix. @@ -216,8 +227,6 @@ class GTSAM_EXPORT MultifrontalClique { /// Allocate the separator-only SBM used for QR leaf updates. void allocateSeparatorSbm(); - bool useQr() const { return solveMode_ == SolveMode::QrLeaf; } - /** * Add a Jacobian factor's contributions into the Ab matrix. * @return Number of rows added. diff --git a/gtsam/linear/MultifrontalSolver.h b/gtsam/linear/MultifrontalSolver.h index 63a02d1067..57e2291304 100644 --- a/gtsam/linear/MultifrontalSolver.h +++ b/gtsam/linear/MultifrontalSolver.h @@ -45,9 +45,9 @@ class MultifrontalClique; class GTSAM_EXPORT MultifrontalSolver { public: struct PrecomputedData { - std::map dims; - std::unordered_set fixedKeys; - SymbolicJunctionTree junctionTree; + std::map dims; ///< Map from variable key to dimension. + std::unordered_set fixedKeys; ///< Keys fixed by constrained factors. + SymbolicJunctionTree junctionTree; ///< Precomputed symbolic junction tree. }; /// Shared pointer to a MultifrontalClique. diff --git a/gtsam/linear/tests/testMultifrontalSolver.cpp b/gtsam/linear/tests/testMultifrontalSolver.cpp index c8d08efb9a..e8708fff7e 100644 --- a/gtsam/linear/tests/testMultifrontalSolver.cpp +++ b/gtsam/linear/tests/testMultifrontalSolver.cpp @@ -96,6 +96,7 @@ TEST(MultifrontalSolver, ConstructorPrecomputed) { auto childClique = root->children[0]; // Verify matrices in leaf (childClique) + CHECK(childClique->useQR() == false); EXPECT_LONGS_EQUAL(4, childClique->sbm().nBlocks()); EXPECT_LONGS_EQUAL(2, childClique->Ab().rows()); EXPECT_LONGS_EQUAL(4, childClique->Ab().nBlocks()); diff --git a/timing/timeMultifrontalSolver.cpp b/timing/timeMultifrontalSolver.cpp index 5d7f8d90a7..5428a7a709 100644 --- a/timing/timeMultifrontalSolver.cpp +++ b/timing/timeMultifrontalSolver.cpp @@ -49,10 +49,10 @@ static void runStandardSolver(const GaussianFactorGraph& smoother, /// Run new MultifrontalSolver elimination and optimization. static void runMultifrontalSolver(MultifrontalSolver& solver, - const GaussianFactorGraph& smoother, + const GaussianFactorGraph& graph, size_t iterations) { for (size_t i = 0; i < iterations; ++i) { - solver.eliminateInPlace(smoother); + solver.eliminateInPlace(graph); const VectorValues& solution = solver.updateSolution(); (void)solution; } From fa50032447f13aa295a8326bada13a14c4183a25 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Mon, 5 Jan 2026 00:17:05 -0500 Subject: [PATCH 09/12] UpdateFromOuterProductBlocks --- gtsam/base/SymmetricBlockMatrix.cpp | 24 +++++++++++++++++++ gtsam/base/SymmetricBlockMatrix.h | 6 +++++ gtsam/base/tests/testSymmetricBlockMatrix.cpp | 24 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/gtsam/base/SymmetricBlockMatrix.cpp b/gtsam/base/SymmetricBlockMatrix.cpp index 6e67418757..9ee96fd93e 100644 --- a/gtsam/base/SymmetricBlockMatrix.cpp +++ b/gtsam/base/SymmetricBlockMatrix.cpp @@ -126,6 +126,30 @@ void SymmetricBlockMatrix::updateFromMappedBlocks( } } +/* ************************************************************************* */ +void SymmetricBlockMatrix::updateFromOuterProductBlocks( + const VerticalBlockMatrix& other, + const std::vector& blockIndices) { + assert(static_cast(blockIndices.size()) == other.nBlocks()); + const DenseIndex otherBlocks = other.nBlocks(); + for (DenseIndex i = 0; i < otherBlocks; ++i) { + const DenseIndex I = blockIndices[i]; + if (I < 0) continue; + assert(I < nBlocks()); + const auto Si = other(i); + Matrix diag = Si.transpose() * Si; + updateDiagonalBlock(I, diag); + for (DenseIndex j = i + 1; j < otherBlocks; ++j) { + const DenseIndex J = blockIndices[j]; + if (J < 0) continue; + assert(J < nBlocks()); + const auto Sj = other(j); + Matrix off = Si.transpose() * Sj; + updateOffDiagonalBlock(I, J, off); + } + } +} + /* ************************************************************************* */ } //\ namespace gtsam diff --git a/gtsam/base/SymmetricBlockMatrix.h b/gtsam/base/SymmetricBlockMatrix.h index f46dc72fae..dbb740a633 100644 --- a/gtsam/base/SymmetricBlockMatrix.h +++ b/gtsam/base/SymmetricBlockMatrix.h @@ -264,6 +264,12 @@ namespace gtsam { void updateFromMappedBlocks(const SymmetricBlockMatrix& other, const std::vector& blockIndices); + /// Update this matrix with blockwise outer products from a vertical block matrix. + /// Adds S_i^T S_j into block (I,J), using a block mapping; entries with index -1 are skipped. + /// The range to use is controlled by other.firstBlock(). + void updateFromOuterProductBlocks(const VerticalBlockMatrix& other, + const std::vector& blockIndices); + /// @} /// @name Accessing the full matrix. /// @{ diff --git a/gtsam/base/tests/testSymmetricBlockMatrix.cpp b/gtsam/base/tests/testSymmetricBlockMatrix.cpp index 4989ea1d84..b2049ada26 100644 --- a/gtsam/base/tests/testSymmetricBlockMatrix.cpp +++ b/gtsam/base/tests/testSymmetricBlockMatrix.cpp @@ -17,6 +17,7 @@ #include #include +#include using namespace std; using namespace gtsam; @@ -187,6 +188,29 @@ TEST(SymmetricBlockMatrix, UpdateFromMappedBlocks) Matrix(doubled.selfadjointView()))); } +/* ************************************************************************* */ +// Update via blockwise outer products from a VerticalBlockMatrix view. +TEST(SymmetricBlockMatrix, UpdateFromOuterProductBlocks) +{ + const std::vector vbmDims{2, 1}; + VerticalBlockMatrix vbm(vbmDims, 4, true); + vbm.matrix() = (Matrix(4, 4) << + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16).finished(); + + const std::vector destDims{1}; + SymmetricBlockMatrix actual(destDims, true); + actual.setZero(); + const std::vector mapping{0, 1}; + const Matrix S = vbm.range(1, 3); + const Matrix expected = S.transpose() * S; + vbm.firstBlock() = 1; + actual.updateFromOuterProductBlocks(vbm, mapping); + EXPECT(assert_equal(expected, Matrix(actual.selfadjointView()))); +} + /* ************************************************************************* */ // In-place inversion path. TEST(SymmetricBlockMatrix, inverseInPlace) { From 178ed2d0d8a42b9c6f3c66292db4e6196a75585b Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Mon, 5 Jan 2026 00:17:30 -0500 Subject: [PATCH 10/12] Use UpdateFromOuterProductBlocks --- gtsam/linear/MultifrontalClique.cpp | 37 ++++++++++------------------- gtsam/linear/MultifrontalClique.h | 8 +------ 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/gtsam/linear/MultifrontalClique.cpp b/gtsam/linear/MultifrontalClique.cpp index 81791ad209..2dc7d624f4 100644 --- a/gtsam/linear/MultifrontalClique.cpp +++ b/gtsam/linear/MultifrontalClique.cpp @@ -171,8 +171,6 @@ std::vector MultifrontalClique::blockDims( void MultifrontalClique::initializeMatrices( const std::vector& blockDims, size_t verticalBlockMatrixRows) { - separatorBlockDims_.assign(blockDims.begin() + numFrontals(), - blockDims.end()); Ab_ = VerticalBlockMatrix(blockDims, verticalBlockMatrixRows, true); // Ab's structure is fixed; clear it once and reuse across loads. Ab_.matrix().setZero(); @@ -183,11 +181,6 @@ void MultifrontalClique::allocateSbm() { sbm_ = SymmetricBlockMatrix(blockDims_, true); } -void MultifrontalClique::allocateSeparatorSbm() { - if (separatorSbm_.nBlocks() > 0) return; - separatorSbm_ = SymmetricBlockMatrix(separatorBlockDims_, true); -} - size_t MultifrontalClique::addJacobianFactor( const JacobianFactor& jacobianFactor, size_t rowOffset) { // We only overwrite the fixed sparsity pattern, so Ab must be zeroed once in @@ -254,9 +247,7 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { (frontalDim + separatorDim > kQrAspectRatio * frontalDim) && (Ab_.matrix().rows() >= static_cast(frontalDim)); solveMode_ = useQR ? SolveMode::QrLeaf : SolveMode::Cholesky; - if (useQR) { - allocateSeparatorSbm(); // QR only needs separator updates for the parent. - } else { + if (!useQR) { allocateSbm(); } } @@ -320,22 +311,20 @@ void MultifrontalClique::eliminateInPlace() { void MultifrontalClique::updateParent(MultifrontalClique& parent) const { if (useQR()) { - assert(separatorSbm_.nBlocks() > 0); + // Accumulate separator (and RHS) normal equations (S^T S) into the parent. + assert(RSd_.nBlocks() > 0 && RSd_.firstBlock() == 0); + assert(parent.sbm_.nBlocks() > 0); const DenseIndex nfBlocks = static_cast(numFrontals()); - const DenseIndex totalBlocks = RSd_.nBlocks(); - const Matrix Sd = RSd_.range(nfBlocks, totalBlocks); - // Accumulate separator normal equations (S^T S) for the parent update. - separatorSbm_.setZero(); - separatorSbm_.selfadjointView().rankUpdate(Sd.transpose()); - parent.sbm_.updateFromMappedBlocks(separatorSbm_, parentIndices_); - return; + RSd_.firstBlock() = nfBlocks; + parent.sbm_.updateFromOuterProductBlocks(RSd_, parentIndices_); + RSd_.firstBlock() = 0; + } else { + // Accumulate the S^T S part from this clique's SBM into the parent. + assert(sbm_.nBlocks() > 0 && sbm_.blockStart() == 0); + sbm_.blockStart() = numFrontals(); + parent.sbm_.updateFromMappedBlocks(sbm_, parentIndices_); + sbm_.blockStart() = 0; } - // Expose only the separator+RHS view when contributing to the parent. - assert(sbm_.nBlocks() > 0 && sbm_.blockStart() == 0); - sbm_.blockStart() = numFrontals(); - assert(sbm_.nBlocks() == parentIndices_.size()); - parent.sbm_.updateFromMappedBlocks(sbm_, parentIndices_); - sbm_.blockStart() = 0; } std::shared_ptr MultifrontalClique::conditional() const { diff --git a/gtsam/linear/MultifrontalClique.h b/gtsam/linear/MultifrontalClique.h index 3fde296abb..435e3aa20f 100644 --- a/gtsam/linear/MultifrontalClique.h +++ b/gtsam/linear/MultifrontalClique.h @@ -224,9 +224,6 @@ class GTSAM_EXPORT MultifrontalClique { /// Allocate the symmetric block matrix if needed. void allocateSbm(); - /// Allocate the separator-only SBM used for QR leaf updates. - void allocateSeparatorSbm(); - /** * Add a Jacobian factor's contributions into the Ab matrix. * @return Number of rows added. @@ -244,7 +241,6 @@ class GTSAM_EXPORT MultifrontalClique { KeyVector orderedKeys_; ///< Keys ordered by block index (frontals+seps). const std::unordered_set* fixedKeys_ = nullptr; std::vector blockDims_; - std::vector separatorBlockDims_; std::vector parentIndices_; ///< Parent block indices for separators + RHS. std::vector frontalPtrs_; ///< Pointers into solution frontals. @@ -258,9 +254,7 @@ class GTSAM_EXPORT MultifrontalClique { // Elimination-time state. mutable SymmetricBlockMatrix sbm_; - mutable SymmetricBlockMatrix - separatorSbm_; ///< Cached separator update for QR. - VerticalBlockMatrix RSd_; ///< Cached [R S d] from elimination. + mutable VerticalBlockMatrix RSd_; ///< Cached [R S d] from elimination. // Solve-time scratch space. mutable Vector rhsScratch_; ///< Cached RHS workspace for back-substitution. From 0f7e2a638d6d2d4d89e79b6468d6c35486820f09 Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Mon, 5 Jan 2026 12:35:55 -0500 Subject: [PATCH 11/12] In-place split --- gtsam/base/SymmetricBlockMatrix.cpp | 20 +++++++++++++++----- gtsam/base/SymmetricBlockMatrix.h | 3 +++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/gtsam/base/SymmetricBlockMatrix.cpp b/gtsam/base/SymmetricBlockMatrix.cpp index 9ee96fd93e..96149ab538 100644 --- a/gtsam/base/SymmetricBlockMatrix.cpp +++ b/gtsam/base/SymmetricBlockMatrix.cpp @@ -89,20 +89,30 @@ void SymmetricBlockMatrix::choleskyPartial(DenseIndex nFrontals) { } /* ************************************************************************* */ -VerticalBlockMatrix SymmetricBlockMatrix::split(DenseIndex nFrontals) { +void SymmetricBlockMatrix::split(DenseIndex nFrontals, + VerticalBlockMatrix* RSd) { gttic(VerticalBlockMatrix_split); + assert(RSd); // Construct a VerticalBlockMatrix that contains [R Sd] - const size_t n1 = offset(nFrontals); - VerticalBlockMatrix RSd = VerticalBlockMatrix::LikeActiveViewOf(*this, n1); + const DenseIndex n1 = offset(nFrontals); + assert(RSd->rows() == n1); + assert(RSd->cols() == cols()); + assert(RSd->nBlocks() == nBlocks()); // Copy into it. - RSd.full() = matrix_.topRows(n1); - RSd.full().triangularView().setZero(); + RSd->full() = matrix_.topRows(n1); + RSd->full().triangularView().setZero(); // Take lower-right block of Ab_ to get the remaining factor blockStart() = nFrontals; +} +VerticalBlockMatrix SymmetricBlockMatrix::split(DenseIndex nFrontals) { + // Construct a VerticalBlockMatrix that contains [R Sd] + const DenseIndex n1 = offset(nFrontals); + VerticalBlockMatrix RSd = VerticalBlockMatrix::LikeActiveViewOf(*this, n1); + split(nFrontals, &RSd); return RSd; } diff --git a/gtsam/base/SymmetricBlockMatrix.h b/gtsam/base/SymmetricBlockMatrix.h index dbb740a633..97bf3bd817 100644 --- a/gtsam/base/SymmetricBlockMatrix.h +++ b/gtsam/base/SymmetricBlockMatrix.h @@ -336,6 +336,9 @@ namespace gtsam { */ VerticalBlockMatrix split(DenseIndex nFrontals); + /// I n-place version of split. + void split(DenseIndex nFrontals, VerticalBlockMatrix* RSd); + protected: /// Number of offsets in the full matrix. From e5e01f45acf6727ec6cb3a454a47ba8aa74cf4fc Mon Sep 17 00:00:00 2001 From: Frank Dellaert Date: Mon, 5 Jan 2026 12:36:15 -0500 Subject: [PATCH 12/12] No more mallocs in QR path --- gtsam/linear/MultifrontalClique.cpp | 36 ++++++++++++++++------------- gtsam/linear/MultifrontalClique.h | 1 + 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/gtsam/linear/MultifrontalClique.cpp b/gtsam/linear/MultifrontalClique.cpp index 2dc7d624f4..b0a797447c 100644 --- a/gtsam/linear/MultifrontalClique.cpp +++ b/gtsam/linear/MultifrontalClique.cpp @@ -174,6 +174,8 @@ void MultifrontalClique::initializeMatrices( Ab_ = VerticalBlockMatrix(blockDims, verticalBlockMatrixRows, true); // Ab's structure is fixed; clear it once and reuse across loads. Ab_.matrix().setZero(); + RSd_ = + VerticalBlockMatrix(blockDims, static_cast(frontalDim), true); } void MultifrontalClique::allocateSbm() { @@ -247,9 +249,11 @@ void MultifrontalClique::fillAb(const GaussianFactorGraph& graph) { (frontalDim + separatorDim > kQrAspectRatio * frontalDim) && (Ab_.matrix().rows() >= static_cast(frontalDim)); solveMode_ = useQR ? SolveMode::QrLeaf : SolveMode::Cholesky; - if (!useQR) { - allocateSbm(); - } + RSdReady_ = false; + assert((useQR && RSd_.matrix().rows() == + static_cast(Ab_.matrix().rows()) || + RSd_.matrix().rows() == static_cast(frontalDim))); + allocateSbm(); } void MultifrontalClique::prepareForElimination() { @@ -273,18 +277,17 @@ void MultifrontalClique::prepareForElimination() { void MultifrontalClique::factorize() { if (useQR()) { - const DenseIndex nfRows = static_cast(frontalDim); - assert(Ab_.matrix().rows() >= nfRows); // Copy Ab_ to preserve its invariant; QR writes in place. - Matrix qr = Ab_.matrix(); - inplace_QR(qr); - RSd_ = VerticalBlockMatrix(blockDims_, nfRows, true); - RSd_.matrix() = qr.topRows(nfRows); - return; + assert(RSd_.matrix().rows() == Ab_.matrix().rows()); + assert(RSd_.matrix().cols() == Ab_.matrix().cols()); + RSd_.matrix() = Ab_.matrix(); + inplace_QR(RSd_.matrix()); + } else { + sbm_.choleskyPartial(numFrontals()); + sbm_.split(numFrontals(), &RSd_); + sbm_.blockStart() = 0; } - sbm_.choleskyPartial(numFrontals()); - RSd_ = sbm_.split(numFrontals()); - sbm_.blockStart() = 0; + RSdReady_ = true; } void MultifrontalClique::addIdentityDamping(double lambda) { @@ -312,7 +315,8 @@ void MultifrontalClique::eliminateInPlace() { void MultifrontalClique::updateParent(MultifrontalClique& parent) const { if (useQR()) { // Accumulate separator (and RHS) normal equations (S^T S) into the parent. - assert(RSd_.nBlocks() > 0 && RSd_.firstBlock() == 0); + assert(RSdReady_); + assert(RSd_.firstBlock() == 0); assert(parent.sbm_.nBlocks() > 0); const DenseIndex nfBlocks = static_cast(numFrontals()); RSd_.firstBlock() = nfBlocks; @@ -328,7 +332,7 @@ void MultifrontalClique::updateParent(MultifrontalClique& parent) const { } std::shared_ptr MultifrontalClique::conditional() const { - assert(RSd_.nBlocks() > 0); + assert(RSdReady_); // RSd_ is cached at elimination time. return std::make_shared(orderedKeys_, numFrontals(), RSd_); @@ -336,7 +340,7 @@ std::shared_ptr MultifrontalClique::conditional() const { // Solve with block back-substitution on the Cholesky-stored SBM. void MultifrontalClique::updateSolution() const { - assert(RSd_.nBlocks() > 0); + assert(RSdReady_); // Use cached [R S d] for fast back-substitution. const size_t nf = numFrontals(); const size_t n = RSd_.nBlocks() - 1; // # frontals + # separators diff --git a/gtsam/linear/MultifrontalClique.h b/gtsam/linear/MultifrontalClique.h index 435e3aa20f..f9e6c3029d 100644 --- a/gtsam/linear/MultifrontalClique.h +++ b/gtsam/linear/MultifrontalClique.h @@ -255,6 +255,7 @@ class GTSAM_EXPORT MultifrontalClique { // Elimination-time state. mutable SymmetricBlockMatrix sbm_; mutable VerticalBlockMatrix RSd_; ///< Cached [R S d] from elimination. + mutable bool RSdReady_ = false; // Solve-time scratch space. mutable Vector rhsScratch_; ///< Cached RHS workspace for back-substitution.