Skip to content

Commit 76e0a78

Browse files
yuvaltassacopybara-github
authored andcommitted
Use two-step sparse Cholesky in Newton solver.
17% overall speedup for `100_humanoids.xml` as measured by `testspeed` PiperOrigin-RevId: 846723342 Change-Id: Ibc41487bfc576c992f14853640d0b6000f5b47b2
1 parent 45b0153 commit 76e0a78

File tree

1 file changed

+31
-48
lines changed

1 file changed

+31
-48
lines changed

src/engine/engine_solver.c

Lines changed: 31 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -815,18 +815,24 @@ struct _mjCGContext {
815815
mjtNum* D; // constraint inertia (nefc x 1)
816816
int* H_rowadr; // Hessian row addresses (nv x 1)
817817
int* H_rownnz; // Hessian row nonzeros (nv x 1)
818-
int* H_lowernnz; // Hessian lower triangle row nonzeros (nv x 1)
818+
int* HT_rownnz; // Hessian transpose row nonzeros (nv x 1)
819+
int* HT_rowadr; // Hessian transpose row addresses (nv x 1)
819820
int* L_rownnz; // Hessian factor row nonzeros (nv x 1)
820821
int* L_rowadr; // Hessian factor row addresses (nv x 1)
822+
int* LT_rownnz; // Hessian factor transpose row nonzeros (nv x 1)
823+
int* LT_rowadr; // Hessian factor transpose row addresses (nv x 1)
821824
int* buf_ind; // index buffer for sparse addition (nv x 1)
822825
mjtNum* buf_val; // value buffer for sparse addition (nv x 1)
823826

824827
// Newton arrays, computed-size (MakeHessian)
825828
int nH; // number of nonzeros in Hessian H
826829
int* H_colind; // Hessian column indices (nH x 1)
830+
int* HT_colind; // Hessian transpose column indices (nH x 1)
827831
mjtNum* H; // Hessian (nH x 1)
828832
int nL; // number of nonzeros in Cholesky factor L
829833
int* L_colind; // Cholesky factor column indices (nL x 1)
834+
int* LT_colind; // Cholesky factor transpose column indices (nL x 1)
835+
int* LT_map; // CSC-to-CSR index mapping (nL x 1)
830836
mjtNum* L; // Cholesky factor (nL x 1)
831837
mjtNum* Lcone; // Cholesky factor with cone contributions (nL x 1)
832838

@@ -989,9 +995,12 @@ static void CGallocate(mjData* d, mjCGContext* ctx, int flg_Newton) {
989995
if (ctx->is_sparse) {
990996
ctx->H_rowadr = mjSTACKALLOC(d, nv, int);
991997
ctx->H_rownnz = mjSTACKALLOC(d, nv, int);
992-
ctx->H_lowernnz = mjSTACKALLOC(d, nv, int);
998+
ctx->HT_rownnz = mjSTACKALLOC(d, nv, int);
999+
ctx->HT_rowadr = mjSTACKALLOC(d, nv, int);
9931000
ctx->L_rownnz = mjSTACKALLOC(d, nv, int);
9941001
ctx->L_rowadr = mjSTACKALLOC(d, nv, int);
1002+
ctx->LT_rownnz = mjSTACKALLOC(d, nv, int);
1003+
ctx->LT_rowadr = mjSTACKALLOC(d, nv, int);
9951004
ctx->buf_val = mjSTACKALLOC(d, nv, mjtNum);
9961005
ctx->buf_ind = mjSTACKALLOC(d, nv, int);
9971006
}
@@ -1549,22 +1558,16 @@ static void MakeHessian(mjData* d, mjCGContext* ctx) {
15491558
ctx->M, ctx->M_rownnz, ctx->M_rowadr, ctx->M_colind,
15501559
ctx->buf_val, ctx->buf_ind);
15511560

1552-
// transiently compute H'; mju_cholFactorNNZ is memory-contiguous in upper triangle layout
1553-
mj_markStack(d);
1554-
int* HT_rownnz = mjSTACKALLOC(d, nv, int);
1555-
int* HT_rowadr = mjSTACKALLOC(d, nv, int);
1556-
int* HT_colind = mjSTACKALLOC(d, ctx->nH, int);
1557-
mju_transposeSparse(NULL, NULL, nv, nv,
1558-
HT_rownnz, HT_rowadr, HT_colind, NULL,
1561+
// compute H' (upper triangle, required for symbolic Cholesky)
1562+
ctx->HT_colind = mjSTACKALLOC(d, ctx->nH, int);
1563+
mju_transposeSparse(NULL, NULL, nv, nv, ctx->HT_rownnz, ctx->HT_rowadr, ctx->HT_colind, NULL,
15591564
ctx->H_rownnz, ctx->H_rowadr, ctx->H_colind);
15601565

15611566
// count total and row non-zeros of reverse-Cholesky factors L and LT
1562-
int* LT_rownnz_temp = mjSTACKALLOC(d, nv, int);
1563-
int* LT_rowadr_temp = mjSTACKALLOC(d, nv, int);
15641567
ctx->nL = mju_cholFactorSymbolic(NULL, ctx->L_rownnz, ctx->L_rowadr, NULL,
1565-
LT_rownnz_temp, LT_rowadr_temp, NULL,
1566-
HT_rownnz, HT_rowadr, HT_colind, nv, d);
1567-
mj_freeStack(d);
1568+
ctx->LT_rownnz, ctx->LT_rowadr, NULL,
1569+
ctx->HT_rownnz, ctx->HT_rowadr, ctx->HT_colind,
1570+
nv, d);
15681571

15691572
// allocate L_colind, L, Lcone
15701573
ctx->L_colind = mjSTACKALLOC(d, ctx->nL, int);
@@ -1573,25 +1576,15 @@ static void MakeHessian(mjData* d, mjCGContext* ctx) {
15731576
ctx->Lcone = mjSTACKALLOC(d, ctx->nL, mjtNum);
15741577
}
15751578

1576-
// count nonzeros in rows of H lower triangle
1577-
for (int r = 0; r < nv; r++) {
1578-
const int* colind = ctx->H_colind + ctx->H_rowadr[r];
1579-
int rownnz = ctx->H_rownnz[r];
1579+
// allocate LT (CSC representation of L)
1580+
ctx->LT_colind = mjSTACKALLOC(d, ctx->nL, int);
1581+
ctx->LT_map = mjSTACKALLOC(d, ctx->nL, int);
15801582

1581-
// count nonzeros up to diagonal (inclusive) for row r
1582-
int nnz = 1;
1583-
while (nnz < rownnz && colind[nnz - 1] < r) {
1584-
nnz++;
1585-
}
1586-
1587-
// last row element is not the diagonal; SHOULD NOT OCCUR
1588-
if (colind[nnz - 1] != r) {
1589-
mjERROR("Newton solver Hessian has zero diagonal on row %d", r);
1590-
}
1591-
1592-
// save row nonzeros
1593-
ctx->H_lowernnz[r] = nnz;
1594-
}
1583+
// symbolic Cholesky: populate L_colind and LT structures
1584+
mju_cholFactorSymbolic(ctx->L_colind, ctx->L_rownnz, ctx->L_rowadr,
1585+
ctx->LT_colind, ctx->LT_rownnz, ctx->LT_rowadr, ctx->LT_map,
1586+
ctx->HT_rownnz, ctx->HT_rowadr, ctx->HT_colind,
1587+
nv, d);
15951588
}
15961589

15971590
// dense
@@ -1643,27 +1636,17 @@ static void FactorizeHessian(mjData* d, mjCGContext* ctx, int flg_recompute) {
16431636
ctx->buf_val, ctx->buf_ind);
16441637
}
16451638

1646-
// copy H lower-triangle into L, fill-in already accounted for
1647-
for (int r = 0; r < nv; r++) {
1648-
int nnz = ctx->H_lowernnz[r];
1649-
mju_copy(ctx->L + ctx->L_rowadr[r], ctx->H + ctx->H_rowadr[r], nnz);
1650-
mju_copyInt(ctx->L_colind + ctx->L_rowadr[r], ctx->H_colind + ctx->H_rowadr[r], nnz);
1651-
ctx->L_rownnz[r] = nnz;
1652-
}
1653-
1654-
// in-place sparse factorization: L = chol(H)
1655-
int rank = mju_cholFactorSparse(ctx->L, nv, mjMINVAL,
1656-
ctx->L_rownnz, ctx->L_rowadr, ctx->L_colind, d);
1639+
// numeric sparse factorization: L = chol(H) using pre-computed sparsity pattern
1640+
int rank = mju_cholFactorNumeric(
1641+
ctx->L, nv, mjMINVAL,
1642+
ctx->L_rownnz, ctx->L_rowadr, ctx->L_colind,
1643+
ctx->LT_rownnz, ctx->LT_rowadr, ctx->LT_colind, ctx->LT_map,
1644+
ctx->H, ctx->H_rownnz, ctx->H_rowadr, ctx->H_colind, d);
16571645

16581646
// rank-deficient; SHOULD NOT OCCUR
16591647
if (rank != nv) {
16601648
mjERROR("rank-deficient sparse Hessian");
16611649
}
1662-
1663-
// pre-counted nL does not match post-factorization nL; SHOULD NOT OCCUR
1664-
if (ctx->nL != ctx->L_rowadr[nv-1] + ctx->L_rownnz[nv-1]) {
1665-
mjERROR("mismatch between pre-counted and post-factorization L nonzeros");
1666-
}
16671650
}
16681651

16691652
// dense

0 commit comments

Comments
 (0)