@@ -187,21 +187,52 @@ int mju_cholFactorSparse(mjtNum* mat, int n, mjtNum mindiag,
187187 return rank ;
188188}
189189
190-
191- // precount row non-zeros of reverse-Cholesky factor L, return total non-zeros
192- // based on ldl_symbolic from 'Algorithm 8xx: a concise sparse Cholesky factorization package'
193- // reads pattern from upper triangle
194- int mju_cholFactorCount (int * L_rownnz , const int * rownnz , const int * rowadr , const int * colind ,
195- int n , mjData * d ) {
190+ // symbolic reverse-Cholesky: compute both L (CSR) and LT (CSC) structures
191+ // if L_colind is NULL, perform counting logic (fill rownnz/rowadr arrays and return total nnz)
192+ // if L_colind is not NULL, assume rownnz/rowadr are precomputed and fill colind/map arrays
193+ // reads pattern from upper triangle
194+ // based on ldl_symbolic from 'Algorithm 8xx: a concise sparse Cholesky factorization package'
195+ int mju_cholFactorSymbolic (int * restrict L_colind , int * restrict L_rownnz , int * restrict L_rowadr ,
196+ int * restrict LT_colind , int * restrict LT_rownnz ,
197+ int * restrict LT_rowadr , int * restrict LT_map ,
198+ const int * rownnz , const int * rowadr , const int * colind , int n ,
199+ mjData * d ) {
196200 mj_markStack (d );
197- int * parent = mjSTACKALLOC (d , n , int );
198- int * flag = mjSTACKALLOC (d , n , int );
201+ int * restrict parent = mjSTACKALLOC (d , n , int );
202+ int * restrict flag = mjSTACKALLOC (d , n , int );
203+ int * restrict cursor = NULL ;
204+ int * LT_write = NULL ;
205+
206+ // filling phase: initialize write positions
207+ if (L_colind ) {
208+ cursor = mjSTACKALLOC (d , n , int );
209+ LT_write = mjSTACKALLOC (d , n , int );
210+ for (int r = 0 ; r < n ; r ++ ) {
211+ cursor [r ] = L_rowadr [r ] + L_rownnz [r ] - 2 ; // end of row r (before diagonal)
212+ LT_write [r ] = LT_rowadr [r ]; // start of LT row r
213+ }
214+ }
199215
200216 // loop over rows in reverse order
201217 for (int r = n - 1 ; r >= 0 ; r -- ) {
202218 parent [r ] = -1 ;
203219 flag [r ] = r ;
204- L_rownnz [r ] = 1 ; // start with 1 for diagonal
220+
221+ // counting phase: start with 1 for diagonal
222+ if (!L_colind ) {
223+ L_rownnz [r ] = 1 ;
224+ LT_rownnz [r ] = 1 ;
225+ }
226+
227+ // filling phase: write diagonals
228+ else {
229+ int diag_idx = L_rowadr [r ] + L_rownnz [r ] - 1 ;
230+ L_colind [diag_idx ] = r ;
231+ int write_idx = LT_write [r ];
232+ LT_colind [write_idx ] = r ;
233+ LT_map [write_idx ] = diag_idx ;
234+ LT_write [r ]++ ;
235+ }
205236
206237 // loop over non-zero columns of upper triangle
207238 int start = rowadr [r ];
@@ -221,8 +252,23 @@ int mju_cholFactorCount(int* L_rownnz, const int* rownnz, const int* rowadr, con
221252 parent [i ] = r ;
222253 }
223254
224- // increment non-zeros, flag row i, advance to parent
225- L_rownnz [i ]++ ;
255+ // counting phase: increment non-zeros
256+ if (!L_colind ) {
257+ L_rownnz [i ]++ ;
258+ LT_rownnz [r ]++ ;
259+ }
260+
261+ // filling phase: write L[i, r] and LT[r, i]
262+ else {
263+ int L_idx = cursor [i ];
264+ cursor [i ]-- ;
265+ L_colind [L_idx ] = r ;
266+ LT_colind [LT_write [r ]] = i ;
267+ LT_map [LT_write [r ]] = L_idx ;
268+ LT_write [r ]++ ;
269+ }
270+
271+ // flag row i, advance to parent
226272 flag [i ] = r ;
227273 i = parent [i ];
228274 }
@@ -231,15 +277,98 @@ int mju_cholFactorCount(int* L_rownnz, const int* rownnz, const int* rowadr, con
231277
232278 mj_freeStack (d );
233279
234- // sum up all row non-zeros
280+ // counting phase: compute row addresses, add up total non-zeros
235281 int nnz = 0 ;
236- for (int r = 0 ; r < n ; r ++ ) {
237- nnz += L_rownnz [r ];
282+ if (!L_colind ) {
283+ nnz = L_rownnz [0 ];
284+ L_rowadr [0 ] = 0 ;
285+ LT_rowadr [0 ] = 0 ;
286+ for (int r = 1 ; r < n ; r ++ ) {
287+ L_rowadr [r ] = L_rowadr [r - 1 ] + L_rownnz [r - 1 ];
288+ LT_rowadr [r ] = LT_rowadr [r - 1 ] + LT_rownnz [r - 1 ];
289+ nnz += L_rownnz [r ];
290+ }
238291 }
239292
240293 return nnz ;
241294}
242295
296+ // numeric reverse-Cholesky: compute L values given fixed sparsity pattern, returns rank
297+ // L_colind must already contain the correct sparsity pattern (from mju_cholFactorSymbolic)
298+ // LT_map[k] gives index in L for LT_colind[k]
299+ int mju_cholFactorNumeric (mjtNum * restrict L , int n , mjtNum mindiag ,
300+ const int * L_rownnz , const int * L_rowadr , const int * L_colind ,
301+ const int * LT_rownnz , const int * LT_rowadr , const int * LT_colind ,
302+ const int * LT_map , const mjtNum * H ,
303+ const int * H_rownnz , const int * H_rowadr , const int * H_colind ,
304+ mjData * d ) {
305+ int rank = n ;
306+
307+ // single-row dense accumulator
308+ mj_markStack (d );
309+ mjtNum * restrict dense = mjSTACKALLOC (d , n , mjtNum );
310+ mju_zero (dense , n );
311+
312+ // backpass over rows
313+ for (int r = n - 1 ; r >= 0 ; r -- ) {
314+ // scatter H[r, 0:r] into dense
315+ mju_scatter (dense , H + H_rowadr [r ], H_colind + H_rowadr [r ], H_rownnz [r ]);
316+
317+ // accumulate updates from rows c > r where L[c,r] != 0
318+ // use CSC transpose: LT column r contains rows that have column r
319+ // start from k=1 to skip the diagonal entry (LT_colind[LT_adr] = r)
320+ int LT_adr = LT_rowadr [r ];
321+ int LT_nnz = LT_rownnz [r ];
322+ for (int k = 1 ; k < LT_nnz ; k ++ ) {
323+ int c = LT_colind [LT_adr + k ]; // row c has L[c,r] != 0, c > r guaranteed
324+
325+ // get L[c,r] index directly from LT_map
326+ int L_cr_idx = LT_map [LT_adr + k ];
327+ mjtNum L_cr = L [L_cr_idx ];
328+
329+ // get row c info
330+ int c_adr = L_rowadr [c ];
331+
332+ // dense[j] -= L[c,r] * L[c,j] for all j <= r in L[c]
333+ // L_cr_idx - c_adr gives the position of r in row c
334+ int num_cols = L_cr_idx - c_adr + 1 ;
335+ const int * colptr = L_colind + c_adr ;
336+ const mjtNum * Lptr = L + c_adr ;
337+ for (int i = 0 ; i < num_cols ; i ++ ) {
338+ dense [colptr [i ]] -= L_cr * Lptr [i ];
339+ }
340+ }
341+
342+ // factor row r diagonal, handle rank-deficient case
343+ mjtNum diag = dense [r ];
344+ if (diag < mindiag ) {
345+ diag = mindiag ;
346+ rank -- ;
347+ }
348+
349+ // scale off-diagonals
350+ mjtNum L_rr = mju_sqrt (diag );
351+ mjtNum L_rr_inv = 1.0 / L_rr ;
352+ int L_adr = L_rowadr [r ];
353+ int L_nnz = L_rownnz [r ];
354+ const int * colptr = L_colind + L_adr ;
355+ mjtNum * Lptr = L + L_adr ;
356+ for (int i = 0 ; i < L_nnz - 1 ; i ++ ) {
357+ Lptr [i ] = dense [colptr [i ]] * L_rr_inv ;
358+ }
359+
360+ // store diagonal
361+ L [L_adr + L_nnz - 1 ] = L_rr ;
362+
363+ // clear dense workspace
364+ for (int i = 0 ; i < L_nnz ; i ++ ) {
365+ dense [colptr [i ]] = 0 ;
366+ }
367+ }
368+
369+ mj_freeStack (d );
370+ return rank ;
371+ }
243372
244373// sparse reverse-order Cholesky solve
245374void mju_cholSolveSparse (mjtNum * res , const mjtNum * mat , const mjtNum * vec , int n ,
@@ -528,7 +657,7 @@ void mju_band2Dense(mjtNum* res, const mjtNum* mat, int ntotal, int nband, int n
528657 mju_zero (res , ntotal * ntotal );
529658
530659 // sparse part
531- for (int i = 0 ; i < nsparse ; i ++ ) {
660+ for (int i = 0 ; i < nsparse ; i ++ ) {
532661 // number of non-zeros left of (i,i)
533662 int width = mjMIN (i , nband - 1 );
534663
@@ -537,13 +666,13 @@ void mju_band2Dense(mjtNum* res, const mjtNum* mat, int ntotal, int nband, int n
537666 }
538667
539668 // dense part
540- for (int i = nsparse ; i < ntotal ; i ++ ) {
669+ for (int i = nsparse ; i < ntotal ; i ++ ) {
541670 mju_copy (res + i * ntotal , mat + nsparse * nband + (i - nsparse )* ntotal , i + 1 );
542671 }
543672
544673 // make symmetric
545674 if (flg_sym ) {
546- for (int i = 0 ; i < ntotal ; i ++ ) {
675+ for (int i = 0 ; i < ntotal ; i ++ ) {
547676 for (int j = i + 1 ; j < ntotal ; j ++ ) {
548677 res [i * ntotal + j ] = res [j * ntotal + i ];
549678 }
@@ -557,7 +686,7 @@ void mju_dense2Band(mjtNum* res, const mjtNum* mat, int ntotal, int nband, int n
557686 int nsparse = ntotal - ndense ;
558687
559688 // sparse part
560- for (int i = 0 ; i < nsparse ; i ++ ) {
689+ for (int i = 0 ; i < nsparse ; i ++ ) {
561690 // number of non-zeros left of (i,i)
562691 int width = mjMIN (i , nband - 1 );
563692
@@ -566,7 +695,7 @@ void mju_dense2Band(mjtNum* res, const mjtNum* mat, int ntotal, int nband, int n
566695 }
567696
568697 // dense part
569- for (int i = nsparse ; i < ntotal ; i ++ ) {
698+ for (int i = nsparse ; i < ntotal ; i ++ ) {
570699 mju_copy (res + nsparse * nband + (i - nsparse )* ntotal , mat + i * ntotal , i + 1 );
571700 }
572701}
@@ -578,13 +707,13 @@ void mju_bandMulMatVec(mjtNum* res, const mjtNum* mat, const mjtNum* vec,
578707 int nsparse = ntotal - ndense ;
579708
580709 // handle multiple vectors
581- for (int j = 0 ; j < nvec ; j ++ ) {
710+ for (int j = 0 ; j < nvec ; j ++ ) {
582711 // precompute pointer to corresponding vector in vec and res
583712 const mjtNum * vec_j = vec + ntotal * j ;
584713 mjtNum * res_j = res + ntotal * j ;
585714
586715 // sparse part
587- for (int i = 0 ; i < nsparse ; i ++ ) {
716+ for (int i = 0 ; i < nsparse ; i ++ ) {
588717 int width = mjMIN (i + 1 , nband );
589718 int adr = i * nband + nband - width ;
590719 int offset = mjMAX (0 , i - nband + 1 );
@@ -596,7 +725,7 @@ void mju_bandMulMatVec(mjtNum* res, const mjtNum* mat, const mjtNum* vec,
596725 }
597726
598727 // dense part
599- for (int i = nsparse ; i < ntotal ; i ++ ) {
728+ for (int i = nsparse ; i < ntotal ; i ++ ) {
600729 int adr = nsparse * nband + (i - nsparse )* ntotal ;
601730 res_j [i ] = mju_dot (mat + adr , vec_j , i + 1 );
602731 if (flg_sym ) {
@@ -808,7 +937,7 @@ int mju_eig3(mjtNum eigval[3], mjtNum eigvec[9], mjtNum quat[4], const mjtNum ma
808937 mju_normalize4 (quat );
809938 }
810939
811- // sort eigenvalues in decreasing order (bubblesort : 0, 1, 0)
940+ // sort eigenvalues in decreasing order (bubble sort : 0, 1, 0)
812941 for (int j = 0 ; j < 3 ; j ++ ) {
813942 int j1 = j %2 ; // lead index
814943
0 commit comments