@@ -488,42 +488,114 @@ def prox_vec(w, z, penalty, lipschitz):
488
488
return w
489
489
490
490
491
+ POWER_MAX_ITER = 20
492
+ TOL = 1e-6 ** 2
493
+
494
+
491
495
@njit
492
- def power_method (X_data , X_indptr , X_indices , n_iter ):
493
- """Power method to compute largest eigenvalue of sparse matrix X.
494
-
496
+ def spectral_norm2 (X_data , X_indptr , X_indices , n_samples ):
497
+ """Compute the squared spectral norm of sparse matrix ``X``.
498
+
499
+ Find the largest eigenvalue of ``X @ X.T`` using the power method.
500
+
495
501
Parameters
496
502
----------
497
503
X_data : array, shape (n_elements,)
498
- `data` attribute of the sparse CSC matrix X .
504
+ `data` attribute of the sparse CSC matrix ``X`` .
499
505
500
506
X_indptr : array, shape (n_features + 1,)
501
- `indptr` attribute of the sparse CSC matrix X .
507
+ `indptr` attribute of the sparse CSC matrix ``X`` .
502
508
503
509
X_indices : array, shape (n_elements,)
504
- `indices` attribute of the sparse CSC matrix X .
505
-
506
- n_iter : int
507
- Number of iterations for the power method .
508
-
510
+ `indices` attribute of the sparse CSC matrix ``X`` .
511
+
512
+ n_samples : int
513
+ number of rows of ``X`` .
514
+
509
515
Returns
510
516
-------
511
- rayleigh : float
512
- Rayleigh quotient or spectral radius of X^TX
517
+ eigenvalue : float
518
+ The largest eigenvalue value of ``X.T @ X``, aka the squared spectral of ``X``.
519
+
520
+ References
521
+ ----------
522
+ .. [1] Alfio Quarteroni, Riccardo Sacco, Fausto Saleri "Numerical Mathematics",
523
+ chapiter 5, page 192-195.
513
524
"""
525
+ # init vec with norm(vec) == 1.
526
+ eigenvector = np .full (n_samples , 1 / np .sqrt (n_samples ))
527
+ eigenvalue = 1.
528
+
529
+ for _ in range (POWER_MAX_ITER ):
530
+ vec = _XXT_dot_vec (X_data , X_indptr , X_indices , eigenvector , n_samples )
531
+ norm_vec = norm (vec )
532
+ eigenvalue = vec @ eigenvector
533
+
534
+ # norm(X @ X.T @ eigenvector - eigenvalue * eigenvector) <= tol
535
+ if norm_vec ** 2 - eigenvalue ** 2 <= TOL :
536
+ break
537
+
538
+ eigenvector = vec / norm_vec
539
+
540
+ return eigenvalue
541
+
542
+
543
+ @njit
544
+ def _XXT_dot_vec (X_data , X_indptr , X_indices , vec , n_samples ):
545
+ # computes X @ X.T @ vec, with X csc encoded
546
+ return _X_dot_vec (X_data , X_indptr , X_indices ,
547
+ _XT_dot_vec (X_data , X_indptr , X_indices , vec ),
548
+ n_samples )
549
+
550
+
551
+ @njit
552
+ def _X_dot_vec (X_data , X_indptr , X_indices , vec , n_samples ):
553
+ # compute X @ vec, with X csc encoded
554
+ result = np .zeros (n_samples )
555
+
556
+ # loop over features
557
+ for j in range (len (X_indptr ) - 1 ):
558
+ if vec [j ] == 0 :
559
+ continue
560
+
561
+ col_j_rows_idx = slice (X_indptr [j ], X_indptr [j + 1 ])
562
+ result [X_indices [col_j_rows_idx ]] += vec [j ] * X_data [col_j_rows_idx ]
563
+
564
+ return result
565
+
566
+
567
+ @njit
568
+ def _XT_dot_vec (X_data , X_indptr , X_indices , vec ):
569
+ # compute X.T @ vec, with X csc encoded
514
570
n_features = len (X_indptr ) - 1
515
- b_k = np .random .rand (n_features )
516
- for _ in range (n_iter ):
517
- b_k1 = np .zeros (n_features )
518
- for j in range (n_features ):
519
- b_k1 [j ] =
520
- b_k1 = A @ b_k # write the full loop
521
- b_k1_nrm = norm (b_k1 )
522
- b_k = b_k1 / b_k1_nrm
523
- rayleigh = b_k1 @ b_k / (norm (b_k ) ** 2 )
524
- return rayleigh
571
+ result = np .zeros (n_features )
572
+
573
+ for j in range (n_features ):
574
+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
575
+ result [j ] += X_data [idx ] * vec [X_indices [idx ]]
576
+
577
+ return result
578
+
525
579
580
+ if __name__ == '__main__' :
581
+ from scipy .sparse import csc_matrix , random
582
+ import time
583
+ n_samples , n_features = 500 , 600
584
+ A = random (n_samples , n_features , density = 0.5 , format = 'csc' )
526
585
586
+ X = csc_matrix (A )
587
+ X_dense = X .toarray ()
527
588
589
+ # cache numba compilation
590
+ M = random (5 , 7 , density = 0.9 , format = 'csc' )
591
+ spectral_norm2 (M .data , M .indptr , M .indices , 5 )
528
592
593
+ start = time .time ()
594
+ spectral_norm2 (X .data , X .indptr , X .indices , n_samples )
595
+ end = time .time ()
596
+ print ("our: " , end - start )
529
597
598
+ start = time .time ()
599
+ norm (X_dense , ord = 2 ) ** 2
600
+ end = time .time ()
601
+ print ("np: " , end - start )
0 commit comments