|
1 | | -# single_svdlib |
| 1 | +# Single-SVDLib: Singular Value Decomposition for Sparse Matrices |
2 | 2 |
|
3 | | -A Rust library for performing Singular Value Decomposition (SVD) on sparse matrices using the Lanczos algorithm. It is build on the original library and expan |
| 3 | +[](https://crates.io/crates/single-svdlib) |
| 4 | +[](https://docs.rs/single-svdlib) |
| 5 | +[](LICENSE) |
4 | 6 |
|
5 | | -## Overview |
6 | | - |
7 | | -`svdlibrs` is a Rust port of LAS2 from SVDLIBC, originally developed by Doug Rohde. This library efficiently computes SVD on sparse matrices, particularly large ones, and returns the decomposition as ndarray components. |
8 | | - |
9 | | -This implementation extends the original [svdlibrs](https://github.com/dfarnham/svdlibrs) by Dave Farnham with: |
10 | | -- Updated dependency versions |
11 | | -- Support for a broader range of numeric types (f64, f32, others) |
12 | | -- Column masking capabilities for analyzing specific subsets of data |
| 7 | +A high-performance Rust library for computing Singular Value Decomposition (SVD) on sparse matrices, with support for both Lanczos and randomized SVD algorithms. |
13 | 8 |
|
14 | 9 | ## Features |
15 | 10 |
|
16 | | -- Performs SVD on sparse matrices using the Lanczos algorithm |
17 | | -- Works with various input formats: CSR, CSC, or COO matrices |
18 | | -- Column masking for dimension selection without data copying |
19 | | -- Generic implementation supporting different numeric types |
20 | | -- High numerical precision for critical calculations |
| 11 | +- **Multiple SVD algorithms**: |
| 12 | + - Lanczos algorithm (based on SVDLIBC) |
| 13 | + - Randomized SVD for very large and sparse matrices |
| 14 | +- **Sparse matrix support**: |
| 15 | + - Compressed Sparse Row (CSR) format |
| 16 | + - Compressed Sparse Column (CSC) format |
| 17 | + - Coordinate (COO) format |
| 18 | +- **Performance optimizations**: |
| 19 | + - Parallel execution with Rayon |
| 20 | + - Adaptive tuning for highly sparse matrices |
| 21 | + - Column masking for subspace SVD |
| 22 | +- **Generic interface**: |
| 23 | + - Works with both `f32` and `f64` precision |
| 24 | +- **Comprehensive error handling and diagnostics** |
21 | 25 |
|
22 | 26 | ## Installation |
23 | 27 |
|
24 | 28 | Add this to your `Cargo.toml`: |
25 | 29 |
|
26 | 30 | ```toml |
27 | 31 | [dependencies] |
28 | | -single-svdlib = "0.1.0" |
29 | | -nalgebra-sparse = "0.10.0" |
30 | | -ndarray = "0.16.1" |
| 32 | +single-svdlib = "0.6.0" |
31 | 33 | ``` |
32 | 34 |
|
33 | | -## Basic Usage |
| 35 | +## Quick Start |
34 | 36 |
|
35 | 37 | ```rust |
36 | | -use single_svdlib::{svd, svd_dim, svd_dim_seed}; |
37 | 38 | use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix}; |
| 39 | +use single_svdlib::laczos::svd_dim_seed; |
38 | 40 |
|
39 | | -// Create a sparse matrix |
| 41 | +// Create a matrix in COO format |
40 | 42 | let mut coo = CooMatrix::<f64>::new(3, 3); |
41 | 43 | coo.push(0, 0, 1.0); coo.push(0, 1, 16.0); coo.push(0, 2, 49.0); |
42 | 44 | coo.push(1, 0, 4.0); coo.push(1, 1, 25.0); coo.push(1, 2, 64.0); |
43 | 45 | coo.push(2, 0, 9.0); coo.push(2, 1, 36.0); coo.push(2, 2, 81.0); |
44 | 46 |
|
| 47 | +// Convert to CSR for better performance |
45 | 48 | let csr = CsrMatrix::from(&coo); |
46 | 49 |
|
47 | | -// Compute SVD |
48 | | -let svd_result = svd(&csr)?; |
| 50 | +// Compute SVD with a fixed random seed |
| 51 | +let svd = svd_dim_seed(&csr, 3, 42).unwrap(); |
49 | 52 |
|
50 | 53 | // Access the results |
51 | | -println!("Rank: {}", svd_result.d); |
52 | | -println!("Singular values: {:?}", svd_result.s); |
53 | | -println!("Left singular vectors (U): {:?}", svd_result.ut.t()); |
54 | | -println!("Right singular vectors (V): {:?}", svd_result.vt.t()); |
| 54 | +let singular_values = &svd.s; |
| 55 | +let left_singular_vectors = &svd.ut; // Note: These are transposed |
| 56 | +let right_singular_vectors = &svd.vt; // Note: These are transposed |
55 | 57 |
|
56 | 58 | // Reconstruct the original matrix |
57 | | -let reconstructed = svd_result.recompose(); |
| 59 | +let reconstructed = svd.recompose(); |
58 | 60 | ``` |
59 | 61 |
|
60 | | -## Column Masking |
| 62 | +## SVD Methods |
61 | 63 |
|
62 | | -The library supports analyzing specific columns without copying the data: |
| 64 | +### Lanczos Algorithm (LAS2) |
63 | 65 |
|
64 | | -```rust |
65 | | -use single_svdlib::{svd, MaskedCSRMatrix}; |
66 | | -use nalgebra_sparse::{coo::CooMatrix, csr::CsrMatrix}; |
| 66 | +The Lanczos algorithm is well-suited for sparse matrices of moderate size: |
67 | 67 |
|
68 | | -// Create a sparse matrix |
69 | | -let mut coo = CooMatrix::<f64>::new(3, 5); |
70 | | -coo.push(0, 0, 1.0); coo.push(0, 2, 2.0); coo.push(0, 4, 3.0); |
71 | | -coo.push(1, 1, 4.0); coo.push(1, 3, 5.0); |
72 | | -coo.push(2, 0, 6.0); coo.push(2, 2, 7.0); coo.push(2, 4, 8.0); |
| 68 | +```rust |
| 69 | +use single_svdlib::laczos; |
73 | 70 |
|
74 | | -let csr = CsrMatrix::from(&coo); |
| 71 | +// Basic SVD computation (uses defaults) |
| 72 | +let svd = laczos::svd(&matrix)?; |
75 | 73 |
|
76 | | -// Method 1: Using a boolean mask (true = include column) |
77 | | -let mask = vec![true, false, true, false, true]; // Only columns 0, 2, 4 |
78 | | -let masked_matrix = MaskedCSRMatrix::new(&csr, mask); |
| 74 | +// SVD with specified target rank |
| 75 | +let svd = laczos::svd_dim(&matrix, 10)?; |
79 | 76 |
|
80 | | -// Method 2: Specifying which columns to include |
81 | | -let columns = vec![0, 2, 4]; |
82 | | -let masked_matrix = MaskedCSRMatrix::with_columns(&csr, &columns); |
| 77 | +// SVD with specified target rank and fixed random seed |
| 78 | +let svd = laczos::svd_dim_seed(&matrix, 10, 42)?; |
83 | 79 |
|
84 | | -// Run SVD on the masked matrix |
85 | | -let svd_result = svd(&masked_matrix)?; |
| 80 | +// Full control over SVD parameters |
| 81 | +let svd = laczos::svd_las2( |
| 82 | + &matrix, |
| 83 | + dimensions, // upper limit of desired number of dimensions |
| 84 | + iterations, // number of Lanczos iterations |
| 85 | + end_interval, // interval containing unwanted eigenvalues, e.g. [-1e-30, 1e-30] |
| 86 | + kappa, // relative accuracy of eigenvalues, e.g. 1e-6 |
| 87 | + random_seed, // random seed (0 for automatic) |
| 88 | +)?; |
86 | 89 | ``` |
87 | 90 |
|
88 | | -## Support for Different Numeric Types |
| 91 | +### Randomized SVD |
89 | 92 |
|
90 | | -The library supports various numeric types: |
| 93 | +For very large sparse matrices, the randomized SVD algorithm offers better performance: |
91 | 94 |
|
92 | 95 | ```rust |
93 | | -// With f64 (double precision) |
94 | | -let csr_f64 = CsrMatrix::<f64>::from(&coo); |
95 | | -let svd_result = svd(&csr_f64)?; |
96 | | - |
97 | | -// With f32 (single precision) |
98 | | -let csr_f32 = CsrMatrix::<f32>::from(&coo); |
99 | | -let svd_result = svd(&csr_f32)?; |
100 | | - |
101 | | -// With integer types (converted internally) |
102 | | -let csr_i32 = CsrMatrix::<i32>::from(&coo); |
103 | | -let masked_i32 = MaskedCSRMatrix::with_columns(&csr_i32, &columns); |
104 | | -let svd_result = svd(&masked_i32)?; |
| 96 | +use single_svdlib::randomized; |
| 97 | + |
| 98 | +let svd = randomized::randomized_svd( |
| 99 | + &matrix, |
| 100 | + target_rank, // desired rank |
| 101 | + n_oversamples, // oversampling parameter (typically 5-10) |
| 102 | + n_power_iterations, // number of power iterations (typically 2-4) |
| 103 | + randomized::PowerIterationNormalizer::QR, // normalization method |
| 104 | + Some(42), // random seed (None for automatic) |
| 105 | +)?; |
105 | 106 | ``` |
106 | 107 |
|
107 | | -## Advanced Usage |
| 108 | +### Column Masking |
108 | 109 |
|
109 | | -For more control over the SVD computation: |
| 110 | +For operations on specific columns of a matrix: |
110 | 111 |
|
111 | 112 | ```rust |
112 | | -use single_svdlib::{svdLAS2, SvdRec}; |
113 | | - |
114 | | -// Customize the SVD calculation |
115 | | -let svd: SvdRec = svdLAS2( |
116 | | - &matrix, // sparse matrix |
117 | | - dimensions, // upper limit of desired dimensions (0 = max) |
118 | | - iterations, // number of algorithm iterations (0 = auto) |
119 | | - &[-1.0e-30, 1.0e-30], // interval for unwanted eigenvalues |
120 | | - 1.0e-6, // relative accuracy threshold |
121 | | - random_seed, // random seed (0 = auto-generate) |
122 | | -)?; |
| 113 | +use single_svdlib::laczos::masked::MaskedCSRMatrix; |
| 114 | + |
| 115 | +// Create a mask for selected columns |
| 116 | +let columns = vec![0, 2, 5, 7]; // Only use these columns |
| 117 | +let masked_matrix = MaskedCSRMatrix::with_columns(&csr_matrix, &columns); |
| 118 | + |
| 119 | +// Compute SVD on the masked matrix |
| 120 | +let svd = laczos::svd(&masked_matrix)?; |
123 | 121 | ``` |
124 | 122 |
|
125 | | -## SVD Results and Diagnostics |
| 123 | +## Result Structure |
126 | 124 |
|
127 | | -The SVD results are returned in a `SvdRec` struct: |
| 125 | +The SVD result contains: |
128 | 126 |
|
129 | 127 | ```rust |
130 | | -pub struct SvdRec { |
131 | | - pub d: usize, // Dimensionality (rank) |
132 | | - pub ut: Array2<f64>, // Transpose of left singular vectors |
133 | | - pub s: Array1<f64>, // Singular values |
134 | | - pub vt: Array2<f64>, // Transpose of right singular vectors |
135 | | - pub diagnostics: Diagnostics, // Computational diagnostics |
| 128 | +struct SvdRec<T> { |
| 129 | + d: usize, // Rank (number of singular values) |
| 130 | + ut: Array2<T>, // Transpose of left singular vectors (d x m) |
| 131 | + s: Array1<T>, // Singular values (d) |
| 132 | + vt: Array2<T>, // Transpose of right singular vectors (d x n) |
| 133 | + diagnostics: Diagnostics<T>, // Computation diagnostics |
136 | 134 | } |
137 | 135 | ``` |
138 | 136 |
|
139 | | -The `Diagnostics` struct provides detailed information about the computation: |
| 137 | +Note that `ut` and `vt` are returned in transposed form. |
| 138 | + |
| 139 | +## Diagnostics |
| 140 | + |
| 141 | +Each SVD computation returns detailed diagnostics: |
140 | 142 |
|
141 | 143 | ```rust |
142 | | -pub struct Diagnostics { |
143 | | - pub non_zero: usize, // Number of non-zeros in the input matrix |
144 | | - pub dimensions: usize, // Number of dimensions attempted |
145 | | - pub iterations: usize, // Number of iterations attempted |
146 | | - pub transposed: bool, // True if the matrix was transposed internally |
147 | | - pub lanczos_steps: usize, // Number of Lanczos steps |
148 | | - pub ritz_values_stabilized: usize, // Number of ritz values |
149 | | - pub significant_values: usize, // Number of significant values |
150 | | - pub singular_values: usize, // Number of singular values |
151 | | - pub end_interval: [f64; 2], // Interval for unwanted eigenvalues |
152 | | - pub kappa: f64, // Relative accuracy threshold |
153 | | - pub random_seed: u32, // Random seed used |
154 | | -} |
| 144 | +let svd = laczos::svd(&matrix)?; |
| 145 | +println!("Non-zero elements: {}", svd.diagnostics.non_zero); |
| 146 | +println!("Transposed during computation: {}", svd.diagnostics.transposed); |
| 147 | +println!("Lanczos steps: {}", svd.diagnostics.lanczos_steps); |
| 148 | +println!("Significant values found: {}", svd.diagnostics.significant_values); |
155 | 149 | ``` |
156 | 150 |
|
157 | | -## License |
| 151 | +## Performance Tips |
158 | 152 |
|
159 | | -This library is provided under the BSD License, as per the original SVDLIBC implementation. |
| 153 | +1. **Choose the right algorithm**: |
| 154 | + - For matrices up to ~10,000 x 10,000 with moderate sparsity, use the Lanczos algorithm |
| 155 | + - For larger matrices or very high sparsity (>99%), use randomized SVD |
| 156 | + |
| 157 | +2. **Matrix format matters**: |
| 158 | + - Convert COO matrices to CSR or CSC for computation |
| 159 | + - CSR typically performs better for row-oriented operations |
| 160 | + |
| 161 | +3. **Adjust parameters for very sparse matrices**: |
| 162 | + - Increase power iterations in randomized SVD (e.g., 5-7) |
| 163 | + - Use a higher `kappa` value in Lanczos for very sparse matrices |
| 164 | + |
| 165 | +4. **Consider column masking** for operations that only need a subset of the data |
| 166 | + |
| 167 | +## License |
160 | 168 |
|
161 | | -## Acknowledgments |
| 169 | +This crate is licensed under the BSD License, the same as the original SVDLIBC implementation. See the `SVDLIBC-LICENSE.txt` file for details. |
162 | 170 |
|
163 | | -- Dave Farnham for the original Rust port |
164 | | -- Doug Rohde for the original SVDLIBC implementation |
165 | | -- University of Tennessee Research Foundation for the underlying mathematical library |
| 171 | +## Credits |
166 | 172 |
|
167 | | -[Latest Version]: https://img.shields.io/crates/v/single-svdlib.svg |
168 | | -[crates.io]: https://crates.io/crates/single-svdlib |
| 173 | +- Original SVDLIBC implementation by Doug Rohde |
| 174 | +- Rust port maintainer of SVDLIBC: Dave Farnham |
| 175 | +- Extensions and modifications of the original algorithm: Ian F. Diks |
0 commit comments