Skip to content

Commit 8c6acf1

Browse files
author
Ian
committed
Added Clustering, PCA, Sparse utilities
1 parent ac0e099 commit 8c6acf1

File tree

20 files changed

+2562
-49
lines changed

20 files changed

+2562
-49
lines changed

Cargo.lock

Lines changed: 538 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "single_algebra"
3-
version = "0.1.2-alpha.7"
3+
version = "0.2.0-alpha.0"
44
edition = "2021"
55
license-file = "LICENSE.md"
66
description = "A linear algebra convenience library for the single-rust library. Can be used externally as well."
@@ -22,28 +22,36 @@ lapack = ["dep:nalgebra-lapack", "nalgebra-lapack/openblas", "dep:nalgebra"]
2222
nalgebra = ["dep:nalgebra"]
2323
faer = ["dep:faer", "dep:faer-ext"]
2424
simba = ["dep:simba"]
25+
clustering = ["network", "local_moving", "dep:kiddo"]
26+
network = []
27+
local_moving = ["network", "dep:ahash"]
2528

2629

2730
[dependencies]
2831
anyhow = "1.0.95"
2932

3033
env_logger = "0.11.6"
31-
faer = {version = "0.20.1", optional = true}
32-
faer-ext = {version = "0.4.1", features = ["ndarray"], optional = true}
34+
faer = { version = "0.20.1", optional = true }
35+
faer-ext = { version = "0.4.1", features = ["ndarray"], optional = true }
3336
log = "0.4.22"
34-
nalgebra = {version = "0.33", features = ["serde-serialize"], optional = true}
35-
nalgebra-lapack = {version = "0.25.0", optional = true, default-features = false}
37+
nalgebra = { version = "0.33", features = ["serde-serialize"], optional = true }
38+
nalgebra-lapack = { version = "0.25.0", optional = true, default-features = false }
3639
nalgebra-sparse = "0.10"
37-
ndarray = {version = "0.16", features = ["rayon"]}
38-
nshare = {version = "0.10.0", features = ["ndarray", "nalgebra"]}
40+
ndarray = { version = "0.16", features = ["rayon"] }
41+
nshare = { version = "0.10.0", features = ["ndarray", "nalgebra"] }
3942
num-traits = "0.2.19"
4043
rayon = "1.10.0"
41-
simba = {version = "0.9.0", optional = true}
42-
smartcore = {version = "0.4", features=["ndarray-bindings"], optional = true}
44+
simba = { version = "0.9.0", optional = true }
45+
smartcore = { version = "0.4", features = ["ndarray-bindings"], optional = true }
4346
single-svdlib = "0.1.0"
47+
parking_lot = "0.12.3"
48+
petgraph = { version = "0.7.1", features = ["rayon"] }
49+
rand = "0.9.0"
50+
rand_chacha = "0.9.0"
51+
kiddo = { version = "5.0.3", optional = true }
52+
ahash = { version = "0.8.11", optional = true, features = ["compile-time-rng"] }
4453

4554

4655
[dev-dependencies]
4756
criterion = { version = "0.5.1", features = ["html_reports"] }
48-
rand = "0.8.5"
4957
approx = "0.5.1"

README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# single-algebra 🧮
2+
3+
The companion algebra library for single-rust, providing powerful matrix operations and machine learning utilities.
4+
5+
## Features 🚀
6+
7+
- Efficient operations on sparse and dense matrices
8+
- Dimensionality reduction techniques
9+
- Clustering algorithms including Louvain community detection
10+
- More features planned!
11+
12+
## Matrix Operations 📊
13+
14+
- SVD decomposition with parallel and LAPACK implementations
15+
- Matrix convenience functions for statistical operations
16+
- Support for both CSR and CSC sparse matrix formats
17+
18+
## Clustering 🔍
19+
20+
- Louvain community detection
21+
- Similarity network construction
22+
- K-nearest neighbors graph building
23+
- Local moving algorithm for community refinement
24+
25+
## Dimensionality Reduction ⬇️
26+
27+
- Incremental PCA implementation
28+
- Support for sparse matrices in dimensionality reduction
29+
30+
## Acknowledgments 🙏
31+
32+
The Louvain clustering implementation was adapted from [louvain-rs](https://github.com/graphext/louvain-rs/tree/master) written by Juan Morales ([email protected]). The original implementation has been modified to better suit the needs of single-algebra.
33+
34+
## Installation
35+
36+
Add this to your `Cargo.toml`:
37+
38+
```toml
39+
[dependencies]
40+
single-algebra = "0.2.0-alpha.0"
41+
```

benches/csc_matrix_benchmark.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use criterion::{BenchmarkId, Criterion, BenchmarkGroup, criterion_group, criterion_main};
22
use nalgebra_sparse::{CooMatrix, CscMatrix};
3-
use rand::{distributions::Uniform, SeedableRng, rngs::StdRng};
3+
use rand::{SeedableRng, rngs::StdRng};
44
use std::time::Duration;
55
use criterion::measurement::Measurement;
6-
use rand::distributions::Distribution;
6+
use rand::distr::{Distribution, Uniform};
77
use rayon::ThreadPool;
88
use single_algebra::sparse::{MatrixNonZero, MatrixSum};
99

@@ -43,9 +43,9 @@ fn create_csc_matrix(
4343
let mut rng = StdRng::seed_from_u64(seed);
4444
let mut coo = CooMatrix::new(rows, cols);
4545
let total_elements = (rows * cols) as f64 * density;
46-
let value_dist = Uniform::from(0.0..1.0);
47-
let row_dist = Uniform::from(0..rows);
48-
let col_dist = Uniform::from(0..cols);
46+
let value_dist = Uniform::try_from(0.0..1.0).unwrap();
47+
let row_dist = Uniform::try_from(0..rows).unwrap();
48+
let col_dist = Uniform::try_from(0..cols).unwrap();
4949

5050
for _ in 0..total_elements as usize {
5151
let row = row_dist.sample(&mut rng);

benches/csr_matrix_benchmark.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
use criterion::measurement::Measurement;
22
use criterion::{criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion};
33
use nalgebra_sparse::{CooMatrix, CsrMatrix};
4-
use rand::distributions::Distribution;
5-
use rand::{distributions::Uniform, rngs::StdRng, SeedableRng};
4+
use rand::{rngs::StdRng, SeedableRng};
65
use single_algebra::sparse::{MatrixNonZero, MatrixSum};
76
use std::time::Duration;
7+
use rand::distr::{Distribution, Uniform};
88

99
#[derive(Clone)]
1010
pub struct SparseMatrixConfig {
@@ -37,9 +37,9 @@ fn create_test_matrix(rows: usize, cols: usize, density: f64, seed: u64) -> CsrM
3737
let mut rng = StdRng::seed_from_u64(seed);
3838
let mut coo = CooMatrix::new(rows, cols);
3939
let total_elements = (rows * cols) as f64 * density;
40-
let value_dist = Uniform::from(0.0..1.0);
41-
let row_dist = Uniform::from(0..rows);
42-
let col_dist = Uniform::from(0..cols);
40+
let value_dist = Uniform::try_from(0.0..1.0).unwrap();
41+
let row_dist = Uniform::try_from(0..rows).unwrap();
42+
let col_dist = Uniform::try_from(0..cols).unwrap();
4343

4444
for _ in 0..total_elements as usize {
4545
let row = row_dist.sample(&mut rng);

src/clustering/louvain/mod.rs

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// See https://www.nature.com/articles/s41598-019-41695-z#Sec2 and for the original publication: https://iopscience.iop.org/article/10.1088/1742-5468/2008/10/P10008 - https://en.wikipedia.org/wiki/Louvain_method & https://github.com/graphext/louvain-rs/tree/master
2+
// Copyright 2018 Juan Morales ([email protected])
3+
// Repository: https://github.com/graphext/louvain-rs/tree/master
4+
// Licensed under the MIT License.
5+
use std::collections::HashSet;
6+
use num_traits::{Float, FromPrimitive, ToPrimitive};
7+
use std::iter::Sum;
8+
use std::ops::MulAssign;
9+
use rand_chacha::ChaCha20Rng;
10+
use rand_chacha::rand_core::SeedableRng;
11+
use crate::local_moving::standard::StandardLocalMoving;
12+
use crate::network::{Graph, Network};
13+
use crate::network::clustering::{NetworkGrouping, VectorGrouping};
14+
15+
pub const DEF_RES: f64 = 1.0;
16+
17+
pub struct Louvain<T>
18+
where
19+
T: Float + FromPrimitive + ToPrimitive + Send + Sync + Sum + MulAssign, {
20+
rng: ChaCha20Rng,
21+
local_moving: StandardLocalMoving<T>
22+
}
23+
24+
impl<T> Louvain<T>
25+
where
26+
T: Float + FromPrimitive + ToPrimitive + Send + Sync + Sum + MulAssign, {
27+
pub fn new(resolution: T, seed: Option<u64>) -> Self {
28+
let seed = seed.unwrap_or_default();
29+
30+
Louvain {
31+
rng: ChaCha20Rng::seed_from_u64(seed),
32+
local_moving: StandardLocalMoving::new(resolution)
33+
}
34+
}
35+
36+
pub fn iterate_one_level<N, E>(
37+
&mut self,
38+
network: &Network<N, E>,
39+
clustering: &mut VectorGrouping
40+
) -> bool
41+
where
42+
N: Float + FromPrimitive + ToPrimitive + Send + Sync + Sum + MulAssign,
43+
E: Float + FromPrimitive + ToPrimitive + Send + Sync + Sum + MulAssign, {
44+
self.local_moving.iterate(network, clustering, &mut self.rng)
45+
}
46+
47+
pub fn iterate<N, E>(
48+
&mut self,
49+
network: &Network<N, E>,
50+
clustering: &mut VectorGrouping
51+
) -> bool
52+
where
53+
N: Float + FromPrimitive + ToPrimitive + Send + Sync + Sum + MulAssign,
54+
E: Float + FromPrimitive + ToPrimitive + Send + Sync + Sum + MulAssign {
55+
let mut update = self.local_moving.iterate(network, clustering, &mut self.rng);
56+
57+
if clustering.group_count() == network.nodes() {
58+
return update;
59+
}
60+
61+
let reduced_network = network.create_reduced_network(clustering);
62+
let mut reduced_clustering = VectorGrouping::create_isolated(reduced_network.nodes());
63+
update |= self.iterate(&reduced_network, &mut reduced_clustering);
64+
clustering.merge(&reduced_clustering);
65+
update
66+
}
67+
68+
pub fn build_network<I>(
69+
n_nodes: usize,
70+
n_edges: usize,
71+
adjacency: I
72+
) -> Network<f64, f64>
73+
where
74+
I: Iterator<Item = (u32, u32)> {
75+
let mut graph = Graph::with_capacity(n_nodes, n_edges);
76+
let mut node_indices = Vec::with_capacity(n_nodes);
77+
78+
for _ in 0..n_nodes {
79+
node_indices.push(graph.add_node(1.0));
80+
}
81+
82+
let mut seen = vec![HashSet::<u32>::new(); n_nodes];
83+
let mut node_weights = vec![0.0; n_nodes];
84+
85+
for (i, j) in adjacency {
86+
let (i, j) = if i < j { (i, j) } else { (j, i) };
87+
let i_ = i as usize;
88+
let j_ = j as usize;
89+
90+
if seen[i_].insert(j) {
91+
graph.add_edge(
92+
node_indices[i_],
93+
node_indices[j_],
94+
1.0
95+
);
96+
node_weights[j_] += 1.0;
97+
node_weights[i_] += 1.0;
98+
}
99+
}
100+
101+
for &i in &node_indices {
102+
*graph.node_weight_mut(i).unwrap() = node_weights[i.index()];
103+
}
104+
105+
Network::new_from_graph(graph)
106+
}
107+
}
108+
109+
#[cfg(test)]
110+
mod tests {
111+
use super::*;
112+
use petgraph::graph::NodeIndex;
113+
114+
fn create_test_network() -> Network<f64, f64> {
115+
let mut graph = Graph::new_undirected();
116+
117+
// Add 5 nodes
118+
for _ in 0..5 {
119+
graph.add_node(1.0);
120+
}
121+
122+
// Add edges to create two communities
123+
graph.add_edge(NodeIndex::new(0), NodeIndex::new(1), 1.0);
124+
graph.add_edge(NodeIndex::new(1), NodeIndex::new(2), 1.0);
125+
graph.add_edge(NodeIndex::new(0), NodeIndex::new(2), 1.0);
126+
graph.add_edge(NodeIndex::new(3), NodeIndex::new(4), 1.0);
127+
128+
Network::new_from_graph(graph)
129+
}
130+
131+
#[test]
132+
fn test_louvain_clustering() {
133+
let network = create_test_network();
134+
let mut clustering = VectorGrouping::create_isolated(network.nodes());
135+
let mut louvain: Louvain<f64> = Louvain::new(DEF_RES.into(), Some(42));
136+
137+
assert!(louvain.iterate(&network, &mut clustering));
138+
139+
// Should identify two communities
140+
assert!(clustering.group_count() == 2);
141+
142+
// Nodes 0,1,2 should be in same cluster
143+
let cluster1 = clustering.get_group(0);
144+
assert_eq!(clustering.get_group(1), cluster1);
145+
assert_eq!(clustering.get_group(2), cluster1);
146+
147+
// Nodes 3,4 should be in different cluster
148+
let cluster2 = clustering.get_group(3);
149+
assert_eq!(clustering.get_group(4), cluster2);
150+
assert_ne!(cluster1, cluster2);
151+
}
152+
153+
#[test]
154+
fn test_build_network() {
155+
let edges = vec![(0, 1), (1, 2), (2, 0), (3, 4)];
156+
let network = Louvain::<f64>::build_network(5, edges.len(), edges.into_iter());
157+
158+
assert_eq!(network.nodes(), 5);
159+
assert_eq!(network.graph.edge_count(), 4);
160+
161+
// Check node weights (should equal degree)
162+
for i in 0..5 {
163+
let weight = network.weight(i);
164+
let expected = match i {
165+
0..=2 => 2.0, // Nodes in triangle
166+
3..=4 => 1.0, // Nodes in single edge
167+
_ => unreachable!(),
168+
};
169+
assert_eq!(weight, expected);
170+
}
171+
}
172+
}

src/clustering/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pub(crate) mod leiden;
2+
pub(crate) mod louvain;
3+
pub use louvain::Louvain;
4+
pub(crate) mod similarity_network;
5+
pub use similarity_network::build_knn_network_combined_matrix;
6+
pub use similarity_network::build_knn_network_separate_matrix;
7+
pub use similarity_network::create_similarity_network;

0 commit comments

Comments
 (0)