Skip to content
This repository was archived by the owner on Jul 16, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ datasets = []
num = { version = "0.1.41", default-features = false }
rand = "0.4.1"
rulinalg = { git = "https://github.com/AtheMathmo/rulinalg", rev = "1ed8b937" }
kdtree = "0.5"
100 changes: 55 additions & 45 deletions src/learning/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@
//! let clustering = model.clusters().unwrap();
//! ```

use learning::{LearningResult, UnSupModel};
use learning::error::{Error, ErrorKind};
use learning::{LearningResult, UnSupModel};

use linalg::{Matrix, Vector, BaseMatrix};
use rulinalg::utils;
use kdtree::distance::squared_euclidean;
use kdtree::KdTree;
use linalg::{BaseMatrix, Matrix, Vector};
use rulinalg::matrix::Row;
use rulinalg::utils;

/// DBSCAN Model
///
Expand Down Expand Up @@ -80,17 +82,27 @@ impl UnSupModel<Matrix<f64>, Vector<Option<usize>>> for DBSCAN {
fn train(&mut self, inputs: &Matrix<f64>) -> LearningResult<()> {
self.init_params(inputs.rows());
let mut cluster = 0;
let mut neighbours = Vec::with_capacity(inputs.rows());
let mut sub_neighbours = Vec::with_capacity(inputs.rows());

let kdt = Self::kdtree(inputs);

for (idx, point) in inputs.row_iter().enumerate() {
let visited = self._visited[idx];

idx;
point;
visited;
if !visited {
self._visited[idx] = true;

let neighbours = self.region_query(point, inputs);
neighbours.clear();
self.region_query(point, inputs, &mut neighbours, &kdt);
neighbours.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
neighbours.dedup();

if neighbours.len() >= self.min_points {
self.expand_cluster(inputs, idx, neighbours, cluster);
self.expand_cluster(inputs, idx, &mut neighbours, &mut sub_neighbours, cluster, &kdt);
cluster += 1;
}
}
Expand All @@ -105,16 +117,14 @@ impl UnSupModel<Matrix<f64>, Vector<Option<usize>>> for DBSCAN {

fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Vector<Option<usize>>> {
if self.predictive {
if let (&Some(ref cluster_data), &Some(ref clusters)) = (&self._cluster_data,
&self.clusters) {
if let (&Some(ref cluster_data), &Some(ref clusters)) = (&self._cluster_data, &self.clusters) {
let mut classes = Vec::with_capacity(inputs.rows());

for input_point in inputs.row_iter() {
let mut distances = Vec::with_capacity(cluster_data.rows());

for cluster_point in cluster_data.row_iter() {
let point_distance =
utils::vec_bin_op(input_point.raw_slice(), cluster_point.raw_slice(), |x, y| x - y);
let point_distance = utils::vec_bin_op(input_point.raw_slice(), cluster_point.raw_slice(), |x, y| x - y);
distances.push(utils::dot(&point_distance, &point_distance).sqrt());
}

Expand All @@ -131,8 +141,7 @@ impl UnSupModel<Matrix<f64>, Vector<Option<usize>>> for DBSCAN {
Err(Error::new_untrained())
}
} else {
Err(Error::new(ErrorKind::InvalidState,
"Model must be set to predictive. Use `self.set_predictive(true)`."))
Err(Error::new(ErrorKind::InvalidState, "Model must be set to predictive. Use `self.set_predictive(true)`."))
}
}
}
Expand Down Expand Up @@ -167,49 +176,46 @@ impl DBSCAN {
self.clusters.as_ref()
}

fn expand_cluster(&mut self,
inputs: &Matrix<f64>,
point_idx: usize,
neighbour_pts: Vec<usize>,
cluster: usize) {
debug_assert!(point_idx < inputs.rows(),
"Point index too large for inputs");
debug_assert!(neighbour_pts.iter().all(|x| *x < inputs.rows()),
"Neighbour indices too large for inputs");

fn expand_cluster<'a>(&mut self, inputs: &Matrix<f64>, mut point_idx: usize, neighbours: &mut Vec<usize>, sub_neighbours: &mut Vec<usize>, cluster: usize, kdt: &KdTree<f64, usize, &'a [f64]>) {
debug_assert!(point_idx < inputs.rows(), "Point index too large for inputs");
self.clusters.as_mut().map(|x| x.mut_data()[point_idx] = Some(cluster));

for data_point_idx in &neighbour_pts {
let visited = self._visited[*data_point_idx];
while let Some(data_point_idx) = neighbours.pop() {
debug_assert!(data_point_idx < inputs.rows(), "Data point index too large for inputs");
debug_assert!(neighbours.iter().all(|x| *x < inputs.rows()), "Neighbour indices too large for inputs");

self.clusters.as_mut().map(|x| x.mut_data()[point_idx] = Some(cluster));
let visited = self._visited[data_point_idx];
if !visited {
self._visited[*data_point_idx] = true;
let data_point_row = unsafe { inputs.row_unchecked(*data_point_idx) };
let sub_neighbours = self.region_query(data_point_row, inputs);
self._visited[data_point_idx] = true;
let data_point_row = unsafe { inputs.row_unchecked(data_point_idx) };

sub_neighbours.clear();
self.region_query(data_point_row, inputs, sub_neighbours, kdt);

if sub_neighbours.len() >= self.min_points {
self.expand_cluster(inputs, *data_point_idx, sub_neighbours, cluster);
point_idx = data_point_idx;
neighbours.extend_from_slice(&sub_neighbours);
neighbours.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
neighbours.dedup();
}
}
}
}


fn region_query(&self, point: Row<f64>, inputs: &Matrix<f64>) -> Vec<usize> {
debug_assert!(point.cols() == inputs.cols(),
"point must be of same dimension as inputs");

let mut in_neighbourhood = Vec::new();
for (idx, data_point) in inputs.row_iter().enumerate() {
//TODO: Use `MatrixMetric` when rulinalg#154 is fixed.
let point_distance = utils::vec_bin_op(data_point.raw_slice(), point.raw_slice(), |x, y| x - y);
let dist = utils::dot(&point_distance, &point_distance).sqrt();

if dist < self.eps {
in_neighbourhood.push(idx);
}
fn region_query<'a>(&self, point: Row<f64>, inputs: &Matrix<f64>, neighbours: &mut Vec<usize>, kdt: &KdTree<f64, usize, &'a [f64]>) {
debug_assert!(point.cols() == inputs.cols(), "point must be of same dimension as inputs");
for (pnt, idx) in kdt.within(point.raw_slice(), self.eps.powi(2), &squared_euclidean).expect("KdTree error checking point") {
neighbours.push(*idx);
}
}

in_neighbourhood
fn kdtree<'a>(inputs: &'a Matrix<f64>) -> KdTree<f64, usize, &'a [f64]> {
let mut kdt = KdTree::new(inputs.cols());
for (idx, row) in inputs.row_iter().enumerate() {
kdt.add(row.raw_slice(), idx);
}
kdt
}

fn init_params(&mut self, total_points: usize) {
Expand All @@ -229,17 +235,19 @@ impl DBSCAN {
#[cfg(test)]
mod tests {
use super::DBSCAN;
use linalg::{Matrix, BaseMatrix};
use linalg::{BaseMatrix, Matrix};

#[test]
fn test_region_query() {
let model = DBSCAN::new(1.0, 3);

let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 3.0, 3.0]);
let kdt = DBSCAN::kdtree(&inputs);

let m = matrix![1.0, 1.0];
let row = m.row(0);
let neighbours = model.region_query(row, &inputs);
let mut neighbours = vec![];
model.region_query(row, &inputs, &mut neighbours, &kdt);

assert!(neighbours.len() == 2);
}
Expand All @@ -249,10 +257,12 @@ mod tests {
let model = DBSCAN::new(0.01, 3);

let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 1.1, 1.1]);
let kdt = DBSCAN::kdtree(&inputs);

let m = matrix![1.0, 1.0];
let row = m.row(0);
let neighbours = model.region_query(row, &inputs);
let mut neighbours = vec![];
model.region_query(row, &inputs, &mut neighbours, &kdt);

assert!(neighbours.len() == 1);
}
Expand Down
30 changes: 11 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@

#[macro_use]
extern crate rulinalg;
extern crate kdtree;
extern crate num as libnum;
extern crate rand;

Expand All @@ -118,9 +119,9 @@ pub mod prelude;
///
/// This module contains reexports of common tools from the rulinalg crate.
pub mod linalg {
pub use rulinalg::matrix::{Axes, Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut};
pub use rulinalg::vector::Vector;
pub use rulinalg::matrix::{Axes, BaseMatrix, BaseMatrixMut, Matrix, MatrixSlice, MatrixSliceMut};
pub use rulinalg::norm;
pub use rulinalg::vector::Vector;
}

/// Module for data handling
Expand All @@ -133,15 +134,15 @@ pub mod learning {
pub mod dbscan;
pub mod glm;
pub mod gmm;
pub mod gp;
pub mod k_means;
pub mod knn;
pub mod lin_reg;
pub mod logistic_reg;
pub mod k_means;
pub mod nnet;
pub mod gp;
pub mod svm;
pub mod naive_bayes;
pub mod knn;
pub mod nnet;
pub mod pca;
pub mod svm;

pub mod error;

Expand Down Expand Up @@ -177,28 +178,19 @@ pub mod learning {
type Targets;

/// Compute the gradient for the model.
fn compute_grad(&self,
params: &[f64],
inputs: &Self::Inputs,
targets: &Self::Targets)
-> (f64, Vec<f64>);
fn compute_grad(&self, params: &[f64], inputs: &Self::Inputs, targets: &Self::Targets) -> (f64, Vec<f64>);
}

/// Trait for optimization algorithms.
pub trait OptimAlgorithm<M: Optimizable> {
/// Return the optimized parameter using gradient optimization.
///
/// Takes in a set of starting parameters and related model data.
fn optimize(&self,
model: &M,
start: &[f64],
inputs: &M::Inputs,
targets: &M::Targets)
-> Vec<f64>;
fn optimize(&self, model: &M, start: &[f64], inputs: &M::Inputs, targets: &M::Targets) -> Vec<f64>;
}

pub mod grad_desc;
pub mod fmincg;
pub mod grad_desc;
}

/// Module for learning tools.
Expand Down
51 changes: 36 additions & 15 deletions tests/learning/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,60 @@ use rm::linalg::Matrix;
use rm::learning::dbscan::DBSCAN;
use rm::learning::UnSupModel;

extern crate rand;
use self::rand::{thread_rng, Rng};

#[test]
#[ignore]
fn test_does_not_overflow() {
let mut rng = thread_rng();
let n = 100_000;
let d = 2;
let inputs = Matrix::new(n, d, rng.gen_iter::<f64>().take(n * d).collect::<Vec<f64>>());

let mut model = DBSCAN::new(0.5, 2);
model.train(&inputs).unwrap();

let clustering = dbg!(model.clusters().unwrap());
}

#[test]
fn test_basic_clusters() {
let inputs = Matrix::new(6, 2, vec![1.0, 2.0,
1.1, 2.2,
0.9, 1.9,
1.0, 2.1,
-2.0, 3.0,
-2.2, 3.1]);
let inputs = Matrix::new(8, 2, vec![1.0, 2.0, 1.1, 2.2, 0.9, 1.9, 1.0, 2.1, -2.0, 3.0, -2.2, 3.1, -1.0, -2.0, -2.0, -1.0]);

let mut model = DBSCAN::new(0.5, 2);
model.train(&inputs).unwrap();

let clustering = model.clusters().unwrap();
let clustering = dbg!(model.clusters().unwrap());

assert!(clustering.data().iter().take(4).all(|x| *x == Some(0)));
assert!(clustering.data().iter().skip(4).all(|x| *x == Some(1)));
assert!(clustering.data().iter().skip(4).take(2).all(|x| *x == Some(1)));
assert!(clustering.data().iter().skip(6).all(|x| *x == None));
}

#[test]
fn test_border_points() {
let inputs = Matrix::new(5, 1, vec![1.55, 2.0, 2.1, 2.2, 2.65]);

let mut model = DBSCAN::new(0.5, 3);
model.train(&inputs).unwrap();

let clustering = dbg!(model.clusters().unwrap());

assert!(clustering.data().iter().take(1).all(|x| *x == None));
assert!(clustering.data().iter().skip(1).take(3).all(|x| *x == Some(0)));
assert!(clustering.data().iter().skip(4).all(|x| *x == None));
}

#[test]
fn test_basic_prediction() {
let inputs = Matrix::new(6, 2, vec![1.0, 2.0,
1.1, 2.2,
0.9, 1.9,
1.0, 2.1,
-2.0, 3.0,
-2.2, 3.1]);
let inputs = Matrix::new(6, 2, vec![1.0, 2.0, 1.1, 2.2, 0.9, 1.9, 1.0, 2.1, -2.0, 3.0, -2.2, 3.1]);

let mut model = DBSCAN::new(0.5, 2);
model.set_predictive(true);
model.train(&inputs).unwrap();

let new_points = Matrix::new(2,2, vec![1.0, 2.0, 4.0, 4.0]);
let new_points = Matrix::new(2, 2, vec![1.0, 2.0, 4.0, 4.0]);

let classes = model.predict(&new_points).unwrap();
assert!(classes[0] == Some(0));
Expand Down