diff --git a/Cargo.lock b/Cargo.lock
index 8f2d51a2..fb9b67b1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -161,6 +161,7 @@ dependencies = [
  "num",
  "rand",
  "rand_pcg",
+ "rayon",
 ]
 
 [[package]]
diff --git a/benches/benchmark.rs b/benches/benchmark.rs
index dc5d6367..09253483 100644
--- a/benches/benchmark.rs
+++ b/benches/benchmark.rs
@@ -1,8 +1,7 @@
 mod generator;
 
-use coupe::algorithms::k_means::simplified_k_means;
-use coupe::algorithms::recursive_bisection::{axis_sort, rcb};
-use coupe::geometry::Point2D;
+use coupe::Partition as _;
+use coupe::Point2D;
 use criterion::{criterion_group, criterion_main};
 use criterion::{Criterion, Throughput};
 use rayon::prelude::*;
@@ -10,21 +9,6 @@ use rayon::prelude::*;
 const SAMPLE_SIZE: usize = 5000;
 const NUM_ITER: usize = 2;
 
-fn bench_axis_sort_random(c: &mut Criterion) {
-    c.benchmark_group("axis_sort_random")
-        .throughput(Throughput::Elements(SAMPLE_SIZE as u64))
-        .bench_function("axis_sort_random", move |b| {
-            let sample_points = generator::uniform_rectangle(
-                Point2D::from([0., 0.]),
-                Point2D::from([30., 10.]),
-                SAMPLE_SIZE,
-            );
-            let mut permutation: Vec<_> = (0..SAMPLE_SIZE).collect();
-
-            b.iter(|| axis_sort(&sample_points, &mut permutation, 0))
-        });
-}
-
 fn bench_raw_pdqsort_random(c: &mut Criterion) {
     c.benchmark_group("raw_pdqsort_random")
         .throughput(Throughput::Elements(SAMPLE_SIZE as u64))
@@ -87,20 +71,6 @@ fn bench_parallel_raw_pdqsort_sorted(c: &mut Criterion) {
         });
 }
 
-fn bench_axis_sort_sorted(c: &mut Criterion) {
-    c.benchmark_group("axis_sort_sorted")
-        .throughput(Throughput::Elements(SAMPLE_SIZE as u64))
-        .bench_function("axis_sort_sorted", move |b| {
-            let sample_points = generator::already_x_sorted_rectangle(
-                Point2D::from([0., 0.]),
-                Point2D::from([30., 10.]),
-                SAMPLE_SIZE,
-            );
-            let mut permutation: Vec<_> = (0..SAMPLE_SIZE).collect();
-            b.iter(|| axis_sort(&sample_points, &mut permutation, 0))
-        });
-}
-
 fn bench_rcb_random(c: &mut Criterion) {
     c.benchmark_group("rcb_random")
         .throughput(Throughput::Elements(SAMPLE_SIZE as u64))
@@ -110,52 +80,54 @@ fn bench_rcb_random(c: &mut Criterion) {
                 Point2D::from([30., 10.]),
                 SAMPLE_SIZE,
             );
-            let weights = vec![1.0; sample_points.len()];
-            let mut ids = vec![0; sample_points.len()];
+            let weights = vec![1; SAMPLE_SIZE];
+            let mut ids = vec![0; SAMPLE_SIZE];
             b.iter(|| {
                 use rayon::iter::IntoParallelRefIterator as _;
                 use rayon::iter::ParallelIterator as _;
 
                 let p = sample_points.par_iter().cloned();
                 let w = weights.par_iter().cloned();
-                rcb(&mut ids, p, w, NUM_ITER)
+                coupe::Rcb {
+                    iter_count: NUM_ITER,
+                }
+                .partition(&mut ids, (p, w))
+                .unwrap()
             })
         });
 }
 
-fn bench_simplified_k_means(c: &mut Criterion) {
-    c.benchmark_group("simplified_k_means")
+fn bench_k_means(c: &mut Criterion) {
+    c.benchmark_group("k_means")
         .throughput(Throughput::Elements(SAMPLE_SIZE as u64))
-        .bench_function("simplified_k_means", move |b| {
+        .bench_function("k_means", move |b| {
             let sample_points = generator::uniform_rectangle(
                 Point2D::new(0., 0.),
                 Point2D::new(30., 10.),
                 SAMPLE_SIZE,
             );
-            let ids: Vec<_> = (0..SAMPLE_SIZE).collect();
-            let weights: Vec<_> = ids.iter().map(|_| 1.).collect();
+            let weights = vec![1.0; SAMPLE_SIZE];
             b.iter(|| {
-                simplified_k_means(
-                    &sample_points,
-                    &weights,
-                    2usize.pow(NUM_ITER as u32),
-                    5.,
-                    1000,
-                    true,
-                )
+                coupe::KMeans {
+                    part_count: 2_usize.pow(NUM_ITER as u32),
+                    imbalance_tol: 5.0,
+                    delta_threshold: 0.0,
+                    hilbert: true,
+                    ..Default::default()
+                }
+                .partition(&mut [0; SAMPLE_SIZE], (&sample_points, &weights))
+                .unwrap()
             })
         });
 }
 
 criterion_group!(
     benches,
-    bench_axis_sort_random,
-    bench_axis_sort_sorted,
     bench_raw_pdqsort_random,
     bench_parallel_raw_pdqsort_random,
     bench_raw_pdqsort_sorted,
     bench_parallel_raw_pdqsort_sorted,
     bench_rcb_random,
-    bench_simplified_k_means
+    bench_k_means
 );
 criterion_main!(benches);
diff --git a/benches/generator.rs b/benches/generator.rs
index 66bfa5d5..2191fafe 100644
--- a/benches/generator.rs
+++ b/benches/generator.rs
@@ -1,5 +1,4 @@
-use coupe::geometry::Point2D;
-use itertools::Itertools;
+use coupe::Point2D;
 use rand::{self, Rng};
 
 pub fn uniform_f64(min: f64, max: f64, num_vals: usize) -> Vec<f64> {
@@ -18,20 +17,3 @@ pub fn uniform_rectangle(p_min: Point2D, p_max: Point2D, num_points: usize) -> V
         })
         .collect()
 }
-
-pub fn already_x_sorted_rectangle(
-    p_min: Point2D,
-    p_max: Point2D,
-    num_points: usize,
-) -> Vec<Point2D> {
-    let mut rng = rand::thread_rng();
-    (0..num_points)
-        .map(|_| {
-            Point2D::from([
-                rng.gen_range(p_min.x..p_max.x),
-                rng.gen_range(p_min.y..p_max.y),
-            ])
-        })
-        .sorted_by(|p1, p2| p1.x.partial_cmp(&p2.x).unwrap_or(std::cmp::Ordering::Equal))
-        .collect()
-}
diff --git a/src/algorithms.rs b/src/algorithms.rs
index 45d1df40..906328cf 100644
--- a/src/algorithms.rs
+++ b/src/algorithms.rs
@@ -1,12 +1,88 @@
-pub mod ckk;
-pub mod fiduccia_mattheyses;
-pub mod graph_growth;
-pub mod greedy;
-pub mod hilbert_curve;
-pub mod k_means;
-pub mod kernighan_lin;
-pub mod kk;
-pub mod multi_jagged;
-pub mod recursive_bisection;
-pub mod vn;
-pub mod z_curve;
+use std::fmt;
+
+mod ckk;
+mod fiduccia_mattheyses;
+mod graph_growth;
+mod greedy;
+mod hilbert_curve;
+mod k_means;
+mod kernighan_lin;
+mod kk;
+mod multi_jagged;
+mod recursive_bisection;
+mod vn;
+mod z_curve;
+
+pub use ckk::CompleteKarmarkarKarp;
+pub use fiduccia_mattheyses::FiducciaMattheyses;
+pub use graph_growth::GraphGrowth;
+pub use greedy::Greedy;
+pub use hilbert_curve::Error as HilbertCurveError;
+pub use hilbert_curve::HilbertCurve;
+pub use k_means::KMeans;
+pub use kernighan_lin::KernighanLin;
+pub use kk::KarmarkarKarp;
+pub use multi_jagged::MultiJagged;
+pub use recursive_bisection::Rcb;
+pub use recursive_bisection::Rib;
+pub use vn::VnBest;
+pub use z_curve::ZCurve;
+
+/// Common errors thrown by algorithms.
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum Error {
+    /// No partition that matches the given criteria could been found.
+    NotFound,
+
+    /// Input sets don't have matching lengths.
+    InputLenMismatch { expected: usize, actual: usize },
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::NotFound => write!(f, "no partition found"),
+            Error::InputLenMismatch { expected, actual } => write!(
+                f,
+                "input sets don't have the same length (expected {expected} items, got {actual})",
+            ),
+        }
+    }
+}
+
+impl std::error::Error for Error {}
+
+/// Map elements to parts randomly.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use rand;
+///
+/// let mut partition = [0; 12];
+///
+/// coupe::Random { rng: rand::thread_rng(), part_count: 3 }
+///     .partition(&mut partition, ())
+///     .unwrap();
+/// ```
+pub struct Random<R> {
+    pub rng: R,
+    pub part_count: usize,
+}
+
+impl<R> crate::Partition<()> for Random<R>
+where
+    R: rand::Rng,
+{
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(&mut self, part_ids: &mut [usize], _: ()) -> Result<Self::Metadata, Self::Error> {
+        for part_id in part_ids {
+            *part_id = self.rng.gen_range(0..self.part_count);
+        }
+        Ok(())
+    }
+}
diff --git a/src/algorithms/ckk.rs b/src/algorithms/ckk.rs
index a3cec67c..9959dea8 100644
--- a/src/algorithms/ckk.rs
+++ b/src/algorithms/ckk.rs
@@ -1,3 +1,4 @@
+use super::Error;
 use num::FromPrimitive;
 use num::ToPrimitive;
 use std::iter::Sum;
@@ -92,7 +93,7 @@ where
     false
 }
 
-pub fn ckk_bipart<I, T>(partition: &mut [usize], weights: I, tolerance: f64) -> bool
+fn ckk_bipart<I, T>(partition: &mut [usize], weights: I, tolerance: f64) -> Result<(), Error>
 where
     I: IntoIterator<Item = T>,
     T: Sum + Add<Output = T> + Sub<Output = T>,
@@ -100,9 +101,14 @@ where
     T: Ord + Default + Copy,
 {
     let mut weights: Vec<(T, usize)> = weights.into_iter().zip(0..).collect();
-    debug_assert_eq!(weights.len(), partition.len());
+    if weights.len() != partition.len() {
+        return Err(Error::InputLenMismatch {
+            expected: partition.len(),
+            actual: weights.len(),
+        });
+    }
     if weights.is_empty() {
-        return true;
+        return Ok(());
     }
     weights.sort_unstable();
 
@@ -111,7 +117,48 @@ where
 
     let mut steps = Vec::new();
 
-    ckk_bipart_rec(partition, &mut weights, tolerance, &mut steps)
+    if ckk_bipart_rec(partition, &mut weights, tolerance, &mut steps) {
+        Ok(())
+    } else {
+        Err(Error::NotFound)
+    }
+}
+
+/// The exact variant of the [Karmarkar-Karp][crate::KarmarkarKarp] algorithm.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+///
+/// let weights: [i32; 4] = [3, 5, 3, 9];
+/// let mut partition = [0; 4];
+///
+/// coupe::CompleteKarmarkarKarp { tolerance: 0.1 }
+///     .partition(&mut partition, weights)
+///     .unwrap();
+/// ```
+pub struct CompleteKarmarkarKarp {
+    pub tolerance: f64,
+}
+
+impl<W> crate::Partition<W> for CompleteKarmarkarKarp
+where
+    W: IntoIterator,
+    W::Item: Sum + Add<Output = W::Item> + Sub<Output = W::Item>,
+    W::Item: FromPrimitive + ToPrimitive,
+    W::Item: Ord + Default + Copy,
+{
+    type Metadata = ();
+    type Error = Error;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        weights: W,
+    ) -> Result<Self::Metadata, Self::Error> {
+        ckk_bipart(part_ids, weights, self.tolerance)
+    }
 }
 
 // TODO tests
diff --git a/src/algorithms/fiduccia_mattheyses.rs b/src/algorithms/fiduccia_mattheyses.rs
index 088313ba..8cc31a40 100644
--- a/src/algorithms/fiduccia_mattheyses.rs
+++ b/src/algorithms/fiduccia_mattheyses.rs
@@ -1,12 +1,11 @@
 use itertools::Itertools;
 use sprs::CsMatView;
 
-use crate::partition::Partition;
-use crate::PointND;
 use std::collections::HashMap;
 
-pub fn fiduccia_mattheyses<'a, const D: usize>(
-    initial_partition: &mut Partition<'a, PointND<D>, f64>,
+fn fiduccia_mattheyses(
+    part_ids: &mut [usize],
+    weights: &[f64],
     adjacency: CsMatView<f64>,
     max_passes: impl Into<Option<usize>>,
     max_flips_per_pass: impl Into<Option<usize>>,
@@ -16,12 +15,11 @@ pub fn fiduccia_mattheyses<'a, const D: usize>(
     let max_passes = max_passes.into();
     let max_flips_per_pass = max_flips_per_pass.into();
     let max_imbalance_per_flip = max_imbalance_per_flip.into();
-    let (_points, weights, ids) = initial_partition.as_raw_mut();
 
     fiduccia_mattheyses_impl(
+        part_ids,
         weights,
-        adjacency.view(),
-        ids,
+        adjacency,
         max_passes,
         max_flips_per_pass,
         max_imbalance_per_flip,
@@ -30,9 +28,9 @@ pub fn fiduccia_mattheyses<'a, const D: usize>(
 }
 
 fn fiduccia_mattheyses_impl(
+    initial_partition: &mut [usize],
     weights: &[f64],
     adjacency: CsMatView<f64>,
-    initial_partition: &mut [usize],
     max_passes: Option<usize>,
     max_flips_per_pass: Option<usize>,
     max_imbalance_per_flip: Option<f64>,
@@ -250,3 +248,107 @@ fn fiduccia_mattheyses_impl(
 
     println!("final cut size: {}", new_cut_size);
 }
+
+/// FiducciaMattheyses
+///
+/// An implementation of the Fiduccia Mattheyses topologic algorithm
+/// for graph partitioning. This implementation is an extension of the
+/// original algorithm to handle partitioning into more than two parts.
+///
+/// This algorithm repeats an iterative pass during which a set of graph nodes are assigned to
+/// a new part, reducing the overall cutsize of the partition. As opposed to the
+/// Kernighan-Lin algorithm, during each pass iteration, only one node is flipped at a time.
+/// The algorithm thus does not preserve partition weights balance and may produce an unbalanced
+/// partition.
+///
+/// Original algorithm from "A Linear-Time Heuristic for Improving Network Partitions"
+/// by C.M. Fiduccia and R.M. Mattheyses.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+/// use sprs::CsMat;
+///
+/// //    swap
+/// // 0  1  0  1
+/// // +--+--+--+
+/// // |  |  |  |
+/// // +--+--+--+
+/// // 0  0  1  1
+/// let points = [
+///      Point2D::new(0., 0.),
+///      Point2D::new(1., 0.),
+///      Point2D::new(2., 0.),
+///      Point2D::new(3., 0.),
+///      Point2D::new(0., 1.),
+///      Point2D::new(1., 1.),
+///      Point2D::new(2., 1.),
+///      Point2D::new(3., 1.),
+///  ];
+///  let weights = [1.0; 8];
+///  let mut partition = [0, 0, 1, 1, 0, 1, 0, 1];
+///
+///  let mut adjacency = CsMat::empty(sprs::CSR, 8);
+///  adjacency.reserve_outer_dim(8);
+///  eprintln!("shape: {:?}", adjacency.shape());
+///  adjacency.insert(0, 1, 1.);
+///  adjacency.insert(1, 2, 1.);
+///  adjacency.insert(2, 3, 1.);
+///  adjacency.insert(4, 5, 1.);
+///  adjacency.insert(5, 6, 1.);
+///  adjacency.insert(6, 7, 1.);
+///  adjacency.insert(0, 4, 1.);
+///  adjacency.insert(1, 5, 1.);
+///  adjacency.insert(2, 6, 1.);
+///  adjacency.insert(3, 7, 1.);
+///  
+///  // symmetry
+///  adjacency.insert(1, 0, 1.);
+///  adjacency.insert(2, 1, 1.);
+///  adjacency.insert(3, 2, 1.);
+///  adjacency.insert(5, 4, 1.);
+///  adjacency.insert(6, 5, 1.);
+///  adjacency.insert(7, 6, 1.);
+///  adjacency.insert(4, 0, 1.);
+///  adjacency.insert(5, 1, 1.);
+///  adjacency.insert(6, 2, 1.);
+///  adjacency.insert(7, 3, 1.);
+///
+/// coupe::FiducciaMattheyses { max_bad_move_in_a_row: 1, ..Default::default() }
+///     .partition(&mut partition, (adjacency.view(), &weights))
+///     .unwrap();
+///
+/// assert_eq!(partition[5], 0);
+/// assert_eq!(partition[6], 1);
+/// ```
+#[derive(Debug, Clone, Copy, Default)]
+pub struct FiducciaMattheyses {
+    pub max_passes: Option<usize>,
+    pub max_flips_per_pass: Option<usize>,
+    pub max_imbalance_per_flip: Option<f64>,
+    pub max_bad_move_in_a_row: usize,
+}
+
+impl<'a> crate::Partition<(CsMatView<'a, f64>, &'a [f64])> for FiducciaMattheyses {
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (adjacency, weights): (CsMatView<f64>, &'a [f64]),
+    ) -> Result<Self::Metadata, Self::Error> {
+        fiduccia_mattheyses(
+            part_ids,
+            weights,
+            adjacency,
+            self.max_passes,
+            self.max_flips_per_pass,
+            self.max_imbalance_per_flip,
+            self.max_bad_move_in_a_row,
+        );
+        Ok(())
+    }
+}
diff --git a/src/algorithms/graph_growth.rs b/src/algorithms/graph_growth.rs
index b9872195..e6e558fc 100644
--- a/src/algorithms/graph_growth.rs
+++ b/src/algorithms/graph_growth.rs
@@ -1,12 +1,16 @@
 use rand::seq::SliceRandom;
 use sprs::CsMatView;
 
-pub fn graph_growth(weights: &[f64], adjacency: CsMatView<f64>, num_parts: usize) -> Vec<usize> {
+fn graph_growth(
+    initial_ids: &mut [usize],
+    weights: &[f64],
+    adjacency: CsMatView<f64>,
+    num_parts: usize,
+) {
     let (shape_x, shape_y) = adjacency.shape();
     assert_eq!(shape_x, shape_y);
     assert_eq!(weights.len(), shape_x);
 
-    let mut initial_ids = vec![0; weights.len()];
     // let total_weight = weights.iter().sum::<f64>();
     // let weight_per_part = total_weight / num_parts as f64;
     let max_expansion_per_pass = 20;
@@ -54,6 +58,81 @@ pub fn graph_growth(weights: &[f64], adjacency: CsMatView<f64>, num_parts: usize
             }
         }
     }
+}
+
+/// Graph Growth algorithm
+///
+/// A topologic algorithm that generates a partition from a topologic mesh.
+/// Given a number k of parts, the algorithm selects k nodes randomly and assigns them to a different part.
+/// Then, at each iteration, each part is expanded to neighbor nodes that are not yet assigned to a part
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+/// use sprs::CsMat;
+///
+/// // +--+--+--+
+/// // |  |  |  |
+/// // +--+--+--+
+///
+///  let weights = [1.0; 8];
+///  let mut partition = [0; 8];
+///
+///  let mut adjacency = CsMat::empty(sprs::CSR, 8);
+///  adjacency.reserve_outer_dim(8);
+///  eprintln!("shape: {:?}", adjacency.shape());
+///  adjacency.insert(0, 1, 1.);
+///  adjacency.insert(1, 2, 1.);
+///  adjacency.insert(2, 3, 1.);
+///  adjacency.insert(4, 5, 1.);
+///  adjacency.insert(5, 6, 1.);
+///  adjacency.insert(6, 7, 1.);
+///  adjacency.insert(0, 4, 1.);
+///  adjacency.insert(1, 5, 1.);
+///  adjacency.insert(2, 6, 1.);
+///  adjacency.insert(3, 7, 1.);
+///  
+///  // symmetry
+///  adjacency.insert(1, 0, 1.);
+///  adjacency.insert(2, 1, 1.);
+///  adjacency.insert(3, 2, 1.);
+///  adjacency.insert(5, 4, 1.);
+///  adjacency.insert(6, 5, 1.);
+///  adjacency.insert(7, 6, 1.);
+///  adjacency.insert(4, 0, 1.);
+///  adjacency.insert(5, 1, 1.);
+///  adjacency.insert(6, 2, 1.);
+///  adjacency.insert(7, 3, 1.);
+///
+/// coupe::GraphGrowth { part_count: 2 }
+///     .partition(&mut partition, (adjacency.view(), &weights))
+///     .unwrap();
+/// ```
+#[derive(Debug, Clone, Copy)]
+pub struct GraphGrowth {
+    pub part_count: usize,
+}
 
-    initial_ids
+impl<'a, W> crate::Partition<(CsMatView<'a, f64>, W)> for GraphGrowth
+where
+    W: AsRef<[f64]>,
+{
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (adjacency, weights): (CsMatView<f64>, W),
+    ) -> Result<Self::Metadata, Self::Error> {
+        graph_growth(
+            part_ids,
+            weights.as_ref(),
+            adjacency.view(),
+            self.part_count,
+        );
+        Ok(())
+    }
 }
diff --git a/src/algorithms/greedy.rs b/src/algorithms/greedy.rs
index 4f778d55..db35643b 100644
--- a/src/algorithms/greedy.rs
+++ b/src/algorithms/greedy.rs
@@ -1,14 +1,16 @@
+use super::Error;
 use num::Zero;
 use std::ops::AddAssign;
 
 /// Implementation of the greedy algorithm.
-pub fn greedy<T: Ord + Zero + Clone + AddAssign>(
+fn greedy<T: Ord + Zero + Clone + AddAssign>(
     partition: &mut [usize],
     weights: impl IntoIterator<Item = T>,
-    num_parts: usize,
-) {
-    if num_parts < 2 {
-        return;
+    part_count: usize,
+) -> Result<(), Error> {
+    if part_count < 2 {
+        partition.fill(0);
+        return Ok(());
     }
 
     // Initialization: make the partition and record the weight of each part in another vector.
@@ -16,9 +18,16 @@ pub fn greedy<T: Ord + Zero + Clone + AddAssign>(
         .into_iter()
         .zip(0..) // Keep track of the weights' indicies
         .collect();
-    assert_eq!(partition.len(), weights.len());
+
+    if weights.len() != partition.len() {
+        return Err(Error::InputLenMismatch {
+            expected: partition.len(),
+            actual: weights.len(),
+        });
+    }
+
     weights.sort_unstable();
-    let mut part_weights = vec![T::zero(); num_parts];
+    let mut part_weights = vec![T::zero(); part_count];
 
     // Put each weight in the lightweightest part.
     for (weight, weight_id) in weights.into_iter().rev() {
@@ -26,8 +35,46 @@ pub fn greedy<T: Ord + Zero + Clone + AddAssign>(
             .iter()
             .enumerate()
             .min_by_key(|(_idx, part_weight)| *part_weight)
-            .unwrap();
+            .unwrap(); // Will not panic because !part_weights.is_empty()
         partition[weight_id] = min_part_weight_idx;
         part_weights[min_part_weight_idx] += weight;
     }
+
+    Ok(())
+}
+
+/// Greedily assign weights to each part.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Real;
+///
+/// let weights = [3.2, 6.8, 10.0, 7.5].map(Real::from);
+/// let mut partition = [0; 4];
+///
+/// coupe::Greedy { part_count: 2 }
+///     .partition(&mut partition, weights)
+///     .unwrap();
+/// ```
+pub struct Greedy {
+    pub part_count: usize,
+}
+
+impl<W> crate::Partition<W> for Greedy
+where
+    W: IntoIterator,
+    W::Item: Ord + Zero + Clone + AddAssign,
+{
+    type Metadata = ();
+    type Error = Error;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        weights: W,
+    ) -> Result<Self::Metadata, Self::Error> {
+        greedy(part_ids, weights, self.part_count)
+    }
 }
diff --git a/src/algorithms/hilbert_curve.rs b/src/algorithms/hilbert_curve.rs
index ee41a7ef..b7d730fc 100644
--- a/src/algorithms/hilbert_curve.rs
+++ b/src/algorithms/hilbert_curve.rs
@@ -12,17 +12,16 @@
 
 use crate::geometry::{Mbr, Point2D};
 use rayon::prelude::*;
+use std::fmt;
 
-pub fn hilbert_curve_partition(
+fn hilbert_curve_partition(
+    partition: &mut [usize],
     points: &[Point2D],
     weights: &[f64],
-    num_partitions: usize,
+    part_count: usize,
     order: usize,
-) -> Vec<usize> {
-    assert!(
-        order < 32,
-        "Cannot construct a Hilbert curve of order >= 32 because 2^32 would overflow u32 capacity."
-    );
+) {
+    debug_assert!(order < 32);
 
     let compute_hilbert_index = hilbert_index_computer(points, order);
     let mut permutation = (0..points.len()).into_par_iter().collect::<Vec<_>>();
@@ -36,14 +35,12 @@ pub fn hilbert_curve_partition(
         .par_sort_by_key(|idx| hilbert_indices[*idx]);
 
     // dummy modifiers to use directly the routine from multi_jagged
-    let modifiers = vec![1. / num_partitions as f64; num_partitions];
-
-    let mut partition = vec![0; points.len()];
+    let modifiers = vec![1. / part_count as f64; part_count];
 
     let split_positions = super::multi_jagged::compute_split_positions(
         weights,
         &permutation,
-        num_partitions - 1,
+        part_count - 1,
         &modifiers,
     );
 
@@ -60,8 +57,6 @@ pub fn hilbert_curve_partition(
                 unsafe { std::ptr::write(ptr.add(*i), part_id) }
             }
         });
-
-    partition
 }
 
 #[allow(unused)]
@@ -75,15 +70,8 @@ pub(crate) fn hilbert_curve_reorder(points: &[Point2D], order: usize) -> Vec<usi
 /// First, the minimal bounding rectangle of the set of points is computed and local
 /// coordinated are defined on it: the mbr is seen as [0; 2^order - 1]^2.
 /// Then the hilbert curve is computed from those local coordinates.
-pub(crate) fn hilbert_curve_reorder_permu(
-    points: &[Point2D],
-    permutation: &mut [usize],
-    order: usize,
-) {
-    assert!(
-        order < 32,
-        "Cannot construct a Hilbert curve of order >= 32 because 2^32 would overflow u32 capacity."
-    );
+fn hilbert_curve_reorder_permu(points: &[Point2D], permutation: &mut [usize], order: usize) {
+    debug_assert!(order < 32);
 
     let compute_hilbert_index = hilbert_index_computer(points, order);
 
@@ -116,17 +104,14 @@ fn hilbert_index_computer(points: &[Point2D], order: usize) -> impl Fn((f64, f64
 }
 
 fn encode(x: u32, y: u32, order: usize) -> u32 {
-    assert!(
-        order < 32,
-        "Cannot construct a Hilbert curve of order >= 32 because 2^32 would overflow u32 capacity."
-    );
-    assert!(
+    debug_assert!(order < 32);
+    debug_assert!(
         x < 2u32.pow(order as u32),
         "Cannot encode the point {:?} on an hilbert curve of order {} because x >= 2^order.",
         (x, y),
         order,
     );
-    assert!(
+    debug_assert!(
         y < 2u32.pow(order as u32),
         "Cannot encode the point {:?} on an hilbert curve of order {} because y >= 2^order.",
         (x, y),
@@ -177,13 +162,13 @@ fn interleave_bits(odd: u32, even: u32) -> u32 {
 
 // Compute a mapping from [a_min; a_max] to [b_min; b_max]
 fn segment_to_segment(a_min: f64, a_max: f64, b_min: f64, b_max: f64) -> impl Fn(f64) -> f64 {
-    assert!(
+    debug_assert!(
         a_min <= a_max,
         "Cannot construct a segment to segment mapping because a_max < a_min. a_min = {}, a_max = {}.",
         a_min,
         a_max,
     );
-    assert!(
+    debug_assert!(
         b_min <= b_max,
         "Cannot construct a segment to segment mapping because b_max < b_min. b_min = {}, b_max = {}.",
         b_min,
@@ -196,7 +181,7 @@ fn segment_to_segment(a_min: f64, a_max: f64, b_min: f64, b_max: f64) -> impl Fn
     let beta = b_min - a_min * alpha;
 
     move |x| {
-        assert!(
+        debug_assert!(
             a_min <= x && x <= a_max,
             "Called a mapping from [{}, {}] to [{}, {}] with the invalid value {}.",
             a_min,
@@ -209,6 +194,102 @@ fn segment_to_segment(a_min: f64, a_max: f64, b_min: f64, b_max: f64) -> impl Fn
     }
 }
 
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum Error {
+    /// Invalid space filling curve order.
+    InvalidOrder { max: u32, actual: u32 },
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Error::InvalidOrder { max, actual } => {
+                write!(
+                    f,
+                    "given hilbert curve order too high. Got {}, max={}.",
+                    actual, max
+                )
+            }
+        }
+    }
+}
+
+impl std::error::Error for Error {}
+
+/// # Hilbert space-filling curve algorithm
+///
+/// An implementation of the Hilbert curve based on
+/// "Encoding and Decoding the Hilbert Order" by XIAN LIU and GÜNTHER SCHRACK.
+///
+/// This algorithm uses space hashing to reorder points alongside the Hilbert curve ov a giver order.
+/// See [wikipedia](https://en.wikipedia.org/wiki/Hilbert_curve) for more details.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+///
+/// let points = [
+///     Point2D::new(0., 0.),
+///     Point2D::new(1., 1.),
+///     Point2D::new(0., 10.),
+///     Point2D::new(1., 9.),
+///     Point2D::new(9., 1.),
+///     Point2D::new(10., 0.),
+///     Point2D::new(10., 10.),
+///     Point2D::new(9., 9.),
+/// ];
+/// let weights = [1.0; 8];
+/// let mut partition = [0; 8];
+///
+/// // generate a partition of 4 parts
+/// coupe::HilbertCurve { part_count: 4, order: 5 }
+///     .partition(&mut partition, (points, weights))
+///     .unwrap();
+///
+/// assert_eq!(partition[0], partition[1]);
+/// assert_eq!(partition[2], partition[3]);
+/// assert_eq!(partition[4], partition[5]);
+/// assert_eq!(partition[6], partition[7]);
+/// ```
+pub struct HilbertCurve {
+    pub part_count: usize,
+    pub order: u32,
+}
+
+// hilbert curve is only implemented in 2d for now
+impl<P, W> crate::Partition<(P, W)> for HilbertCurve
+where
+    P: AsRef<[Point2D]>,
+    W: AsRef<[f64]>,
+{
+    type Metadata = ();
+    type Error = Error;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (points, weights): (P, W),
+    ) -> Result<Self::Metadata, Self::Error> {
+        if self.order >= 32 {
+            return Err(Error::InvalidOrder {
+                max: 31,
+                actual: self.order,
+            });
+        }
+        hilbert_curve_partition(
+            part_ids,
+            points.as_ref(),
+            weights.as_ref(),
+            self.part_count,
+            self.order as usize,
+        );
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/algorithms/k_means.rs b/src/algorithms/k_means.rs
index 6e78814e..bc8c26d4 100644
--- a/src/algorithms/k_means.rs
+++ b/src/algorithms/k_means.rs
@@ -15,9 +15,6 @@ use rayon::prelude::*;
 use std::cmp::Ordering;
 use std::sync::atomic::{self, AtomicPtr};
 
-// use super::hilbert_curve;
-use super::z_curve;
-
 use itertools::iproduct;
 use itertools::Itertools;
 
@@ -26,143 +23,6 @@ use itertools::Itertools;
 /// for the k-means algorithm and not a partition id
 type ClusterId = usize;
 
-/// A simplified implementation of the algorithm described in the paper
-/// by Moritz von Looz et al. that follows the same idea but without the small
-/// optimizations that would improve the efficiency of the algorithm. In particular,
-/// this version shows some noticeable oscillations when imposing a restrictive balance constraint.
-/// It also skips the bounding boxes optimization which would slightly reduce the complexity of the
-/// algorithm.
-pub fn simplified_k_means<const D: usize>(
-    points: &[PointND<D>],
-    weights: &[f64],
-    num_partitions: usize,
-    imbalance_tol: f64,
-    mut n_iter: isize,
-    hilbert: bool,
-) -> Vec<usize>
-where
-    Const<D>: DimSub<Const<1>>,
-    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
-        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
-{
-    let sfc_order = (f64::from(std::u32::MAX)).log(f64::from(2u32.pow(D as u32))) as u32;
-    let permu = if hilbert {
-        unimplemented!("hilbert curve currently not implemented for n-dimension");
-    // hilbert_curve::hilbert_curve_reorder(points, 15)
-    } else {
-        z_curve::z_curve_reorder(points, sfc_order)
-    };
-
-    let points_per_center = points.len() / num_partitions;
-
-    let mut centers: Vec<_> = permu
-        .iter()
-        .cloned()
-        .skip(points_per_center / 2)
-        .step_by(points_per_center)
-        .map(|idx| points[idx])
-        .collect();
-
-    let center_ids: Vec<ClusterId> = centers.par_iter().map(|_| crate::uid()).collect();
-
-    let mut influences = centers.par_iter().map(|_| 1.).collect::<Vec<_>>();
-
-    let dummy_id = crate::uid();
-    let mut assignments: Vec<ClusterId> = permu.par_iter().map(|_| dummy_id).collect();
-    let atomic_handle = AtomicPtr::from(assignments.as_mut_ptr());
-    permu
-        .par_chunks(points_per_center)
-        .zip(center_ids.par_iter())
-        .for_each(|(chunk, id)| {
-            let ptr = atomic_handle.load(atomic::Ordering::Relaxed);
-            for idx in chunk {
-                unsafe { std::ptr::write(ptr.add(*idx), *id) }
-            }
-        });
-
-    let mut imbalance = std::f64::MAX;
-
-    let target_weight = weights.par_iter().sum::<f64>() / num_partitions as f64;
-
-    while imbalance > imbalance_tol && n_iter > 0 {
-        n_iter -= 1;
-
-        // find new assignments
-        permu.par_iter().for_each(|&idx| {
-            // find closest center
-            let mut distances = ::std::iter::repeat(points[idx])
-                .zip(centers.iter())
-                .zip(center_ids.iter())
-                .zip(influences.iter())
-                .map(|(((p, center), id), influence)| (*id, (p - center).norm() * influence))
-                .collect::<Vec<_>>();
-
-            distances
-                .as_mut_slice()
-                .sort_unstable_by(|(_, d1), (_, d2)| d1.partial_cmp(d2).unwrap_or(Ordering::Equal));
-
-            // update assignment with closest center found
-
-            let new_assignment = distances.into_iter().next().unwrap().0;
-            let ptr = atomic_handle.load(atomic::Ordering::Relaxed);
-            unsafe { std::ptr::write(ptr.add(idx), new_assignment) }
-        });
-
-        // update centers position from new assignments
-        centers
-            .par_iter_mut()
-            .zip(center_ids.par_iter())
-            .for_each(|(center, id)| {
-                let new_center = geometry::center(
-                    &permu
-                        .iter()
-                        // TODO: maybe cloning is avoidable here
-                        .map(|&idx| (points[idx], assignments[idx]))
-                        .filter(|(_, point_id)| *id == *point_id)
-                        .map(|(p, _)| p)
-                        .collect::<Vec<_>>(),
-                );
-
-                *center = new_center;
-            });
-
-        // compute cluster weights
-        let cluster_weights = center_ids
-            .par_iter()
-            .map(|id| {
-                assignments
-                    .iter()
-                    .zip(weights.iter())
-                    .filter(|(point_id, _)| *id == **point_id)
-                    .fold(0., |acc, (_, weight)| acc + weight)
-            })
-            .collect::<Vec<_>>();
-
-        // update influence
-        cluster_weights
-            .par_iter()
-            .zip(influences.par_iter_mut())
-            .for_each(|(cluster_weight, influence)| {
-                let ratio = target_weight / *cluster_weight as f64;
-
-                let new_influence = *influence / ratio.sqrt();
-                let max_diff = 0.05 * *influence;
-                if (*influence - new_influence).abs() < max_diff {
-                    *influence = new_influence;
-                } else if new_influence > *influence {
-                    *influence += max_diff;
-                } else {
-                    *influence -= max_diff;
-                }
-            });
-
-        // update imbalance
-        imbalance = self::imbalance(&cluster_weights);
-    }
-
-    assignments
-}
-
 fn imbalance(weights: &[f64]) -> f64 {
     match (
         weights
@@ -216,89 +76,7 @@ impl Default for BalancedKmeansSettings {
     }
 }
 
-pub fn balanced_k_means<const D: usize>(
-    points: &[PointND<D>],
-    weights: &[f64],
-    settings: impl Into<Option<BalancedKmeansSettings>>,
-) -> Vec<usize>
-where
-    Const<D>: DimSub<Const<1>>,
-    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
-        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
-{
-    let settings = settings.into().unwrap_or_default();
-
-    // sort points with space filling curve
-    let sfc_order = (f64::from(std::u32::MAX)).log(f64::from(2u32.pow(D as u32))) as u32;
-    let mut permu = if settings.hilbert {
-        unimplemented!("hilbert curve currently not implemented for n-dimension");
-    // hilbert_curve::hilbert_curve_reorder(points, 15)
-    } else {
-        z_curve::z_curve_reorder(points, sfc_order)
-    };
-
-    // Compute how many points will be initially assigned to each cluster
-    let points_per_center = points.len() / settings.num_partitions;
-
-    // select num_partitions initial centers from the ordered points
-    let centers: Vec<_> = permu
-        .iter()
-        // for each partition yielded by the Z-curve reordering
-        // we select the median point to be the initial cluster center
-        // because it is in most cases in the middle of the partition
-        .skip(points_per_center / 2)
-        .step_by(points_per_center)
-        .par_bridge()
-        .map(|&idx| points[idx])
-        .collect();
-
-    // generate unique ids for each initial partition that will live throughout
-    // the algorithm (no new id is generated afterwards)
-    let center_ids: Vec<ClusterId> = centers.par_iter().map(|_| crate::uid()).collect();
-
-    let dummy_id = crate::uid();
-    let mut assignments: Vec<_> = permu.par_iter().map(|_| dummy_id).collect();
-    let atomic_handle = AtomicPtr::from(assignments.as_mut_ptr());
-    permu
-        .par_chunks(points_per_center)
-        .zip(center_ids.par_iter())
-        .for_each(|(chunk, id)| {
-            let ptr = atomic_handle.load(atomic::Ordering::Relaxed);
-            for idx in chunk {
-                unsafe { std::ptr::write(ptr.add(*idx), *id) }
-            }
-        });
-
-    debug_assert_eq!(assignments.len(), points.len());
-
-    // Generate initial influences (to 1)
-    let mut influences: Vec<_> = centers.par_iter().map(|_| 1.).collect();
-
-    // Generate initial lower and upper bounds. These two variables represent bounds on
-    // the effective distance between an point and the cluster it is assigned to.
-    let mut lbs: Vec<_> = points.par_iter().map(|_| 0.).collect();
-    let mut ubs: Vec<_> = points.par_iter().map(|_| std::f64::MAX).collect(); // we use f64::MAX to represent infinity
-
-    balanced_k_means_iter(
-        Inputs { points, weights },
-        Clusters {
-            centers,
-            center_ids: &center_ids,
-        },
-        &mut permu,
-        AlgorithmState {
-            assignments: &mut assignments,
-            influences: &mut influences,
-            lbs: &mut lbs,
-            ubs: &mut ubs,
-        },
-        &settings,
-        settings.max_iter,
-    );
-    assignments
-}
-
-pub fn balanced_k_means_with_initial_partition<const D: usize>(
+fn balanced_k_means_with_initial_partition<const D: usize>(
     points: &[PointND<D>],
     weights: &[f64],
     settings: impl Into<Option<BalancedKmeansSettings>>,
@@ -737,3 +515,107 @@ fn max_distance<const D: usize>(points: &[PointND<D>]) -> f64 {
         .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
         .unwrap()
 }
+
+/// K-means algorithm
+///
+/// An implementation of the balanced k-means algorithm inspired from
+/// "Balanced k-means for Parallel Geometric Partitioning" by Moritz von Looz,
+/// Charilaos Tzovas and Henning Meyerhenke (2018, University of Cologne).
+///
+/// From an initial partition, the K-means algorithm will generate points clusters that will,
+/// at each iteration, exchage points with other clusters that are "closer", and move by recomputing the clusters position (defined as
+/// the centroid of the points assigned to the cluster). Eventually the clusters will stop moving, yielding a new partition.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+///
+/// let points = [
+///     Point2D::new(0., 0.),
+///     Point2D::new(1., 0.),
+///     Point2D::new(2., 0.),
+///     Point2D::new(0., 5.),
+///     Point2D::new(1., 5.),
+///     Point2D::new(2., 5.),
+///     Point2D::new(0., 10.),
+///     Point2D::new(1., 10.),
+///     Point2D::new(2., 10.),
+/// ];
+/// let weights = [1.; 9];
+///
+/// // create an unbalanced partition:
+/// //  - p1: total weight = 1
+/// //  - p2: total weight = 7
+/// //  - p3: total weight = 1
+/// let mut partition = [1, 2, 2, 2, 2, 2, 2, 2, 3];
+///
+/// coupe::KMeans { part_count: 3, delta_threshold: 0.0, ..Default::default() }
+///     .partition(&mut partition, (&points, &weights))
+///     .unwrap();
+///
+/// assert_eq!(partition[0], partition[1]);
+/// assert_eq!(partition[0], partition[2]);
+///
+/// assert_eq!(partition[3], partition[4]);
+/// assert_eq!(partition[3], partition[5]);
+///
+/// assert_eq!(partition[6], partition[7]);
+/// assert_eq!(partition[6], partition[8]);
+/// ```
+#[derive(Debug, Clone, Copy)]
+pub struct KMeans {
+    pub part_count: usize,
+    pub imbalance_tol: f64,
+    pub delta_threshold: f64,
+    pub max_iter: usize,
+    pub max_balance_iter: usize,
+    pub erode: bool,
+    pub hilbert: bool,
+    pub mbr_early_break: bool,
+}
+
+impl Default for KMeans {
+    fn default() -> Self {
+        Self {
+            part_count: 7,
+            imbalance_tol: 5.,
+            delta_threshold: 0.01,
+            max_iter: 500,
+            max_balance_iter: 20, // for now, `max_balance_iter > 1` yields poor convergence time
+            erode: false,         // for now, `erode` yields` enabled yields wrong results
+            hilbert: true,
+            mbr_early_break: false, // for now, `mbr_early_break` enabled yields wrong results
+        }
+    }
+}
+
+impl<'a, const D: usize> crate::Partition<(&'a [PointND<D>], &'a [f64])> for KMeans
+where
+    Const<D>: DimSub<Const<1>>,
+    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
+        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
+{
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (points, weights): (&'a [PointND<D>], &'a [f64]),
+    ) -> Result<Self::Metadata, Self::Error> {
+        let settings = BalancedKmeansSettings {
+            num_partitions: self.part_count,
+            imbalance_tol: self.imbalance_tol,
+            delta_threshold: self.delta_threshold,
+            max_iter: self.max_iter,
+            max_balance_iter: self.max_balance_iter,
+            erode: self.erode,
+            hilbert: self.hilbert,
+            mbr_early_break: self.mbr_early_break,
+        };
+        balanced_k_means_with_initial_partition(points, weights, settings, part_ids);
+        Ok(())
+    }
+}
diff --git a/src/algorithms/kernighan_lin.rs b/src/algorithms/kernighan_lin.rs
index 31654f6d..fe041e0e 100644
--- a/src/algorithms/kernighan_lin.rs
+++ b/src/algorithms/kernighan_lin.rs
@@ -3,18 +3,16 @@
 //! At each iteration, two nodes of different partition will be swapped, decreasing the overall cutsize
 //! of the partition. The swap is performed in such a way that the added partition imbalanced is controlled.
 
-use crate::geometry::PointND;
-use crate::partition::Partition;
-
 use itertools::Itertools;
 use sprs::CsMatView;
 
-pub(crate) fn kernighan_lin<'a, const D: usize>(
-    initial_partition: &mut Partition<'a, PointND<D>, f64>,
+fn kernighan_lin(
+    part_ids: &mut [usize],
+    weights: &[f64],
     adjacency: CsMatView<f64>,
-    max_passes: impl Into<Option<usize>>,
-    max_flips_per_pass: impl Into<Option<usize>>,
-    max_imbalance_per_flip: impl Into<Option<f64>>,
+    max_passes: Option<usize>,
+    max_flips_per_pass: Option<usize>,
+    max_imbalance_per_flip: Option<f64>,
     max_bad_move_in_a_row: usize,
 ) {
     // To adapt Kernighan-Lin to a partition of more than 2 parts,
@@ -22,15 +20,10 @@ pub(crate) fn kernighan_lin<'a, const D: usize>(
     // are adjacent if there exists an element in one part that is linked to
     // an element in the other part).
 
-    let max_passes = max_passes.into();
-    let max_flips_per_pass = max_flips_per_pass.into();
-    let max_imbalance_per_flip = max_imbalance_per_flip.into();
-    let (_points, weights, ids) = initial_partition.as_raw_mut();
-
-    kernighan_lin_2_impl::<D>(
+    kernighan_lin_2_impl(
+        part_ids,
         weights,
-        adjacency.view(),
-        ids,
+        adjacency,
         max_passes,
         max_flips_per_pass,
         max_imbalance_per_flip,
@@ -38,10 +31,10 @@ pub(crate) fn kernighan_lin<'a, const D: usize>(
     );
 }
 
-fn kernighan_lin_2_impl<const D: usize>(
+fn kernighan_lin_2_impl(
+    initial_partition: &mut [usize],
     weights: &[f64],
     adjacency: CsMatView<f64>,
-    initial_partition: &mut [usize],
     max_passes: Option<usize>,
     max_flips_per_pass: Option<usize>,
     _max_imbalance_per_flip: Option<f64>,
@@ -57,7 +50,7 @@ fn kernighan_lin_2_impl<const D: usize>(
         unimplemented!();
     }
 
-    let mut cut_size = crate::topology::cut_size(adjacency.view(), initial_partition);
+    let mut cut_size = crate::topology::cut_size(adjacency, initial_partition);
     println!("Initial cut size: {}", cut_size);
     let mut new_cut_size = cut_size;
 
@@ -180,3 +173,109 @@ fn kernighan_lin_2_impl<const D: usize>(
     }
     println!("final cut size: {}", new_cut_size);
 }
+
+/// KernighanLin algorithm
+///
+/// An implementation of the Kernighan Lin topologic algorithm
+/// for graph partitioning. The current implementation currently only handles
+/// partitioning a graph into two parts, as described in the original algorithm in
+/// "An efficient heuristic procedure for partitioning graphs" by W. Kernighan and S. Lin.
+///
+/// The algorithms repeats an iterative pass during which several pairs of nodes have
+/// their part assignment swapped in order to reduce the cutsize of the partition.
+/// If all the nodes are equally weighted, the algorithm preserves the partition balance.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+/// use sprs::CsMat;
+///
+/// //    swap
+/// // 0  1  0  1
+/// // +--+--+--+
+/// // |  |  |  |
+/// // +--+--+--+
+/// // 0  0  1  1
+/// let points = [
+///      Point2D::new(0., 0.),
+///      Point2D::new(1., 0.),
+///      Point2D::new(2., 0.),
+///      Point2D::new(3., 0.),
+///      Point2D::new(0., 1.),
+///      Point2D::new(1., 1.),
+///      Point2D::new(2., 1.),
+///      Point2D::new(3., 1.),
+///  ];
+///  let weights = [1.; 8];
+///  let mut partition = [0, 0, 1, 1, 0, 1, 0, 1];
+///
+///  let mut adjacency = CsMat::empty(sprs::CSR, 8);
+///  adjacency.reserve_outer_dim(8);
+///  eprintln!("shape: {:?}", adjacency.shape());
+///  adjacency.insert(0, 1, 1.);
+///  adjacency.insert(1, 2, 1.);
+///  adjacency.insert(2, 3, 1.);
+///  adjacency.insert(4, 5, 1.);
+///  adjacency.insert(5, 6, 1.);
+///  adjacency.insert(6, 7, 1.);
+///  adjacency.insert(0, 4, 1.);
+///  adjacency.insert(1, 5, 1.);
+///  adjacency.insert(2, 6, 1.);
+///  adjacency.insert(3, 7, 1.);
+///  
+///  // symmetry
+///  adjacency.insert(1, 0, 1.);
+///  adjacency.insert(2, 1, 1.);
+///  adjacency.insert(3, 2, 1.);
+///  adjacency.insert(5, 4, 1.);
+///  adjacency.insert(6, 5, 1.);
+///  adjacency.insert(7, 6, 1.);
+///  adjacency.insert(4, 0, 1.);
+///  adjacency.insert(5, 1, 1.);
+///  adjacency.insert(6, 2, 1.);
+///  adjacency.insert(7, 3, 1.);
+///
+/// // 1 iteration
+/// coupe::KernighanLin {
+///     max_passes: Some(1),
+///     max_flips_per_pass: Some(1),
+///     max_imbalance_per_flip: None,
+///     max_bad_move_in_a_row: 1,
+/// }
+/// .partition(&mut partition, (adjacency.view(), &weights))
+/// .unwrap();
+///
+/// assert_eq!(partition[5], 0);
+/// assert_eq!(partition[6], 1);
+/// ```
+#[derive(Debug, Clone, Copy, Default)]
+pub struct KernighanLin {
+    pub max_passes: Option<usize>,
+    pub max_flips_per_pass: Option<usize>,
+    pub max_imbalance_per_flip: Option<f64>,
+    pub max_bad_move_in_a_row: usize,
+}
+
+impl<'a> crate::Partition<(CsMatView<'a, f64>, &'a [f64])> for KernighanLin {
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (adjacency, weights): (CsMatView<f64>, &'a [f64]),
+    ) -> Result<Self::Metadata, Self::Error> {
+        kernighan_lin(
+            part_ids,
+            weights,
+            adjacency,
+            self.max_passes,
+            self.max_flips_per_pass,
+            self.max_imbalance_per_flip,
+            self.max_bad_move_in_a_row,
+        );
+        Ok(())
+    }
+}
diff --git a/src/algorithms/kk.rs b/src/algorithms/kk.rs
index ce091aa3..60b970fc 100644
--- a/src/algorithms/kk.rs
+++ b/src/algorithms/kk.rs
@@ -1,3 +1,4 @@
+use super::Error;
 use std::collections::BinaryHeap;
 use std::ops::Sub;
 use std::ops::SubAssign;
@@ -9,7 +10,7 @@ use num::Zero;
 /// # Differences with the k-partitioning implementation
 ///
 /// This function has better performance than [kk] called with `num_parts == 2`.
-fn kk_bipart<T>(partition: &mut [usize], weights: impl IntoIterator<Item = T>)
+fn kk_bipart<T>(partition: &mut [usize], weights: impl Iterator<Item = T>)
 where
     T: Ord + Sub<Output = T>,
 {
@@ -17,10 +18,6 @@ where
         .into_iter()
         .zip(0..) // Keep track of the weights' indicies
         .collect();
-    assert_eq!(partition.len(), weights.len());
-    if weights.is_empty() {
-        return;
-    }
 
     // Core algorithm: find the imbalance of the partition.
     // "opposites" is built in this loop to backtrack the solution. It tracks weights that must end
@@ -51,33 +48,18 @@ where
 }
 
 /// Implementation of the Karmarkar-Karp algorithm
-pub fn kk<T, I>(partition: &mut [usize], weights: I, num_parts: usize)
+fn kk<T, I>(partition: &mut [usize], weights: I, num_parts: usize)
 where
     T: Zero + Ord + Sub<Output = T> + SubAssign + Copy,
-    I: IntoIterator<Item = T>,
-    <I as IntoIterator>::IntoIter: ExactSizeIterator,
+    I: Iterator<Item = T> + ExactSizeIterator,
 {
-    let weights = weights.into_iter();
-    let num_weights = weights.len();
-    assert_eq!(partition.len(), num_weights);
-    if num_weights == 0 {
-        return;
-    }
-    if num_parts < 2 {
-        return;
-    }
-    if num_parts == 2 {
-        // The bi-partitioning is a special case that can be handled faster than
-        // the general case.
-        return kk_bipart(partition, weights);
-    }
-
     // Initialize "m", a "k*num_weights" matrix whose first column is "weights".
+    let weight_count = weights.len();
     let mut m: BinaryHeap<Vec<(T, usize)>> = weights
         .zip(0..)
         .map(|(w, id)| {
             let mut v: Vec<(T, usize)> = (0..num_parts)
-                .map(|p| (T::zero(), num_weights * p + id))
+                .map(|p| (T::zero(), weight_count * p + id))
                 .collect();
             v[0].0 = w;
             v
@@ -88,7 +70,7 @@ where
     // largest weights in two different parts, the largest weight of each row is put into the same
     // part as the smallest one, and so on.
 
-    let mut opposites = Vec::with_capacity(num_weights);
+    let mut opposites = Vec::with_capacity(weight_count);
     while 2 <= m.len() {
         let a = m.pop().unwrap();
         let b = m.pop().unwrap();
@@ -119,7 +101,7 @@ where
     // Backtracking. Same as the bi-partitioning case.
 
     // parts = [ [m0i] for m0i in m[0] ]
-    let mut parts: Vec<usize> = vec![0; num_parts * num_weights];
+    let mut parts: Vec<usize> = vec![0; num_parts * weight_count];
     let imbalance = m.pop().unwrap(); // first and last element of "m".
     for (i, w) in imbalance.into_iter().enumerate() {
         // Put each remaining element in a different part.
@@ -134,3 +116,56 @@ where
     parts.truncate(partition.len());
     partition.copy_from_slice(&parts);
 }
+
+/// The Karmarkar-Karp algorithm, also called the Largest Differencing Method.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+///
+/// let weights = [3, 5, 3, 9];
+/// let mut partition = [0; 4];
+///
+/// coupe::KarmarkarKarp { part_count: 3 }
+///     .partition(&mut partition, weights)
+///     .unwrap();
+/// ```
+pub struct KarmarkarKarp {
+    pub part_count: usize,
+}
+
+impl<W> crate::Partition<W> for KarmarkarKarp
+where
+    W: IntoIterator,
+    W::IntoIter: ExactSizeIterator,
+    W::Item: Zero + Ord + Sub<Output = W::Item> + SubAssign + Copy,
+{
+    type Metadata = ();
+    type Error = Error;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        weights: W,
+    ) -> Result<Self::Metadata, Self::Error> {
+        if self.part_count < 2 || part_ids.len() < 2 {
+            return Ok(());
+        }
+        let weights = weights.into_iter();
+        if weights.len() != part_ids.len() {
+            return Err(Error::InputLenMismatch {
+                expected: part_ids.len(),
+                actual: weights.len(),
+            });
+        }
+        if self.part_count == 2 {
+            // The bi-partitioning is a special case that can be handled faster
+            // than the general case.
+            kk_bipart(part_ids, weights);
+        } else {
+            kk(part_ids, weights, self.part_count);
+        }
+        Ok(())
+    }
+}
diff --git a/src/algorithms/multi_jagged.rs b/src/algorithms/multi_jagged.rs
index 26018af3..cfd3acaf 100644
--- a/src/algorithms/multi_jagged.rs
+++ b/src/algorithms/multi_jagged.rs
@@ -145,35 +145,34 @@ fn compute_modifiers(
         .collect()
 }
 
-pub fn multi_jagged<const D: usize>(
+fn multi_jagged<const D: usize>(
+    partition: &mut [usize],
     points: &[PointND<D>],
     weights: &[f64],
     num_parts: usize,
     max_iter: usize,
-) -> Vec<usize> {
+) {
     let partition_scheme = partition_scheme(num_parts, max_iter);
-    multi_jagged_with_scheme(points, weights, partition_scheme)
+    multi_jagged_with_scheme(partition, points, weights, partition_scheme);
 }
 
 fn multi_jagged_with_scheme<const D: usize>(
+    partition: &mut [usize],
     points: &[PointND<D>],
     weights: &[f64],
     partition_scheme: PartitionScheme,
-) -> Vec<usize> {
+) {
     let len = points.len();
     let mut permutation = (0..len).into_par_iter().collect::<Vec<_>>();
-    let mut initial_partition = vec![0; points.len()];
 
     multi_jagged_recurse(
         points,
         weights,
         &mut permutation,
-        &AtomicPtr::new(initial_partition.as_mut_ptr()),
+        &AtomicPtr::new(partition.as_mut_ptr()),
         0,
         partition_scheme,
     );
-
-    initial_partition
 }
 
 fn multi_jagged_recurse<const D: usize>(
@@ -322,6 +321,73 @@ pub(crate) fn split_at_mut_many<'a, T>(
     head
 }
 
+/// # Multi-Jagged algorithm
+///
+/// This algorithm is inspired by Multi-Jagged: A Scalable Parallel Spatial Partitioning Algorithm"
+/// by Mehmet Deveci, Sivasankaran Rajamanickam, Karen D. Devine, Umit V. Catalyurek.
+///
+/// It improves over [RCB](struct.Rcb.html) by following the same idea but by creating more than two subparts
+/// in each iteration which leads to decreasing recursion depth. It also allows to generate a partition
+/// of any number of parts.
+///
+/// More precisely, given a number of parts, the algorithm will generate a "partition scheme", which describes how
+/// to perform splits at each iteration, such that the total number of iteration is less than `max_iter`.
+///
+/// More iteration does not necessarily result in a better partition.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+///
+/// let points = vec![
+///     Point2D::new(0., 0.),
+///     Point2D::new(1., 0.),
+///     Point2D::new(2., 0.),   
+///     Point2D::new(0., 1.),
+///     Point2D::new(1., 1.),
+///     Point2D::new(2., 1.),
+///     Point2D::new(0., 2.),
+///     Point2D::new(1., 2.),
+///     Point2D::new(2., 2.),
+/// ];
+/// let weights = [4.2; 9];
+/// let mut partition = [0; 9];
+///
+/// // generate a partition of 4 parts
+/// coupe::MultiJagged { part_count: 9, max_iter: 4 }
+///     .partition(&mut partition, (&points, &weights))
+///     .unwrap();
+///
+/// for i in 0..9 {
+///     for j in 0..9 {
+///         if j == i {
+///             continue    
+///         }
+///         assert_ne!(partition[i], partition[j])
+///     }
+/// }
+/// ```
+pub struct MultiJagged {
+    pub part_count: usize,
+    pub max_iter: usize,
+}
+
+impl<'a, const D: usize> crate::Partition<(&'a [PointND<D>], &'a [f64])> for MultiJagged {
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (points, weights): (&'a [PointND<D>], &'a [f64]),
+    ) -> Result<Self::Metadata, Self::Error> {
+        multi_jagged(part_ids, points, weights, self.part_count, self.max_iter);
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/algorithms/recursive_bisection.rs b/src/algorithms/recursive_bisection.rs
index f4a8ccb9..eaf1acaa 100644
--- a/src/algorithms/recursive_bisection.rs
+++ b/src/algorithms/recursive_bisection.rs
@@ -1,3 +1,4 @@
+use super::Error;
 use crate::geometry::Mbr;
 use crate::geometry::PointND;
 use async_lock::Mutex;
@@ -9,6 +10,7 @@ use nalgebra::Const;
 use nalgebra::DefaultAllocator;
 use nalgebra::DimDiff;
 use nalgebra::DimSub;
+use nalgebra::ToTypenum;
 use rayon::prelude::*;
 use std::cmp;
 use std::future::Future;
@@ -161,7 +163,7 @@ where
         .map(|item| item.point[coord])
         .minmax()
         .into_option()
-        .unwrap();
+        .unwrap(); // Won't panic because items has at least two elements.
 
     mem::drop(enter);
 
@@ -500,7 +502,12 @@ fn rcb_thread<const D: usize, W>(
     futures_lite::future::block_on(task);
 }
 
-pub fn rcb<const D: usize, P, W>(partition: &mut [usize], points: P, weights: W, iter_count: usize)
+fn rcb<const D: usize, P, W>(
+    partition: &mut [usize],
+    points: P,
+    weights: W,
+    iter_count: usize,
+) -> Result<(), Error>
 where
     P: rayon::iter::IntoParallelIterator<Item = PointND<D>>,
     P::Iter: rayon::iter::IndexedParallelIterator,
@@ -513,8 +520,18 @@ where
     let points = points.into_par_iter();
     let weights = weights.into_par_iter();
 
-    assert_eq!(points.len(), weights.len());
-    assert_eq!(points.len(), partition.len());
+    if weights.len() != partition.len() {
+        return Err(Error::InputLenMismatch {
+            expected: partition.len(),
+            actual: weights.len(),
+        });
+    }
+    if points.len() != partition.len() {
+        return Err(Error::InputLenMismatch {
+            expected: partition.len(),
+            actual: points.len(),
+        });
+    }
 
     let init_span = tracing::info_span!("convert input and make initial data structures");
     let enter = init_span.enter();
@@ -545,6 +562,77 @@ where
             s.spawn(move |_| rcb_thread(iteration_ctxs, chunk, iter_count, 0.05));
         }
     });
+
+    Ok(())
+}
+
+/// # Recursive Coordinate Bisection algorithm
+///
+/// Partitions a mesh based on the nodes coordinates and coresponding weights.
+///
+/// This is the most simple and straightforward geometric algorithm. It operates as follows for a N-dimensional set of points:
+///
+/// At each iteration, select a vector `n` of the canonical basis `(e_0, ..., e_{n-1})`. Then, split the set of points with an hyperplane orthogonal
+/// to `n`, such that the two parts of the splits are evenly weighted. Finally, recurse by reapplying the algorithm to the two parts with an other
+/// normal vector selection.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+///
+/// let points = [
+///     Point2D::new(1., 1.),
+///     Point2D::new(-1., 1.),
+///     Point2D::new(1., -1.),
+///     Point2D::new(-1., -1.),
+/// ];
+/// let weights = [1; 4];
+/// let mut partition = [0; 4];
+///
+/// // generate a partition of 4 parts
+/// coupe::Rcb { iter_count: 2 }
+///     .partition(&mut partition, (points, weights))
+///     .unwrap();
+///
+/// for i in 0..4 {
+///     for j in 0..4 {
+///         if j == i {
+///             continue
+///         }
+///         assert_ne!(partition[i], partition[j])
+///     }
+/// }
+/// ```
+pub struct Rcb {
+    pub iter_count: usize,
+}
+
+impl<const D: usize, P, W> crate::Partition<(P, W)> for Rcb
+where
+    P: rayon::iter::IntoParallelIterator<Item = PointND<D>>,
+    P::Iter: rayon::iter::IndexedParallelIterator,
+    W: rayon::iter::IntoParallelIterator,
+    W::Item: Copy + std::fmt::Debug + Default,
+    W::Item: Add<Output = W::Item> + AddAssign + Sub<Output = W::Item> + Sum + PartialOrd,
+    W::Item: num::ToPrimitive,
+    W::Iter: rayon::iter::IndexedParallelIterator,
+{
+    type Metadata = ();
+    type Error = Error;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (points, weights): (P, W),
+    ) -> Result<Self::Metadata, Self::Error> {
+        if part_ids.len() < 2 {
+            // Would make Itertools::minmax().into_option() return None.
+            return Ok(());
+        }
+        rcb(part_ids, points, weights, self.iter_count)
+    }
 }
 
 // pub because it is also useful for multijagged and required for benchmarks
@@ -580,12 +668,13 @@ pub fn axis_sort<const D: usize>(
 /// The global shape of the data is first considered and the separator is computed to
 /// be parallel to the inertia axis of the global shape, which aims to lead to better shaped
 /// partitions.
-pub fn rib<const D: usize, W>(
+fn rib<const D: usize, W>(
     partition: &mut [usize],
     points: &[PointND<D>],
     weights: W,
     n_iter: usize,
-) where
+) -> Result<(), Error>
+where
     Const<D>: DimSub<Const<1>>,
     DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
         + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
@@ -595,19 +684,79 @@ pub fn rib<const D: usize, W>(
     W::Item: num::ToPrimitive,
     W::Iter: rayon::iter::IndexedParallelIterator,
 {
-    let weights = weights.into_par_iter();
-
-    assert_eq!(points.len(), weights.len());
-    assert_eq!(points.len(), partition.len());
-
     let mbr = Mbr::from_points(points);
-
     let points = points.par_iter().map(|p| mbr.mbr_to_aabb(p));
-
     // When the rotation is done, we just apply RCB
     rcb(partition, points, weights, n_iter)
 }
 
+/// # Recursive Inertial Bisection algorithm
+///
+/// Partitions a mesh based on the nodes coordinates and coresponding weights
+///
+/// This is a variant of the [`Rcb`](struct.Rcb.html) algorithm, where a basis change is performed beforehand so that
+/// the first coordinate of the new basis is colinear to the inertia axis of the set of points. This has the goal
+/// of producing better shaped partition than [Rcb](struct.Rcb.html).
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+///
+/// // Here, the inertia axis is the y axis.
+/// // We thus expect Rib to split horizontally first.
+/// let points = [
+///     Point2D::new(1., 10.),
+///     Point2D::new(-1., 10.),
+///     Point2D::new(1., -10.),
+///     Point2D::new(-1., -10.),
+/// ];
+/// let weights = [1; 4];
+/// let mut partition = [0; 4];
+///
+/// // generate a partition of 2 parts (1 split)
+/// coupe::Rib { iter_count: 1 }
+///     .partition(&mut partition, (&points, weights))
+///     .unwrap();
+///
+/// // the two points at the top are in the same part
+/// assert_eq!(partition[0], partition[1]);
+///
+/// // the two points at the bottom are in the same part
+/// assert_eq!(partition[2], partition[3]);
+///
+/// // there are two different parts
+/// assert_ne!(partition[1], partition[2]);
+/// ```
+pub struct Rib {
+    /// The number of iterations of the algorithm. This will yield a partition of `2^num_iter` parts.
+    pub iter_count: usize,
+}
+
+impl<'a, const D: usize, W> crate::Partition<(&'a [PointND<D>], W)> for Rib
+where
+    Const<D>: DimSub<Const<1>> + ToTypenum,
+    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
+        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
+    W: rayon::iter::IntoParallelIterator,
+    W::Item: Copy + std::fmt::Debug + Default,
+    W::Item: Add<Output = W::Item> + AddAssign + Sub<Output = W::Item> + Sum + PartialOrd,
+    W::Item: num::ToPrimitive,
+    W::Iter: rayon::iter::IndexedParallelIterator,
+{
+    type Metadata = ();
+    type Error = Error;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        (points, weights): (&'a [PointND<D>], W),
+    ) -> Result<Self::Metadata, Self::Error> {
+        rib(part_ids, points, weights, self.iter_count)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -664,7 +813,8 @@ mod tests {
             .num_threads(1) // make the test deterministic
             .build()
             .unwrap()
-            .install(|| rcb(&mut partition, points, weights, 2));
+            .install(|| rcb(&mut partition, points, weights, 2))
+            .unwrap();
 
         assert_eq!(partition[0], partition[6]);
         assert_eq!(partition[1], partition[7]);
@@ -694,7 +844,7 @@ mod tests {
         let weights: Vec<f64> = (0..points.len()).map(|_| rand::random()).collect();
 
         let mut partition = vec![0; points.len()];
-        rcb(&mut partition, points, weights.par_iter().cloned(), 3);
+        rcb(&mut partition, points, weights.par_iter().cloned(), 3).unwrap();
 
         let mut loads: HashMap<usize, f64> = HashMap::new();
         let mut sizes: HashMap<usize, usize> = HashMap::new();
diff --git a/src/algorithms/vn/best.rs b/src/algorithms/vn/best.rs
index 3c519413..e8054533 100644
--- a/src/algorithms/vn/best.rs
+++ b/src/algorithms/vn/best.rs
@@ -1,5 +1,4 @@
 use crate::imbalance::compute_parts_load;
-use crate::RunInfo;
 use itertools::Itertools;
 use num::One;
 use num::Zero;
@@ -8,10 +7,10 @@ use std::ops::Div;
 use std::ops::Mul;
 use std::ops::Sub;
 
-pub fn vn_best_mono<T>(partition: &mut [usize], criterion: &[T], nb_parts: usize) -> RunInfo
+fn vn_best_mono<W, T>(partition: &mut [usize], criterion: W, nb_parts: usize) -> usize
 where
+    W: IntoIterator<Item = T>,
     T: AddAssign + Sub<Output = T> + Div<Output = T> + Mul<Output = T>,
-    for<'a> &'a T: Sub<Output = T>,
     T: Zero + One,
     T: Ord + Copy,
 {
@@ -21,22 +20,31 @@ where
         two
     };
 
+    let mut criterion: Vec<(T, usize)> = criterion.into_iter().zip(0..).collect();
+
     assert_eq!(partition.len(), criterion.len());
 
     // We expect weights to be non-negative values
-    assert!(criterion.iter().all(|weight| *weight >= T::zero()));
+    assert!(criterion
+        .iter()
+        .all(|(weight, _weight_id)| *weight >= T::zero()));
     // We check if all weights are 0 because this screw the gain table
     // initialization and a quick fix could interfere with the algorithm.
     if partition.is_empty()
         || criterion.is_empty()
-        || criterion.iter().all(|item| item.is_zero())
+        || criterion
+            .iter()
+            .all(|(weight, _weight_id)| weight.is_zero())
         || nb_parts < 2
     {
-        return RunInfo::skip();
+        return 0;
     }
 
-    let mut part_loads = compute_parts_load(partition, nb_parts, criterion);
-    let mut criterion: Vec<(T, usize)> = criterion.iter().copied().zip(0..).collect();
+    let mut part_loads = compute_parts_load(
+        partition,
+        nb_parts,
+        criterion.iter().map(|(weight, _weight_id)| *weight),
+    );
     criterion.sort_unstable();
 
     let mut algo_iterations = 0;
@@ -109,8 +117,52 @@ where
         algo_iterations += 1;
     }
 
-    RunInfo {
-        algo_iterations: Some(algo_iterations),
+    algo_iterations
+}
+
+/// Greedy Vector-of-Numbers algorithms.
+///
+/// This algorithm greedily moves weights from parts to parts in such a way that
+/// the imbalance gain is maximized on each step.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use rand;
+///
+/// let part_count = 2;
+/// let mut partition = [0; 4];
+/// let weights = [4, 6, 2, 9];
+///
+/// coupe::Random { rng: rand::thread_rng(), part_count }
+///     .partition(&mut partition, ())
+///     .unwrap();
+/// coupe::VnBest { part_count }
+///     .partition(&mut partition, weights)
+///     .unwrap();
+/// ```
+pub struct VnBest {
+    pub part_count: usize,
+}
+
+impl<W> crate::Partition<W> for VnBest
+where
+    W: IntoIterator,
+    W::Item: AddAssign + Sub<Output = W::Item> + Div<Output = W::Item> + Mul<Output = W::Item>,
+    W::Item: Zero + One,
+    W::Item: Ord + Copy,
+{
+    type Metadata = usize;
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        weights: W,
+    ) -> Result<Self::Metadata, Self::Error> {
+        let algo_iterations = vn_best_mono(part_ids, weights, self.part_count);
+        Ok(algo_iterations)
     }
 }
 
@@ -126,7 +178,7 @@ mod tests {
         let w = [1, 2, 3, 4, 5, 6];
         let mut part = vec![0; w.len()];
         let imb_ini = imbalance::imbalance(2, &part, &w);
-        vn_best_mono::<u32>(&mut part, &w, 2);
+        vn_best_mono(&mut part, w, 2);
         let imb_end = imbalance::imbalance(2, &part, &w);
         assert!(imb_end < imb_ini);
         println!("imbalance : {} < {}", imb_end, imb_ini);
@@ -144,9 +196,9 @@ mod tests {
                         prop::collection::vec(0..1usize, num_weights))
                 })
         ) {
-            let imb_ini = imbalance::max_imbalance(2, &partition, &weights);
-            vn_best_mono::<u64>(&mut partition, &weights, 2);
-            let imb_end = imbalance::max_imbalance(2, &partition, &weights);
+            let imb_ini = imbalance::max_imbalance(2, &partition, weights.iter().cloned());
+            vn_best_mono::<_, u64>(&mut partition, weights.iter().cloned(), 2);
+            let imb_end = imbalance::max_imbalance(2, &partition, weights.iter().cloned());
             // Not sure if it is true for max_imbalance (i.e. weighter - lighter)
             proptest::prop_assert!(imb_end <= imb_ini, "{} < {}", imb_ini, imb_end);
         }
diff --git a/src/algorithms/vn/mod.rs b/src/algorithms/vn/mod.rs
index 3e42fd2c..22f4211c 100644
--- a/src/algorithms/vn/mod.rs
+++ b/src/algorithms/vn/mod.rs
@@ -1 +1,3 @@
 pub mod best;
+
+pub use best::VnBest;
diff --git a/src/algorithms/z_curve.rs b/src/algorithms/z_curve.rs
index c8b3ebbb..98492a84 100644
--- a/src/algorithms/z_curve.rs
+++ b/src/algorithms/z_curve.rs
@@ -25,6 +25,7 @@ use nalgebra::Const;
 use nalgebra::DefaultAllocator;
 use nalgebra::DimDiff;
 use nalgebra::DimSub;
+use nalgebra::ToTypenum;
 use rayon::prelude::*;
 
 use std::cmp::Ordering;
@@ -36,13 +37,13 @@ use std::sync::atomic::{self, AtomicPtr};
 type HashType = u128;
 const HASH_TYPE_MAX: HashType = std::u128::MAX;
 
-pub fn z_curve_partition<const D: usize>(
+fn z_curve_partition<const D: usize>(
+    partition: &mut [usize],
     points: &[PointND<D>],
-    num_partitions: usize,
+    part_count: usize,
     order: u32,
-) -> Vec<usize>
-where
-    Const<D>: DimSub<Const<1>>,
+) where
+    Const<D>: DimSub<Const<1>> + ToTypenum,
     DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
         + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
 {
@@ -58,15 +59,13 @@ where
 
     let mut permutation: Vec<_> = (0..points.len()).into_par_iter().collect();
 
-    let mut ininial_partition = vec![0; points.len()];
-
     // reorder points
     z_curve_partition_recurse(points, order, &mbr, &mut permutation);
 
-    let points_per_partition = points.len() / num_partitions;
-    let remainder = points.len() % num_partitions;
+    let points_per_partition = points.len() / part_count;
+    let remainder = points.len() % part_count;
 
-    let atomic_handle = AtomicPtr::from(ininial_partition.as_mut_ptr());
+    let atomic_handle = AtomicPtr::from(partition.as_mut_ptr());
 
     // give an id to each partition
     //
@@ -85,8 +84,6 @@ where
                 unsafe { std::ptr::write(ptr.add(*idx), id) }
             }
         });
-
-    ininial_partition
 }
 
 // reorders `permu` to sort points by increasing z-curve hash
@@ -189,6 +186,63 @@ fn compute_hash<const D: usize>(point: &PointND<D>, order: u32, mbr: &Mbr<D>) ->
     }
 }
 
+/// # Z space-filling curve algorithm
+///
+/// The Z-curve uses space hashing to partition points. The points in the same part of a partition
+/// have the same Z-hash. This hash is computed by recursively constructing a N-dimensional region tree.
+///
+/// # Example
+///
+/// ```rust
+/// use coupe::Partition as _;
+/// use coupe::Point2D;
+///
+/// let points = [
+///     Point2D::new(0., 0.),
+///     Point2D::new(1., 1.),
+///     Point2D::new(0., 10.),
+///     Point2D::new(1., 9.),
+///     Point2D::new(9., 1.),
+///     Point2D::new(10., 0.),
+///     Point2D::new(10., 10.),
+///     Point2D::new(9., 9.),
+/// ];
+/// let mut partition = [0; 8];
+///
+/// // generate a partition of 4 parts
+/// coupe::ZCurve { part_count: 4, order: 5 }
+///     .partition(&mut partition, &points)
+///     .unwrap();
+///
+/// assert_eq!(partition[0], partition[1]);
+/// assert_eq!(partition[2], partition[3]);
+/// assert_eq!(partition[4], partition[5]);
+/// assert_eq!(partition[6], partition[7]);
+/// ```  
+pub struct ZCurve {
+    pub part_count: usize,
+    pub order: u32,
+}
+
+impl<'a, const D: usize> crate::Partition<&'a [PointND<D>]> for ZCurve
+where
+    Const<D>: DimSub<Const<1>> + ToTypenum,
+    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
+        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
+{
+    type Metadata = ();
+    type Error = std::convert::Infallible;
+
+    fn partition(
+        &mut self,
+        part_ids: &mut [usize],
+        points: &'a [PointND<D>],
+    ) -> Result<Self::Metadata, Self::Error> {
+        z_curve_partition(part_ids, points, self.part_count, self.order);
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -196,7 +250,7 @@ mod tests {
 
     #[test]
     fn test_partition() {
-        let points = vec![
+        let points = [
             Point2D::from([0., 0.]),
             Point2D::from([20., 10.]),
             Point2D::from([0., 10.]),
@@ -207,8 +261,9 @@ mod tests {
             Point2D::from([4., 2.]),
         ];
 
-        let ids = z_curve_partition(&points, 4, 1);
-        for id in ids.iter() {
+        let mut ids = [0; 8];
+        z_curve_partition(&mut ids, &points, 4, 1);
+        for id in ids {
             println!("{}", id);
         }
         assert_eq!(ids[0], ids[7]);
diff --git a/src/geometry.rs b/src/geometry.rs
index 67ea48b0..6bd09e11 100644
--- a/src/geometry.rs
+++ b/src/geometry.rs
@@ -1,7 +1,6 @@
 //! A few useful geometric types
 
 use approx::Ulps;
-use itertools::Itertools;
 use nalgebra::allocator::Allocator;
 use nalgebra::ArrayStorage;
 use nalgebra::Const;
@@ -125,24 +124,6 @@ impl<const D: usize> Aabb<D> {
         Self { p_min, p_max }
     }
 
-    pub fn aspect_ratio(&self) -> f64 {
-        use itertools::MinMaxResult::*;
-
-        let (min, max) = match self
-            .p_min()
-            .iter()
-            .zip(self.p_max().iter())
-            .map(|(min, max)| (max - min).abs())
-            .minmax()
-        {
-            MinMax(min, max) => (min, max),
-            _ => unimplemented!(),
-        };
-
-        // TODO: What if min == 0.0 ?
-        max / min
-    }
-
     pub fn contains(&self, point: &PointND<D>) -> bool {
         let eps = 10. * std::f64::EPSILON;
         self.p_min
@@ -261,14 +242,6 @@ impl<const D: usize> Mbr<D> {
         }
     }
 
-    /// Computes the aspect ratio of the Mbr.
-    ///
-    /// It is defined as follows:
-    /// `ratio = long_side / short_side`
-    pub fn aspect_ratio(&self) -> f64 {
-        self.aabb.aspect_ratio()
-    }
-
     /// Computes the distance between a point and the current mbr.
     pub fn distance_to_point(&self, point: &PointND<D>) -> f64 {
         self.aabb.distance_to_point(&(self.mbr_to_aabb * point))
@@ -377,6 +350,7 @@ mod tests {
     use super::*;
     use approx::assert_relative_eq;
     use approx::assert_ulps_eq;
+    use itertools::Itertools as _;
     use nalgebra::Matrix2;
 
     #[test]
@@ -390,11 +364,9 @@ mod tests {
         ];
 
         let aabb = Aabb::from_points(&points);
-        let aspect_ratio = aabb.aspect_ratio();
 
         assert_ulps_eq!(aabb.p_min, Point2D::from([0., 0.]));
         assert_ulps_eq!(aabb.p_max, Point2D::from([5., 5.]));
-        assert_ulps_eq!(aspect_ratio, 1.);
     }
 
     #[test]
@@ -411,21 +383,6 @@ mod tests {
         let _aabb = Aabb::from_points(&points);
     }
 
-    #[test]
-    fn test_mbr_2d() {
-        let points = vec![
-            Point2D::from([5., 3.]),
-            Point2D::from([0., 0.]),
-            Point2D::from([1., -1.]),
-            Point2D::from([4., 4.]),
-        ];
-
-        let mbr = Mbr::from_points(&points);
-        let aspect_ratio = mbr.aspect_ratio();
-
-        assert_relative_eq!(aspect_ratio, 4., epsilon = 1e-14);
-    }
-
     #[test]
     fn test_inertia_matrix() {
         let points = vec![
@@ -567,31 +524,9 @@ mod tests {
         ];
 
         let aabb = Aabb::from_points(&points);
-        let aspect_ratio = aabb.aspect_ratio();
 
         assert_ulps_eq!(aabb.p_min, Point3D::from([0., 0., -2.]));
         assert_ulps_eq!(aabb.p_max, Point3D::from([5., 5., 5.]));
-        assert_ulps_eq!(aspect_ratio, 7. / 5.);
-    }
-
-    #[test]
-    fn test_mbr_3d() {
-        let points = vec![
-            Point3D::from([5., 3., 0.]),
-            Point3D::from([0., 0., 0.]),
-            Point3D::from([1., -1., 0.]),
-            Point3D::from([4., 4., 0.]),
-            Point3D::from([5., 3., 2.]),
-            Point3D::from([0., 0., 2.]),
-            Point3D::from([1., -1., 2.]),
-            Point3D::from([4., 4., 2.]),
-        ];
-
-        let mbr = Mbr::from_points(&points);
-        println!("mbr = {:?}", mbr);
-        let aspect_ratio = mbr.aspect_ratio();
-
-        assert_relative_eq!(aspect_ratio, 4., epsilon = 1e-14);
     }
 
     #[test]
diff --git a/src/imbalance.rs b/src/imbalance.rs
index 1fad5f4f..ba4ef189 100644
--- a/src/imbalance.rs
+++ b/src/imbalance.rs
@@ -12,14 +12,14 @@ use num::Zero;
 pub fn compute_parts_load<T: Zero + Clone + AddAssign>(
     partition: &[usize],
     num_parts: usize,
-    weights: &[T],
+    weights: impl IntoIterator<Item = T>,
 ) -> Vec<T> {
     debug_assert!(*partition.iter().max().unwrap_or(&0) < num_parts);
     partition
         .iter()
         .zip(weights)
         .fold(vec![T::zero(); num_parts], |mut acc, (&part, w)| {
-            acc[part] += w.clone();
+            acc[part] += w;
             acc
         })
 }
@@ -64,7 +64,7 @@ where
 pub fn imbalance_target<T: Zero + Sum + Clone + AddAssign + Sub<Output = T> + PartialOrd + Copy>(
     targets: &[T],
     partition: &[usize],
-    weights: &[T],
+    weights: impl IntoIterator<Item = T>,
 ) -> T {
     let num_parts = targets.len();
     debug_assert!(*partition.iter().max().unwrap_or(&0) < num_parts);
@@ -81,7 +81,7 @@ pub fn imbalance_target<T: Zero + Sum + Clone + AddAssign + Sub<Output = T> + Pa
 pub fn max_imbalance<T: Zero + Clone + Copy + AddAssign + Sum + PartialOrd + Sub<Output = T>>(
     num_parts: usize,
     partition: &[usize],
-    weights: &[T],
+    weights: impl IntoIterator<Item = T>,
 ) -> T {
     compute_parts_load(partition, num_parts, weights)
         .iter()
diff --git a/src/lib.rs b/src/lib.rs
index dc39216b..c0470515 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,10 +2,9 @@
 //!
 //! # Crate Layout
 //!
-//! Coupe exposes each algorithm with a struct that implements a trait. There are currently two traits available:
-//!
-//! - A [`Partitioner`] represents an algorithm that will generate a partition given a set of geometric points and weights.
-//! - A [`PartitionImprover`] represents an algorithm that will improve an existing partition (previously generated with a [`Partitioner`]).
+//! Coupe exposes a [`Partition`] trait, which is in turn implemented by
+//! algorithms.  See its documentation for more details.  The trait is generic around its input, which means algorithms
+//! can partition different type of collections (e.g. 2D and 3D meshes).
 //!
 //! # Available algorithms
 //!
@@ -26,1324 +25,39 @@
 //! - [K-means][KMeans]
 //! - Number partitioning:
 //!   + [VN-Best][VnBest]
+//! - [Fiduccia-Mattheyses][FiducciaMattheyses]
+//! - [Kernighan-Lin][KernighanLin]
 
-pub mod algorithms;
-pub mod geometry;
+mod algorithms;
+mod geometry;
 pub mod imbalance;
-pub mod partition;
 mod real;
-mod run_info;
 pub mod topology;
 mod uid;
 
-// API
-
-// SUBMODULES REEXPORT
+pub use crate::algorithms::*;
 pub use crate::geometry::{Point2D, Point3D, PointND};
 pub use crate::real::Real;
-pub use crate::run_info::RunInfo;
 pub use crate::uid::uid;
 
-pub mod dimension {
-    pub use nalgebra::base::dimension::*;
-}
-
-use nalgebra::allocator::Allocator;
-use nalgebra::ArrayStorage;
-use nalgebra::Const;
-use nalgebra::DefaultAllocator;
-use nalgebra::DimDiff;
-use nalgebra::DimSub;
-use ndarray::ArrayView1;
-use ndarray::ArrayView2;
-use rayon::iter::IntoParallelIterator as _;
-use rayon::iter::ParallelIterator as _;
-use sprs::CsMatView;
-
-use crate::partition::*;
-
-// Trait that allows conversions from/to different kinds of
-// points views representation as partitioner inputs
-// e.g. &[f64], &[PointND<D>], slice from ndarray, ...
-pub trait PointsView<'a, const D: usize> {
-    fn to_points_nd(self) -> &'a [PointND<D>];
-}
-
-impl<'a, const D: usize> PointsView<'a, D> for &'a [f64] {
-    fn to_points_nd(self) -> &'a [PointND<D>] {
-        if self.len() % D != 0 {
-            panic!("error: tried to convert a &[f64] to a &[PointND<D>] with D = {}, but input slice has len {}", D, self.len());
-        }
-        unsafe { std::slice::from_raw_parts(self.as_ptr() as *const PointND<D>, self.len() / D) }
-    }
-}
-
-impl<'a, const D: usize> PointsView<'a, D> for ArrayView1<'a, f64> {
-    fn to_points_nd(self) -> &'a [PointND<D>] {
-        let slice = self.to_slice().expect(
-            "Cannot convert an ArrayView1 with dicontiguous storage repr to a slice. Try cloning the data into a contiguous array first"
-        );
-        slice.to_points_nd()
-    }
-}
-
-impl<'a, const D: usize> PointsView<'a, D> for ArrayView2<'a, f64> {
-    fn to_points_nd(self) -> &'a [PointND<D>] {
-        let slice = self.to_slice().expect(
-            "Cannot convert an ArrayView2 with dicontiguous storage repr to a slice. Try cloning the data into a contiguous array first"
-        );
-        slice.to_points_nd()
-    }
-}
-
-impl<'a, const D: usize> PointsView<'a, D> for &'a [PointND<D>] {
-    fn to_points_nd(self) -> &'a [PointND<D>] {
-        // nothing to do
-        self
-    }
-}
-
-/// # A geometric partitioning algorithm.
-///
-/// Algorithms that implement [`Partitioner`](trait.Partitioner.html) operate on a set of geometric points and associated weights and generate
-/// a partition. A partition is described by an array of ids, each unique id represents a part of the partition.
-///
-/// These algorithms generate a partition from scratch, as opposed to those which implement [`PartitionImprover`](trait.PartitionImprover.html), which work on
-/// an existing partition to improve it.
-///
-/// See the [implementors](trait.Partitioner.html#implementors) for more information about the currently available algorithms.
-pub trait Partitioner<const D: usize> {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64>;
-}
-
-/// A geometric algorithm to improve a partition.
-///
-/// Algorithms that implement [`PartitionImprover`](trait.PartitionImprover.html) operate on a set of geometric
-/// points and associated weights to modify and improve an existing partition (typically generated by a [`Partitioner`](trait.Partitioner.html)).
-///
-/// See the [implementors](trait.PartitionImprover.html#implementors) for more information about the currently available algorithms.
-pub trait PartitionImprover<const D: usize> {
-    fn improve_partition<'a>(
-        &self,
-        partition: Partition<'a, PointND<D>, f64>,
-    ) -> Partition<'a, PointND<D>, f64>;
-}
-
-pub trait TopologicPartitioner<const D: usize> {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-        adjacency: CsMatView<f64>,
-    ) -> Partition<'a, PointND<D>, f64>;
-}
-
-pub trait TopologicPartitionImprover<const D: usize> {
-    fn improve_partition<'a>(
-        &self,
-        partition: Partition<'a, PointND<D>, f64>,
-        adjacency: CsMatView<f64>,
-    ) -> Partition<'a, PointND<D>, f64>;
-}
-
-/// # Recursive Coordinate Bisection algorithm
-///
-/// Partitions a mesh based on the nodes coordinates and coresponding weights.
-///
-/// This is the most simple and straightforward geometric algorithm. It operates as follows for a N-dimensional set of points:
-///
-/// At each iteration, select a vector `n` of the canonical basis `(e_0, ..., e_{n-1})`. Then, split the set of points with an hyperplane orthogonal
-/// to `n`, such that the two parts of the splits are evenly weighted. Finally, recurse by reapplying the algorithm to the two parts with an other
-/// normal vector selection.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::Partitioner;
-///
-/// let points = vec![
-///     Point2D::new(1., 1.),
-///     Point2D::new(-1., 1.),
-///     Point2D::new(1., -1.),
-///     Point2D::new(-1., -1.),
-/// ];
-///
-/// let weights = vec![1., 1., 1., 1.];
-///
-/// // generate a partition of 4 parts
-/// let rcb = coupe::Rcb::new(2);
-/// let partition = rcb.partition(points.as_slice(), &weights);
-///
-/// let ids = partition.ids();
-/// for i in 0..4 {
-///     for j in 0..4 {
-///         if j == i {
-///             continue
-///         }
-///         assert_ne!(ids[i], ids[j])
-///     }
-/// }
-/// ```
-pub struct Rcb {
-    pub num_iter: usize,
-}
-
-impl Rcb {
-    pub fn new(num_iter: usize) -> Self {
-        Self { num_iter }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for Rcb {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let points = points.to_points_nd();
-        let mut ids = vec![0; points.len()];
-        let p = points.into_par_iter().cloned();
-        let w = weights.into_par_iter().cloned();
-        crate::algorithms::recursive_bisection::rcb(&mut ids, p, w, self.num_iter);
-        Partition::from_ids(points, weights, ids)
-    }
-}
-
-/// # Recursive Inertial Bisection algorithm
-///
-/// Partitions a mesh based on the nodes coordinates and coresponding weights
-///
-/// This is a variant of the [`Rcb`](struct.Rcb.html) algorithm, where a basis change is performed beforehand so that
-/// the first coordinate of the new basis is colinear to the inertia axis of the set of points. This has the goal
-/// of producing better shaped partition than [Rcb](struct.Rcb.html).
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::Partitioner;
-///
-/// // Here, the inertia axis is the y axis.
-/// // We thus expect Rib to split horizontally first.
-/// let points = vec![
-///     Point2D::new(1., 10.),
-///     Point2D::new(-1., 10.),
-///     Point2D::new(1., -10.),
-///     Point2D::new(-1., -10.),
-/// ];
-///
-/// let weights = vec![1., 1., 1., 1.];
-///
-/// // generate a partition of 2 parts (1 split)
-/// let rib = coupe::Rib::new(1);
-/// let partition = rib.partition(points.as_slice(), &weights);
-///
-/// let ids = partition.ids();
-///
-/// // the two points at the top are in the same partition
-/// assert_eq!(ids[0], ids[1]);
-///
-/// // the two points at the bottom are in the same partition
-/// assert_eq!(ids[2], ids[3]);
-///
-/// // there are two different partition
-/// assert_ne!(ids[1], ids[2]);
-/// ```
-pub struct Rib {
-    /// The number of iterations of the algorithm. This will yield a partition of `2^num_iter` parts.
-    pub num_iter: usize,
-}
-
-impl Rib {
-    pub fn new(num_iter: usize) -> Self {
-        Self { num_iter }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for Rib
-where
-    Const<D>: DimSub<Const<1>>,
-    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
-        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
-{
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let points = points.to_points_nd();
-        let mut ids = vec![0; points.len()];
-        let w = weights.into_par_iter().cloned();
-        crate::algorithms::recursive_bisection::rib(&mut ids, points, w, self.num_iter);
-        Partition::from_ids(points, weights, ids)
-    }
-}
-
-/// # Multi-Jagged algorithm
-///
-/// This algorithm is inspired by Multi-Jagged: A Scalable Parallel Spatial Partitioning Algorithm"
-/// by Mehmet Deveci, Sivasankaran Rajamanickam, Karen D. Devine, Umit V. Catalyurek.
-///
-/// It improves over [RCB](struct.Rcb.html) by following the same idea but by creating more than two subparts
-/// in each iteration which leads to decreasing recursion depth. It also allows to generate a partition
-/// of any number of parts.
-///
-/// More precisely, given a number of parts, the algorithm will generate a "partition scheme", which describes how
-/// to perform splits at each iteration, such that the total number of iteration is less than `max_iter`.
-///
-/// More iteration does not necessarily result in a better partition.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::Partitioner;
-///
-/// let points = vec![
-///     Point2D::new(0., 0.),
-///     Point2D::new(1., 0.),
-///     Point2D::new(2., 0.),   
-///     Point2D::new(0., 1.),
-///     Point2D::new(1., 1.),
-///     Point2D::new(2., 1.),
-///     Point2D::new(0., 2.),
-///     Point2D::new(1., 2.),
-///     Point2D::new(2., 2.),
-/// ];
-///
-/// let weights = vec![1.; 9];
-///
-/// let num_partitions = 9;
-/// let max_iter = 4;
-///
-/// // generate a partition of 4 parts
-/// let multi_jagged = coupe::MultiJagged::new(num_partitions, max_iter);
-///
-/// let partition = multi_jagged.partition(points.as_slice(), &weights);
-///
-/// let ids = partition.ids();
-///
-/// for i in 0..9 {
-///     for j in 0..9 {
-///         if j == i {
-///             continue    
-///         }
-///         assert_ne!(ids[i], ids[j])
-///     }
-/// }
-/// ```
-pub struct MultiJagged {
-    pub num_partitions: usize,
-    pub max_iter: usize,
-}
-
-impl MultiJagged {
-    pub fn new(num_partitions: usize, max_iter: usize) -> Self {
-        Self {
-            num_partitions,
-            max_iter,
-        }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for MultiJagged {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let points = points.to_points_nd();
-        let ids = crate::algorithms::multi_jagged::multi_jagged(
-            points,
-            weights,
-            self.num_partitions,
-            self.max_iter,
-        );
-
-        Partition::from_ids(points, weights, ids)
-    }
-}
-
-/// # Z space-filling curve algorithm
-///
-/// The Z-curve uses space hashing to partition points. The points in the same part of a partition
-/// have the same Z-hash. This hash is computed by recursively constructing a N-dimensional region tree.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::Partitioner;
-///
-/// let points = vec![
-///     Point2D::new(0., 0.),
-///     Point2D::new(1., 1.),
-///     Point2D::new(0., 10.),
-///     Point2D::new(1., 9.),
-///     Point2D::new(9., 1.),
-///     Point2D::new(10., 0.),
-///     Point2D::new(10., 10.),
-///     Point2D::new(9., 9.),
-/// ];
-///
-/// let weights = vec![1.; 8];
-///
-/// let num_partitions = 4;
-/// let order = 5;
-///
-/// // generate a partition of 4 parts
-/// let z_curve = coupe::ZCurve::new(num_partitions, order);
-///
-/// let partition = z_curve.partition(points.as_slice(), &weights);
-/// let ids = partition.ids();
-///
-/// assert_eq!(ids[0], ids[1]);
-/// assert_eq!(ids[2], ids[3]);
-/// assert_eq!(ids[4], ids[5]);
-/// assert_eq!(ids[6], ids[7]);
-/// ```  
-pub struct ZCurve {
-    pub num_partitions: usize,
-    pub order: u32,
-}
-
-impl ZCurve {
-    pub fn new(num_partitions: usize, order: u32) -> Self {
-        Self {
-            num_partitions,
-            order,
-        }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for ZCurve
-where
-    Const<D>: DimSub<Const<1>>,
-    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
-        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
-{
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let points = points.to_points_nd();
-        let ids =
-            crate::algorithms::z_curve::z_curve_partition(points, self.num_partitions, self.order);
-        Partition::from_ids(points, weights, ids)
-    }
-}
-
-/// # Hilbert space-filling curve algorithm
-///
-/// An implementation of the Hilbert curve based on
-/// "Encoding and Decoding the Hilbert Order" by XIAN LIU and GÜNTHER SCHRACK.
-///
-/// This algorithm uses space hashing to reorder points alongside the Hilbert curve ov a giver order.
-/// See [wikipedia](https://en.wikipedia.org/wiki/Hilbert_curve) for more details.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::Partitioner;
-///
-/// let points = vec![
-///     Point2D::new(0., 0.),
-///     Point2D::new(1., 1.),
-///     Point2D::new(0., 10.),
-///     Point2D::new(1., 9.),
-///     Point2D::new(9., 1.),
-///     Point2D::new(10., 0.),
-///     Point2D::new(10., 10.),
-///     Point2D::new(9., 9.),
-/// ];
-///
-/// let weights = vec![1.; 8];
-///
-/// let num_partitions = 4;
-/// let order = 5;
-///
-/// // generate a partition of 4 parts
-/// let hilbert = coupe::HilbertCurve::new(num_partitions, order);
-///
-/// let partition = hilbert.partition(points.as_slice(), &weights);
-/// let ids = partition.ids();
-///
-/// assert_eq!(ids[0], ids[1]);
-/// assert_eq!(ids[2], ids[3]);
-/// assert_eq!(ids[4], ids[5]);
-/// assert_eq!(ids[6], ids[7]);
-/// ```
-pub struct HilbertCurve {
-    pub num_partitions: usize,
-    pub order: u32,
-}
-
-impl HilbertCurve {
-    pub fn new(num_partitions: usize, order: u32) -> Self {
-        Self {
-            num_partitions,
-            order,
-        }
-    }
-}
-
-// hilbert curve is only implemented in 2d for now
-impl Partitioner<2> for HilbertCurve {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, 2>,
-        _weights: &'a [f64],
-    ) -> Partition<'a, PointND<2>, f64> {
-        let points = points.to_points_nd();
-        let ids = crate::algorithms::hilbert_curve::hilbert_curve_partition(
-            points,
-            _weights,
-            self.num_partitions,
-            self.order as usize,
-        );
-
-        Partition::from_ids(points, _weights, ids)
-    }
-}
-
-use std::cell::RefCell;
-
-/// Map mesh elements to partitions randomly.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Partitioner;
-/// use coupe::Point2D;
-/// use coupe::Random;
-/// use rand;
-///
-/// let num_parts = 3;
-/// let points: &[Point2D] = &[Point2D::new(1., 0.), Point2D::new(0., 1.)];
-///
-/// let r = Random::new(rand::thread_rng(), num_parts);
-/// r.partition(points, &[1., 1.]);
-/// ```
-pub struct Random<R> {
-    rng: RefCell<R>,
-    num_parts: usize,
-}
-
-impl<R> Random<R> {
-    pub fn new(rng: R, num_parts: usize) -> Random<R> {
-        Random {
-            rng: RefCell::new(rng),
-            num_parts,
-        }
-    }
-}
-
-impl<R, const D: usize> Partitioner<D> for Random<R>
-where
-    R: rand::Rng,
-{
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let mut rng = self.rng.borrow_mut();
-        let ids = (0..weights.len())
-            .map(|_| rng.gen_range(0..self.num_parts))
-            .collect();
-        Partition::from_ids(points.to_points_nd(), weights, ids)
-    }
-}
-
-/// Greedily assign weights to each part.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Partitioner;
-/// use coupe::Point2D;
-/// use coupe::Greedy;
-///
-/// let num_parts = 3;
-/// let points: &[Point2D] = &[Point2D::new(1., 0.), Point2D::new(0., 1.)];
-///
-/// let g = Greedy::new(num_parts);
-/// g.partition(points, &[1., 1.]);
-/// ```
-pub struct Greedy {
-    num_parts: usize,
-}
-
-impl Greedy {
-    pub fn new(num_parts: usize) -> Greedy {
-        Greedy { num_parts }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for Greedy {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let mut ids = vec![0; weights.len()];
-        {
-            let weights = weights.iter().map(Real::from);
-            algorithms::greedy::greedy(&mut ids, weights, self.num_parts);
-        }
-        Partition::from_ids(points.to_points_nd(), weights, ids)
-    }
-}
-
-/// The Karmarkar-Karp algorithm, also called the Largest Differencing Method.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Partitioner;
-/// use coupe::Point2D;
-/// use coupe::KarmarkarKarp;
-///
-/// let num_parts = 3;
-/// let points: &[Point2D] = &[Point2D::new(1., 0.), Point2D::new(0., 1.)];
-///
-/// let kk = KarmarkarKarp::new(num_parts);
-/// kk.partition(points, &[1., 1.]);
-/// ```
-pub struct KarmarkarKarp {
-    num_parts: usize,
-}
-
-impl KarmarkarKarp {
-    pub fn new(num_parts: usize) -> KarmarkarKarp {
-        KarmarkarKarp { num_parts }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for KarmarkarKarp {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let mut ids = vec![0; weights.len()];
-        {
-            let weights = weights.iter().map(Real::from);
-            algorithms::kk::kk(&mut ids, weights, self.num_parts);
-        }
-        Partition::from_ids(points.to_points_nd(), weights, ids)
-    }
-}
-
-/// The exact variant of the [Karmarkar-Karp][KarmarkarKarp] algorithm.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Partitioner;
-/// use coupe::Point2D;
-/// use coupe::CompleteKarmarkarKarp;
-///
-/// let tolerance = 0.1;
-/// let points: &[Point2D] = &[Point2D::new(1., 0.), Point2D::new(0., 1.)];
-///
-/// let ckk = CompleteKarmarkarKarp::new(tolerance);
-/// ckk.partition(points, &[1., 1.]);
-/// ```
-pub struct CompleteKarmarkarKarp {
-    tolerance: f64,
-}
-
-impl CompleteKarmarkarKarp {
-    pub fn new(tolerance: f64) -> CompleteKarmarkarKarp {
-        CompleteKarmarkarKarp { tolerance }
-    }
-}
-
-impl<const D: usize> Partitioner<D> for CompleteKarmarkarKarp {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let mut ids = vec![0; weights.len()];
-        {
-            let weights = weights.iter().map(Real::from);
-            algorithms::ckk::ckk_bipart(&mut ids, weights, self.tolerance);
-        }
-        Partition::from_ids(points.to_points_nd(), weights, ids)
-    }
-}
-
-/// Greedy Vector-of-Numbers algorithms.
-///
-/// This algorithm greedily moves weights from parts to parts in such a way that
-/// the imbalance gain is maximized on each step.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Partitioner;
-/// use coupe::PartitionImprover;
-/// use coupe::Point2D;
-/// use coupe::Random;
-/// use coupe::VnBest;
-/// use rand;
-///
-/// let num_parts = 3;
-/// let points: &[Point2D] = &[Point2D::new(1., 0.), Point2D::new(0., 1.)];
-///
-/// let r = Random::new(rand::thread_rng(), num_parts);
-/// let partition = r.partition(points, &[1., 1.]);
-/// let vn = VnBest::new(num_parts);
-/// vn.improve_partition(partition);
-/// ```
-pub struct VnBest {
-    num_parts: usize,
-}
-
-impl VnBest {
-    pub fn new(num_parts: usize) -> VnBest {
-        VnBest { num_parts }
-    }
-}
-
-impl<const D: usize> PartitionImprover<D> for VnBest {
-    fn improve_partition<'a>(
-        &self,
-        partition: Partition<'a, PointND<D>, f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        let (points, weights, mut ids) = partition.into_raw();
-        let weights_real: Vec<Real> = weights.iter().map(Real::from).collect();
-        algorithms::vn::best::vn_best_mono::<Real>(&mut ids, &weights_real, self.num_parts);
-        Partition::from_ids(points, weights, ids)
-    }
-}
-
-/// K-means algorithm
-///
-/// An implementation of the balanced k-means algorithm inspired from
-/// "Balanced k-means for Parallel Geometric Partitioning" by Moritz von Looz,
-/// Charilaos Tzovas and Henning Meyerhenke (2018, University of Cologne).
-///
-/// From an initial partition, the K-means algorithm will generate points clusters that will,
-/// at each iteration, exchage points with other clusters that are "closer", and move by recomputing the clusters position (defined as
-/// the centroid of the points assigned to the cluster). Eventually the clusters will stop moving, yielding a new partition.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::PartitionImprover;
-/// use coupe::partition::Partition;
-///
-/// // create ids for initial partition
-///
-/// let points = vec![
-///     Point2D::new(0., 0.),
-///     Point2D::new(1., 0.),
-///     Point2D::new(2., 0.),
-///     Point2D::new(0., 5.),
-///     Point2D::new(1., 5.),
-///     Point2D::new(2., 5.),
-///     Point2D::new(0., 10.),
-///     Point2D::new(1., 10.),
-///     Point2D::new(2., 10.),
-/// ];
-///
-/// let weights = vec![1.; 9];
-///
-/// // create an unbalanced partition:
-/// //  - p1: total weight = 1
-/// //  - p2: total weight = 7
-/// //  - p3: total weight = 1
-/// let ids = vec![1, 2, 2, 2, 2, 2, 2, 2, 3];
-/// let partition = Partition::from_ids(&points, &weights, ids);
+/// The `Partition` trait allows for partitioning data.
 ///
-/// let mut k_means = coupe::KMeans::default();
-/// k_means.num_partitions = 3;
-/// k_means.delta_threshold = 0.;
+/// Partitioning algorithms implement this trait.
 ///
-/// let partition = k_means.improve_partition(partition);
-/// let ids = partition.ids();
+/// The generic argument `M` defines the input of the algorithms (e.g. an
+/// adjacency matrix or a 2D set of points).
 ///
-/// assert_eq!(ids[0], ids[1]);
-/// assert_eq!(ids[0], ids[2]);
-///
-/// assert_eq!(ids[3], ids[4]);
-/// assert_eq!(ids[3], ids[5]);
-///
-/// assert_eq!(ids[6], ids[7]);
-/// assert_eq!(ids[6], ids[8]);
-/// ```
-#[derive(Debug, Clone, Copy)]
-pub struct KMeans {
-    pub num_partitions: usize,
-    pub imbalance_tol: f64,
-    pub delta_threshold: f64,
-    pub max_iter: usize,
-    pub max_balance_iter: usize,
-    pub erode: bool,
-    pub hilbert: bool,
-    pub mbr_early_break: bool,
-}
-
-// KMeans builder pattern
-// to reduce construction boilerplate
-// e.g.
-// ```rust
-// let k_means = KMeansBuilder::default()
-//    .imbalance_tol(5.)
-//    .max_balance_iter(12)
-//    .build();
-// ```
-#[derive(Debug, Default, Clone, Copy)]
-pub struct KMeansBuilder {
-    inner: KMeans,
-}
-
-impl KMeansBuilder {
-    pub fn build(self) -> KMeans {
-        self.inner
-    }
-
-    pub fn num_partitions(mut self, num_partitions: usize) -> Self {
-        self.inner.num_partitions = num_partitions;
-        self
-    }
-
-    pub fn imbalance_tol(mut self, imbalance_tol: f64) -> Self {
-        self.inner.imbalance_tol = imbalance_tol;
-        self
-    }
+/// The input partition must be of the correct size and its contents may or may
+/// not be used by the algorithms.
+pub trait Partition<M> {
+    /// Diagnostic data returned for a specific run of the algorithm.
+    type Metadata;
 
-    pub fn delta_threshold(mut self, delta_threshold: f64) -> Self {
-        self.inner.delta_threshold = delta_threshold;
-        self
-    }
-
-    pub fn max_iter(mut self, max_iter: usize) -> Self {
-        self.inner.max_iter = max_iter;
-        self
-    }
-
-    pub fn max_balance_iter(mut self, max_balance_iter: usize) -> Self {
-        self.inner.max_balance_iter = max_balance_iter;
-        self
-    }
-
-    pub fn erode(mut self, erode: bool) -> Self {
-        self.inner.erode = erode;
-        self
-    }
-
-    pub fn hilbert(mut self, hilbert: bool) -> Self {
-        self.inner.hilbert = hilbert;
-        self
-    }
-
-    pub fn mbr_early_break(mut self, mbr_early_break: bool) -> Self {
-        self.inner.mbr_early_break = mbr_early_break;
-        self
-    }
-}
-
-impl KMeans {
-    #[allow(clippy::too_many_arguments)]
-    pub fn new(
-        num_partitions: usize,
-        imbalance_tol: f64,
-        delta_threshold: f64,
-        max_iter: usize,
-        max_balance_iter: usize,
-        erode: bool,
-        hilbert: bool,
-        mbr_early_break: bool,
-    ) -> Self {
-        Self {
-            num_partitions,
-            imbalance_tol,
-            delta_threshold,
-            max_iter,
-            max_balance_iter,
-            erode,
-            hilbert,
-            mbr_early_break,
-        }
-    }
-}
-
-impl Default for KMeans {
-    fn default() -> Self {
-        Self {
-            num_partitions: 7,
-            imbalance_tol: 5.,
-            delta_threshold: 0.01,
-            max_iter: 500,
-            max_balance_iter: 20, // for now, `max_balance_iter > 1` yields poor convergence time
-            erode: false,         // for now, `erode` yields` enabled yields wrong results
-            hilbert: true,
-            mbr_early_break: false, // for now, `mbr_early_break` enabled yields wrong results
-        }
-    }
-}
-
-impl<const D: usize> PartitionImprover<D> for KMeans
-where
-    Const<D>: DimSub<Const<1>>,
-    DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
-        + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
-{
-    fn improve_partition<'a>(
-        &self,
-        partition: Partition<'a, PointND<D>, f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        let settings = crate::algorithms::k_means::BalancedKmeansSettings {
-            num_partitions: self.num_partitions,
-            imbalance_tol: self.imbalance_tol,
-            delta_threshold: self.delta_threshold,
-            max_iter: self.max_iter,
-            max_balance_iter: self.max_balance_iter,
-            erode: self.erode,
-            hilbert: self.hilbert,
-            mbr_early_break: self.mbr_early_break,
-        };
-        let (points, weights, mut ids) = partition.into_raw();
-        crate::algorithms::k_means::balanced_k_means_with_initial_partition(
-            points, weights, settings, &mut ids,
-        );
-        Partition::from_ids(points, weights, ids)
-    }
-}
-
-/// KernighanLin algorithm
-///
-/// An implementation of the Kernighan Lin topologic algorithm
-/// for graph partitioning. The current implementation currently only handles
-/// partitioning a graph into two parts, as described in the original algorithm in
-/// "An efficient heuristic procedure for partitioning graphs" by W. Kernighan and S. Lin.
-///
-/// The algorithms repeats an iterative pass during which several pairs of nodes have
-/// their part assignment swapped in order to reduce the cutsize of the partition.
-/// If all the nodes are equally weighted, the algorithm preserves the partition balance.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::TopologicPartitionImprover;
-/// use coupe::partition::Partition;
-/// use sprs::CsMat;
-///
-/// //    swap
-/// // 0  1  0  1
-/// // +--+--+--+
-/// // |  |  |  |
-/// // +--+--+--+
-/// // 0  0  1  1
-/// let points = vec![
-///      Point2D::new(0., 0.),
-///      Point2D::new(1., 0.),
-///      Point2D::new(2., 0.),
-///      Point2D::new(3., 0.),
-///      Point2D::new(0., 1.),
-///      Point2D::new(1., 1.),
-///      Point2D::new(2., 1.),
-///      Point2D::new(3., 1.),
-///  ];
-///
-///  let ids = vec![0, 0, 1, 1, 0, 1, 0, 1];
-///  let weights = vec![1.; 8];
-///
-///  let mut partition = Partition::from_ids(&points, &weights, ids);
-///
-///  let mut adjacency = CsMat::empty(sprs::CSR, 8);
-///  adjacency.reserve_outer_dim(8);
-///  eprintln!("shape: {:?}", adjacency.shape());
-///  adjacency.insert(0, 1, 1.);
-///  adjacency.insert(1, 2, 1.);
-///  adjacency.insert(2, 3, 1.);
-///  adjacency.insert(4, 5, 1.);
-///  adjacency.insert(5, 6, 1.);
-///  adjacency.insert(6, 7, 1.);
-///  adjacency.insert(0, 4, 1.);
-///  adjacency.insert(1, 5, 1.);
-///  adjacency.insert(2, 6, 1.);
-///  adjacency.insert(3, 7, 1.);
-///  
-///  // symmetry
-///  adjacency.insert(1, 0, 1.);
-///  adjacency.insert(2, 1, 1.);
-///  adjacency.insert(3, 2, 1.);
-///  adjacency.insert(5, 4, 1.);
-///  adjacency.insert(6, 5, 1.);
-///  adjacency.insert(7, 6, 1.);
-///  adjacency.insert(4, 0, 1.);
-///  adjacency.insert(5, 1, 1.);
-///  adjacency.insert(6, 2, 1.);
-///  adjacency.insert(7, 3, 1.);
-///
-/// // 1 iteration
-/// let algo = coupe::KernighanLin::new(1, 1, None, 1);
-///
-/// let partition = algo.improve_partition(partition, adjacency.view());
-///
-/// let new_ids = partition.into_ids();
-/// assert_eq!(new_ids[5], 0);
-/// assert_eq!(new_ids[6], 1);
-/// ```
-#[derive(Debug, Clone, Copy)]
-pub struct KernighanLin {
-    max_passes: Option<usize>,
-    max_flips_per_pass: Option<usize>,
-    max_imbalance_per_flip: Option<f64>,
-    max_bad_move_in_a_row: usize,
-}
-
-impl KernighanLin {
-    pub fn new(
-        max_passes: impl Into<Option<usize>>,
-        max_flips_per_pass: impl Into<Option<usize>>,
-        max_imbalance_per_flip: impl Into<Option<f64>>,
-        max_bad_move_in_a_row: usize,
-    ) -> Self {
-        Self {
-            max_passes: max_passes.into(),
-            max_flips_per_pass: max_flips_per_pass.into(),
-            max_imbalance_per_flip: max_imbalance_per_flip.into(),
-            max_bad_move_in_a_row,
-        }
-    }
-}
-
-impl<const D: usize> TopologicPartitionImprover<D> for KernighanLin {
-    fn improve_partition<'a>(
-        &self,
-        mut partition: Partition<'a, PointND<D>, f64>,
-        adjacency: CsMatView<f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        crate::algorithms::kernighan_lin::kernighan_lin(
-            &mut partition,
-            adjacency,
-            self.max_passes,
-            self.max_flips_per_pass,
-            self.max_imbalance_per_flip,
-            self.max_bad_move_in_a_row,
-        );
-        partition
-    }
-}
-
-/// FiducciaMattheyses
-///
-/// An implementation of the Fiduccia Mattheyses topologic algorithm
-/// for graph partitioning. This implementation is an extension of the
-/// original algorithm to handle partitioning into more than two parts.
-///
-/// This algorithm repeats an iterative pass during which a set of graph nodes are assigned to
-/// a new part, reducing the overall cutsize of the partition. As opposed to the
-/// Kernighan-Lin algorithm, during each pass iteration, only one node is flipped at a time.
-/// The algorithm thus does not preserve partition weights balance and may produce an unbalanced
-/// partition.
-///
-/// Original algorithm from "A Linear-Time Heuristic for Improving Network Partitions"
-/// by C.M. Fiduccia and R.M. Mattheyses.
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::TopologicPartitionImprover;
-/// use coupe::partition::Partition;
-/// use sprs::CsMat;
-///
-/// //    swap
-/// // 0  1  0  1
-/// // +--+--+--+
-/// // |  |  |  |
-/// // +--+--+--+
-/// // 0  0  1  1
-/// let points = vec![
-///      Point2D::new(0., 0.),
-///      Point2D::new(1., 0.),
-///      Point2D::new(2., 0.),
-///      Point2D::new(3., 0.),
-///      Point2D::new(0., 1.),
-///      Point2D::new(1., 1.),
-///      Point2D::new(2., 1.),
-///      Point2D::new(3., 1.),
-///  ];
-///
-///  let ids = vec![0, 0, 1, 1, 0, 1, 0, 1];
-///  let weights = vec![1.; 8];
-///
-///  let mut partition = Partition::from_ids(&points, &weights, ids);
-///
-///  let mut adjacency = CsMat::empty(sprs::CSR, 8);
-///  adjacency.reserve_outer_dim(8);
-///  eprintln!("shape: {:?}", adjacency.shape());
-///  adjacency.insert(0, 1, 1.);
-///  adjacency.insert(1, 2, 1.);
-///  adjacency.insert(2, 3, 1.);
-///  adjacency.insert(4, 5, 1.);
-///  adjacency.insert(5, 6, 1.);
-///  adjacency.insert(6, 7, 1.);
-///  adjacency.insert(0, 4, 1.);
-///  adjacency.insert(1, 5, 1.);
-///  adjacency.insert(2, 6, 1.);
-///  adjacency.insert(3, 7, 1.);
-///  
-///  // symmetry
-///  adjacency.insert(1, 0, 1.);
-///  adjacency.insert(2, 1, 1.);
-///  adjacency.insert(3, 2, 1.);
-///  adjacency.insert(5, 4, 1.);
-///  adjacency.insert(6, 5, 1.);
-///  adjacency.insert(7, 6, 1.);
-///  adjacency.insert(4, 0, 1.);
-///  adjacency.insert(5, 1, 1.);
-///  adjacency.insert(6, 2, 1.);
-///  adjacency.insert(7, 3, 1.);
-///
-/// // 1 iteration
-/// let algo = coupe::FiducciaMattheyses::new(None, None, None, 1);
-///
-/// let partition = algo.improve_partition(partition, adjacency.view());
-///
-/// let new_ids = partition.into_ids();
-/// assert_eq!(new_ids[5], 0);
-/// assert_eq!(new_ids[6], 1);
-/// ```
-#[derive(Debug, Clone, Copy)]
-pub struct FiducciaMattheyses {
-    max_passes: Option<usize>,
-    max_flips_per_pass: Option<usize>,
-    max_imbalance_per_flip: Option<f64>,
-    max_bad_move_in_a_row: usize,
-}
-
-impl FiducciaMattheyses {
-    pub fn new(
-        max_passes: impl Into<Option<usize>>,
-        max_flips_per_pass: impl Into<Option<usize>>,
-        max_imbalance_per_flip: impl Into<Option<f64>>,
-        max_bad_move_in_a_row: usize,
-    ) -> Self {
-        Self {
-            max_passes: max_passes.into(),
-            max_flips_per_pass: max_flips_per_pass.into(),
-            max_imbalance_per_flip: max_imbalance_per_flip.into(),
-            max_bad_move_in_a_row,
-        }
-    }
-}
-
-impl<const D: usize> TopologicPartitionImprover<D> for FiducciaMattheyses {
-    fn improve_partition<'a>(
-        &self,
-        mut partition: Partition<'a, PointND<D>, f64>,
-        adjacency: CsMatView<f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        crate::algorithms::fiduccia_mattheyses::fiduccia_mattheyses(
-            &mut partition,
-            adjacency,
-            self.max_passes,
-            self.max_flips_per_pass,
-            self.max_imbalance_per_flip,
-            self.max_bad_move_in_a_row,
-        );
-        partition
-    }
-}
-
-/// Graph Growth algorithm
-///
-/// A topologic algorithm that generates a partition from a topologic mesh.
-/// Given a number k of parts, the algorithm selects k nodes randomly and assigns them to a different part.
-/// Then, at each iteration, each part is expanded to neighbor nodes that are not yet assigned to a part
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::TopologicPartitioner;
-/// use coupe::partition::Partition;
-/// use sprs::CsMat;
-///
-/// // +--+--+--+
-/// // |  |  |  |
-/// // +--+--+--+
-///
-/// let points = vec![
-///      Point2D::new(0., 0.),
-///      Point2D::new(1., 0.),
-///      Point2D::new(2., 0.),
-///      Point2D::new(3., 0.),
-///      Point2D::new(0., 1.),
-///      Point2D::new(1., 1.),
-///      Point2D::new(2., 1.),
-///      Point2D::new(3., 1.),
-///  ];
-///
-///  let weights = vec![1.; 8];
-///
-///  let mut adjacency = CsMat::empty(sprs::CSR, 8);
-///  adjacency.reserve_outer_dim(8);
-///  eprintln!("shape: {:?}", adjacency.shape());
-///  adjacency.insert(0, 1, 1.);
-///  adjacency.insert(1, 2, 1.);
-///  adjacency.insert(2, 3, 1.);
-///  adjacency.insert(4, 5, 1.);
-///  adjacency.insert(5, 6, 1.);
-///  adjacency.insert(6, 7, 1.);
-///  adjacency.insert(0, 4, 1.);
-///  adjacency.insert(1, 5, 1.);
-///  adjacency.insert(2, 6, 1.);
-///  adjacency.insert(3, 7, 1.);
-///  
-///  // symmetry
-///  adjacency.insert(1, 0, 1.);
-///  adjacency.insert(2, 1, 1.);
-///  adjacency.insert(3, 2, 1.);
-///  adjacency.insert(5, 4, 1.);
-///  adjacency.insert(6, 5, 1.);
-///  adjacency.insert(7, 6, 1.);
-///  adjacency.insert(4, 0, 1.);
-///  adjacency.insert(5, 1, 1.);
-///  adjacency.insert(6, 2, 1.);
-///  adjacency.insert(7, 3, 1.);
-///
-/// let gg = coupe::GraphGrowth::new(2);
-///
-/// let _partition = gg.partition(points.as_slice(), &weights, adjacency.view());
-/// ```
-#[derive(Debug, Clone, Copy)]
-pub struct GraphGrowth {
-    num_partitions: usize,
-}
-
-impl GraphGrowth {
-    pub fn new(num_partitions: usize) -> Self {
-        Self { num_partitions }
-    }
-}
-
-impl<const D: usize> TopologicPartitioner<D> for GraphGrowth {
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-        adjacency: CsMatView<f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        let ids = crate::algorithms::graph_growth::graph_growth(
-            weights,
-            adjacency.view(),
-            self.num_partitions,
-        );
-        Partition::from_ids(points.to_points_nd(), weights, ids)
-    }
-}
-
-/// # Represents the composition algorithm.
-///
-/// This structure is created by calling the [`compose`](trait.Compose.html#tymethod.compose)
-/// of the [`Compose`](trait.Compose.html) trait.
-pub struct Composition<T, U> {
-    first: T,
-    second: U,
-}
-
-impl<T, U> Composition<T, U> {
-    pub fn new(first: T, second: U) -> Self {
-        Self { first, second }
-    }
-}
-
-impl<T, U, const D: usize> Partitioner<D> for Composition<T, U>
-where
-    T: Partitioner<D>,
-    U: PartitionImprover<D>,
-{
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-    ) -> Partition<'a, PointND<D>, f64> {
-        let points = points.to_points_nd();
-        let partition = self.first.partition(points, weights);
-        self.second.improve_partition(partition)
-    }
-}
-
-impl<T, U, const D: usize> PartitionImprover<D> for Composition<T, U>
-where
-    T: PartitionImprover<D>,
-    U: PartitionImprover<D>,
-{
-    fn improve_partition<'a>(
-        &self,
-        partition: Partition<'a, PointND<D>, f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        self.second
-            .improve_partition(self.first.improve_partition(partition))
-    }
-}
-
-impl<T, U, const D: usize> TopologicPartitioner<D> for Composition<T, U>
-where
-    T: Partitioner<D>,
-    U: TopologicPartitionImprover<D>,
-{
-    fn partition<'a>(
-        &self,
-        points: impl PointsView<'a, D>,
-        weights: &'a [f64],
-        adjacency: CsMatView<f64>,
-    ) -> Partition<'a, PointND<D>, f64> {
-        let partition = self.first.partition(points, weights);
-        self.second.improve_partition(partition, adjacency)
-    }
-}
-
-/// # Compose two algorithms.
-///
-/// This trait enables algorithms to be composed together. Doing so allows for instance to improve the partition
-/// generated by a [`Partitioner`](trait.Partitioner.html) with a [`PartitionImprover`](trait.PartitionImprover.html).
-///
-/// The resulting composition implements either [`Partitioner`](trait.Partitioner.html) or [`PartitionImprover`](trait.PartitionImprover.html)
-/// based on the input algorithms.
-///
-/// # Example
-/// ```rust
-/// use coupe::{Compose, Partitioner, PartitionImprover};
-/// use coupe::dimension::U3;
-///
-/// let num_partitions = 7;
-/// let max_iter = 3;
-///
-/// let mut k_means = coupe::KMeans::default();
-/// k_means.num_partitions = 7;
-/// let multi_jagged_then_k_means = coupe::MultiJagged::new(
-///     num_partitions,
-///     max_iter
-/// ).compose(k_means);
-///
-/// ```
-pub trait Compose<T> {
-    type Composed;
-    fn compose(self, other: T) -> Self::Composed;
-}
+    /// Error details, should the algorithm fail to run.
+    type Error;
 
-impl<T, U> Compose<T> for U {
-    type Composed = Composition<Self, T>;
-    fn compose(self, other: T) -> Self::Composed {
-        Composition::new(self, other)
-    }
+    /// Partition the given data and output the part ID of each element in
+    /// `part_ids`.
+    fn partition(&mut self, part_ids: &mut [usize], data: M)
+        -> Result<Self::Metadata, Self::Error>;
 }
diff --git a/src/partition.rs b/src/partition.rs
deleted file mode 100644
index d7b57e12..00000000
--- a/src/partition.rs
+++ /dev/null
@@ -1,442 +0,0 @@
-//! Utilities to manipulate partitions.
-
-use itertools::Itertools;
-use nalgebra::allocator::Allocator;
-use nalgebra::ArrayStorage;
-use nalgebra::Const;
-use nalgebra::DefaultAllocator;
-use nalgebra::DimDiff;
-use nalgebra::DimSub;
-use num::{Num, Signed};
-use sprs::CsMatView;
-
-use std::cmp::PartialOrd;
-use std::iter::Sum;
-
-use crate::geometry::Mbr;
-use crate::PointND;
-
-/// Represents a partition.
-///
-/// This struct is usually created by a partitioning algorithm. It internally uses unique IDs to represent a partition of a set of points and weights.
-/// The `Partition` object exposes a convenient interface to manipulate a partition:
-///  - Iterate over parts
-///  - Analyze quality (e.g. weight imbalance)
-///
-/// # Example
-///
-/// ```rust
-/// use coupe::Point2D;
-/// use coupe::partition::Partition;
-/// use approx::*;
-///
-///
-/// let ids = vec![1, 2, 3, 1, 2, 3, 1, 2, 3];
-/// let weights = vec![1.; 9];
-/// let points = vec![
-///     Point2D::new(2., 2.),
-///     Point2D::new(3., 3.),
-///     Point2D::new(7., 7.),
-///     Point2D::new(8., 8.),
-///     Point2D::new(9., 9.),
-///     Point2D::new(5., 5.),
-///     Point2D::new(1., 1.),
-///     Point2D::new(6., 6.),
-///     Point2D::new(4., 4.),
-/// ];
-///
-/// let partition = Partition::from_ids(&points, &weights, ids);
-///
-/// // iterate over each part corresponding to a unique ID
-/// for part in partition.parts() {
-///     assert_ulps_eq!(part.total_weight(), 3.)
-/// }
-/// ```
-#[derive(Clone)]
-pub struct Partition<'a, P, W> {
-    points: &'a [P],
-    weights: &'a [W],
-    ids: Vec<usize>,
-}
-
-impl<'a, P, W> Partition<'a, P, W> {
-    /// Constructs a default partition from a set of points and weights and initialize it with default IDs (with a single part).
-    ///
-    /// # Panics
-    ///
-    /// Panics if `points` and `weights` have different lenghts.
-    pub fn new(points: &'a [P], weights: &'a [W]) -> Self {
-        if points.len() != weights.len() {
-            panic!(
-                "Cannot initialize a partition with points and weights of different sizes. Found {} points and {} weights",
-                points.len(), weights.len()
-            );
-        }
-
-        Self {
-            points,
-            weights,
-            ids: vec![0; points.len()],
-        }
-    }
-
-    /// Constructs a partition from a set of points, weights and IDs.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `points`, `weights` and `ids` do not have the same length.
-    pub fn from_ids(points: &'a [P], weights: &'a [W], ids: Vec<usize>) -> Self {
-        if points.len() != weights.len() {
-            panic!(
-                "Cannot initialize a partition with points and weights of different sizes. Found {} points and {} weights",
-                points.len(), weights.len()
-            );
-        }
-
-        if points.len() != ids.len() {
-            panic!(
-                "Cannot initialize a partition with points and ids of different sizes. Found {} points and {} ids",
-                points.len(), ids.len()
-            );
-        }
-
-        Self {
-            points,
-            weights,
-            ids,
-        }
-    }
-
-    /// Consumes the partition, returning the internal array of ids representing the partition.
-    pub fn into_ids(self) -> Vec<usize> {
-        self.ids
-    }
-
-    /// Consumes the partition, returning the internal components representing the partition
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use coupe::partition::Partition;
-    /// use coupe::Point2D;
-    ///
-    /// fn consume_partition(partition: Partition<Point2D, f64>) {
-    ///     let (_points, _weights, mut _ids) = partition.into_raw();
-    ///     // do something with the fields
-    /// }
-    /// ```
-    pub fn into_raw(self) -> (&'a [P], &'a [W], Vec<usize>) {
-        (self.points, self.weights, self.ids)
-    }
-
-    pub fn as_raw(&self) -> (&'a [P], &'a [W], &[usize]) {
-        (self.points, self.weights, &self.ids)
-    }
-
-    pub fn as_raw_mut(&mut self) -> (&'a [P], &'a [W], &mut [usize]) {
-        (self.points, self.weights, &mut self.ids)
-    }
-
-    /// Returns a slice of the IDs used to represent the partition.
-    pub fn ids(&self) -> &[usize] {
-        &self.ids
-    }
-
-    /// Returns of mutable slice of the IDs used to represent the partition.
-    pub fn ids_mut(&mut self) -> &mut [usize] {
-        &mut self.ids
-    }
-
-    /// Returns a slice of the points used for the partition.
-    pub fn points(&self) -> &[P] {
-        self.points
-    }
-
-    /// Returns a slice of the weights used for the partition.
-    pub fn weights(&self) -> &[W] {
-        self.weights
-    }
-
-    /// Returns an iterator over each part of the partition.
-    ///
-    /// Each part is a set of points and weights that share the same ID.
-    pub fn parts(&'a self) -> impl Iterator<Item = Part<'a, P, W>> {
-        let indices = (0..self.points.len()).collect::<Vec<_>>();
-        self.ids.iter().unique().map(move |id| {
-            let indices = indices
-                .iter()
-                .filter(|idx| self.ids[**idx] == *id)
-                .cloned()
-                .collect::<Vec<_>>();
-            Part::<'a, P, W> {
-                partition: self,
-                indices,
-            }
-        })
-    }
-
-    /// Computes the maximum imbalance of the partition.
-    /// It is defined as the maximum difference between the weights of two different parts.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use coupe::partition::Partition;
-    ///
-    /// let dummy_point = coupe::Point2D::new(0., 0.);
-    /// let points = [dummy_point; 5];
-    ///
-    /// let weights = [1, 2, 3, 4, 5];
-    ///
-    /// let ids = vec![1, 2, 2, 2, 3];
-    ///
-    /// let partition = Partition::from_ids(&points[..], &weights[..], ids);
-    ///
-    /// let max_imbalance = partition.max_imbalance();
-    /// assert_eq!(max_imbalance, 8);
-    /// ```
-    pub fn max_imbalance(&'a self) -> W
-    where
-        W: Num + PartialOrd + Signed + Sum + Clone,
-    {
-        let total_weights = self
-            .parts()
-            .map(|part| part.total_weight())
-            .collect::<Vec<_>>();
-        total_weights
-            .iter()
-            .flat_map(|w1| {
-                total_weights
-                    .iter()
-                    .map(move |w2| (w1.clone() - w2.clone()).abs())
-            })
-            .max_by(|a, b| a.partial_cmp(b).unwrap())
-            // if the partition is empty, then there is the imbalance is null
-            .unwrap_or_else(W::zero)
-    }
-
-    /// Computes the relative imbalance of the partition.
-    /// It is defined as follows: `relative_imbalance = maximum_imbalance / total_weight_in_partition`.
-    /// It expresses the imbalance in terms of percentage of the total weight. It may be more meaningful than raw numbers in some cases.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use coupe::partition::Partition;
-    ///
-    /// let dummy_point = coupe::Point2D::new(0., 0.);
-    /// let points = [dummy_point; 5];
-    ///
-    /// let weights = [1000, 1000, 1000, 1000, 6000];
-    ///
-    /// let ids = vec![1, 2, 2, 2, 3];
-    ///
-    /// let partition = Partition::from_ids(&points[..], &weights[..], ids);
-    ///
-    /// let relative_imbalance = partition.relative_imbalance();
-    /// assert_eq!(relative_imbalance, 0.5); // = 50% of total weight
-    /// ```
-    pub fn relative_imbalance(&'a self) -> f64
-    where
-        W: Num + PartialOrd + Signed + Sum + Clone,
-        f64: std::convert::From<W>,
-    {
-        let total_weights = self
-            .parts()
-            .map(|part| part.total_weight())
-            .collect::<Vec<_>>();
-        let max_imbalance = total_weights
-            .iter()
-            .flat_map(|w1| {
-                total_weights
-                    .iter()
-                    .map(move |w2| (w1.clone() - w2.clone()).abs())
-            })
-            .max_by(|a, b| a.partial_cmp(b).unwrap())
-            .unwrap_or_else(W::zero);
-
-        f64::from(max_imbalance) / f64::from(total_weights.into_iter().sum())
-    }
-
-    /// Merge two parts of the partition (identified by unique id).
-    ///
-    /// If either `id1` or `id2` is not contained in the partition, does nothing.
-    pub fn merge_parts(&mut self, id1: usize, id2: usize) {
-        for id in self.ids.iter_mut() {
-            if *id == id2 {
-                *id = id1;
-            }
-        }
-    }
-
-    /// Construct pairs of adjacent parts.
-    /// Two parts are adjacent if there exist one element in the first
-    /// part and one element in the second part that are linked together
-    /// in the adjacency graph.
-    pub fn adjacent_parts(
-        &'a self,
-        adjacency: CsMatView<f64>,
-    ) -> Vec<(Part<'a, P, W>, Part<'a, P, W>)> {
-        // build parts adjacency graph
-        let parts = self.parts().collect::<Vec<_>>();
-
-        let mut adjacent_parts = Vec::new();
-
-        for (i, p) in parts.iter().enumerate() {
-            for (j, q) in parts.iter().enumerate() {
-                if j < i
-                    && adjacency
-                        .iter()
-                        .any(|(_, (i, j))| p.indices().contains(&i) && q.indices().contains(&j))
-                {
-                    adjacent_parts.push((p.clone(), q.clone()))
-                }
-            }
-        }
-
-        adjacent_parts
-    }
-
-    /// Swaps the parts of two elements
-    ///
-    /// Panics if `idx1` or `idx2` is out of range
-    pub fn swap(&mut self, idx1: usize, idx2: usize) {
-        self.ids.swap(idx1, idx2);
-    }
-}
-
-/// Represent a part of a partition.
-///
-/// This struct is not meaningful on its own, and is usually constructed by
-/// [iterating over a partition](struct.Partition.html#method.parts).
-pub struct Part<'a, P, W> {
-    partition: &'a Partition<'a, P, W>,
-    indices: Vec<usize>,
-}
-
-// Custom Clone impl since we juste clone the indices and not the partition itself
-impl<'a, P, W> Clone for Part<'a, P, W> {
-    fn clone(&self) -> Self {
-        Self {
-            partition: self.partition,
-            indices: self.indices.clone(),
-        }
-    }
-}
-
-impl<'a, P, W> Part<'a, P, W> {
-    /// Computes the total weight (i.e. the sum of all the weights) of the part.
-    pub fn total_weight(&self) -> W
-    where
-        W: Sum + Clone,
-    {
-        self.indices
-            .iter()
-            .map(|idx| &self.partition.weights[*idx])
-            .cloned()
-            .sum()
-    }
-
-    pub fn indices(&self) -> &[usize] {
-        &self.indices
-    }
-
-    pub fn into_indices(self) -> Vec<usize> {
-        self.indices
-    }
-
-    pub fn points(&self) -> &'a [P] {
-        self.partition.points()
-    }
-
-    pub fn weights(&self) -> &'a [W] {
-        self.partition.weights()
-    }
-
-    /// Iterate over the points and weights of the part.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use coupe::Point2D;
-    /// use coupe::partition::Partition;
-    ///
-    /// fn iterate(partition: &Partition<Point2D, f64>) {
-    ///     // iterate over each part
-    ///     for part in partition.parts() {
-    ///         //iterate over each point/weight of the current part
-    ///         for (p, w) in part.iter() {
-    ///             // do something with p, w
-    ///         }
-    ///     }
-    /// }
-    /// ```
-    pub fn iter(&self) -> PartIter<P, W> {
-        PartIter {
-            partition: self.partition,
-            indices: &self.indices,
-        }
-    }
-}
-
-impl<'a, W, const D: usize> Part<'a, PointND<D>, W> {
-    /// Computes the aspect ratio of a part. It is defined as the aspect ratio of a minimal bounding rectangle
-    /// of the set of points contained in the part.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use coupe::Point2D;
-    /// use coupe::partition::Partition;
-    /// use approx::*;
-    ///
-    /// let points = [
-    ///     Point2D::new(0., 0.),
-    ///     Point2D::new(6., 0.),
-    ///     Point2D::new(0., 2.),
-    ///     Point2D::new(6., 2.),
-    /// ];
-    /// let weights = [1.; 4];
-    ///
-    /// let partition = Partition::new(&points[..], &weights[..]); // initialized with a single part
-    ///
-    /// // only 1 iteration
-    /// for part in partition.parts() {
-    ///     assert_ulps_eq!(part.aspect_ratio(), 3.);
-    /// }
-    /// ```
-    pub fn aspect_ratio(&self) -> f64
-    where
-        Const<D>: DimSub<Const<1>>,
-        DefaultAllocator: Allocator<f64, Const<D>, Const<D>, Buffer = ArrayStorage<f64, D, D>>
-            + Allocator<f64, DimDiff<Const<D>, Const<1>>>,
-    {
-        if self.indices.len() <= 2 {
-            panic!("Cannot compute the aspect ratio of a part of less than 2 points");
-        }
-        let points = self
-            .indices
-            .iter()
-            .map(|&idx| self.partition.points()[idx])
-            .collect::<Vec<_>>();
-        Mbr::from_points(&points).aspect_ratio()
-    }
-}
-
-/// An iterator over points and weights of a part.
-///
-/// Is struct is usually created when [iterating over a part](struct.Part.html#method.iter).
-pub struct PartIter<'a, P, W> {
-    partition: &'a Partition<'a, P, W>,
-    indices: &'a [usize],
-}
-
-impl<'a, P, W> Iterator for PartIter<'a, P, W> {
-    type Item = (&'a P, &'a W);
-    fn next(&mut self) -> Option<Self::Item> {
-        self.indices.get(0).map(|idx| {
-            self.indices = &self.indices[1..];
-            (&self.partition.points[*idx], &self.partition.weights[*idx])
-        })
-    }
-}
diff --git a/src/run_info.rs b/src/run_info.rs
deleted file mode 100644
index 457157d3..00000000
--- a/src/run_info.rs
+++ /dev/null
@@ -1,16 +0,0 @@
-/// Information on an algorithm run.
-///
-/// Filled in by algorithms when run on a given input.  Gives information about how the run went.
-#[derive(Default, Copy, Clone)]
-pub struct RunInfo {
-    /// Number of iterations the algorithm underwent to provide a partition.
-    pub algo_iterations: Option<u32>,
-}
-
-impl RunInfo {
-    pub fn skip() -> RunInfo {
-        RunInfo {
-            algo_iterations: Some(0),
-        }
-    }
-}
diff --git a/tools/Cargo.toml b/tools/Cargo.toml
index 4c1133d0..77a41c52 100644
--- a/tools/Cargo.toml
+++ b/tools/Cargo.toml
@@ -30,3 +30,4 @@ anyhow = { version = "1", default-features = false, features = ["std"] }
 # Other utilities
 itertools = { version = "0.10", default-features = false }
 num = { version = "0.4", default-features = false }
+rayon = "1.3"
diff --git a/tools/num-part/src/main.rs b/tools/num-part/src/main.rs
index db40ade4..9a7a88aa 100644
--- a/tools/num-part/src/main.rs
+++ b/tools/num-part/src/main.rs
@@ -1,6 +1,5 @@
 use anyhow::Context as _;
 use anyhow::Result;
-use coupe::RunInfo;
 use rand::SeedableRng as _;
 use std::env;
 use std::path::PathBuf;
@@ -73,7 +72,7 @@ where
 /// - `Err(msg)`, where `msg` is the reason the algorithm cannot be run with the given arguments
 ///   (e.g. it's a bi-partitioning algorithm but the caller passed a number of parts that is
 ///   different than 2).
-type Algorithm = Box<dyn Fn(&mut [usize], &[f64], usize, usize) -> Result<RunInfo>>;
+type Algorithm = Box<dyn Fn(&mut [usize], &[f64], usize, usize) -> Result<()>>;
 
 fn main() -> Result<()> {
     let mut options = getopts::Options::new();
diff --git a/tools/src/bin/mesh-part/main.rs b/tools/src/bin/mesh-part/main.rs
index 6672aa12..c8fd4ce6 100644
--- a/tools/src/bin/mesh-part/main.rs
+++ b/tools/src/bin/mesh-part/main.rs
@@ -1,77 +1,127 @@
 use anyhow::Context as _;
 use anyhow::Result;
-use coupe::RunInfo;
+use coupe::Partition as _;
+use coupe::PointND;
+use mesh_io::medit::Mesh;
 use mesh_io::weight;
+use rayon::iter::IntoParallelRefIterator as _;
+use rayon::iter::ParallelIterator as _;
 use std::env;
 use std::fs;
 use std::io;
+use std::mem;
 
-macro_rules! coupe_part {
-    ( $algo:expr, $dimension:expr, $points:expr, $weights:expr ) => {{
-        match $dimension {
-            2 => {
-                coupe::Partitioner::<2>::partition(&$algo, $points.as_slice(), $weights).into_ids()
+struct Problem<const D: usize> {
+    points: Vec<PointND<D>>,
+    weights: weight::Array,
+}
+
+trait Algorithm<const D: usize> {
+    fn run(&mut self, partition: &mut [usize], problem: &Problem<D>) -> Result<()>;
+}
+
+impl<const D: usize, R> Algorithm<D> for coupe::Random<R>
+where
+    R: rand::Rng,
+{
+    fn run(&mut self, partition: &mut [usize], _: &Problem<D>) -> Result<()> {
+        self.partition(partition, ())?;
+        Ok(())
+    }
+}
+
+impl<const D: usize> Algorithm<D> for coupe::Greedy {
+    fn run(&mut self, partition: &mut [usize], problem: &Problem<D>) -> Result<()> {
+        use weight::Array::*;
+        match &problem.weights {
+            Integers(is) => {
+                let weights = is.iter().map(|weight| weight[0]);
+                self.partition(partition, weights)?;
             }
-            3 => {
-                coupe::Partitioner::<3>::partition(&$algo, $points.as_slice(), $weights).into_ids()
+            Floats(fs) => {
+                let weights = fs.iter().map(|weight| coupe::Real::from(weight[0]));
+                self.partition(partition, weights)?;
             }
-            _ => anyhow::bail!("only 2D and 3D meshes are supported"),
         }
-    }};
-    ( $algo:expr, $problem:expr ) => {{
-        let weights: Vec<f64> = match &$problem.weights {
-            weight::Array::Floats(ws) => ws.iter().map(|weight| weight[0]).collect(),
-            weight::Array::Integers(_) => anyhow::bail!("integers are not supported by coupe yet"),
-        };
-        coupe_part!($algo, $problem.dimension, $problem.points, &weights)
-    }};
+        Ok(())
+    }
 }
 
-macro_rules! coupe_part_improve {
-    ( $algo:expr, $partition:expr, $dimension:expr, $points:expr, $weights:expr ) => {{
-        let ids = match $dimension {
-            2 => {
-                let points = coupe::PointsView::to_points_nd($points.as_slice());
-                let partition =
-                    coupe::partition::Partition::from_ids(points, $weights, $partition.to_vec());
-                coupe::PartitionImprover::<2>::improve_partition(&$algo, partition).into_ids()
+impl<const D: usize> Algorithm<D> for coupe::KarmarkarKarp {
+    fn run(&mut self, partition: &mut [usize], problem: &Problem<D>) -> Result<()> {
+        use weight::Array::*;
+        match &problem.weights {
+            Integers(is) => {
+                let weights = is.iter().map(|weight| weight[0]);
+                self.partition(partition, weights)?;
             }
-            3 => {
-                let points = coupe::PointsView::to_points_nd($points.as_slice());
-                let partition =
-                    coupe::partition::Partition::from_ids(points, $weights, $partition.to_vec());
-                coupe::PartitionImprover::<3>::improve_partition(&$algo, partition).into_ids()
+            Floats(fs) => {
+                let weights = fs.iter().map(|weight| coupe::Real::from(weight[0]));
+                self.partition(partition, weights)?;
             }
-            _ => anyhow::bail!("only 2D and 3D meshes are supported"),
-        };
-        $partition.copy_from_slice(&ids);
-    }};
-    ( $algo:expr, $partition:expr, $problem:expr ) => {{
-        let weights: Vec<f64> = match &$problem.weights {
-            weight::Array::Floats(ws) => ws.iter().map(|weight| weight[0]).collect(),
-            weight::Array::Integers(_) => anyhow::bail!("integers are not supported by coupe yet"),
-        };
-        coupe_part_improve!(
-            $algo,
-            $partition,
-            $problem.dimension,
-            $problem.points,
-            &weights
-        )
-    }};
+        }
+        Ok(())
+    }
 }
 
-struct Problem {
-    dimension: usize,
-    points: Vec<f64>,
-    weights: weight::Array,
+impl<const D: usize> Algorithm<D> for coupe::VnBest {
+    fn run(&mut self, partition: &mut [usize], problem: &Problem<D>) -> Result<()> {
+        use weight::Array::*;
+        match &problem.weights {
+            Integers(is) => {
+                let weights = is.iter().map(|weight| weight[0]);
+                self.partition(partition, weights)?;
+            }
+            Floats(fs) => {
+                let weights = fs.iter().map(|weight| coupe::Real::from(weight[0]));
+                self.partition(partition, weights)?;
+            }
+        }
+        Ok(())
+    }
 }
 
-type Algorithm = Box<dyn FnMut(&mut [usize], &Problem) -> Result<RunInfo>>;
+impl<const D: usize> Algorithm<D> for coupe::Rcb {
+    fn run(&mut self, partition: &mut [usize], problem: &Problem<D>) -> Result<()> {
+        use weight::Array::*;
+        let points = problem.points.par_iter().cloned();
+        match &problem.weights {
+            Integers(is) => {
+                let weights = is.par_iter().map(|weight| weight[0]);
+                self.partition(partition, (points, weights))?;
+            }
+            Floats(fs) => {
+                let weights = fs.par_iter().map(|weight| weight[0]);
+                self.partition(partition, (points, weights))?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<const D: usize> Algorithm<D> for coupe::HilbertCurve {
+    fn run(&mut self, partition: &mut [usize], problem: &Problem<D>) -> Result<()> {
+        use weight::Array::*;
+        if D != 2 {
+            anyhow::bail!("hilbert is only implemented for 2D meshes");
+        }
+        // SAFETY: is a noop since D == 2
+        let points =
+            unsafe { mem::transmute::<&Vec<PointND<D>>, &Vec<PointND<2>>>(&problem.points) };
+        match &problem.weights {
+            Integers(_) => anyhow::bail!("hilbert is only implemented for floats"),
+            Floats(fs) => {
+                let weights: Vec<f64> = fs.iter().map(|weight| weight[0]).collect();
+                self.partition(partition, (points, weights))?;
+            }
+        }
+        Ok(())
+    }
+}
 
-fn parse_algorithm(spec: String) -> Result<Algorithm> {
+fn parse_algorithm<const D: usize>(spec: &str) -> Result<Box<dyn Algorithm<D>>> {
     let mut args = spec.split(',');
-    let name = args.next().context("empty algorithm spec")?;
+    let name = args.next().context("it's empty")?;
 
     fn optional<T>(maybe_arg: Option<Result<T>>, default: T) -> Result<T> {
         Ok(maybe_arg.transpose()?.unwrap_or(default))
@@ -100,77 +150,75 @@ fn parse_algorithm(spec: String) -> Result<Algorithm> {
                 bytes.resize(32_usize, 0_u8);
                 bytes.try_into().unwrap()
             };
-            let mut rng = rand_pcg::Pcg64::from_seed(seed);
-            Box::new(move |partition, problem| {
-                let algo = coupe::Random::new(&mut rng, part_count);
-                let weights = vec![0.0; problem.points.len()];
-                let ids = coupe_part!(algo, problem.dimension, problem.points, &weights);
-                partition.copy_from_slice(&ids);
-                Ok(RunInfo::default())
-            })
-        }
-        "greedy" => {
-            let part_count = required(usize_arg(args.next()))?;
-            Box::new(move |partition, problem| {
-                let ids = coupe_part!(coupe::Greedy::new(part_count), problem);
-                partition.copy_from_slice(&ids);
-                Ok(RunInfo::default())
-            })
-        }
-        "kk" => {
-            let part_count = required(usize_arg(args.next()))?;
-            Box::new(move |partition, problem| {
-                let ids = coupe_part!(coupe::KarmarkarKarp::new(part_count), problem);
-                partition.copy_from_slice(&ids);
-                Ok(RunInfo::default())
-            })
-        }
-        "vn-best" => {
-            let part_count = required(usize_arg(args.next()))?;
-            Box::new(move |partition, problem| {
-                coupe_part_improve!(coupe::VnBest::new(part_count), partition, problem);
-                Ok(RunInfo::default())
-            })
-        }
-        "rcb" => {
-            let iter_count = required(usize_arg(args.next()))?;
-            Box::new(move |partition, problem| {
-                let ids = coupe_part!(coupe::Rcb::new(iter_count), problem);
-                partition.copy_from_slice(&ids);
-                Ok(RunInfo::default())
-            })
-        }
-        "hilbert" => {
-            let num_partitions = required(usize_arg(args.next()))?;
-            let order = optional(usize_arg(args.next()), 12)? as u32;
-            Box::new(move |partition, problem| {
-                let algo = coupe::HilbertCurve {
-                    num_partitions,
-                    order,
-                };
-                let weights: Vec<f64> = match &problem.weights {
-                    weight::Array::Floats(ws) => ws.iter().map(|weight| weight[0]).collect(),
-                    weight::Array::Integers(_) => {
-                        anyhow::bail!("hilbert is not implemented for integers")
-                    }
-                };
-                let res = match problem.dimension {
-                    2 => coupe::Partitioner::<2>::partition(
-                        &algo,
-                        problem.points.as_slice(),
-                        &weights,
-                    )
-                    .into_ids(),
-                    _ => anyhow::bail!("hilbert is only implemented in 2D"),
-                };
-                partition.copy_from_slice(&res);
-                Ok(RunInfo::default())
-            })
+            let rng = rand_pcg::Pcg64::from_seed(seed);
+            Box::new(coupe::Random { rng, part_count })
         }
+        "greedy" => Box::new(coupe::Greedy {
+            part_count: required(usize_arg(args.next()))?,
+        }),
+        "kk" => Box::new(coupe::KarmarkarKarp {
+            part_count: required(usize_arg(args.next()))?,
+        }),
+        "vn-best" => Box::new(coupe::VnBest {
+            part_count: required(usize_arg(args.next()))?,
+        }),
+        "rcb" => Box::new(coupe::Rcb {
+            iter_count: required(usize_arg(args.next()))?,
+        }),
+        "hilbert" => Box::new(coupe::HilbertCurve {
+            part_count: required(usize_arg(args.next()))?,
+            order: optional(usize_arg(args.next()), 12)? as u32,
+        }),
         _ => anyhow::bail!("unknown algorithm {:?}", name),
     })
 }
 
+fn main_d<const D: usize>(
+    matches: getopts::Matches,
+    mesh: Mesh,
+    weights: weight::Array,
+) -> Result<Vec<usize>> {
+    let points: Vec<_> = mesh
+        .elements()
+        .filter_map(|(element_type, nodes, _element_ref)| {
+            if element_type.dimension() != mesh.dimension() {
+                return None;
+            }
+            let mut barycentre = [0.0; D];
+            for node_idx in nodes {
+                let node_coordinates = mesh.node(*node_idx);
+                for (bc_coord, node_coord) in barycentre.iter_mut().zip(node_coordinates) {
+                    *bc_coord += node_coord;
+                }
+            }
+            for bc_coord in &mut barycentre {
+                *bc_coord /= nodes.len() as f64;
+            }
+            Some(PointND::from(barycentre))
+        })
+        .collect();
+
+    let mut partition = vec![0; points.len()];
+    let problem = Problem { points, weights };
+
+    let algorithms: Vec<_> = matches
+        .opt_strs("a")
+        .into_iter()
+        .map(|algorithm_spec| {
+            parse_algorithm(&algorithm_spec)
+                .with_context(|| format!("invalid algorithm {:?}", algorithm_spec))
+        })
+        .collect::<Result<_>>()?;
+
+    for mut algorithm in algorithms {
+        algorithm
+            .run(&mut partition, &problem)
+            .context("failed to apply algorithm")?;
+    }
+
+    Ok(partition)
+}
+
 fn main() -> Result<()> {
     let mut options = getopts::Options::new();
     options.optflag("h", "help", "print this help menu");
@@ -191,37 +239,10 @@ fn main() -> Result<()> {
         return Ok(());
     }
 
-    let algorithms: Vec<_> = matches
-        .opt_strs("a")
-        .into_iter()
-        .map(parse_algorithm)
-        .collect::<Result<_>>()?;
-
     let mesh_file = matches
         .opt_str("m")
         .context("missing required option 'mesh'")?;
-    let mesh = mesh_io::medit::Mesh::from_file(mesh_file).context("failed to read mesh file")?;
-
-    let points: Vec<_> = mesh
-        .elements()
-        .filter_map(|(element_type, nodes, _element_ref)| {
-            if element_type.dimension() != mesh.dimension() {
-                return None;
-            }
-            let mut barycentre = vec![0.0; mesh.dimension()];
-            for node_idx in nodes {
-                let node_coordinates = mesh.node(*node_idx);
-                for (bc_coord, node_coord) in barycentre.iter_mut().zip(node_coordinates) {
-                    *bc_coord += node_coord;
-                }
-            }
-            for bc_coord in &mut barycentre {
-                *bc_coord /= nodes.len() as f64;
-            }
-            Some(barycentre)
-        })
-        .flatten()
-        .collect();
+    let mesh = Mesh::from_file(mesh_file).context("failed to read mesh file")?;
 
     let weight_file = matches
         .opt_str("w")
@@ -230,16 +251,11 @@ fn main() -> Result<()> {
     let weight_file = io::BufReader::new(weight_file);
     let weights = weight::read(weight_file).context("failed to read weight file")?;
 
-    let problem = Problem {
-        dimension: mesh.dimension(),
-        points,
-        weights,
+    let partition = match mesh.dimension() {
+        2 => main_d::<2>(matches, mesh, weights)?,
+        3 => main_d::<3>(matches, mesh, weights)?,
+        n => anyhow::bail!("expected 2D or 3D mesh, got a {n}D mesh"),
     };
-    let mut partition = vec![0; problem.points.len() / problem.dimension];
-
-    for mut algorithm in algorithms {
-        algorithm(&mut partition, &problem).context("failed to apply algorithm")?;
-    }
 
     let stdout = io::stdout();
     let stdout = stdout.lock();