diff --git a/Cargo.toml b/Cargo.toml index 8e9fc8c..33a25fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,13 +7,11 @@ description = "Base support for planetary science image processing" repository = "https://github.com/kmgill/sciimg" readme = "README.md" keywords = ["planetary", "astrophotography", "science", "imaging"] -license = "MIT" +license = "MIT" [features] -rayon = [] - -[target.'cfg(rayon)'.dependencies] -rayon = "1.7.0" +rayon = ["dep:rayon"] +default = [] [dependencies] chrono = "0.4.19" @@ -21,8 +19,15 @@ image = "0.24.5" imageproc = "0.23.0" lab = "0.11.0" memmap = "0.7.0" +rayon = { version = "1.7.0", optional = true } serde = { version = "1.0.125", features = ["derive"] } string-builder = "0.2.0" itertools = "0.10.5" anyhow = "1.0.65" +[dev-dependencies] +criterion = "0.5.1" + +[[bench]] +name = "gaussian" +harness = false diff --git a/benches/gaussian.rs b/benches/gaussian.rs new file mode 100644 index 0000000..4014ef1 --- /dev/null +++ b/benches/gaussian.rs @@ -0,0 +1,83 @@ +//! So this is a carbon copy of the two implementations in src/gaussian.rs +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use sciimg::image::Image; + +use std::time::Duration; + +const INPAINT_TEST_IMAGE: &str = "tests/testdata/MSL_MAHLI_INPAINT_Sol2904_V1.png"; + +// st +fn st(img: &mut Image, sigma: f32) { + use sciimg::gaussianblur::st_gaussian_blur; + + let mut buffers = vec![]; + (0..img.num_bands()).for_each(|b| { + buffers.push(img.get_band(b).to_owned()); + }); + + if let Ok(buffers) = st_gaussian_blur(&mut buffers, sigma) { + buffers.iter().enumerate().for_each(|(b, _)| { + img.set_band(&buffers[b], b); + }); + } +} + +#[cfg(feature = "rayon")] +// rayon +fn rayon(img: &mut Image, sigma: f32) { + use sciimg::gaussianblur::par_gaussian_blur; + + let mut buffers = vec![]; + (0..img.num_bands()).for_each(|b| { + buffers.push(img.get_band(b).to_owned()); + }); + + if let Ok(buffers) = par_gaussian_blur(&mut buffers, sigma) { + buffers.iter().enumerate().for_each(|(b, _)| { + img.set_band(&buffers[b], b); + }); + } +} + +fn setup_benchmark_data() -> Image { + Image::open(&String::from(INPAINT_TEST_IMAGE)).unwrap() +} + +fn benchmark_gaussian_blur(c: &mut Criterion) { + let mut group = c.benchmark_group("GaussianBlur"); + group.measurement_time(Duration::from_secs(10)); + + // Setup benchmark parameters + let sigma_values = [2.0]; + + for sigma in sigma_values.iter() { + // Benchmark original implementation + group.bench_with_input( + BenchmarkId::new("single-threaded", sigma), + sigma, + |b, &sigma| { + b.iter(|| { + let mut img = setup_benchmark_data(); + black_box(st(&mut img, sigma)) + }) + }, + ); + } + + #[cfg(feature = "rayon")] + { + for sigma in sigma_values.iter() { + // Benchmark original implementation + group.bench_with_input(BenchmarkId::new("rayon", sigma), sigma, |b, &sigma| { + b.iter(|| { + let mut img = setup_benchmark_data(); + black_box(rayon(&mut img, sigma)) + }) + }); + } + } + group.finish(); +} + +criterion_group!(benches, benchmark_gaussian_blur); +criterion_main!(benches); diff --git a/src/gaussianblur.rs b/src/gaussianblur.rs new file mode 100644 index 0000000..b43e273 --- /dev/null +++ b/src/gaussianblur.rs @@ -0,0 +1,199 @@ +use crate::max; +use crate::{image::Image, imagebuffer::ImageBuffer, Dn, DnVec, VecMath}; +use anyhow::{anyhow, Result}; + +/// NOTE: this will call the parallelized version if you are using the 'rayon' feature. +pub fn gaussian_blur_nband(buffers: &mut [ImageBuffer], sigma: f32) -> Result> { + #[cfg(not(feature = "rayon"))] + { + st_gaussian_blur(buffers, sigma) + } + #[cfg(feature = "rayon")] + { + par_gaussian_blur(buffers, sigma) + } +} + +pub trait RgbImageBlur { + fn gaussian_blur(&mut self, sigma: f32); +} + +impl RgbImageBlur for Image { + fn gaussian_blur(&mut self, sigma: f32) { + let mut buffers = vec![]; + (0..self.num_bands()).for_each(|b| { + buffers.push(self.get_band(b).to_owned()); + }); + + if let Ok(buffers) = gaussian_blur_nband(&mut buffers, sigma) { + buffers.iter().enumerate().for_each(|(b, _)| { + self.set_band(&buffers[b], b); + }); + } + } +} + +#[cfg(feature = "rayon")] +pub fn par_gaussian_blur( + buffers: &[ImageBuffer], + sigma: f32, +) -> Result, anyhow::Error> { + use rayon::prelude::*; + + if buffers.is_empty() { + return Err(anyhow!("No buffers provided")); + } + + let sig_squared = sigma.powi(2); + let radius = (3.0 * sigma).ceil().max(1.0) as usize; + let kernel_length = radius * 2 + 1; + let mut kernel = DnVec::zeros(kernel_length); + let mut sum = 0.0; + let r = radius as i32; + + (-r..=r).for_each(|i| { + let exponent = -(i * i) as Dn / (2.0 * sig_squared); + let kernel_value = + (std::f32::consts::E as Dn).powf(exponent) / (std::f32::consts::TAU * sig_squared); + kernel[(i + r) as usize] = kernel_value; + sum += kernel_value; + }); + kernel.iter_mut().for_each(|v| *v /= sum); + + let buffer_width = buffers[0].width; + let buffer_height = buffers[0].height; + + // Horizontal pass: each buffer independently. + let mut horizontal_buffers = buffers.iter().cloned().collect::>(); + horizontal_buffers + .par_iter_mut() + .enumerate() + .for_each(|(b, buf)| { + for y in 0..buffer_height { + for x in 0..buffer_width { + let mut acc = 0.0; + for kernel_i in -r..=r { + let x_sample = x as i32 + kernel_i; // using addition + if x_sample < 0 || x_sample >= buffer_width as i32 { + continue; + } + let v = match buffers[b].safe_get(x_sample as usize, y) { + Some(val) => val, + None => continue, // skip if out of bounds + }; + acc += v * kernel[(kernel_i + r) as usize]; + } + buf.put(x, y, acc); + } + } + }); + + // Vertical pass: each buffer independently. + let mut vertical_buffers = horizontal_buffers.clone(); + vertical_buffers + .par_iter_mut() + .enumerate() + .for_each(|(b, buf)| { + for x in 0..buffer_width { + for y in 0..buffer_height { + let mut acc = 0.0; + for kernel_i in -r..=r { + let y_sample = y as i32 + kernel_i; // using addition + if y_sample < 0 || y_sample >= buffer_height as i32 { + continue; + } + let v = match horizontal_buffers[b].safe_get(x, y_sample as usize) { + Some(val) => val, + None => continue, + }; + acc += v * kernel[(kernel_i + r) as usize]; + } + buf.put(x, y, acc); + } + } + }); + + Ok(vertical_buffers) +} + +/// SLOW (according to Kevin...) +pub fn st_gaussian_blur(buffers: &mut [ImageBuffer], sigma: f32) -> Result> { + if buffers.is_empty() { + return Err(anyhow!("No buffers provided")); + } + + let sig_squared = sigma.powi(2); + let radius = max!((3.0 * sigma).ceil(), 1.0) as usize; + + let kernel_length = radius * 2 + 1; + + let mut kernel = DnVec::zeros(kernel_length); + let mut sum = 0.0; + + let r = radius as i32; + + (-r..r).for_each(|i| { + let exponent_numerator = -(i * i) as Dn; + let exponent_denominator = 2.0 * sig_squared; + + let e_expression = + (std::f32::consts::E as Dn).powf(exponent_numerator / exponent_denominator); + let kernel_value = e_expression / std::f32::consts::TAU * sig_squared; + + kernel[(i + r) as usize] = kernel_value; + sum += kernel_value; + }); + + // Normalize kernel + kernel.iter_mut().for_each(|i| { + *i /= sum; + }); + + let buffer_width = buffers[0].width; + let buffer_height = buffers[0].height; + let buff_len: usize = buffers.len(); + + // Horizontal pass + (0..buffer_width).for_each(|x| { + (0..buffer_height).for_each(|y| { + let mut values = DnVec::zeros(buff_len); + 'h_kernel: for kernel_i in -r..=r { + let x_sample = x as i32 - kernel_i; + let k = kernel[(kernel_i + r) as usize]; + for b in 0..buff_len { + let v = match buffers[b].safe_get(x_sample as usize, y) { + Some(val) => val, + None => continue 'h_kernel, + }; + values[b] += v * k; + } + } + for i in 0..buff_len { + buffers[i].put(x, y, values[i]); + } + }); + }); + + // Vertical pass + (0..buffer_width).for_each(|x| { + (0..buffer_height).for_each(|y| { + let mut values = DnVec::zeros(buff_len); + 'v_kernel: for kernel_i in -r..=r { + let y_sample = y as i32 - kernel_i; + let k = kernel[(kernel_i + r) as usize]; + for b in 0..buff_len { + let v = match buffers[b].safe_get(x, y_sample as usize) { + Some(val) => val, + None => continue 'v_kernel, + }; + values[b] += v * k; + } + } + for i in 0..buff_len { + buffers[i].put(x, y, values[i]); + } + }); + }); + + Ok(buffers.into()) +} diff --git a/src/guassianblur.rs b/src/guassianblur.rs deleted file mode 100644 index afc52db..0000000 --- a/src/guassianblur.rs +++ /dev/null @@ -1,228 +0,0 @@ -use crate::image::Image; -use crate::imagebuffer::ImageBuffer; -use crate::max; -use crate::Dn; -use crate::DnVec; -use crate::VecMath; -use anyhow::anyhow; -use anyhow::Result; - -#[cfg(rayon)] -use rayon::prelude::*; -#[cfg(rayon)] -use std::sync::Arc; -#[cfg(rayon)] -use std::sync::Mutex; - -#[cfg(not(rayon))] -// SSSSSLLLLOOOOOOWWWWWWW..... -pub fn guassian_blur_nband(buffers: &mut [ImageBuffer], sigma: f32) -> Result> { - if buffers.is_empty() { - return Err(anyhow!("No buffers provided")); - } - - let sig_squared = sigma.powi(2); - let radius = max!((3.0 * sigma).ceil(), 1.0) as usize; - - let kernel_length = radius * 2 + 1; - - let mut kernel = DnVec::zeros(kernel_length); - let mut sum = 0.0; - - let r = radius as i32; - - (-r..r).for_each(|i| { - let exponent_numerator = -(i * i) as Dn; - let exponent_denominator = 2.0 * sig_squared; - - let e_expression = - (std::f32::consts::E as Dn).powf(exponent_numerator / exponent_denominator); - let kernel_value = e_expression / std::f32::consts::TAU * sig_squared; - - kernel[(i + r) as usize] = kernel_value; - sum += kernel_value; - }); - - // Normalize kernel - kernel.iter_mut().for_each(|i| { - *i /= sum; - }); - - let buffer_width = buffers[0].width; - let buffer_height = buffers[0].height; - let buff_len: usize = buffers.len(); - - // 1st pass: Horizontal Blur - (0..buffer_width).for_each(|x| { - (0..buffer_height).for_each(|y| { - let mut values = DnVec::zeros(buff_len); - - (-r..r).for_each(|kernel_i| { - // Protect image bounds - if x as i32 - kernel_i < 0 || x as i32 - kernel_i >= buffer_width as i32 { - let kernel_value = kernel[(kernel_i + r) as usize]; - - (0..buff_len).for_each(|b| { - values[b] += buffers[b].get(x - kernel_i as usize, y) * kernel_value; - }); - } - }); - - (0..buff_len).for_each(|i| { - buffers[i].put(x, y, values[i]); - }); - }); - }); - - // 2nd pass: Vertical Blur - (0..buffer_width).for_each(|x| { - (0..buffer_height).for_each(|y| { - let mut values = DnVec::zeros(buff_len); - - (-r..r).for_each(|kernel_i| { - // Protect image bounds - if y as i32 - kernel_i < 0 || y as i32 - kernel_i >= buffer_height as i32 { - let kernel_value = kernel[(kernel_i + r) as usize]; - (0..buff_len).for_each(|b| { - //FIXME: unsafe unwrap - values[b] += buffers[b].get(x, y - kernel_i as usize) * kernel_value; - }); - } - }); - - (0..buff_len).for_each(|i| { - buffers[i].put(x, y, values[i]); - }); - }); - }); - Ok(buffers.into()) -} - -#[cfg(rayon)] -/* -Hopefully a little faster?, naive optimisations. -There's no test for this one so, we'll see how we go.. -*/ -pub fn guassian_blur_nband( - buffers: &mut [ImageBuffer], - sigma: f32, -) -> error::Result> { - if buffers.is_empty() { - return Err("No buffers provided"); - } - - let sig_squared = sigma.powi(2); - let radius = max!((3.0 * sigma).ceil(), 1.0) as usize; - - let kernel_length = radius.powi(2) + 1; - - let mut kernel = DnVec::zeros(kernel_length); - - let r = radius as i32; - - let sum: Dn = (-r..r) - .par_iter() - .map(|i| { - let exponent_numerator = -(i * i) as Dn; - let exponent_denominator = sig_squared.powi(2); - - let e_expression = - (std::f32::consts::E as Dn).powf(exponent_numerator / exponent_denominator); - - let kernel_value = e_expression / std::f32::consts::TAU * sig_squared; - - kernel[(i + r) as usize] = kernel_value; - - kernel_value - }) - .sum(); - - // Normalize kernel - kernel.par_iter_mut().for_each(|i| { - *i /= sum; - }); - - // Setup some paralelle iterators, reusable epsilons and locks. - let buffer_width = buffers[0].width; - let buffer_height = buffers[0].height; - let buff_len: usize = buffers.len(); - - let width_iter = (0..buffer_width).into_par_iter(); - let height_iter = (0..buffer_height).into_iter(); - - // Smart mutually exclusive smart pointer to allow for us to mutate the buffer across threads. - let m_buffers = Arc::new(Mutex::new(buffers.to_vec())); - - // Without a test to run against it's hard to know if this will help, or just thrash heaps of mutex contention. - // in theory if the indexes into the buffer are guaranteed to be unique we shouldn't need locks at all.. - // I will probs refactor this to an mpsc pattern to try that next. - - // 1st pass: Horizontal Blur - width_iter.clone().for_each(|x| { - let m_c_buffers = m_buffers.clone(); // These are only clones of the pointer - - height_iter.clone().for_each(|y| { - let values = (-r..r) - .filter(|&kernel_i| { - x as i32 - kernel_i < 0 || x as i32 - kernel_i >= buffer_width as i32 - }) - .flat_map(|kernel_i| { - let kernel_value = kernel[(kernel_i + r) as usize]; - (0..buff_len).map(move |b| { - buffers[b].get(x - kernel_i as usize, y).unwrap() * kernel_value - }) - }) - .fold(DnVec::zeros(buff_len), |acc, v| acc + v); - - (0..buff_len).for_each(|i| { - let mut buffers = m_c_buffers.lock().unwrap(); - buffers[i].put(x, y, values[i]); - }); - }); - }); - - // 2nd pass: Vertical Blur - width_iter.for_each(|x| { - let m_c_buffers = m_buffers.clone(); - height_iter.clone().for_each(|y| { - //TODO: the logic is basically the same as the above so break out into a helper and pass in the variables. - let values = (-r..r) - .filter(|&kernel_i| { - y as i32 - kernel_i < 0 || y as i32 - kernel_i >= buffer_height as i32 - }) - .flat_map(|kernel_i| { - let kernel_value = kernel[(kernel_i + r) as usize]; - (0..buff_len).map(move |b| { - buffers[b].get(y - kernel_i as usize, y).unwrap() * kernel_value - }) - }) - .fold(DnVec::zeros(buff_len), |acc, v| acc + v); - - (0..buff_len).for_each(|i| { - let mut buffers = m_c_buffers.lock().unwrap(); // Move mutable borrow outside of inner closure - buffers[i].put(x, y, values[i]); - }); - }); - }); - - Ok(buffers.into()) -} - -pub trait RgbImageBlur { - fn guassian_blur(&mut self, sigma: f32); -} - -impl RgbImageBlur for Image { - fn guassian_blur(&mut self, sigma: f32) { - let mut buffers = vec![]; - (0..self.num_bands()).for_each(|b| { - buffers.push(self.get_band(b).to_owned()); - }); - - if let Ok(buffers) = guassian_blur_nband(&mut buffers, sigma) { - buffers.iter().enumerate().for_each(|(b, _)| { - self.set_band(&buffers[b], b); - }); - } - } -} diff --git a/src/image.rs b/src/image.rs index d4a3c5c..9e85e4a 100644 --- a/src/image.rs +++ b/src/image.rs @@ -26,6 +26,12 @@ macro_rules! check_band_in_bounds { }; } +impl Image { + pub fn buffers(&self) -> &[ImageBuffer] { + &self.bands + } +} + fn image_uses_alpha(buffer: &DynamicImage) -> bool { matches!(buffer.color(), La8 | Rgba8 | La16 | Rgba16 | Rgba32F) } diff --git a/src/imagebuffer.rs b/src/imagebuffer.rs index be63cf8..a87f256 100644 --- a/src/imagebuffer.rs +++ b/src/imagebuffer.rs @@ -458,7 +458,17 @@ impl ImageBuffer { let index = y * self.width + x; self.buffer[index] } else { - panic!("Invalid pixel coordinates"); + panic!("Invalid pixel coordinates, x={}, y={}", x, y); + } + } + + #[inline(always)] + pub fn safe_get(&self, x: usize, y: usize) -> Option { + if x < self.width && y < self.height { + let index = y * self.width + x; + Some(self.buffer[index]) + } else { + None } } diff --git a/src/inpaint.rs b/src/inpaint.rs index bc9162d..dbc3c0a 100644 --- a/src/inpaint.rs +++ b/src/inpaint.rs @@ -1,12 +1,7 @@ // https://www.researchgate.net/publication/238183352_An_Image_Inpainting_Technique_Based_on_the_Fast_Marching_Method use crate::{enums, image::Image, imagebuffer::ImageBuffer, stats}; -use anyhow::anyhow; -use anyhow::Result; -use itertools::iproduct; - -#[cfg(rayon)] -use rayon::prelude::*; +use anyhow::{anyhow, Result}; #[derive(Debug, Clone)] pub struct Point { @@ -80,21 +75,8 @@ fn get_num_good_neighbors(mask: &ImageBuffer, x: i32, y: i32) -> u32 { s } -#[cfg(rayon)] -pub fn find_starting_point(mask: &ImageBuffer) -> Option { - let height_iter = (0..mask.height.clone()).into_par_iter(); - - for (y, x) in iproduct!(height_iter, (0..mask.width)) { - if mask.get(x, y) > 0.0 { - return Some(Point { x, y, score: 0 }); - } - } - None -} - -#[cfg(not(rayon))] pub fn find_starting_point(mask: &ImageBuffer) -> Option { - for (y, x) in iproduct!((0..mask.height), (0..mask.width)) { + for (y, x) in itertools::iproduct!((0..mask.height), (0..mask.width)) { if mask.get(x, y) > 0.0 { return Some(Point { x, y, score: 0 }); } diff --git a/src/lib.rs b/src/lib.rs index 5b08196..a30eb5a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,7 @@ pub mod enums; #[deprecated] pub mod error; -pub mod guassianblur; +pub mod gaussianblur; pub mod hotpixel; pub mod image; pub mod imagebuffer; diff --git a/src/unsharp.rs b/src/unsharp.rs index 96cf0e8..5ee7054 100644 --- a/src/unsharp.rs +++ b/src/unsharp.rs @@ -1,4 +1,4 @@ -use crate::guassianblur::guassian_blur_nband; +use crate::gaussianblur::gaussian_blur_nband; use crate::image::Image; use crate::imagebuffer::ImageBuffer; use anyhow::Result; @@ -9,7 +9,7 @@ pub fn unsharp_mask_nbands( amount: f32, ) -> Result> { //FIXME: Unwraps :( - match guassian_blur_nband(buffers, sigma) { + match gaussian_blur_nband(buffers, sigma) { Ok(blurred) => Ok((0..blurred.len()) .map(|b| { buffers[b]