diff --git a/Cargo.lock b/Cargo.lock index 7b7bcac2..3726a585 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -46,6 +46,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrayfire" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02d832c30a1d99b71e4a6dcd5d888155ce030dd8d9b501357e60b87a60d5d3b" +dependencies = [ + "half", + "lazy_static", + "libc", + "num", + "rustc_version", + "serde", + "serde_derive", + "serde_json", +] + [[package]] name = "atty" version = "0.2.14" @@ -178,6 +194,7 @@ version = "0.1.0" dependencies = [ "affinity", "approx", + "arrayfire", "criterion", "itertools", "ittapi", @@ -674,8 +691,8 @@ checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" dependencies = [ "approx", "matrixmultiply", - "num-complex", - "num-rational", + "num-complex 0.4.2", + "num-rational 0.4.1", "num-traits", "rand", "rand_distr", @@ -690,7 +707,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" dependencies = [ "matrixmultiply", - "num-complex", + "num-complex 0.4.2", "num-integer", "num-traits", "rawpointer", @@ -725,6 +742,41 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8536030f9fea7127f841b45bb6243b27255787fb4eb83958aa1ef9d2fdc0c36" +dependencies = [ + "num-bigint", + "num-complex 0.2.4", + "num-integer", + "num-iter", + "num-rational 0.2.4", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95" +dependencies = [ + "autocfg", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.2" @@ -755,6 +807,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-part" version = "0.1.0" @@ -771,6 +834,18 @@ dependencies = [ "rusqlite", ] +[[package]] +name = "num-rational" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-rational" version = "0.4.1" @@ -1077,6 +1152,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + [[package]] name = "rusty-fork" version = "0.3.0" @@ -1137,6 +1221,21 @@ dependencies = [ "bindgen", ] +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "serde" version = "1.0.147" @@ -1197,7 +1296,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" dependencies = [ "approx", - "num-complex", + "num-complex 0.4.2", "num-traits", "paste", "wide", @@ -1216,7 +1315,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea71e48b3eab4c4b153e8e35dcaeac132720809ef68359097b8cb54a18edd70" dependencies = [ "ndarray", - "num-complex", + "num-complex 0.4.2", "num-traits", "num_cpus", "rayon", diff --git a/Cargo.toml b/Cargo.toml index c7e66447..7218a3ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ tracing = { version = "0.1", default-features = false, features = ["std"] } rand = "0.8" sprs = { version = "0.11", optional = true, default-features = false, features = ["multi_thread"] } ittapi = "0.3" +arrayfire = { version = "3", default-features = false, features = ["algorithm", "arithmetic", "data", "indexing", "macros"] } [dev-dependencies] affinity = { version = "0.1", default-features = false } diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..948212ef --- /dev/null +++ b/build.rs @@ -0,0 +1,3 @@ +fn main() { + println!("cargo:rustc-link-lib=afcuda"); +} diff --git a/src/cartesian/mod.rs b/src/cartesian/mod.rs index 9b50f33f..e44a43db 100644 --- a/src/cartesian/mod.rs +++ b/src/cartesian/mod.rs @@ -1,4 +1,8 @@ use crate::topology::Topology; +use arrayfire::Array; +use arrayfire::Dim4; +use arrayfire::Fromf64; +use arrayfire::HasAfEnum; use num_traits::AsPrimitive; use num_traits::Num; use num_traits::One; @@ -10,6 +14,7 @@ use std::iter::Sum; use std::marker::PhantomData; use std::num::NonZeroUsize; use std::ops::Range; +use arrayfire::ConstGenerator; mod rcb; @@ -131,12 +136,18 @@ impl Grid<2> { where W: Send + Sync + PartialOrd + Num + Sum + AsPrimitive, f64: AsPrimitive, + W: HasAfEnum + Fromf64 + ConstGenerator, { - let total_weight: W = weights.par_iter().cloned().sum(); + let [width, height] = self.size; + let dim = [usize::from(width) as u64, usize::from(height) as u64, 1, 1]; + let weights = Array::new(weights, Dim4::new(&dim)); + + let (total_weight, _) = arrayfire::sum_all(&weights); + let iters = rcb::recurse_2d( self, self.into_subgrid(), - weights, + &weights, total_weight, iter_count, 1, diff --git a/src/cartesian/rcb.rs b/src/cartesian/rcb.rs index 5734ce1f..815495a6 100644 --- a/src/cartesian/rcb.rs +++ b/src/cartesian/rcb.rs @@ -1,5 +1,13 @@ use super::Grid; use super::SubGrid; +use arrayfire::seq; +use arrayfire::view; +use arrayfire::Array; +use arrayfire::BinaryOp; +use arrayfire::ConstGenerator; +use arrayfire::Dim4; +use arrayfire::Fromf64; +use arrayfire::HasAfEnum; use num_traits::AsPrimitive; use num_traits::Num; use rayon::iter::IndexedParallelIterator; @@ -98,10 +106,46 @@ where } } +fn weighted_median_gpu(weights: &Array, total_weight: W) -> WeightedMedian +where + W: Send + Sync + PartialOrd + Num + Sum + AsPrimitive, + f64: AsPrimitive, + W: HasAfEnum + ConstGenerator + Fromf64, +{ + let ideal_part_weight: W = (total_weight.as_() / 2.0).as_(); + let prefix_sum = arrayfire::scan(weights, 0, BinaryOp::ADD, false); + let greaters = arrayfire::gt( + &prefix_sum, + &arrayfire::constant(ideal_part_weight, prefix_sum.dims()), + false, + ); + // TODO get best index + let locations = arrayfire::locate(&greaters); + let position; + let left_weight; + if locations.dims()[0] > 0 { + // TODO find how to index arrays... + let p = view!(locations[0:0:1]); + (position, _) = arrayfire::sum_all(&p); + assert!((position as u64) < weights.dims()[0]); + let s = seq!(0, position as i32, 1); + (left_weight, _) = arrayfire::sum_all(&view!(weights[s])); + } else { + position = weights.dims()[0] as u32 - 1; + assert!((position as u64) < weights.dims()[0]); + left_weight = total_weight; // TODO + } + + WeightedMedian { + position: position as usize, + left_weight, + } +} + pub(super) fn recurse_2d( grid: Grid<2>, subgrid: SubGrid<2>, - weights: &[W], + weights: &Array, total_weight: W, iter_count: usize, coord: usize, @@ -109,65 +153,48 @@ pub(super) fn recurse_2d( where W: Send + Sync + PartialOrd + Num + Sum + AsPrimitive, f64: AsPrimitive, + W: HasAfEnum + ConstGenerator + Fromf64, { - if subgrid.size[coord] == 0 || iter_count == 0 { + if subgrid.size.contains(&0) || iter_count == 0 { return IterationResult::Whole; } - let axis_weights: Vec = if coord == 0 { - subgrid - .axis(0) - .into_par_iter() - .map(|x| { - let s: W = subgrid - .axis(1) - .map(|y| weights[grid.index_of([x, y])]) - .sum(); - s - }) - .collect() - } else { - subgrid - .axis(1) - .into_par_iter() - .map(|y| { - let s: W = subgrid - .axis(0) - .map(|x| weights[grid.index_of([x, y])]) - .sum(); - s - }) - .collect() - }; + let sub_x = seq!( + subgrid.offset[0] as i32, + (subgrid.offset[0] + subgrid.size[0]) as i32 - 1, + 1 + ); + let sub_y = seq!( + subgrid.offset[1] as i32, + (subgrid.offset[1] + subgrid.size[1]) as i32 - 1, + 1 + ); + let sub_weights = view!(weights[sub_x, sub_y]); + let axis_weights = arrayfire::sum(&sub_weights, 1 - coord as i32); + let axis_weights = arrayfire::flat(&axis_weights); - let split = weighted_median(&axis_weights, total_weight); + let split = weighted_median_gpu(&axis_weights, total_weight); let split_position = split.position + subgrid.offset[coord]; let left_weight = split.left_weight; let right_weight = total_weight - left_weight; let (left_grid, right_grid) = subgrid.split_at(coord, split_position); - let (left, right) = rayon::join( - || { - recurse_2d( - grid, - left_grid, - weights, - left_weight, - iter_count - 1, - (coord + 1) % 2, - ) - }, - || { - recurse_2d( - grid, - right_grid, - weights, - right_weight, - iter_count - 1, - (coord + 1) % 2, - ) - }, + let left = recurse_2d( + grid, + left_grid, + &weights, + left_weight, + iter_count - 1, + (coord + 1) % 2, + ); + let right = recurse_2d( + grid, + right_grid, + &weights, + right_weight, + iter_count - 1, + (coord + 1) % 2, ); IterationResult::Split { diff --git a/src/main.rs b/src/main.rs index ca4ac288..3164f288 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,8 @@ fn main() { + rayon::ThreadPoolBuilder::new() + .start_handler(|_| arrayfire::set_device(0)) + .build_global() + .unwrap(); let x = std::env::args().nth(1).unwrap().parse().unwrap(); let y = std::env::args().nth(2).unwrap().parse().unwrap(); let iter = std::env::args() @@ -12,6 +16,10 @@ fn main() { let weights: Vec = (0..n).map(|i| i as f64).collect(); let mut partition = vec![0; n]; + eprintln!("warming up..."); + grid.rcb(&mut partition, &weights, iter); + eprintln!("actual run..."); + let domain = ittapi::Domain::new("MyIncredibleDomain"); let before = std::time::Instant::now(); let task = ittapi::Task::begin(&domain, "MyIncredibleTask");