Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
423a60f
Prepare for benchmark of grid loop
w1th0utnam3 Sep 11, 2025
5bdb32e
Add benchmark
w1th0utnam3 Sep 11, 2025
445326f
Start implementing NEON variant of dense grid loop
w1th0utnam3 Sep 11, 2025
3958326
Implement NEON kernel
w1th0utnam3 Sep 11, 2025
ece74e4
Improve remainder processing
w1th0utnam3 Sep 11, 2025
0405f22
Small optimizations
w1th0utnam3 Sep 11, 2025
58ea549
Fix limits of evaluation
w1th0utnam3 Sep 11, 2025
e954fcb
Add benchmark data
w1th0utnam3 Sep 12, 2025
9783573
Update bounds
w1th0utnam3 Sep 12, 2025
788126b
Reduce code duplication
w1th0utnam3 Sep 12, 2025
cdbd2a6
Reformulate kernel
w1th0utnam3 Sep 12, 2025
0963e75
Improve performance
w1th0utnam3 Sep 12, 2025
8c94486
Fix use of Neon features
w1th0utnam3 Sep 12, 2025
9b93f47
Fix errors with Canyon benchmarks
w1th0utnam3 Sep 12, 2025
398fcce
Add AVX implementation
w1th0utnam3 Sep 12, 2025
79e88a4
Use FMA
w1th0utnam3 Sep 12, 2025
05fb89d
Auto dispatch for vectorization
w1th0utnam3 Sep 12, 2025
6b24b2e
Formatting
w1th0utnam3 Sep 12, 2025
5c8d4ae
Use vectorization in surface reconstruction
w1th0utnam3 Sep 12, 2025
d54ca42
Add CLI flags to enable SIMD
w1th0utnam3 Sep 12, 2025
8d77b11
Show vectorization type in timings
w1th0utnam3 Sep 12, 2025
64d8591
Refactor, enable vectorization on 32bit as well
w1th0utnam3 Sep 15, 2025
11b61a4
Refactoring, add some safety checks
w1th0utnam3 Sep 15, 2025
f90b5fc
Log detected SIMD features
w1th0utnam3 Sep 15, 2025
e9aaad8
Move code for SIMD kernels
w1th0utnam3 Sep 15, 2025
30ce6e6
Implement test for NEON cubic spline kernel
w1th0utnam3 Sep 15, 2025
0f2fbba
Fixes on x86
w1th0utnam3 Sep 15, 2025
1f45728
Rename CLI arg for SIMD
w1th0utnam3 Sep 15, 2025
c17f8e6
Add AVX cubic kernel test
w1th0utnam3 Sep 15, 2025
931b64a
Py: Add SIMD arguments
w1th0utnam3 Sep 15, 2025
095f808
Add serde-serialize feature, move code to benchmark
w1th0utnam3 Sep 15, 2025
264cfab
Update documentation
w1th0utnam3 Sep 15, 2025
c3962bd
Update to main
w1th0utnam3 Sep 15, 2025
d14f817
Fix imports of Scalar
w1th0utnam3 Sep 15, 2025
a6cd0cb
Reduce code duplication
w1th0utnam3 Sep 15, 2025
f208d35
Reduce code duplication dense/sparse
w1th0utnam3 Sep 15, 2025
008251e
Build & test on more targets
w1th0utnam3 Sep 15, 2025
3ec707a
Ignore kernel tests based on cfg
w1th0utnam3 Sep 15, 2025
18e7427
Fix cache keys
w1th0utnam3 Sep 15, 2025
9fef8ae
Add warning for SIMD + non-f32
w1th0utnam3 Sep 15, 2025
fc56ce3
Specify minimum Rust version
w1th0utnam3 Sep 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 64 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ jobs:
- uses: actions/checkout@v4
with:
lfs: true
- name: Update rust toolchain
run: |
rustup update stable
rustup default stable
rustup --version
- name: Cache rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-dev-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Check (default members)
run: cargo check
- name: Check with examples
Expand All @@ -40,13 +53,30 @@ jobs:
run: cargo test

build_release:
name: Build & test release mode
runs-on: ubuntu-latest
name: Build & test release mode (${{ matrix.runner }})
runs-on: ${{ matrix.runner }}
needs: build_workspace
strategy:
fail-fast: false
matrix:
runner: [ubuntu-latest, ubuntu-24.04-arm, macos-14, macos-13, windows-latest]
steps:
- uses: actions/checkout@v4
with:
lfs: true
- name: Update rust toolchain
run: |
rustup update stable
rustup default stable
rustup --version
- name: Cache rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ matrix.runner }}-release-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Build (release)
run: cargo build --release
- name: Test (release)
Expand All @@ -59,6 +89,19 @@ jobs:
- uses: actions/checkout@v4
with:
lfs: true
- name: Update rust toolchain
run: |
rustup update stable
rustup default stable
rustup --version
- name: Cache rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-lib-all-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Build
run: |
cargo build --manifest-path="splashsurf_lib/Cargo.toml" --all-targets --all-features
Expand All @@ -76,6 +119,19 @@ jobs:
- uses: actions/checkout@v4
with:
lfs: true
- name: Update rust toolchain
run: |
rustup update stable
rustup default stable
rustup --version
- name: Cache rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-lib-no-default-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Build
run: |
cargo build --manifest-path="splashsurf_lib/Cargo.toml" --all-targets --no-default-features
Expand All @@ -87,9 +143,14 @@ jobs:
name: Publish to crates.io
runs-on: ubuntu-latest
if: ${{ startsWith(github.ref, 'refs/tags/v') || (github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main') }}
needs: [check_format, build_workspace, build_lib_all_features, build_lib_no_default_features]
needs: [check_format, build_workspace, build_lib_all_features, build_lib_no_default_features, build_release]
steps:
- uses: actions/checkout@v4
- name: Update rust toolchain
run: |
rustup update stable
rustup default stable
rustup --version
- name: Publish splashsurf_lib
run: |
cargo publish --package splashsurf_lib
Expand Down
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions data/density_grid_loop_subdomain_33.json

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion pysplashsurf/src/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ use crate::utils::{IndexT, pyerr_unsupported_scalar};
/// Upper corner [x,y,z] of the AABB of particles to consider in the reconstruction.
/// multi_threading
/// Flag to enable multi-threading for the reconstruction and post-processing steps.
/// simd
/// Flag to enable SIMD vectorization for the reconstruction if supported by the CPU architecture.
/// subdomain_grid
/// Flag to enable spatial decomposition by dividing the domain into subdomains with dense marching cube grids for efficient multi-threading.
/// subdomain_grid_auto_disable
Expand Down Expand Up @@ -108,7 +110,7 @@ use crate::utils::{IndexT, pyerr_unsupported_scalar};
#[pyo3(name = "reconstruction_pipeline")]
#[pyo3(signature = (particles, *, attributes_to_interpolate = None,
particle_radius, rest_density = 1000.0, smoothing_length, cube_size, iso_surface_threshold = 0.6,
aabb_min = None, aabb_max = None, multi_threading = true,
aabb_min = None, aabb_max = None, multi_threading = true, simd = true,
subdomain_grid = true, subdomain_grid_auto_disable = true, subdomain_num_cubes_per_dim = 64,
check_mesh_closed = false, check_mesh_manifold = false, check_mesh_orientation = false, check_mesh_debug = false,
mesh_cleanup = false, mesh_cleanup_snap_dist = None, decimate_barnacles = false, keep_vertices = false, compute_normals = false, sph_normals = false,
Expand All @@ -128,6 +130,7 @@ pub fn reconstruction_pipeline<'py>(
aabb_min: Option<[f64; 3]>,
aabb_max: Option<[f64; 3]>,
multi_threading: bool,
simd: bool,
subdomain_grid: bool,
subdomain_grid_auto_disable: bool,
subdomain_num_cubes_per_dim: u32,
Expand Down Expand Up @@ -192,6 +195,7 @@ pub fn reconstruction_pipeline<'py>(
iso_surface_threshold,
particle_aabb,
enable_multi_threading: multi_threading,
enable_simd: simd,
spatial_decomposition,
global_neighborhood_list: false,
};
Expand Down
6 changes: 5 additions & 1 deletion pysplashsurf/src/reconstruction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ impl PySurfaceReconstruction {
/// Upper corner of the AABB of particles to consider in the reconstruction.
/// multi_threading
/// Flag to enable multi-threading for the reconstruction and post-processing steps.
/// simd
/// Flag to enable SIMD vectorization for the reconstruction if supported by the CPU architecture.
/// subdomain_grid
/// Flag to enable spatial decomposition by dividing the domain into subdomains with dense marching cube grids for efficient multi-threading.
/// subdomain_grid_auto_disable
Expand All @@ -134,7 +136,7 @@ impl PySurfaceReconstruction {
#[pyo3(signature = (particles, *,
particle_radius, rest_density = 1000.0, smoothing_length, cube_size, iso_surface_threshold = 0.6,
aabb_min = None, aabb_max = None,
multi_threading = true, global_neighborhood_list = false,
multi_threading = true, simd = true, global_neighborhood_list = false,
subdomain_grid = true, subdomain_grid_auto_disable = true, subdomain_num_cubes_per_dim = 64
))]
pub fn reconstruct_surface<'py>(
Expand All @@ -147,6 +149,7 @@ pub fn reconstruct_surface<'py>(
aabb_min: Option<[f64; 3]>,
aabb_max: Option<[f64; 3]>,
multi_threading: bool,
simd: bool,
global_neighborhood_list: bool,
subdomain_grid: bool,
subdomain_grid_auto_disable: bool,
Expand Down Expand Up @@ -175,6 +178,7 @@ pub fn reconstruct_surface<'py>(
iso_surface_threshold,
particle_aabb,
enable_multi_threading: multi_threading,
enable_simd: simd,
spatial_decomposition,
global_neighborhood_list,
};
Expand Down
1 change: 1 addition & 0 deletions splashsurf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ keywords = ["sph", "particle", "surface", "reconstruction", "marching-cubes"]
categories = ["command-line-utilities", "graphics", "science", "simulation", "visualization"]
readme = "README.md"
edition = "2024"
rust-version = "1.88"

authors.workspace = true
license.workspace = true
Expand Down
12 changes: 12 additions & 0 deletions splashsurf/src/reconstruct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ pub(crate) struct ReconstructSubcommandArgs {
/// Set the number of threads for the worker thread pool
#[arg(help_heading = ARGS_ADV, long, short = 'n')]
pub num_threads: Option<usize>,
/// Enable vectorization of some computations using SIMD instructions (requires CPU with AVX2 or NEON support).
/// Note that vectorization is currently only available in single precision (f32) mode.
#[arg(
help_heading = ARGS_ADV,
long,
default_value = "on",
value_name = "off|on",
ignore_case = true,
require_equals = true
)]
pub simd: Switch,

/// Enable automatic spatial decomposition using a regular grid-based approach (for efficient multithreading) if the domain is large enough
#[arg(
Expand Down Expand Up @@ -637,6 +648,7 @@ pub(crate) mod arguments {
iso_surface_threshold: args.surface_threshold,
particle_aabb,
enable_multi_threading: args.parallelize_over_particles.into_bool(),
enable_simd: args.simd.into_bool(),
spatial_decomposition,
global_neighborhood_list: args.mesh_smoothing_weights.into_bool(),
};
Expand Down
20 changes: 16 additions & 4 deletions splashsurf_lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ keywords = ["sph", "particle", "surface", "reconstruction", "marching-cubes"]
categories = ["graphics", "science", "simulation", "visualization", "rendering"]
readme = "README.md"
edition = "2024"
rust-version = "1.88"

documentation = "https://docs.rs/splashsurf_lib"
authors.workspace = true
Expand All @@ -17,9 +18,15 @@ repository.workspace = true
# RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features --no-deps --open
# Build with all features to properly document everything
all-features = true
# Build only for a single target as this crate does not have any platform specific behavior
default-target = "x86_64-unknown-linux-gnu"
targets = []
targets = [
"x86_64-unknown-linux-gnu",
"i686-unknown-linux-gnu",
"aarch64-unknown-linux-gnu",
"aarch64-apple-darwin",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"i686-pc-windows-msvc"
]

# Ignore the tests (especially the test mesh files) for publishing
exclude = ["tests/*", "benches/*"]
Expand All @@ -29,6 +36,7 @@ default = []
vtk_extras = ["vtkio"]
profiling = []
io = ["vtk_extras", "vtkio", "ply-rs", "nom", "serde_json", "flate2"]
serde-serialize = ["serde", "serde_derive", "serde_json", "nalgebra/serde-serialize"]

[dependencies]
log = "0.4"
Expand Down Expand Up @@ -59,6 +67,10 @@ flate2 = { version = "1.0", optional = true }
nom = { version = "8.0", optional = true }
serde_json = { version = "1.0", optional = true }

# Serialization
serde = { version = "1.0", optional = true }
serde_derive = { version = "1.0", optional = true }

[dev-dependencies]
criterion = "0.7"
ultraviolet = "0.10"
Expand All @@ -83,4 +95,4 @@ required-features = ["profiling", "io"]
name = "splashsurf_lib_benches"
path = "benches/splashsurf_lib_benches.rs"
harness = false
required-features = ["io"]
required-features = ["io", "serde-serialize"]
3 changes: 3 additions & 0 deletions splashsurf_lib/benches/benches/bench_full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ pub fn surface_reconstruction_dam_break(c: &mut Criterion) {
iso_surface_threshold: 0.6,
particle_aabb: None,
enable_multi_threading: true,
enable_simd: true,
spatial_decomposition: SpatialDecomposition::None,
global_neighborhood_list: false,
};
Expand Down Expand Up @@ -161,6 +162,7 @@ pub fn surface_reconstruction_double_dam_break(c: &mut Criterion) {
iso_surface_threshold: 0.6,
particle_aabb: None,
enable_multi_threading: true,
enable_simd: true,
spatial_decomposition: SpatialDecomposition::None,
global_neighborhood_list: false,
};
Expand Down Expand Up @@ -220,6 +222,7 @@ pub fn surface_reconstruction_double_dam_break_inplace(c: &mut Criterion) {
iso_surface_threshold: 0.6,
particle_aabb: None,
enable_multi_threading: true,
enable_simd: true,
spatial_decomposition: SpatialDecomposition::None,
global_neighborhood_list: false,
};
Expand Down
Loading
Loading