Skip to content

Commit 57055e3

Browse files
committed
Add AVX2x2, AVX2x4 and AVX2x8 SIMD implementations
These new implementations and array4096 are gated behind a `all-simd` feature which is enabled by default, to allow compile times to be reduced during solution development. Benchmarks on my system when limited to 1 thread: Compiled without any flags, avx2x4 is ~65% faster than the previous fastest implementation (array256). Compiled with `RUSTFLAGS='-C target_cpu=native'`, avx2x4 is ~10% faster than the previous fastest implementation (array4096). This change doesn't impact the performance of the avx2 implementation.
1 parent bc5b314 commit 57055e3

File tree

13 files changed

+262
-64
lines changed

13 files changed

+262
-64
lines changed

crates/aoc/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,17 @@ repository = { workspace = true }
88
rust-version = { workspace = true }
99

1010
[dependencies]
11-
utils = { path = "../utils" }
11+
utils = { path = "../utils", default-features = false }
1212
# xtask update dependencies
1313
year2015 = { path = "../year2015", optional = true }
1414
year2016 = { path = "../year2016", optional = true }
1515
year2017 = { path = "../year2017", optional = true }
1616
year2024 = { path = "../year2024", optional = true }
1717

1818
[features]
19-
default = ["all-years", "unsafe"]
19+
default = ["all-years", "all-simd", "unsafe"]
2020
const_lut = ["year2024?/const_lut"]
21+
all-simd = ["utils/all-simd"]
2122
# xtask update features
2223
all-years = ["year2015", "year2016", "year2017", "year2024"]
2324
unsafe = ["year2015?/unsafe", "year2016?/unsafe", "year2017?/unsafe", "year2024?/unsafe", "utils/unsafe"]

crates/aoc_wasm/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ repository = { workspace = true }
88
rust-version = { workspace = true }
99

1010
[dependencies]
11-
aoc = { path = "../aoc" }
12-
utils = { path = "../utils", optional = true }
11+
# Don't enable all-simd feature to avoid including array4096 implementation
12+
aoc = { path = "../aoc", default-features = false, features = ["unsafe", "all-years"] }
13+
utils = { path = "../utils", default-features = false }
1314

1415
[features]
1516
multithreading = ["utils/wasm-multithreading"]

crates/aoc_wasm/src/multithreading.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use aoc::utils::wasm::scoped_tasks::worker;
21
use std::alloc::{Layout, alloc_zeroed};
32

43
/// Allocate stack for worker threads.
@@ -13,5 +12,9 @@ extern "C" fn allocate_stack(size: usize, align: usize) -> *mut u8 {
1312
/// Run worker thread.
1413
#[unsafe(no_mangle)]
1514
extern "C" fn worker_thread() {
16-
worker();
15+
#[cfg(target_family = "wasm")]
16+
aoc::utils::wasm::scoped_tasks::worker();
17+
18+
#[cfg(not(target_family = "wasm"))]
19+
panic!("worker_thread is not supported on this target");
1720
}

crates/utils/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ rust-version = { workspace = true }
1010
[dependencies]
1111

1212
[features]
13+
default = ["unsafe", "all-simd"]
1314
unsafe = []
15+
all-simd = []
1416
wasm-multithreading = ["unsafe"]
1517

1618
[lints]

crates/utils/src/multiversion.rs

Lines changed: 134 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,16 @@ macro_rules! multiversion {
7979
Scalar => $name::scalar::$name($($arg_name),*),
8080
Array128 => $name::array128::$name($($arg_name),*),
8181
Array256 => $name::array256::$name($($arg_name),*),
82-
#[cfg(not(target_family = "wasm"))]
82+
#[cfg(feature = "all-simd")]
8383
Array4096 => $name::array4096::$name($($arg_name),*),
84-
#[cfg(all(feature="unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
84+
#[cfg(all(feature = "unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
8585
AVX2 => unsafe { $name::avx2::$name($($arg_name),*) },
86+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
87+
AVX2x2 => unsafe { $name::avx2x2::$name($($arg_name),*) },
88+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
89+
AVX2x4 => unsafe { $name::avx2x4::$name($($arg_name),*) },
90+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
91+
AVX2x8 => unsafe { $name::avx2x8::$name($($arg_name),*) },
8692
}
8793
}
8894
};
@@ -125,7 +131,7 @@ macro_rules! multiversion {
125131
}
126132

127133
/// [`multiversion!`] array4096 implementation.
128-
#[cfg(not(target_family = "wasm"))]
134+
#[cfg(feature="all-simd")]
129135
pub mod array4096 {
130136
#![allow(clippy::large_types_passed_by_value)]
131137

@@ -136,7 +142,7 @@ macro_rules! multiversion {
136142
}
137143

138144
/// [`multiversion!`] avx2 implementation.
139-
#[cfg(all(feature="unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
145+
#[cfg(all(feature = "unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
140146
pub mod avx2 {
141147
#![allow(clippy::missing_safety_doc)]
142148

@@ -145,6 +151,39 @@ macro_rules! multiversion {
145151

146152
$crate::multiversion!{@helper target_feature(enable = "avx2") $($tail)*}
147153
}
154+
155+
/// [`multiversion!`] avx2x2 implementation.
156+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
157+
pub mod avx2x2 {
158+
#![allow(clippy::missing_safety_doc)]
159+
160+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
161+
use {super::*, $($($path::)+avx2x2::*),*};
162+
163+
$crate::multiversion!{@helper target_feature(enable = "avx2") $($tail)*}
164+
}
165+
166+
/// [`multiversion!`] avx2x4 implementation.
167+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
168+
pub mod avx2x4 {
169+
#![allow(clippy::missing_safety_doc)]
170+
171+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
172+
use {super::*, $($($path::)+avx2x4::*),*};
173+
174+
$crate::multiversion!{@helper target_feature(enable = "avx2") $($tail)*}
175+
}
176+
177+
/// [`multiversion!`] avx2x8 implementation.
178+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
179+
pub mod avx2x8 {
180+
#![allow(clippy::missing_safety_doc)]
181+
182+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
183+
use {super::*, $($($path::)+avx2x8::*),*};
184+
185+
$crate::multiversion!{@helper target_feature(enable = "avx2") $($tail)*}
186+
}
148187
};
149188

150189
// Microbenchmark for dynamic dispatch
@@ -170,10 +209,16 @@ macro_rules! multiversion {
170209
Scalar => scalar::$name(),
171210
Array128 => array128::$name(),
172211
Array256 => array256::$name(),
173-
#[cfg(not(target_family = "wasm"))]
212+
#[cfg(feature = "all-simd")]
174213
Array4096 => array4096::$name(),
175-
#[cfg(all(feature="unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
214+
#[cfg(all(feature = "unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
176215
AVX2 => unsafe { avx2::$name() },
216+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
217+
AVX2x2 => unsafe { avx2x2::$name() },
218+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
219+
AVX2x4 => unsafe { avx2x4::$name() },
220+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
221+
AVX2x8 => unsafe { avx2x8::$name() },
177222
});
178223
(start.elapsed(), x)
179224
})
@@ -278,6 +323,7 @@ macro_rules! multiversion_test {
278323
}
279324

280325
#[test]
326+
#[cfg(feature = "all-simd")]
281327
$(#[$m])*
282328
fn array4096() {
283329
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
@@ -287,7 +333,7 @@ macro_rules! multiversion_test {
287333
}
288334

289335
#[test]
290-
#[cfg(all(feature="unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
336+
#[cfg(all(feature = "unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
291337
$(#[$m])*
292338
fn avx2() {
293339
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
@@ -301,6 +347,54 @@ macro_rules! multiversion_test {
301347

302348
unsafe { $body }
303349
}
350+
351+
#[test]
352+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
353+
$(#[$m])*
354+
fn avx2x2() {
355+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
356+
use {$($($path::)+avx2x2::*),*};
357+
358+
if !$crate::multiversion::Version::AVX2x2.supported() {
359+
use std::io::{stdout, Write};
360+
let _ = writeln!(&mut stdout(), "warning: skipping test in {}::avx2x2 due to missing avx2 support", module_path!());
361+
return;
362+
}
363+
364+
unsafe { $body }
365+
}
366+
367+
#[test]
368+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
369+
$(#[$m])*
370+
fn avx2x4() {
371+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
372+
use {$($($path::)+avx2x4::*),*};
373+
374+
if !$crate::multiversion::Version::AVX2x4.supported() {
375+
use std::io::{stdout, Write};
376+
let _ = writeln!(&mut stdout(), "warning: skipping test in {}::avx2x4 due to missing avx2 support", module_path!());
377+
return;
378+
}
379+
380+
unsafe { $body }
381+
}
382+
383+
#[test]
384+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
385+
$(#[$m])*
386+
fn avx2x8() {
387+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
388+
use {$($($path::)+avx2x8::*),*};
389+
390+
if !$crate::multiversion::Version::AVX2x8.supported() {
391+
use std::io::{stdout, Write};
392+
let _ = writeln!(&mut stdout(), "warning: skipping test in {}::avx2x8 due to missing avx2 support", module_path!());
393+
return;
394+
}
395+
396+
unsafe { $body }
397+
}
304398
};
305399

306400
(
@@ -335,7 +429,7 @@ macro_rules! multiversion_test {
335429
$crate::multiversion_test!(@expr { $($tail)+ });
336430
}
337431

338-
#[cfg(not(target_family = "wasm"))]
432+
#[cfg(feature = "all-simd")]
339433
#[allow(clippy::large_types_passed_by_value)]
340434
{
341435
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
@@ -344,14 +438,38 @@ macro_rules! multiversion_test {
344438
$crate::multiversion_test!(@expr { $($tail)+ });
345439
}
346440

347-
#[cfg(all(feature="unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
441+
#[cfg(all(feature = "unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
348442
if $crate::multiversion::Version::AVX2.supported() {
349443
unsafe {
350444
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
351445
use {$($($path::)+avx2::*),*};
352446

353447
$crate::multiversion_test!(@expr { $($tail)+ });
354448
}
449+
450+
#[cfg(feature = "all-simd")]
451+
unsafe {
452+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
453+
use {$($($path::)+avx2x2::*),*};
454+
455+
$crate::multiversion_test!(@expr { $($tail)+ });
456+
}
457+
458+
#[cfg(feature = "all-simd")]
459+
unsafe {
460+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
461+
use {$($($path::)+avx2x4::*),*};
462+
463+
$crate::multiversion_test!(@expr { $($tail)+ });
464+
}
465+
466+
#[cfg(feature = "all-simd")]
467+
unsafe {
468+
#[allow(clippy::allow_attributes, unused_imports, clippy::wildcard_imports)]
469+
use {$($($path::)+avx2x8::*),*};
470+
471+
$crate::multiversion_test!(@expr { $($tail)+ });
472+
}
355473
}
356474
};
357475
(@expr $e:expr) => { $e }
@@ -408,10 +526,16 @@ versions_impl! {
408526
Scalar,
409527
Array128,
410528
Array256,
411-
#[cfg(not(target_family = "wasm"))]
529+
#[cfg(feature = "all-simd")]
412530
Array4096,
413531
#[cfg(all(feature = "unsafe", any(target_arch = "x86", target_arch = "x86_64")))]
414532
AVX2 if std::arch::is_x86_feature_detected!("avx2"),
533+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
534+
AVX2x2 if std::arch::is_x86_feature_detected!("avx2"),
535+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
536+
AVX2x4 if std::arch::is_x86_feature_detected!("avx2"),
537+
#[cfg(all(feature = "unsafe", feature = "all-simd", any(target_arch = "x86", target_arch = "x86_64")))]
538+
AVX2x8 if std::arch::is_x86_feature_detected!("avx2"),
415539
}
416540

417541
static OVERRIDE: OnceLock<Option<Version>> = OnceLock::new();

crates/utils/src/simd/array.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ pub mod array256 {
113113
}
114114

115115
/// 4096-bit wide vector implementations using arrays.
116+
#[cfg(feature = "all-simd")]
116117
pub mod array4096 {
117118
/// The name of this backend.
118119
pub const SIMD_BACKEND: &str = "array4096";

0 commit comments

Comments
 (0)