Skip to content

Commit d9ce3cb

Browse files
authored
Merge pull request #28 from HadrienG2/safe_arch-avx512
Safe arch + avx512
2 parents 0c889fd + 3ee229d commit d9ce3cb

File tree

6 files changed

+83
-54
lines changed

6 files changed

+83
-54
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ env:
2222
CARGO_INCREMENTAL: 0
2323
RUSTFLAGS: -D warnings
2424
RUSTDOCFLAGS: -D warnings
25-
MINIMAL_RUST: 1.79.0 # Minimal Supported Rust Version
25+
MINIMAL_RUST: 1.89.0 # Minimal Supported Rust Version
2626

2727
jobs:
2828
# Workaround for github CI dropping env var expansion in matrix strategy

CHANGELOG.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
_There are no unreleased changes in the pipeline at the moment._
1212

1313

14+
## [3.0.0] - 2025-10-04
15+
16+
### Changed
17+
18+
- AVX-512 types do not require the `nightly` feature anymore
19+
- Bumped MSRV to 1.89.0.
20+
21+
1422
## [2.0.1] - 2025-05-04
1523

1624
### Fixed
@@ -34,7 +42,7 @@ _There are no unreleased changes in the pipeline at the moment._
3442

3543
### Fixed
3644

37-
- Adapt nightly feature to latest nightly changes.
45+
- Adapt `nightly` feature to latest nightly changes.
3846

3947

4048
## [1.0.0] - 2024-01-05
@@ -44,7 +52,8 @@ _There are no unreleased changes in the pipeline at the moment._
4452
- First tagged release of pessimize.
4553

4654

47-
[Unreleased]: https://github.com/HadrienG2/pessimize/compare/v2.0.1...HEAD
55+
[Unreleased]: https://github.com/HadrienG2/pessimize/compare/v3.0.0...HEAD
56+
[3.0.0]: https://github.com/HadrienG2/pessimize/compare/v2.0.1...v3.0.0
4857
[2.0.1]: https://github.com/HadrienG2/pessimize/compare/v2.0.0...v2.0.1
4958
[2.0.0]: https://github.com/HadrienG2/pessimize/compare/v1.0.1...v2.0.0
5059
[1.0.1]: https://github.com/HadrienG2/pessimize/compare/v1.0.0...v1.0.1

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ name = "pessimize"
1414
version = "2.0.1"
1515
authors = ["Hadrien G. <[email protected]>"]
1616
edition = "2021"
17-
rust-version = "1.79.0"
17+
rust-version = "1.89.0"
1818
description = "More efficient Rust compiler optimization barriers"
1919
repository = "https://github.com/HadrienG2/pessimize/"
2020
license = "MPL-2.0"
@@ -36,10 +36,10 @@ nightly = []
3636
default_impl = ["nightly"]
3737

3838
[dependencies]
39-
safe_arch = { version = "0.7", optional = true }
39+
safe_arch = { version = "0.9", optional = true }
4040

4141
[dev-dependencies]
42-
safe_arch = "0.7"
42+
safe_arch = "0.9"
4343
tempfile = "3.20"
4444

4545
[package.metadata.docs.rs]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[![Continuous
77
Integration](https://img.shields.io/github/actions/workflow/status/HadrienG2/pessimize/ci.yml?branch=master)](https://github.com/HadrienG2/pessimize/actions?query=workflow%3A%22Continuous+Integration%22)
88
![Requires rustc
9-
1.79.0+](https://img.shields.io/badge/rustc-1.79.0+-lightgray.svg)
9+
1.89.0+](https://img.shields.io/badge/rustc-1.89.0+-lightgray.svg)
1010

1111
Microbenchmarking is a subtle exercise to begin with, and the lack of
1212
lightweight optimization barriers on stable Rust makes it even more difficult.

src/arch/x86_family.rs

Lines changed: 65 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,12 @@ use target_arch::CpuidResult;
1111
use target_arch::__m128;
1212
#[cfg(any(target_feature = "avx2", doc))]
1313
use target_arch::__m256i;
14-
#[cfg(all(
15-
feature = "nightly",
16-
any(all(target_feature = "avx512vl", target_feature = "avx512bf16"), doc)
17-
))]
18-
use target_arch::{__m128bh, __m256bh};
1914
#[cfg(any(target_feature = "sse2", doc))]
2015
use target_arch::{__m128d, __m128i};
2116
#[cfg(any(target_feature = "avx", doc))]
2217
use target_arch::{__m256, __m256d};
18+
#[cfg(any(target_feature = "avx512f", doc))]
19+
use target_arch::{__m512, __m512d, __m512i};
2320

2421
pessimize_asm_values!(allow(missing_docs) { reg_byte: (i8, u8), reg: (i16, u16, i32, u32, isize, usize) });
2522

@@ -98,18 +95,16 @@ pessimize_asm_values!(
9895
);
9996

10097
/// AVX-512 specific functionality
101-
#[cfg_attr(
102-
feature = "nightly",
103-
doc(cfg(all(feature = "nightly", target_feature = "avx512f")))
104-
)]
105-
#[cfg(all(feature = "nightly", any(target_feature = "avx512f", doc)))]
98+
#[cfg(any(target_feature = "avx512f", doc))]
99+
#[cfg_attr(feature = "nightly", doc(cfg(target_feature = "avx512f")))]
106100
pub mod avx512 {
107101
use super::*;
108102
use crate::Pessimize;
109103
use core::arch::asm;
110104
#[cfg(any(target_feature = "avx512bf16", doc))]
111105
use target_arch::__m512bh;
112-
use target_arch::{__m512, __m512d, __m512i};
106+
#[cfg(any(all(target_feature = "avx512vl", target_feature = "avx512bf16"), doc))]
107+
use target_arch::{__m128bh, __m256bh};
113108

114109
// Basic register type support
115110
pessimize_asm_values!(
@@ -152,7 +147,7 @@ pub mod avx512 {
152147
$(
153148
// This is one of the primitive Pessimize impls on which
154149
// the PessimizeCast/BorrowPessimize stack is built
155-
#[$doc_cfg]
150+
#[cfg_attr(feature = "nightly", $doc_cfg)]
156151
unsafe impl Pessimize for Mask<$mask_impl> {
157152
#[inline]
158153
fn hide(mut self) -> Self {
@@ -206,6 +201,8 @@ mod safe_arch_types {
206201
use safe_arch::{m128d, m128i};
207202
#[cfg(any(target_feature = "avx", doc))]
208203
use safe_arch::{m256, m256d};
204+
#[cfg(any(target_feature = "avx512f", doc))]
205+
use safe_arch::{m512, m512d, m512i};
209206

210207
#[cfg(any(target_feature = "sse", doc))]
211208
pessimize_newtypes!(
@@ -236,6 +233,16 @@ mod safe_arch_types {
236233
doc(cfg(all(feature = "safe_arch", target_feature = "avx2")))
237234
{ m256i{ __m256i } }
238235
);
236+
237+
#[cfg(any(target_feature = "avx512f", doc))]
238+
pessimize_newtypes!(
239+
doc(cfg(all(feature = "safe_arch", target_feature = "avx512f")))
240+
{
241+
m512{ __m512 },
242+
m512d{ __m512d },
243+
m512i{ __m512i }
244+
}
245+
);
239246
}
240247

241248
// Support portable_simd if enabled
@@ -731,12 +738,18 @@ mod tests {
731738
}
732739
}
733740

734-
#[cfg(all(feature = "nightly", target_feature = "avx512f"))]
735-
mod avx512 {
736-
use super::*;
737-
738-
#[test]
739-
fn avx512f() {
741+
#[cfg(target_feature = "avx512f")]
742+
#[test]
743+
fn avx512f() {
744+
use safe_arch::{m512, m512d, m512i};
745+
test_simd::<i32, 16, m512i>(i32::MIN, i32::MAX);
746+
test_simd::<u32, 16, m512i>(u32::MIN, u32::MAX);
747+
test_simd::<f32, 16, m512>(f32::MIN, f32::MAX);
748+
test_simd::<i64, 8, m512i>(i64::MIN, i64::MAX);
749+
test_simd::<u64, 8, m512i>(u64::MIN, u64::MAX);
750+
test_simd::<f64, 8, m512d>(f64::MIN, f64::MAX);
751+
#[cfg(feature = "nightly")]
752+
{
740753
portable_simd_tests!(
741754
(i32, 16),
742755
(u32, 16),
@@ -746,21 +759,21 @@ mod tests {
746759
(f64, 8)
747760
);
748761
portable_mask_tests!((i32, 16), (i64, 8));
749-
#[cfg(target_arch = "x86")]
750-
{
751-
portable_simd_tests!((isize, 16), (usize, 16));
752-
portable_mask_tests!((isize, 16));
753-
}
754-
#[cfg(target_arch = "x86_64")]
755-
{
756-
portable_simd_tests!((isize, 8), (usize, 8));
757-
portable_mask_tests!((isize, 8));
758-
}
762+
portable_simd_tests!((isize, 8), (usize, 8));
763+
portable_mask_tests!((isize, 8));
759764
}
765+
}
760766

761-
#[test]
762-
#[ignore]
763-
fn avx512f_optim() {
767+
#[cfg(target_feature = "avx512f")]
768+
#[test]
769+
#[ignore]
770+
fn avx512f_optim() {
771+
use safe_arch::{m512, m512d, m512i};
772+
test_unoptimized_value_type::<m512>();
773+
test_unoptimized_value_type::<m512d>();
774+
test_unoptimized_value_type::<m512i>();
775+
#[cfg(feature = "nightly")]
776+
{
764777
portable_simd_tests_optim!(
765778
(i32, 16),
766779
(u32, 16),
@@ -770,26 +783,33 @@ mod tests {
770783
(f64, 8)
771784
);
772785
portable_mask_tests_optim!((i32, 16), (i64, 8));
773-
#[cfg(target_arch = "x86")]
774-
{
775-
portable_simd_tests_optim!((isize, 16), (usize, 16));
776-
portable_mask_tests_optim!((isize, 16));
777-
}
778-
#[cfg(target_arch = "x86_64")]
779-
{
780-
portable_simd_tests_optim!((isize, 8), (usize, 8));
781-
portable_mask_tests_optim!((isize, 8));
782-
}
786+
portable_simd_tests_optim!((isize, 8), (usize, 8));
787+
portable_mask_tests_optim!((isize, 8));
783788
}
789+
}
784790

785-
#[cfg(target_feature = "avx512bw")]
786-
#[test]
787-
fn avx512bw() {
791+
#[cfg(target_feature = "avx512bw")]
792+
#[test]
793+
fn avx512bw() {
794+
use safe_arch::m512i;
795+
test_simd::<i8, 64, m512i>(i8::MIN, i8::MAX);
796+
test_simd::<u8, 64, m512i>(u8::MIN, u8::MAX);
797+
test_simd::<i16, 32, m512i>(i16::MIN, i16::MAX);
798+
test_simd::<u16, 32, m512i>(u16::MIN, u16::MAX);
799+
#[cfg(feature = "nightly")]
800+
{
788801
portable_simd_tests!((i8, 64), (u8, 64), (i16, 32), (u16, 32));
789802
portable_mask_tests!((i8, 64), (i16, 32));
790803
}
804+
}
805+
806+
// This is nightly-only even though the rest of avx512 is not nightly only
807+
// anymore because we can't easily test without safe_arch or portable_simd
808+
// and portable_simd doesn't have support for AVX-512 yet.
809+
#[cfg(all(feature = "nightly", target_feature = "avx512bw"))]
810+
mod avx512 {
811+
use super::*;
791812

792-
#[cfg(target_feature = "avx512bw")]
793813
#[test]
794814
#[ignore]
795815
fn avx512bw_optim() {

0 commit comments

Comments
 (0)