Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ env:
CARGO_INCREMENTAL: 0
RUSTFLAGS: -D warnings
RUSTDOCFLAGS: -D warnings
MINIMAL_RUST: 1.79.0 # Minimal Supported Rust Version
MINIMAL_RUST: 1.89.0 # Minimal Supported Rust Version

jobs:
# Workaround for github CI dropping env var expansion in matrix strategy
Expand Down
13 changes: 11 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
_There are no unreleased changes in the pipeline at the moment._


## [3.0.0] - 2025-10-04

### Changed

- AVX-512 types do not require the `nightly` feature anymore
- Bumped MSRV to 1.89.0.


## [2.0.1] - 2025-05-04

### Fixed
Expand All @@ -34,7 +42,7 @@ _There are no unreleased changes in the pipeline at the moment._

### Fixed

- Adapt nightly feature to latest nightly changes.
- Adapt `nightly` feature to latest nightly changes.


## [1.0.0] - 2024-01-05
Expand All @@ -44,7 +52,8 @@ _There are no unreleased changes in the pipeline at the moment._
- First tagged release of pessimize.


[Unreleased]: https://github.com/HadrienG2/pessimize/compare/v2.0.1...HEAD
[Unreleased]: https://github.com/HadrienG2/pessimize/compare/v3.0.0...HEAD
[3.0.0]: https://github.com/HadrienG2/pessimize/compare/v2.0.1...v3.0.0
[2.0.1]: https://github.com/HadrienG2/pessimize/compare/v2.0.0...v2.0.1
[2.0.0]: https://github.com/HadrienG2/pessimize/compare/v1.0.1...v2.0.0
[1.0.1]: https://github.com/HadrienG2/pessimize/compare/v1.0.0...v1.0.1
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ name = "pessimize"
version = "2.0.1"
authors = ["Hadrien G. <[email protected]>"]
edition = "2021"
rust-version = "1.79.0"
rust-version = "1.89.0"
description = "More efficient Rust compiler optimization barriers"
repository = "https://github.com/HadrienG2/pessimize/"
license = "MPL-2.0"
Expand All @@ -36,10 +36,10 @@ nightly = []
default_impl = ["nightly"]

[dependencies]
safe_arch = { version = "0.7", optional = true }
safe_arch = { version = "0.9", optional = true }

[dev-dependencies]
safe_arch = "0.7"
safe_arch = "0.9"
tempfile = "3.20"

[package.metadata.docs.rs]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![Continuous
Integration](https://img.shields.io/github/actions/workflow/status/HadrienG2/pessimize/ci.yml?branch=master)](https://github.com/HadrienG2/pessimize/actions?query=workflow%3A%22Continuous+Integration%22)
![Requires rustc
1.79.0+](https://img.shields.io/badge/rustc-1.79.0+-lightgray.svg)
1.89.0+](https://img.shields.io/badge/rustc-1.89.0+-lightgray.svg)

Microbenchmarking is a subtle exercise to begin with, and the lack of
lightweight optimization barriers on stable Rust makes it even more difficult.
Expand Down
110 changes: 65 additions & 45 deletions src/arch/x86_family.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,12 @@ use target_arch::CpuidResult;
use target_arch::__m128;
#[cfg(any(target_feature = "avx2", doc))]
use target_arch::__m256i;
#[cfg(all(
feature = "nightly",
any(all(target_feature = "avx512vl", target_feature = "avx512bf16"), doc)
))]
use target_arch::{__m128bh, __m256bh};
#[cfg(any(target_feature = "sse2", doc))]
use target_arch::{__m128d, __m128i};
#[cfg(any(target_feature = "avx", doc))]
use target_arch::{__m256, __m256d};
#[cfg(any(target_feature = "avx512f", doc))]
use target_arch::{__m512, __m512d, __m512i};

pessimize_asm_values!(allow(missing_docs) { reg_byte: (i8, u8), reg: (i16, u16, i32, u32, isize, usize) });

Expand Down Expand Up @@ -98,18 +95,16 @@ pessimize_asm_values!(
);

/// AVX-512 specific functionality
#[cfg_attr(
feature = "nightly",
doc(cfg(all(feature = "nightly", target_feature = "avx512f")))
)]
#[cfg(all(feature = "nightly", any(target_feature = "avx512f", doc)))]
#[cfg(any(target_feature = "avx512f", doc))]
#[cfg_attr(feature = "nightly", doc(cfg(target_feature = "avx512f")))]
pub mod avx512 {
use super::*;
use crate::Pessimize;
use core::arch::asm;
#[cfg(any(target_feature = "avx512bf16", doc))]
use target_arch::__m512bh;
use target_arch::{__m512, __m512d, __m512i};
#[cfg(any(all(target_feature = "avx512vl", target_feature = "avx512bf16"), doc))]
use target_arch::{__m128bh, __m256bh};

// Basic register type support
pessimize_asm_values!(
Expand Down Expand Up @@ -152,7 +147,7 @@ pub mod avx512 {
$(
// This is one of the primitive Pessimize impls on which
// the PessimizeCast/BorrowPessimize stack is built
#[$doc_cfg]
#[cfg_attr(feature = "nightly", $doc_cfg)]
unsafe impl Pessimize for Mask<$mask_impl> {
#[inline]
fn hide(mut self) -> Self {
Expand Down Expand Up @@ -206,6 +201,8 @@ mod safe_arch_types {
use safe_arch::{m128d, m128i};
#[cfg(any(target_feature = "avx", doc))]
use safe_arch::{m256, m256d};
#[cfg(any(target_feature = "avx512f", doc))]
use safe_arch::{m512, m512d, m512i};

#[cfg(any(target_feature = "sse", doc))]
pessimize_newtypes!(
Expand Down Expand Up @@ -236,6 +233,16 @@ mod safe_arch_types {
doc(cfg(all(feature = "safe_arch", target_feature = "avx2")))
{ m256i{ __m256i } }
);

#[cfg(any(target_feature = "avx512f", doc))]
pessimize_newtypes!(
doc(cfg(all(feature = "safe_arch", target_feature = "avx512f")))
{
m512{ __m512 },
m512d{ __m512d },
m512i{ __m512i }
}
);
}

// Support portable_simd if enabled
Expand Down Expand Up @@ -731,12 +738,18 @@ mod tests {
}
}

#[cfg(all(feature = "nightly", target_feature = "avx512f"))]
mod avx512 {
use super::*;

#[test]
fn avx512f() {
#[cfg(target_feature = "avx512f")]
#[test]
fn avx512f() {
use safe_arch::{m512, m512d, m512i};
test_simd::<i32, 16, m512i>(i32::MIN, i32::MAX);
test_simd::<u32, 16, m512i>(u32::MIN, u32::MAX);
test_simd::<f32, 16, m512>(f32::MIN, f32::MAX);
test_simd::<i64, 8, m512i>(i64::MIN, i64::MAX);
test_simd::<u64, 8, m512i>(u64::MIN, u64::MAX);
test_simd::<f64, 8, m512d>(f64::MIN, f64::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!(
(i32, 16),
(u32, 16),
Expand All @@ -746,21 +759,21 @@ mod tests {
(f64, 8)
);
portable_mask_tests!((i32, 16), (i64, 8));
#[cfg(target_arch = "x86")]
{
portable_simd_tests!((isize, 16), (usize, 16));
portable_mask_tests!((isize, 16));
}
#[cfg(target_arch = "x86_64")]
{
portable_simd_tests!((isize, 8), (usize, 8));
portable_mask_tests!((isize, 8));
}
portable_simd_tests!((isize, 8), (usize, 8));
portable_mask_tests!((isize, 8));
}
}

#[test]
#[ignore]
fn avx512f_optim() {
#[cfg(target_feature = "avx512f")]
#[test]
#[ignore]
fn avx512f_optim() {
use safe_arch::{m512, m512d, m512i};
test_unoptimized_value_type::<m512>();
test_unoptimized_value_type::<m512d>();
test_unoptimized_value_type::<m512i>();
#[cfg(feature = "nightly")]
{
portable_simd_tests_optim!(
(i32, 16),
(u32, 16),
Expand All @@ -770,26 +783,33 @@ mod tests {
(f64, 8)
);
portable_mask_tests_optim!((i32, 16), (i64, 8));
#[cfg(target_arch = "x86")]
{
portable_simd_tests_optim!((isize, 16), (usize, 16));
portable_mask_tests_optim!((isize, 16));
}
#[cfg(target_arch = "x86_64")]
{
portable_simd_tests_optim!((isize, 8), (usize, 8));
portable_mask_tests_optim!((isize, 8));
}
portable_simd_tests_optim!((isize, 8), (usize, 8));
portable_mask_tests_optim!((isize, 8));
}
}

#[cfg(target_feature = "avx512bw")]
#[test]
fn avx512bw() {
#[cfg(target_feature = "avx512bw")]
#[test]
fn avx512bw() {
use safe_arch::m512i;
test_simd::<i8, 64, m512i>(i8::MIN, i8::MAX);
test_simd::<u8, 64, m512i>(u8::MIN, u8::MAX);
test_simd::<i16, 32, m512i>(i16::MIN, i16::MAX);
test_simd::<u16, 32, m512i>(u16::MIN, u16::MAX);
#[cfg(feature = "nightly")]
{
portable_simd_tests!((i8, 64), (u8, 64), (i16, 32), (u16, 32));
portable_mask_tests!((i8, 64), (i16, 32));
}
}

// This is nightly-only even though the rest of avx512 is not nightly only
// anymore because we can't easily test without safe_arch or portable_simd
// and portable_simd doesn't have support for AVX-512 yet.
#[cfg(all(feature = "nightly", target_feature = "avx512bw"))]
mod avx512 {
use super::*;

#[cfg(target_feature = "avx512bw")]
#[test]
#[ignore]
fn avx512bw_optim() {
Expand Down