Skip to content

Commit 5262484

Browse files
authored
cmov: add support for NonZero and Ordering (#1404)
Adds `Cmov` and `CmovEq` impls for the following types (and slices thereof) from `core::num`: - NonZeroI8 - NonZeroI16 - NonZeroI32 - NonZeroI64 - NonZeroI128 - NonZeroU8 - NonZeroU16 - NonZeroU32 - NonZeroU64 - NonZeroU128 Also adds support to `core::cmp::Ordering`. This should be sufficient to eliminate unsafe code from `ctutils`, and also improve performance.
1 parent 3524aa8 commit 5262484

File tree

3 files changed

+240
-56
lines changed

3 files changed

+240
-56
lines changed

cmov/src/lib.rs

Lines changed: 93 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,19 @@ mod array;
3838
mod backends;
3939
mod slice;
4040

41-
/// Condition
41+
use core::{
42+
cmp,
43+
num::{
44+
NonZeroI8, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI128, NonZeroU8, NonZeroU16,
45+
NonZeroU32, NonZeroU64, NonZeroU128,
46+
},
47+
};
48+
49+
/// Condition: the argument given to [`Cmov`] and [`CmovEq`] representing an effective boolean
50+
/// condition by virtue of being zero or non-zero.
51+
///
52+
/// Using a `u8` for this type helps prevent rustc optimizers from speculating about it as if it
53+
/// were a boolean value.
4254
pub type Condition = u8;
4355

4456
/// Conditional move
@@ -192,9 +204,15 @@ macro_rules! impl_cmov_traits_for_signed_ints {
192204
};
193205
}
194206

195-
impl_cmov_traits_for_signed_ints!(i8 => u8, i16 => u16, i32 => u32, i64 => u64, i128 => u128);
207+
impl_cmov_traits_for_signed_ints!(
208+
i8 => u8,
209+
i16 => u16,
210+
i32 => u32,
211+
i64 => u64,
212+
i128 => u128
213+
);
196214

197-
macro_rules! impl_cmov_traits_for_size_type {
215+
macro_rules! impl_cmov_traits_for_size_int {
198216
($size:ty, $int16:ty, $int32:ty, $int64:ty) => {
199217
#[cfg(any(
200218
target_pointer_width = "16",
@@ -256,5 +274,75 @@ macro_rules! impl_cmov_traits_for_size_type {
256274
};
257275
}
258276

259-
impl_cmov_traits_for_size_type!(isize, i16, i32, i64);
260-
impl_cmov_traits_for_size_type!(usize, u16, u32, u64);
277+
impl_cmov_traits_for_size_int!(isize, i16, i32, i64);
278+
impl_cmov_traits_for_size_int!(usize, u16, u32, u64);
279+
280+
/// Impl `Cmov` for `NonZero<T>` by calling the `Cmov` impl for `T`.
281+
macro_rules! impl_cmov_traits_for_nonzero_integers {
282+
( $($nzint:ident),+ ) => {
283+
$(
284+
impl Cmov for $nzint {
285+
#[inline]
286+
fn cmovnz(&mut self, src: &Self, condition: Condition) {
287+
let mut n = self.get();
288+
n.cmovnz(&src.get(), condition);
289+
290+
// SAFETY: we are constructing `NonZero` from a value we obtained from
291+
// `NonZero::get`, which ensures it's non-zero.
292+
#[allow(unsafe_code)]
293+
unsafe { *self = $nzint::new_unchecked(n) }
294+
}
295+
}
296+
297+
impl CmovEq for $nzint {
298+
#[inline]
299+
fn cmoveq(&self, rhs: &Self, input: Condition, output: &mut Condition) {
300+
self.get().cmoveq(&rhs.get(), input, output);
301+
}
302+
}
303+
)+
304+
};
305+
}
306+
307+
impl_cmov_traits_for_nonzero_integers!(
308+
NonZeroI8,
309+
NonZeroI16,
310+
NonZeroI32,
311+
NonZeroI64,
312+
NonZeroI128,
313+
NonZeroU8,
314+
NonZeroU16,
315+
NonZeroU32,
316+
NonZeroU64,
317+
NonZeroU128
318+
);
319+
320+
impl Cmov for cmp::Ordering {
321+
#[inline]
322+
fn cmovnz(&mut self, src: &Self, condition: Condition) {
323+
// `Ordering` is `#[repr(i8)]` where:
324+
//
325+
// - `Less` => -1
326+
// - `Equal` => 0
327+
// - `Greater` => 1
328+
//
329+
// Given this, it's possible to operate on orderings as if they're `i8`, which allows us to
330+
// use the `CtSelect` impl on `i8` to select between them.
331+
let mut n = *self as i8;
332+
n.cmovnz(&(*src as i8), condition);
333+
334+
// SAFETY: `Ordering` is `#[repr(i8)]` and `ret` has been assigned to
335+
// a value which was originally a valid `Ordering` then cast to `i8`
336+
#[allow(trivial_casts, unsafe_code)]
337+
unsafe {
338+
*self = *(&raw const n).cast::<Self>();
339+
}
340+
}
341+
}
342+
343+
impl CmovEq for cmp::Ordering {
344+
#[inline]
345+
fn cmoveq(&self, rhs: &Self, input: Condition, output: &mut Condition) {
346+
(*self as i8).cmoveq(&(*rhs as i8), input, output);
347+
}
348+
}

cmov/src/slice.rs

Lines changed: 97 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
33
use crate::{Cmov, CmovEq, Condition};
44
use core::{
5+
cmp,
6+
num::{
7+
NonZeroI8, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI128, NonZeroU8, NonZeroU16,
8+
NonZeroU32, NonZeroU64, NonZeroU128,
9+
},
510
ops::{BitOrAssign, Shl},
611
ptr, slice,
712
};
@@ -12,7 +17,6 @@ type Word = u32;
1217
#[cfg(target_pointer_width = "64")]
1318
type Word = u64;
1419
const WORD_SIZE: usize = size_of::<Word>();
15-
const _: () = assert!(size_of::<usize>() <= WORD_SIZE, "unexpected word size");
1620

1721
/// Assert the lengths of the two slices are equal.
1822
macro_rules! assert_lengths_eq {
@@ -25,6 +29,10 @@ macro_rules! assert_lengths_eq {
2529
};
2630
}
2731

32+
//
33+
// `Cmov` trait impls
34+
//
35+
2836
// Optimized implementation for byte slices which coalesces them into word-sized chunks first,
2937
// then performs [`Cmov`] at the word-level to cut down on the total number of instructions.
3038
impl Cmov for [u8] {
@@ -125,8 +133,10 @@ macro_rules! impl_cmov_with_loop {
125133
};
126134
}
127135

136+
// These types are large enough we don't need to use anything more complex than a simple loop
128137
impl_cmov_with_loop!(u32, u64, u128);
129138

139+
/// Ensure the two provided types have the same size and alignment.
130140
macro_rules! assert_size_and_alignment_eq {
131141
($int:ty, $uint:ty) => {
132142
const {
@@ -143,65 +153,53 @@ macro_rules! assert_size_and_alignment_eq {
143153
};
144154
}
145155

146-
/// Implement [`Cmov`] for a signed type by invoking the corresponding unsigned impl.
147-
macro_rules! impl_cmov_for_signed_with_unsigned {
148-
($int:ty, $uint:ty) => {
149-
impl Cmov for [$int] {
150-
#[inline]
151-
#[track_caller]
152-
#[allow(unsafe_code)]
153-
fn cmovnz(&mut self, value: &Self, condition: Condition) {
154-
assert_size_and_alignment_eq!($int, $uint);
155-
156-
// SAFETY:
157-
// - Slices being constructed are of same-sized integers as asserted above.
158-
// - We source the slice length directly from the other valid slice.
159-
160-
let self_unsigned = unsafe { cast_slice_mut::<$int, $uint>(self) };
161-
let value_unsigned = unsafe { cast_slice::<$int, $uint>(value) };
162-
self_unsigned.cmovnz(value_unsigned, condition);
163-
}
164-
}
165-
};
166-
}
167-
168-
/// Implement [`CmovEq`] for a signed type by invoking the corresponding unsigned impl.
169-
macro_rules! impl_cmoveq_for_signed_with_unsigned {
170-
($int:ty, $uint:ty) => {
171-
impl CmovEq for [$int] {
172-
#[inline]
173-
#[allow(unsafe_code)]
174-
fn cmovne(&self, rhs: &Self, input: Condition, output: &mut Condition) {
175-
// SAFETY:
176-
// - Slices being constructed are of same-sized integers as asserted above.
177-
// - We source the slice length directly from the other valid slice.
178-
let self_unsigned = unsafe { cast_slice::<$int, $uint>(self) };
179-
let rhs_unsigned = unsafe { cast_slice::<$int, $uint>(rhs) };
180-
self_unsigned.cmovne(rhs_unsigned, input, output);
181-
}
182-
}
183-
};
184-
}
185-
186-
/// Implement [`Cmov`] and [`CmovEq`] for the given signed/unsigned type pair.
187-
// TODO(tarcieri): use `cast_unsigned`/`cast_signed` to get rid of the `=> u*`
188-
macro_rules! impl_cmov_traits_for_signed_with_unsigned {
189-
( $($int:ty => $uint:ty),+ ) => {
156+
/// Implement [`Cmov`] and [`CmovEq`] traits by casting to a different type that impls the traits.
157+
macro_rules! impl_cmov_with_cast {
158+
( $($src:ty => $dst:ty),+ ) => {
190159
$(
191-
impl_cmov_for_signed_with_unsigned!($int, $uint);
192-
impl_cmoveq_for_signed_with_unsigned!($int, $uint);
160+
impl Cmov for [$src] {
161+
#[inline]
162+
#[track_caller]
163+
#[allow(unsafe_code)]
164+
fn cmovnz(&mut self, value: &Self, condition: Condition) {
165+
assert_size_and_alignment_eq!($src, $dst);
166+
167+
// SAFETY:
168+
// - Slices being constructed are of same-sized integers as asserted above.
169+
// - We source the slice length directly from the other valid slice.
170+
let self_unsigned = unsafe { cast_slice_mut::<$src, $dst>(self) };
171+
let value_unsigned = unsafe { cast_slice::<$src, $dst>(value) };
172+
self_unsigned.cmovnz(value_unsigned, condition);
173+
}
174+
}
193175
)+
194176
};
195177
}
196178

197-
impl_cmov_traits_for_signed_with_unsigned!(
179+
// These types are all safe to cast between each other
180+
impl_cmov_with_cast!(
198181
i8 => u8,
199182
i16 => u16,
200183
i32 => u32,
201184
i64 => u64,
202-
i128 => u128
185+
i128 => u128,
186+
NonZeroI8 => i8,
187+
NonZeroI16 => i16,
188+
NonZeroI32 => i32,
189+
NonZeroI64 => i64,
190+
NonZeroI128 => i128,
191+
NonZeroU8 => u8,
192+
NonZeroU16 => u16,
193+
NonZeroU32 => u32,
194+
NonZeroU64 => u64,
195+
NonZeroU128 => u128,
196+
cmp::Ordering => i8 // #[repr(i8)]
203197
);
204198

199+
//
200+
// `CmovEq` impls
201+
//
202+
205203
// Optimized implementation for byte slices which coalesces them into word-sized chunks first,
206204
// then performs [`CmovEq`] at the word-level to cut down on the total number of instructions.
207205
impl CmovEq for [u8] {
@@ -253,6 +251,52 @@ macro_rules! impl_cmoveq_with_loop {
253251
// TODO(tarcieri): investigate word-coalescing impls
254252
impl_cmoveq_with_loop!(u16, u32, u64, u128);
255253

254+
/// Implement [`CmovEq`] traits by casting to a different type that impls the traits.
255+
macro_rules! impl_cmoveq_with_cast {
256+
( $($src:ty => $dst:ty),+ ) => {
257+
$(
258+
impl CmovEq for [$src] {
259+
#[inline]
260+
#[allow(unsafe_code)]
261+
fn cmovne(&self, rhs: &Self, input: Condition, output: &mut Condition) {
262+
assert_size_and_alignment_eq!($src, $dst);
263+
264+
// SAFETY:
265+
// - Slices being constructed are of same-sized types as asserted above.
266+
// - We source the slice length directly from the other valid slice.
267+
let self_unsigned = unsafe { cast_slice::<$src, $dst>(self) };
268+
let rhs_unsigned = unsafe { cast_slice::<$src, $dst>(rhs) };
269+
self_unsigned.cmovne(rhs_unsigned, input, output);
270+
}
271+
}
272+
)+
273+
};
274+
}
275+
276+
// These types are all safe to cast between each other
277+
impl_cmoveq_with_cast!(
278+
i8 => u8,
279+
i16 => u16,
280+
i32 => u32,
281+
i64 => u64,
282+
i128 => u128,
283+
NonZeroI8 => i8,
284+
NonZeroI16 => i16,
285+
NonZeroI32 => i32,
286+
NonZeroI64 => i64,
287+
NonZeroI128 => i128,
288+
NonZeroU8 => u8,
289+
NonZeroU16 => u16,
290+
NonZeroU32 => u32,
291+
NonZeroU64 => u64,
292+
NonZeroU128 => u128,
293+
cmp::Ordering => i8 // #[repr(i8)]
294+
);
295+
296+
//
297+
// Helper functions
298+
//
299+
256300
/// Performs an unsafe pointer cast from one slice type to the other.
257301
///
258302
/// # Compile-time panics
@@ -355,6 +399,10 @@ where
355399
}
356400
}
357401

402+
//
403+
// Vendored `core` functions to allow a 1.85 MSRV
404+
//
405+
358406
/// Rust core `[T]::as_chunks` vendored because of its 1.88 MSRV.
359407
/// TODO(tarcieri): use upstream function when we bump MSRV
360408
#[inline]

0 commit comments

Comments
 (0)