@@ -107,21 +107,50 @@ impl_nzint!(NonZeroI64, NonZeroI64::new);
107107impl_nzint ! ( NonZeroI128 , NonZeroI128 :: new) ;
108108
109109#[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
110- macro_rules! x86_intrinsic_impl {
111- ( $meta: meta, $( $intrinsic: ident) ,+) => { $(
112- #[ cfg( $meta) ]
113- impl Distribution <$intrinsic> for StandardUniform {
114- #[ inline]
115- fn sample<R : Rng + ?Sized >( & self , rng: & mut R ) -> $intrinsic {
116- // On proper hardware, this should compile to SIMD instructions
117- // Verified on x86 Haswell with __m128i, __m256i
118- let mut buf = [ 0_u8 ; core:: mem:: size_of:: <$intrinsic>( ) ] ;
119- rng. fill_bytes( & mut buf) ;
120- // x86 is little endian so no need for conversion
121- zerocopy:: transmute!( buf)
122- }
123- }
124- ) +} ;
110+ impl Distribution < __m128i > for StandardUniform {
111+ #[ inline]
112+ fn sample < R : Rng + ?Sized > ( & self , rng : & mut R ) -> __m128i {
113+ // NOTE: It's tempting to use the u128 impl here, but confusingly this
114+ // results in different code (return via rdx, r10 instead of rax, rdx
115+ // with u128 impl) and is much slower (+130 time). This version calls
116+ // impls::fill_bytes_via_next but performs well.
117+
118+ let mut buf = [ 0_u8 ; core:: mem:: size_of :: < __m128i > ( ) ] ;
119+ rng. fill_bytes ( & mut buf) ;
120+ // x86 is little endian so no need for conversion
121+
122+ // SAFETY: All byte sequences of `buf` represent values of the output type.
123+ unsafe { core:: mem:: transmute ( buf) }
124+ }
125+ }
126+
127+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
128+ impl Distribution < __m256i > for StandardUniform {
129+ #[ inline]
130+ fn sample < R : Rng + ?Sized > ( & self , rng : & mut R ) -> __m256i {
131+ let mut buf = [ 0_u8 ; core:: mem:: size_of :: < __m256i > ( ) ] ;
132+ rng. fill_bytes ( & mut buf) ;
133+ // x86 is little endian so no need for conversion
134+
135+ // SAFETY: All byte sequences of `buf` represent values of the output type.
136+ unsafe { core:: mem:: transmute ( buf) }
137+ }
138+ }
139+
140+ #[ cfg( all(
141+ any( target_arch = "x86" , target_arch = "x86_64" ) ,
142+ feature = "simd_support"
143+ ) ) ]
144+ impl Distribution < __m512i > for StandardUniform {
145+ #[ inline]
146+ fn sample < R : Rng + ?Sized > ( & self , rng : & mut R ) -> __m512i {
147+ let mut buf = [ 0_u8 ; core:: mem:: size_of :: < __m512i > ( ) ] ;
148+ rng. fill_bytes ( & mut buf) ;
149+ // x86 is little endian so no need for conversion
150+
151+ // SAFETY: All byte sequences of `buf` represent values of the output type.
152+ unsafe { core:: mem:: transmute ( buf) }
153+ }
125154}
126155
127156#[ cfg( feature = "simd_support" ) ]
@@ -148,24 +177,6 @@ macro_rules! simd_impl {
148177#[ cfg( feature = "simd_support" ) ]
149178simd_impl ! ( u8 , i8 , u16 , i16 , u32 , i32 , u64 , i64 ) ;
150179
151- #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
152- x86_intrinsic_impl ! (
153- any( target_arch = "x86" , target_arch = "x86_64" ) ,
154- __m128i,
155- __m256i
156- ) ;
157- #[ cfg( all(
158- any( target_arch = "x86" , target_arch = "x86_64" ) ,
159- feature = "simd_support"
160- ) ) ]
161- x86_intrinsic_impl ! (
162- all(
163- any( target_arch = "x86" , target_arch = "x86_64" ) ,
164- feature = "simd_support"
165- ) ,
166- __m512i
167- ) ;
168-
169180#[ cfg( test) ]
170181mod tests {
171182 use super :: * ;
0 commit comments