1+ use core:: mem:: MaybeUninit ;
2+
13use libc:: size_t;
24
35const fn const_max ( a : usize , b : usize ) -> usize {
@@ -68,12 +70,28 @@ pub(crate) static OF_defaultNorm: [i16; 29] = [
6870pub ( crate ) const OF_DEFAULTNORMLOG : u32 = 5 ;
6971pub ( crate ) static OF_defaultNormLog : u32 = OF_DEFAULTNORMLOG ;
7072
73+ #[ inline]
7174pub ( crate ) unsafe fn ZSTD_copy16 ( dst : * mut u8 , src : * const u8 ) {
7275 // We use `copy` instead of `copy_nonoverlapping` here because the literal buffer can now
7376 // be located within the dst buffer. In circumstances where the op "catches up" to where the
7477 // literal buffer is, there can be partial overlaps in this call on the final
7578 // copy if the literal is being shifted by less than 16 bytes.
76- core:: ptr:: copy ( src, dst, 16 )
79+ crate :: cfg_select!(
80+ target_arch = "x86_64" => {
81+ #[ repr( C ) ]
82+ struct V {
83+ x: i64 ,
84+ y: i64 ,
85+ }
86+
87+ // This generates the same instructions, but surrounding code optimizes better.
88+ let v = core:: ptr:: read_unaligned( src as * const MaybeUninit <V >) ;
89+ core:: ptr:: write_unaligned( dst as * mut MaybeUninit <V >, v) ;
90+ }
91+ _ => {
92+ core:: ptr:: copy( src, dst, 16 ) ;
93+ }
94+ ) ;
7795}
7896
7997pub ( crate ) const WILDCOPY_OVERLENGTH : usize = 32 ;
@@ -126,7 +144,7 @@ pub(crate) unsafe fn ZSTD_wildcopy(
126144 // probabilities. Since it is almost certain to be short, only do
127145 // one 16-byte copy in the first call. Then, do two calls per loop since
128146 // at that point it is more likely to have a high trip count.
129- core :: ptr :: copy ( ip , op, 16 ) ;
147+ ZSTD_copy16 ( op, ip ) ;
130148
131149 if 16 >= length {
132150 return ;
@@ -136,11 +154,11 @@ pub(crate) unsafe fn ZSTD_wildcopy(
136154 ip = ip. add ( 16 ) ;
137155
138156 loop {
139- core :: ptr :: copy ( ip , op, 16 ) ;
157+ ZSTD_copy16 ( op, ip ) ;
140158 op = op. add ( 16 ) ;
141159 ip = ip. add ( 16 ) ;
142160
143- core :: ptr :: copy ( ip , op, 16 ) ;
161+ ZSTD_copy16 ( op, ip ) ;
144162 op = op. add ( 16 ) ;
145163 ip = ip. add ( 16 ) ;
146164
0 commit comments