@@ -41,6 +41,65 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141 core:: mem:: transmute ( x_read)
4242}
4343
44+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
45+ /// read with the out-of-bounds part filled with 0s.
46+ /// `load_sz` be strictly less than `WORD_SIZE`.
47+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
48+ #[ inline( always) ]
49+ unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
50+ debug_assert ! ( load_sz < WORD_SIZE ) ;
51+
52+ let mut i = 0 ;
53+ let mut out = 0usize ;
54+ macro_rules! load_prefix {
55+ ( $( $ty: ty) +) => { $(
56+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
57+ if ( load_sz & chunk_sz) != 0 {
58+ // Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
59+ * ( & raw mut out) . byte_add( i) . cast:: <$ty>( ) = * src. byte_add( i) . cast:: <$ty>( ) ;
60+ i |= chunk_sz;
61+ }
62+ ) +} ;
63+ }
64+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
65+ // (since `load_size < WORD_SIZE`).
66+ const { assert ! ( WORD_SIZE <= 8 ) } ;
67+ load_prefix ! ( u32 u16 u8 ) ;
68+ debug_assert ! ( i == load_sz) ;
69+ out
70+ }
71+
72+ /// Load `load_sz` many bytes from `src.byte_add(WORD_SIZE - load_sz)`. `src` must be `usize`-aligned.
73+ /// The bytes are returned as the *last* bytes of the return value, i.e., this acts as if we had done
74+ /// a `usize` read from `src`, with the out-of-bounds part filled with 0s.
75+ /// `load_sz` be strictly less than `WORD_SIZE`.
76+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
77+ #[ inline( always) ]
78+ unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
79+ debug_assert ! ( load_sz < WORD_SIZE ) ;
80+
81+ let mut i = 0 ;
82+ let mut out = 0usize ;
83+ let start_shift = WORD_SIZE - load_sz;
84+ macro_rules! load_prefix {
85+ ( $( $ty: ty) +) => { $(
86+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
87+ if ( load_sz & chunk_sz) != 0 {
88+ // Since we are doing the small reads first, `start_shift + i` has in the mean
89+ // time become aligned to `chunk_sz`.
90+ * ( & raw mut out) . byte_add( start_shift + i) . cast:: <$ty>( ) = * src. byte_add( start_shift + i) . cast:: <$ty>( ) ;
91+ i |= chunk_sz;
92+ }
93+ ) +} ;
94+ }
95+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
96+ // (since `load_size < WORD_SIZE`).
97+ const { assert ! ( WORD_SIZE <= 8 ) } ;
98+ load_prefix ! ( u8 u16 u32 ) ;
99+ debug_assert ! ( i == load_sz) ;
100+ out
101+ }
102+
44103#[ inline( always) ]
45104pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
46105 #[ inline( always) ]
@@ -66,40 +125,55 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66125 }
67126 }
68127
128+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
129+ /// `src` *must not* be `usize`-aligned.
69130 #[ cfg( not( feature = "mem-unaligned" ) ) ]
70131 #[ inline( always) ]
71132 unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
133+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
134+ debug_assert ! ( src. addr( ) % WORD_SIZE != 0 ) ;
135+
72136 let mut dest_usize = dest as * mut usize ;
73137 let dest_end = dest. wrapping_add ( n) as * mut usize ;
74138
75139 // Calculate the misalignment offset and shift needed to reassemble value.
140+ // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
76141 let offset = src as usize & WORD_MASK ;
77142 let shift = offset * 8 ;
78143
79144 // Realign src
80- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
81- // This will read (but won't use) bytes out of bound.
82- // cfg needed because not all targets will have atomic loads that can be lowered
83- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
85- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
86- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
87- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
145+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
146+ let mut prev_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
88147
89- while dest_usize < dest_end {
148+ while dest_usize. wrapping_add ( 1 ) < dest_end {
90149 src_aligned = src_aligned. wrapping_add ( 1 ) ;
91150 let cur_word = * src_aligned;
92151 #[ cfg( target_endian = "little" ) ]
93- let resembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
152+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
94153 #[ cfg( target_endian = "big" ) ]
95- let resembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
154+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
96155 prev_word = cur_word;
97156
98- * dest_usize = resembled ;
157+ * dest_usize = reassembled ;
99158 dest_usize = dest_usize. wrapping_add ( 1 ) ;
100159 }
160+
161+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
162+ // it is partially out-of-bounds.
163+ src_aligned = src_aligned. wrapping_add ( 1 ) ;
164+ let cur_word = load_aligned_partial ( src_aligned, offset) ;
165+ #[ cfg( target_endian = "little" ) ]
166+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
167+ #[ cfg( target_endian = "big" ) ]
168+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
169+ // prev_word does not matter any more
170+
171+ * dest_usize = reassembled;
172+ // dest_usize does not matter any more
101173 }
102174
175+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
176+ /// `src` *must not* be `usize`-aligned.
103177 #[ cfg( feature = "mem-unaligned" ) ]
104178 #[ inline( always) ]
105179 unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
@@ -164,40 +238,55 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164238 }
165239 }
166240
241+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
242+ /// `src` *must not* be `usize`-aligned.
167243 #[ cfg( not( feature = "mem-unaligned" ) ) ]
168244 #[ inline( always) ]
169245 unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
246+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
247+ debug_assert ! ( src. addr( ) % WORD_SIZE != 0 ) ;
248+
170249 let mut dest_usize = dest as * mut usize ;
171- let dest_start = dest. wrapping_sub ( n) as * mut usize ;
250+ let dest_start = dest. wrapping_sub ( n) as * mut usize ; // we're moving towards the start
172251
173252 // Calculate the misalignment offset and shift needed to reassemble value.
253+ // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
174254 let offset = src as usize & WORD_MASK ;
175255 let shift = offset * 8 ;
176256
177- // Realign src_aligned
178- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
179- // This will read (but won't use) bytes out of bound.
180- // cfg needed because not all targets will have atomic loads that can be lowered
181- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
183- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
184- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
185- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
257+ // Realign src
258+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
259+ let mut prev_word = load_aligned_partial ( src_aligned, offset) ;
186260
187- while dest_start < dest_usize {
261+ while dest_start. wrapping_add ( 1 ) < dest_usize {
188262 src_aligned = src_aligned. wrapping_sub ( 1 ) ;
189263 let cur_word = * src_aligned;
190264 #[ cfg( target_endian = "little" ) ]
191- let resembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
265+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
192266 #[ cfg( target_endian = "big" ) ]
193- let resembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
267+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
194268 prev_word = cur_word;
195269
196270 dest_usize = dest_usize. wrapping_sub ( 1 ) ;
197- * dest_usize = resembled ;
271+ * dest_usize = reassembled ;
198272 }
273+
274+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
275+ // it is partially out-of-bounds.
276+ src_aligned = src_aligned. wrapping_sub ( 1 ) ;
277+ let cur_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
278+ #[ cfg( target_endian = "little" ) ]
279+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
280+ #[ cfg( target_endian = "big" ) ]
281+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
282+ // prev_word does not matter any more
283+
284+ dest_usize = dest_usize. wrapping_sub ( 1 ) ;
285+ * dest_usize = reassembled;
199286 }
200287
288+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
289+ /// `src` *must not* be `usize`-aligned.
201290 #[ cfg( feature = "mem-unaligned" ) ]
202291 #[ inline( always) ]
203292 unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
0 commit comments