Skip to content

Commit 68e1cc6

Browse files
committed
Optimize copy_to/from() for u8/i8 slice
It's a common case to use copy_to/from() to copy byte slice, so optimize it by avoiding explicitly loop. This optimization helps to reduce 90% time of byte stream copy. VolatileSlice::copy_to_u8 time: [70.407 ns 70.728 ns 71.212 ns] change: [-93.751% -93.706% -93.667%] (p = 0.00 < 0.05) Performance has improved. Found 16 outliers among 200 measurements (8.00%) 7 (3.50%) high mild 9 (4.50%) high severe Benchmarking VolatileSlice::copy_to_u16: VolatileSlice::copy_to_u16 time: [560.10 ns 562.15 ns 565.42 ns] change: [-1.9847% -1.4879% -0.9829%] (p = 0.00 < 0.05) Change within noise threshold. Found 11 outliers among 200 measurements (5.50%) 4 (2.00%) high mild 7 (3.50%) high severe Benchmarking VolatileSlice::copy_from_u8: VolatileSlice::copy_from_u8 time: [69.704 ns 69.925 ns 70.285 ns] change: [-94.918% -94.873% -94.819%] (p = 0.00 < 0.05) Performance has improved. Found 14 outliers among 200 measurements (7.00%) 7 (3.50%) high mild 7 (3.50%) high severe Benchmarking VolatileSlice::copy_from_u16: VolatileSlice::copy_from_u16 time: [682.23 ns 685.20 ns 690.26 ns] change: [-0.7430% -0.0145% +0.8123%] (p = 0.97 > 0.05) No change in performance detected. Found 13 outliers among 200 measurements (6.50%) 4 (2.00%) high mild 9 (4.50%) high severe Signed-off-by: Liu Jiang <[email protected]>
1 parent 90e340c commit 68e1cc6

File tree

1 file changed

+94
-20
lines changed

1 file changed

+94
-20
lines changed

src/volatile_memory.rs

Lines changed: 94 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,19 @@ impl<'a> VolatileSlice<'a> {
368368
where
369369
T: ByteValued,
370370
{
371-
let count = self.size / size_of::<T>();
372-
let source = self.get_array_ref::<T>(0, count).unwrap();
373-
source.copy_to(buf)
371+
// A fast path for u8/i8
372+
if size_of::<T>() == 1 {
373+
// It is safe because the pointers are range-checked when the slices are created,
374+
// and they never escape the VolatileSlices.
375+
let source = unsafe { self.as_slice() };
376+
// Safe because `T` is a one-byte data structure.
377+
let dst = unsafe { from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, buf.len()) };
378+
copy_slice(dst, source)
379+
} else {
380+
let count = self.size / size_of::<T>();
381+
let source = self.get_array_ref::<T>(0, count).unwrap();
382+
source.copy_to(buf)
383+
}
374384
}
375385

376386
/// Copies as many bytes as possible from this slice to the provided `slice`.
@@ -425,9 +435,19 @@ impl<'a> VolatileSlice<'a> {
425435
where
426436
T: ByteValued,
427437
{
428-
let count = self.size / size_of::<T>();
429-
let dest = self.get_array_ref::<T>(0, count).unwrap();
430-
dest.copy_from(buf)
438+
// A fast path for u8/i8
439+
if size_of::<T>() == 1 {
440+
// It is safe because the pointers are range-checked when the slices are created,
441+
// and they never escape the VolatileSlices.
442+
let dst = unsafe { self.as_mut_slice() };
443+
// Safe because `T` is a one-byte data structure.
444+
let src = unsafe { from_raw_parts(buf.as_ptr() as *const u8, buf.len()) };
445+
copy_slice(dst, src);
446+
} else {
447+
let count = self.size / size_of::<T>();
448+
let dest = self.get_array_ref::<T>(0, count).unwrap();
449+
dest.copy_from(buf)
450+
}
431451
}
432452

433453
/// Returns a slice corresponding to the data in the underlying memory.
@@ -939,6 +959,17 @@ impl<'a, T: ByteValued> VolatileArrayRef<'a, T> {
939959
/// # }
940960
/// ```
941961
pub fn copy_to(&self, buf: &mut [T]) -> usize {
962+
// A fast path for u8/i8
963+
if size_of::<T>() == 1 {
964+
let source = self.to_slice();
965+
// It is safe because the pointers are range-checked when the slices are created,
966+
// and they never escape the VolatileSlices.
967+
let src = unsafe { source.as_slice() };
968+
// Safe because `T` is a one-byte data structure.
969+
let dst = unsafe { from_raw_parts_mut(buf.as_mut_ptr() as *mut u8, buf.len()) };
970+
return copy_slice(dst, src);
971+
}
972+
942973
let mut addr = self.addr;
943974
let mut i = 0;
944975
for v in buf.iter_mut().take(self.len()) {
@@ -1010,15 +1041,26 @@ impl<'a, T: ByteValued> VolatileArrayRef<'a, T> {
10101041
/// # }
10111042
/// ```
10121043
pub fn copy_from(&self, buf: &[T]) {
1013-
let mut addr = self.addr;
1014-
for &v in buf.iter().take(self.len()) {
1015-
unsafe {
1016-
// write_volatile is safe because the pointers are range-checked when
1017-
// the slices are created, and they never escape the VolatileSlices.
1018-
// ptr::add is safe because get_array_ref() validated that
1019-
// size_of::<T>() * self.len() fits in an isize.
1020-
write_volatile(addr as *mut Packed<T>, Packed::<T>(v));
1021-
addr = addr.add(self.element_size());
1044+
// A fast path for u8/i8
1045+
if size_of::<T>() == 1 {
1046+
// It is safe because the pointers are range-checked when the slices are created,
1047+
// and they never escape the VolatileSlices.
1048+
let destination = self.to_slice();
1049+
let dst = unsafe { destination.as_mut_slice() };
1050+
// Safe because `T` is a one-byte data structure.
1051+
let src = unsafe { from_raw_parts(buf.as_ptr() as *const u8, buf.len()) };
1052+
copy_slice(dst, src);
1053+
} else {
1054+
let mut addr = self.addr;
1055+
for &v in buf.iter().take(self.len()) {
1056+
unsafe {
1057+
// write_volatile is safe because the pointers are range-checked when
1058+
// the slices are created, and they never escape the VolatileSlices.
1059+
// ptr::add is safe because get_array_ref() validated that
1060+
// size_of::<T>() * self.len() fits in an isize.
1061+
write_volatile(addr as *mut Packed<T>, Packed::<T>(v));
1062+
addr = addr.add(self.element_size());
1063+
}
10221064
}
10231065
}
10241066
}
@@ -1362,8 +1404,8 @@ mod tests {
13621404
}
13631405

13641406
#[test]
1365-
fn slice_copy_to() {
1366-
let mut a = [2, 4, 6, 8, 10];
1407+
fn slice_copy_to_u8() {
1408+
let mut a = [2u8, 4, 6, 8, 10];
13671409
let mut b = [0u8; 4];
13681410
let mut c = [0u8; 6];
13691411
let a_ref = &mut a[..];
@@ -1375,8 +1417,23 @@ mod tests {
13751417
}
13761418

13771419
#[test]
1378-
fn slice_copy_from() {
1379-
let a = [2, 4, 6, 8, 10];
1420+
fn slice_copy_to_u16() {
1421+
let mut a = [0x01u16, 0x2, 0x03, 0x4, 0x5];
1422+
let mut b = [0u16; 4];
1423+
let mut c = [0u16; 6];
1424+
let a_ref = &mut a[..];
1425+
let v_ref = unsafe { VolatileSlice::new(a_ref.as_mut_ptr() as *mut u8, 9) };
1426+
1427+
v_ref.copy_to(&mut b[..]);
1428+
v_ref.copy_to(&mut c[..]);
1429+
assert_eq!(b[0..4], a_ref[0..4]);
1430+
assert_eq!(c[0..4], a_ref[0..4]);
1431+
assert_eq!(c[4], 0);
1432+
}
1433+
1434+
#[test]
1435+
fn slice_copy_from_u8() {
1436+
let a = [2u8, 4, 6, 8, 10];
13801437
let mut b = [0u8; 4];
13811438
let mut c = [0u8; 6];
13821439
let b_ref = &mut b[..];
@@ -1390,9 +1447,26 @@ mod tests {
13901447
assert_eq!(c_ref[0..5], a[0..5]);
13911448
}
13921449

1450+
#[test]
1451+
fn slice_copy_from_u16() {
1452+
let a = [2u16, 4, 6, 8, 10];
1453+
let mut b = [0u16; 4];
1454+
let mut c = [0u16; 6];
1455+
let b_ref = &mut b[..];
1456+
let v_ref = unsafe { VolatileSlice::new(b_ref.as_mut_ptr() as *mut u8, 8) };
1457+
v_ref.copy_from(&a[..]);
1458+
assert_eq!(b_ref[0..4], a[0..4]);
1459+
1460+
let c_ref = &mut c[..];
1461+
let v_ref = unsafe { VolatileSlice::new(c_ref.as_mut_ptr() as *mut u8, 9) };
1462+
v_ref.copy_from(&a[..]);
1463+
assert_eq!(c_ref[0..4], a[0..4]);
1464+
assert_eq!(c_ref[4], 0);
1465+
}
1466+
13931467
#[test]
13941468
fn slice_copy_to_volatile_slice() {
1395-
let mut a = [2, 4, 6, 8, 10];
1469+
let mut a = [2u8, 4, 6, 8, 10];
13961470
let a_ref = &mut a[..];
13971471
let a_slice = a_ref.get_slice(0, a_ref.len()).unwrap();
13981472

0 commit comments

Comments
 (0)