@@ -72,28 +72,35 @@ static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src,
72
72
73
73
// Copies `count` bytes by blocks of `kBlockSize` bytes.
74
74
// Copies at the start and end of the buffer are unaligned.
75
- // Copies in the middle of the buffer are aligned to `kBlockSize `.
75
+ // Copies in the middle of the buffer are aligned to `kAlignment `.
76
76
//
77
77
// e.g. with
78
78
// [12345678123456781234567812345678]
79
- // [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
80
- // [__XXXXXXXX______________________]
81
- // [________XXXXXXXX________________]
82
- // [________________XXXXXXXX________]
83
- // [_____________________XXXXXXXX___]
79
+ // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
80
+ // [__XXXX___________________________]
81
+ // [_____XXXXXXXX____________________]
82
+ // [_____________XXXXXXXX____________]
83
+ // [_____________________XXXXXXXX____]
84
+ // [______________________XXXXXXXX___]
84
85
//
85
- // Precondition: `count > 2 * kBlockSize` for efficiency.
86
- // `count >= kBlockSize` for correctness.
87
- template <size_t kBlockSize >
86
+ // Precondition: `kAlignment <= kBlockSize`
87
+ // `count > 2 * kBlockSize` for efficiency.
88
+ // `count >= kAlignment` for correctness.
89
+ template <size_t kBlockSize , size_t kAlignment = kBlockSize >
88
90
static void CopyAlignedBlocks (char *__restrict dst, const char *__restrict src,
89
91
size_t count) {
90
- CopyBlock<kBlockSize >(dst, src); // Copy first block
92
+ static_assert (is_power2 (kAlignment ), " kAlignment must be a power of two" );
93
+ static_assert (is_power2 (kBlockSize ), " kBlockSize must be a power of two" );
94
+ static_assert (kAlignment <= kBlockSize ,
95
+ " kAlignment must be less or equal to block size" );
96
+ CopyBlock<kAlignment >(dst, src); // Copy first block
91
97
92
98
// Copy aligned blocks
93
- const size_t ofla = offset_from_last_aligned<kBlockSize >(src);
99
+ const size_t ofla = offset_from_last_aligned<kAlignment >(src);
94
100
const size_t limit = count + ofla - kBlockSize ;
95
- for (size_t offset = kBlockSize ; offset < limit; offset += kBlockSize )
96
- CopyBlock<kBlockSize >(dst - ofla + offset, src - ofla + offset);
101
+ for (size_t offset = kAlignment ; offset < limit; offset += kBlockSize )
102
+ CopyBlock<kBlockSize >(dst - ofla + offset,
103
+ assume_aligned<kAlignment >(src - ofla + offset));
97
104
98
105
CopyLastBlock<kBlockSize >(dst, src, count); // Copy last block
99
106
}
0 commit comments