1010
1111#include " src/__support/macros/attributes.h" // LIBC_INLINE
1212#include " src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL
13+ #include " src/string/memory_utils/arm/common.h" // LIBC_ATTR_LIKELY, LIBC_ATTR_UNLIKELY
1314#include " src/string/memory_utils/utils.h" // memcpy_inline, distance_to_align
1415
1516#include < stddef.h> // size_t
1617
17- // https://libc.llvm.org/compiler_support.html
18- // Support for [[likely]] / [[unlikely]]
19- // [X] GCC 12.2
20- // [X] Clang 12
21- // [ ] Clang 11
22- #define LIBC_ATTR_LIKELY [[likely]]
23- #define LIBC_ATTR_UNLIKELY [[unlikely]]
24-
25- #if defined(LIBC_COMPILER_IS_CLANG)
26- #if LIBC_COMPILER_CLANG_VER < 1200
27- #undef LIBC_ATTR_LIKELY
28- #undef LIBC_ATTR_UNLIKELY
29- #define LIBC_ATTR_LIKELY
30- #define LIBC_ATTR_UNLIKELY
31- #endif
32- #endif
33-
3418namespace LIBC_NAMESPACE_DECL {
3519
3620namespace {
3721
38- LIBC_INLINE_VAR constexpr size_t kWordSize = sizeof (uint32_t );
39-
40- enum Strategy {
41- ForceWordLdStChain,
42- AssumeWordAligned,
43- AssumeUnaligned,
44- };
22+ template <size_t bytes>
23+ LIBC_INLINE void copy_assume_aligned (void *dst, const void *src) {
24+ constexpr size_t alignment = bytes > kWordSize ? kWordSize : bytes;
25+ memcpy_inline<bytes>(assume_aligned<alignment>(dst),
26+ assume_aligned<alignment>(src));
27+ }
4528
46- template <size_t bytes, Strategy strategy = AssumeUnaligned>
47- LIBC_INLINE void copy_and_bump_pointers (Ptr &dst, CPtr &src) {
48- if constexpr (strategy == AssumeUnaligned) {
49- memcpy_inline<bytes>(assume_aligned<1 >(dst), assume_aligned<1 >(src));
50- } else if constexpr (strategy == AssumeWordAligned) {
51- static_assert (bytes >= kWordSize );
52- memcpy_inline<bytes>(assume_aligned<kWordSize >(dst),
53- assume_aligned<kWordSize >(src));
54- } else if constexpr (strategy == ForceWordLdStChain) {
29+ template <size_t bytes, BlockOp block_op = BlockOp::kFull >
30+ LIBC_INLINE void copy_block_and_bump_pointers (Ptr &dst, CPtr &src) {
31+ if constexpr (block_op == BlockOp::kFull ) {
32+ copy_assume_aligned<bytes>(dst, src);
33+ } else {
5534 // We restrict loads/stores to 4 byte to prevent the use of load/store
56- // multiple (LDM, STM) and load/store double (LDRD, STRD). First, they may
57- // fault (see notes below) and second, they use more registers which in turn
58- // adds push/pop instructions in the hot path.
59- static_assert (( bytes % kWordSize == 0 ) && (bytes >= kWordSize ) );
35+ // multiple (LDM, STM) and load/store double (LDRD, STRD). First, they
36+ // may fault (see notes below) and second, they use more registers which
37+ // in turn adds push/pop instructions in the hot path.
38+ static_assert (bytes >= kWordSize );
6039 LIBC_LOOP_UNROLL
61- for (size_t i = 0 ; i < bytes / kWordSize ; ++i) {
62- const size_t offset = i * kWordSize ;
63- memcpy_inline<kWordSize >(dst + offset, src + offset);
40+ for (size_t offset = 0 ; offset < bytes; offset += kWordSize ) {
41+ copy_assume_aligned<kWordSize >(dst + offset, src + offset);
6442 }
6543 }
6644 // In the 1, 2, 4 byte copy case, the compiler can fold pointer offsetting
@@ -72,30 +50,19 @@ LIBC_INLINE void copy_and_bump_pointers(Ptr &dst, CPtr &src) {
7250 src += bytes;
7351}
7452
75- LIBC_INLINE void copy_bytes_and_bump_pointers (Ptr &dst, CPtr &src,
76- const size_t size) {
53+ template < size_t bytes, BlockOp block_op, BumpSize bump_size = BumpSize:: kYes >
54+ LIBC_INLINE void consume_by_aligned_block (Ptr &dst, CPtr &src, size_t & size) {
7755 LIBC_LOOP_NOUNROLL
78- for (size_t i = 0 ; i < size; ++i)
79- *dst++ = *src++;
80- }
81-
82- template <size_t block_size, Strategy strategy>
83- LIBC_INLINE void copy_blocks_and_update_args (Ptr &dst, CPtr &src,
84- size_t &size) {
85- LIBC_LOOP_NOUNROLL
86- for (size_t i = 0 ; i < size / block_size; ++i)
87- copy_and_bump_pointers<block_size, strategy>(dst, src);
88- // Update `size` once at the end instead of once per iteration.
89- size %= block_size;
90- }
91-
92- LIBC_INLINE CPtr bitwise_or (CPtr a, CPtr b) {
93- return cpp::bit_cast<CPtr>(cpp::bit_cast<uintptr_t >(a) |
94- cpp::bit_cast<uintptr_t >(b));
56+ for (size_t i = 0 ; i < size / bytes; ++i)
57+ copy_block_and_bump_pointers<bytes, block_op>(dst, src);
58+ if constexpr (bump_size == BumpSize::kYes ) {
59+ size %= bytes;
60+ }
9561}
9662
97- LIBC_INLINE auto misaligned (CPtr a) {
98- return distance_to_align_down<kWordSize >(a);
63+ LIBC_INLINE void copy_bytes_and_bump_pointers (Ptr &dst, CPtr &src,
64+ size_t size) {
65+ consume_by_aligned_block<1 , BlockOp::kFull , BumpSize::kNo >(dst, src, size);
9966}
10067
10168} // namespace
@@ -125,20 +92,21 @@ LIBC_INLINE auto misaligned(CPtr a) {
12592 if (src_alignment == 0 )
12693 LIBC_ATTR_LIKELY {
12794 // Both `src` and `dst` are now word-aligned.
128- copy_blocks_and_update_args <64 , AssumeWordAligned >(dst, src, size);
129- copy_blocks_and_update_args <16 , AssumeWordAligned >(dst, src, size);
130- copy_blocks_and_update_args <4 , AssumeWordAligned >(dst, src, size);
95+ consume_by_aligned_block <64 , BlockOp:: kFull >(dst, src, size);
96+ consume_by_aligned_block <16 , BlockOp:: kFull >(dst, src, size);
97+ consume_by_aligned_block <4 , BlockOp:: kFull >(dst, src, size);
13198 }
13299 else {
133100 // `dst` is aligned but `src` is not.
134101 LIBC_LOOP_NOUNROLL
135102 while (size >= kWordSize ) {
136- // Recompose word from multiple loads depending on the alignment.
103+ // Recompose word from multiple loads depending on the
104+ // alignment.
137105 const uint32_t value =
138106 src_alignment == 2
139107 ? load_aligned<uint32_t , uint16_t , uint16_t >(src)
140108 : load_aligned<uint32_t , uint8_t , uint16_t , uint8_t >(src);
141- memcpy_inline <kWordSize >(assume_aligned< kWordSize >( dst) , &value);
109+ copy_assume_aligned <kWordSize >(dst, &value);
142110 dst += kWordSize ;
143111 src += kWordSize ;
144112 size -= kWordSize ;
@@ -169,31 +137,33 @@ LIBC_INLINE auto misaligned(CPtr a) {
169137 if (size < 8 )
170138 LIBC_ATTR_UNLIKELY {
171139 if (size & 1 )
172- copy_and_bump_pointers <1 >(dst, src);
140+ copy_block_and_bump_pointers <1 >(dst, src);
173141 if (size & 2 )
174- copy_and_bump_pointers <2 >(dst, src);
142+ copy_block_and_bump_pointers <2 >(dst, src);
175143 if (size & 4 )
176- copy_and_bump_pointers <4 >(dst, src);
144+ copy_block_and_bump_pointers <4 >(dst, src);
177145 return ;
178146 }
179147 if (misaligned (src))
180148 LIBC_ATTR_UNLIKELY {
181149 const size_t offset = distance_to_align_up<kWordSize >(dst);
182150 if (offset & 1 )
183- copy_and_bump_pointers <1 >(dst, src);
151+ copy_block_and_bump_pointers <1 >(dst, src);
184152 if (offset & 2 )
185- copy_and_bump_pointers <2 >(dst, src);
153+ copy_block_and_bump_pointers <2 >(dst, src);
186154 size -= offset;
187155 }
188156 }
189- copy_blocks_and_update_args<64 , ForceWordLdStChain>(dst, src, size);
190- copy_blocks_and_update_args<16 , ForceWordLdStChain>(dst, src, size);
191- copy_blocks_and_update_args<4 , AssumeUnaligned>(dst, src, size);
157+ // `dst` and `src` are not necessarily both aligned at that point but this
158+ // implementation assumes hardware support for unaligned loads and stores.
159+ consume_by_aligned_block<64 , BlockOp::kByWord >(dst, src, size);
160+ consume_by_aligned_block<16 , BlockOp::kByWord >(dst, src, size);
161+ consume_by_aligned_block<4 , BlockOp::kFull >(dst, src, size);
192162 if (size & 1 )
193- copy_and_bump_pointers <1 >(dst, src);
163+ copy_block_and_bump_pointers <1 >(dst, src);
194164 if (size & 2 )
195165 LIBC_ATTR_UNLIKELY
196- copy_and_bump_pointers <2 >(dst, src);
166+ copy_block_and_bump_pointers <2 >(dst, src);
197167}
198168
199169[[maybe_unused]] LIBC_INLINE void inline_memcpy_arm (void *__restrict dst_,
@@ -210,8 +180,4 @@ LIBC_INLINE auto misaligned(CPtr a) {
210180
211181} // namespace LIBC_NAMESPACE_DECL
212182
213- // Cleanup local macros
214- #undef LIBC_ATTR_LIKELY
215- #undef LIBC_ATTR_UNLIKELY
216-
217183#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_INLINE_MEMCPY_H
0 commit comments