Skip to content

Commit fcb2e5d

Browse files
committed
[libc] refactor Cortex memcpy code in preparation of memset
1 parent da8d7f4 commit fcb2e5d

File tree

4 files changed

+100
-80
lines changed

4 files changed

+100
-80
lines changed

libc/src/string/memory_utils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ add_header_library(
77
aarch64/inline_memcpy.h
88
aarch64/inline_memmove.h
99
aarch64/inline_memset.h
10+
arm/common.h
1011
arm/inline_memcpy.h
1112
generic/aligned_access.h
1213
generic/byte_per_byte.h
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
//===-- Common constants and defines for arm --------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_COMMON_H
10+
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_COMMON_H
11+
12+
#include "src/__support/macros/attributes.h" // LIBC_INLINE_VAR
13+
#include "src/string/memory_utils/utils.h" // CPtr, Ptr, distance_to_align
14+
15+
#include <stddef.h> // size_t
16+
17+
// https://libc.llvm.org/compiler_support.html
18+
// Support for [[likely]] / [[unlikely]]
19+
// [X] GCC 12.2
20+
// [X] Clang 12
21+
// [ ] Clang 11
22+
#define LIBC_ATTR_LIKELY [[likely]]
23+
#define LIBC_ATTR_UNLIKELY [[unlikely]]
24+
25+
#if defined(LIBC_COMPILER_IS_CLANG)
26+
#if LIBC_COMPILER_CLANG_VER < 1200
27+
#undef LIBC_ATTR_LIKELY
28+
#undef LIBC_ATTR_UNLIKELY
29+
#define LIBC_ATTR_LIKELY
30+
#define LIBC_ATTR_UNLIKELY
31+
#endif
32+
#endif
33+
34+
namespace LIBC_NAMESPACE_DECL {
35+
36+
LIBC_INLINE_VAR constexpr size_t kWordSize = sizeof(uint32_t);
37+
38+
enum class BumpSize : bool { kNo = false, kYes = true };
39+
enum class BlockOp : bool { kFull = false, kByWord = true };
40+
41+
LIBC_INLINE auto misaligned(CPtr ptr) {
42+
return distance_to_align_down<kWordSize>(ptr);
43+
}
44+
45+
LIBC_INLINE CPtr bitwise_or(CPtr a, CPtr b) {
46+
return cpp::bit_cast<CPtr>(cpp::bit_cast<uintptr_t>(a) |
47+
cpp::bit_cast<uintptr_t>(b));
48+
}
49+
50+
} // namespace LIBC_NAMESPACE_DECL
51+
52+
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_COMMON_H

libc/src/string/memory_utils/arm/inline_memcpy.h

Lines changed: 46 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -10,57 +10,35 @@
1010

1111
#include "src/__support/macros/attributes.h" // LIBC_INLINE
1212
#include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL
13+
#include "src/string/memory_utils/arm/common.h" // LIBC_ATTR_LIKELY, LIBC_ATTR_UNLIKELY
1314
#include "src/string/memory_utils/utils.h" // memcpy_inline, distance_to_align
1415

1516
#include <stddef.h> // size_t
1617

17-
// https://libc.llvm.org/compiler_support.html
18-
// Support for [[likely]] / [[unlikely]]
19-
// [X] GCC 12.2
20-
// [X] Clang 12
21-
// [ ] Clang 11
22-
#define LIBC_ATTR_LIKELY [[likely]]
23-
#define LIBC_ATTR_UNLIKELY [[unlikely]]
24-
25-
#if defined(LIBC_COMPILER_IS_CLANG)
26-
#if LIBC_COMPILER_CLANG_VER < 1200
27-
#undef LIBC_ATTR_LIKELY
28-
#undef LIBC_ATTR_UNLIKELY
29-
#define LIBC_ATTR_LIKELY
30-
#define LIBC_ATTR_UNLIKELY
31-
#endif
32-
#endif
33-
3418
namespace LIBC_NAMESPACE_DECL {
3519

3620
namespace {
3721

38-
LIBC_INLINE_VAR constexpr size_t kWordSize = sizeof(uint32_t);
39-
40-
enum Strategy {
41-
ForceWordLdStChain,
42-
AssumeWordAligned,
43-
AssumeUnaligned,
44-
};
22+
template <size_t bytes>
23+
LIBC_INLINE void copy_assume_aligned(void *dst, const void *src) {
24+
constexpr size_t alignment = bytes > kWordSize ? kWordSize : bytes;
25+
memcpy_inline<bytes>(assume_aligned<alignment>(dst),
26+
assume_aligned<alignment>(src));
27+
}
4528

46-
template <size_t bytes, Strategy strategy = AssumeUnaligned>
47-
LIBC_INLINE void copy_and_bump_pointers(Ptr &dst, CPtr &src) {
48-
if constexpr (strategy == AssumeUnaligned) {
49-
memcpy_inline<bytes>(assume_aligned<1>(dst), assume_aligned<1>(src));
50-
} else if constexpr (strategy == AssumeWordAligned) {
51-
static_assert(bytes >= kWordSize);
52-
memcpy_inline<bytes>(assume_aligned<kWordSize>(dst),
53-
assume_aligned<kWordSize>(src));
54-
} else if constexpr (strategy == ForceWordLdStChain) {
29+
template <size_t bytes, BlockOp block_op = BlockOp::kFull>
30+
LIBC_INLINE void copy_block_and_bump_pointers(Ptr &dst, CPtr &src) {
31+
if constexpr (block_op == BlockOp::kFull) {
32+
copy_assume_aligned<bytes>(dst, src);
33+
} else {
5534
// We restrict loads/stores to 4 byte to prevent the use of load/store
56-
// multiple (LDM, STM) and load/store double (LDRD, STRD). First, they may
57-
// fault (see notes below) and second, they use more registers which in turn
58-
// adds push/pop instructions in the hot path.
59-
static_assert((bytes % kWordSize == 0) && (bytes >= kWordSize));
35+
// multiple (LDM, STM) and load/store double (LDRD, STRD). First, they
36+
// may fault (see notes below) and second, they use more registers which
37+
// in turn adds push/pop instructions in the hot path.
38+
static_assert(bytes >= kWordSize);
6039
LIBC_LOOP_UNROLL
61-
for (size_t i = 0; i < bytes / kWordSize; ++i) {
62-
const size_t offset = i * kWordSize;
63-
memcpy_inline<kWordSize>(dst + offset, src + offset);
40+
for (size_t offset = 0; offset < bytes; offset += kWordSize) {
41+
copy_assume_aligned<kWordSize>(dst + offset, src + offset);
6442
}
6543
}
6644
// In the 1, 2, 4 byte copy case, the compiler can fold pointer offsetting
@@ -72,30 +50,19 @@ LIBC_INLINE void copy_and_bump_pointers(Ptr &dst, CPtr &src) {
7250
src += bytes;
7351
}
7452

75-
LIBC_INLINE void copy_bytes_and_bump_pointers(Ptr &dst, CPtr &src,
76-
const size_t size) {
53+
template <size_t bytes, BlockOp block_op, BumpSize bump_size = BumpSize::kYes>
54+
LIBC_INLINE void consume_by_aligned_block(Ptr &dst, CPtr &src, size_t &size) {
7755
LIBC_LOOP_NOUNROLL
78-
for (size_t i = 0; i < size; ++i)
79-
*dst++ = *src++;
80-
}
81-
82-
template <size_t block_size, Strategy strategy>
83-
LIBC_INLINE void copy_blocks_and_update_args(Ptr &dst, CPtr &src,
84-
size_t &size) {
85-
LIBC_LOOP_NOUNROLL
86-
for (size_t i = 0; i < size / block_size; ++i)
87-
copy_and_bump_pointers<block_size, strategy>(dst, src);
88-
// Update `size` once at the end instead of once per iteration.
89-
size %= block_size;
90-
}
91-
92-
LIBC_INLINE CPtr bitwise_or(CPtr a, CPtr b) {
93-
return cpp::bit_cast<CPtr>(cpp::bit_cast<uintptr_t>(a) |
94-
cpp::bit_cast<uintptr_t>(b));
56+
for (size_t i = 0; i < size / bytes; ++i)
57+
copy_block_and_bump_pointers<bytes, block_op>(dst, src);
58+
if constexpr (bump_size == BumpSize::kYes) {
59+
size %= bytes;
60+
}
9561
}
9662

97-
LIBC_INLINE auto misaligned(CPtr a) {
98-
return distance_to_align_down<kWordSize>(a);
63+
LIBC_INLINE void copy_bytes_and_bump_pointers(Ptr &dst, CPtr &src,
64+
size_t size) {
65+
consume_by_aligned_block<1, BlockOp::kFull, BumpSize::kNo>(dst, src, size);
9966
}
10067

10168
} // namespace
@@ -125,20 +92,21 @@ LIBC_INLINE auto misaligned(CPtr a) {
12592
if (src_alignment == 0)
12693
LIBC_ATTR_LIKELY {
12794
// Both `src` and `dst` are now word-aligned.
128-
copy_blocks_and_update_args<64, AssumeWordAligned>(dst, src, size);
129-
copy_blocks_and_update_args<16, AssumeWordAligned>(dst, src, size);
130-
copy_blocks_and_update_args<4, AssumeWordAligned>(dst, src, size);
95+
consume_by_aligned_block<64, BlockOp::kFull>(dst, src, size);
96+
consume_by_aligned_block<16, BlockOp::kFull>(dst, src, size);
97+
consume_by_aligned_block<4, BlockOp::kFull>(dst, src, size);
13198
}
13299
else {
133100
// `dst` is aligned but `src` is not.
134101
LIBC_LOOP_NOUNROLL
135102
while (size >= kWordSize) {
136-
// Recompose word from multiple loads depending on the alignment.
103+
// Recompose word from multiple loads depending on the
104+
// alignment.
137105
const uint32_t value =
138106
src_alignment == 2
139107
? load_aligned<uint32_t, uint16_t, uint16_t>(src)
140108
: load_aligned<uint32_t, uint8_t, uint16_t, uint8_t>(src);
141-
memcpy_inline<kWordSize>(assume_aligned<kWordSize>(dst), &value);
109+
copy_assume_aligned<kWordSize>(dst, &value);
142110
dst += kWordSize;
143111
src += kWordSize;
144112
size -= kWordSize;
@@ -169,31 +137,33 @@ LIBC_INLINE auto misaligned(CPtr a) {
169137
if (size < 8)
170138
LIBC_ATTR_UNLIKELY {
171139
if (size & 1)
172-
copy_and_bump_pointers<1>(dst, src);
140+
copy_block_and_bump_pointers<1>(dst, src);
173141
if (size & 2)
174-
copy_and_bump_pointers<2>(dst, src);
142+
copy_block_and_bump_pointers<2>(dst, src);
175143
if (size & 4)
176-
copy_and_bump_pointers<4>(dst, src);
144+
copy_block_and_bump_pointers<4>(dst, src);
177145
return;
178146
}
179147
if (misaligned(src))
180148
LIBC_ATTR_UNLIKELY {
181149
const size_t offset = distance_to_align_up<kWordSize>(dst);
182150
if (offset & 1)
183-
copy_and_bump_pointers<1>(dst, src);
151+
copy_block_and_bump_pointers<1>(dst, src);
184152
if (offset & 2)
185-
copy_and_bump_pointers<2>(dst, src);
153+
copy_block_and_bump_pointers<2>(dst, src);
186154
size -= offset;
187155
}
188156
}
189-
copy_blocks_and_update_args<64, ForceWordLdStChain>(dst, src, size);
190-
copy_blocks_and_update_args<16, ForceWordLdStChain>(dst, src, size);
191-
copy_blocks_and_update_args<4, AssumeUnaligned>(dst, src, size);
157+
// `dst` and `src` are not necessarily both aligned at that point but this
158+
// implementation assumes hardware support for unaligned loads and stores.
159+
consume_by_aligned_block<64, BlockOp::kByWord>(dst, src, size);
160+
consume_by_aligned_block<16, BlockOp::kByWord>(dst, src, size);
161+
consume_by_aligned_block<4, BlockOp::kFull>(dst, src, size);
192162
if (size & 1)
193-
copy_and_bump_pointers<1>(dst, src);
163+
copy_block_and_bump_pointers<1>(dst, src);
194164
if (size & 2)
195165
LIBC_ATTR_UNLIKELY
196-
copy_and_bump_pointers<2>(dst, src);
166+
copy_block_and_bump_pointers<2>(dst, src);
197167
}
198168

199169
[[maybe_unused]] LIBC_INLINE void inline_memcpy_arm(void *__restrict dst_,
@@ -210,8 +180,4 @@ LIBC_INLINE auto misaligned(CPtr a) {
210180

211181
} // namespace LIBC_NAMESPACE_DECL
212182

213-
// Cleanup local macros
214-
#undef LIBC_ATTR_LIKELY
215-
#undef LIBC_ATTR_UNLIKELY
216-
217183
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ARM_INLINE_MEMCPY_H

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4268,6 +4268,7 @@ libc_support_library(
42684268
"src/string/memory_utils/aarch64/inline_memcpy.h",
42694269
"src/string/memory_utils/aarch64/inline_memmove.h",
42704270
"src/string/memory_utils/aarch64/inline_memset.h",
4271+
"src/string/memory_utils/arm/common.h",
42714272
"src/string/memory_utils/arm/inline_memcpy.h",
42724273
"src/string/memory_utils/generic/aligned_access.h",
42734274
"src/string/memory_utils/generic/byte_per_byte.h",

0 commit comments

Comments
 (0)