99#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
1010#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
1111
12- #include " src/__support/macros/config .h" // LIBC_INLINE
13- #include " src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL
12+ #include " src/__support/common .h"
13+ #include " src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
1414#include " src/__support/macros/properties/architectures.h"
15+ #include " src/string/memory_utils/op_aarch64.h"
1516#include " src/string/memory_utils/op_builtin.h"
17+ #include " src/string/memory_utils/op_generic.h"
18+ #include " src/string/memory_utils/op_x86.h"
1619#include " src/string/memory_utils/utils.h"
1720
1821#include < stddef.h> // size_t
1922
20- #if defined(LIBC_TARGET_ARCH_IS_X86)
21- #include " src/string/memory_utils/x86_64/memcpy_implementations.h"
22- #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
23- #include " src/string/memory_utils/aarch64/memcpy_implementations.h"
24- #endif
25-
2623namespace __llvm_libc {
2724
2825[[maybe_unused]] LIBC_INLINE void
@@ -33,6 +30,98 @@ inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src,
3330 builtin::Memcpy<1 >::block (dst + offset, src + offset);
3431}
3532
33+ #if defined(LIBC_TARGET_ARCH_IS_X86)
34+ [[maybe_unused]] LIBC_INLINE void
35+ inline_memcpy_x86 (Ptr __restrict dst, CPtr __restrict src, size_t count) {
36+ if (count == 0 )
37+ return ;
38+ if (count == 1 )
39+ return builtin::Memcpy<1 >::block (dst, src);
40+ if (count == 2 )
41+ return builtin::Memcpy<2 >::block (dst, src);
42+ if (count == 3 )
43+ return builtin::Memcpy<3 >::block (dst, src);
44+ if (count == 4 )
45+ return builtin::Memcpy<4 >::block (dst, src);
46+ if (count < 8 )
47+ return builtin::Memcpy<4 >::head_tail (dst, src, count);
48+ if (count < 16 )
49+ return builtin::Memcpy<8 >::head_tail (dst, src, count);
50+ if (count < 32 )
51+ return builtin::Memcpy<16 >::head_tail (dst, src, count);
52+ if (count < 64 )
53+ return builtin::Memcpy<32 >::head_tail (dst, src, count);
54+ if (count < 128 )
55+ return builtin::Memcpy<64 >::head_tail (dst, src, count);
56+ if (x86::kAvx && count < 256 )
57+ return builtin::Memcpy<128 >::head_tail (dst, src, count);
58+ builtin::Memcpy<32 >::block (dst, src);
59+ align_to_next_boundary<32 , Arg::Dst>(dst, src, count);
60+ static constexpr size_t kBlockSize = x86::kAvx ? 64 : 32 ;
61+ return builtin::Memcpy<kBlockSize >::loop_and_tail (dst, src, count);
62+ }
63+
64+ [[maybe_unused]] LIBC_INLINE void
65+ inline_memcpy_x86_maybe_interpose_repmovsb (Ptr __restrict dst,
66+ CPtr __restrict src, size_t count) {
67+ // Whether to use rep;movsb exclusively, not at all, or only above a certain
68+ // threshold.
69+ #ifndef LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
70+ #define LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE -1
71+ #endif
72+
73+ #ifdef LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB
74+ #error LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB is deprecated use LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE=0 instead.
75+ #endif // LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB
76+
77+ #ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
78+ #error LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE is deprecated use LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE=0 instead.
79+ #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
80+
81+ static constexpr size_t kRepMovsbThreshold =
82+ LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
83+ if constexpr (kRepMovsbThreshold == 0 ) {
84+ return x86::Memcpy::repmovsb (dst, src, count);
85+ } else if constexpr (kRepMovsbThreshold == size_t (-1 )) {
86+ return inline_memcpy_x86 (dst, src, count);
87+ } else {
88+ if (LIBC_UNLIKELY (count >= kRepMovsbThreshold ))
89+ return x86::Memcpy::repmovsb (dst, src, count);
90+ else
91+ return inline_memcpy_x86 (dst, src, count);
92+ }
93+ }
94+ #endif // defined(LIBC_TARGET_ARCH_IS_X86)
95+
96+ #if defined(LIBC_TARGET_ARCH_IS_AARCH64)
97+ [[maybe_unused]] LIBC_INLINE void
98+ inline_memcpy_aarch64 (Ptr __restrict dst, CPtr __restrict src, size_t count) {
99+ if (count == 0 )
100+ return ;
101+ if (count == 1 )
102+ return builtin::Memcpy<1 >::block (dst, src);
103+ if (count == 2 )
104+ return builtin::Memcpy<2 >::block (dst, src);
105+ if (count == 3 )
106+ return builtin::Memcpy<3 >::block (dst, src);
107+ if (count == 4 )
108+ return builtin::Memcpy<4 >::block (dst, src);
109+ if (count < 8 )
110+ return builtin::Memcpy<4 >::head_tail (dst, src, count);
111+ if (count < 16 )
112+ return builtin::Memcpy<8 >::head_tail (dst, src, count);
113+ if (count < 32 )
114+ return builtin::Memcpy<16 >::head_tail (dst, src, count);
115+ if (count < 64 )
116+ return builtin::Memcpy<32 >::head_tail (dst, src, count);
117+ if (count < 128 )
118+ return builtin::Memcpy<64 >::head_tail (dst, src, count);
119+ builtin::Memcpy<16 >::block (dst, src);
120+ align_to_next_boundary<16 , Arg::Src>(dst, src, count);
121+ return builtin::Memcpy<64 >::loop_and_tail (dst, src, count);
122+ }
123+ #endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
124+
36125LIBC_INLINE void inline_memcpy (Ptr __restrict dst, CPtr __restrict src,
37126 size_t count) {
38127 using namespace __llvm_libc ::builtin;
0 commit comments