@@ -5,24 +5,169 @@ const builtin = @import("builtin");
55comptime {
66 if (builtin .object_format != .c ) {
77 @export (& memcpy , .{ .name = "memcpy" , .linkage = common .linkage , .visibility = common .visibility });
8+ @export (& memmove , .{ .name = "memmove" , .linkage = common .linkage , .visibility = common .visibility });
89 }
910}
1011
11- pub fn memcpy (noalias dest : ? [* ]u8 , noalias src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
12- @setRuntimeSafety (false );
12+ const llvm_cannot_lower = switch (builtin .cpu .arch ) {
13+ .arm , .armeb , .thumb , .thumbeb = > builtin .zig_backend == .stage2_llvm ,
14+ else = > false ,
15+ };
1316
14- if (len != 0 ) {
15- var d = dest .? ;
16- var s = src .? ;
17- var n = len ;
18- while (true ) {
19- d [0 ] = s [0 ];
20- n -= 1 ;
21- if (n == 0 ) break ;
22- d += 1 ;
23- s += 1 ;
17+ fn memcpy (noalias opt_dest : ? [* ]u8 , noalias opt_src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
18+ if (llvm_cannot_lower ) {
19+ for (0.. len ) | i | opt_dest .? [i ] = opt_src .? [i ];
20+ return opt_dest ;
21+ } else {
22+ return memmove (opt_dest , opt_src , len );
23+ }
24+ }
25+
26+ /// A port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
27+ fn memmove (opt_dest : ? [* ]u8 , opt_src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
28+ if (llvm_cannot_lower ) {
29+ if (@intFromPtr (opt_dest ) < @intFromPtr (opt_src )) {
30+ for (0.. len ) | i | opt_dest .? [i ] = opt_src .? [i ];
31+ return opt_dest ;
32+ } else {
33+ var index = len ;
34+ while (index != 0 ) {
35+ index -= 1 ;
36+ opt_dest .? [index ] = opt_src .? [index ];
37+ }
38+ return opt_dest ;
2439 }
2540 }
2641
42+ if (len == 0 ) {
43+ @branchHint (.unlikely );
44+ return opt_dest ;
45+ }
46+
47+ const dest = opt_dest .? ;
48+ const src = opt_src .? ;
49+
50+ if (len < 8 ) {
51+ @branchHint (.unlikely );
52+ if (len == 1 ) {
53+ @branchHint (.unlikely );
54+ dest [0 ] = src [0 ];
55+ } else if (len >= 4 ) {
56+ @branchHint (.unlikely );
57+ blockCopy (dest , src , 4 , len );
58+ } else {
59+ blockCopy (dest , src , 2 , len );
60+ }
61+ return dest ;
62+ }
63+
64+ if (len > 32 ) {
65+ @branchHint (.unlikely );
66+ if (len > 256 ) {
67+ @branchHint (.unlikely );
68+ copyMove (dest , src , len );
69+ return dest ;
70+ }
71+ copyLong (dest , src , len );
72+ return dest ;
73+ }
74+
75+ if (len > 16 ) {
76+ @branchHint (.unlikely );
77+ blockCopy (dest , src , 16 , len );
78+ return dest ;
79+ }
80+
81+ blockCopy (dest , src , 8 , len );
82+
2783 return dest ;
2884}
85+
86+ inline fn blockCopy (dest : [* ]u8 , src : [* ]const u8 , block_size : comptime_int , len : usize ) void {
87+ const first = @as (* align (1 ) const @Vector (block_size , u8 ), src [0.. block_size ]).* ;
88+ const second = @as (* align (1 ) const @Vector (block_size , u8 ), src [len - block_size .. ][0.. block_size ]).* ;
89+ dest [0.. block_size ].* = first ;
90+ dest [len - block_size .. ][0.. block_size ].* = second ;
91+ }
92+
93+ inline fn copyLong (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
94+ var array : [8 ]@Vector (32 , u8 ) = undefined ;
95+
96+ inline for (.{ 64 , 128 , 192 , 256 }, 0.. ) | N , i | {
97+ array [i * 2 ] = src [(N / 2 ) - 32 .. ][0.. 32].* ;
98+ array [(i * 2 ) + 1 ] = src [len - N / 2 .. ][0.. 32].* ;
99+
100+ if (len <= N ) {
101+ @branchHint (.unlikely );
102+ for (0.. i + 1 ) | j | {
103+ dest [j * 32 .. ][0.. 32].* = array [j * 2 ];
104+ dest [len - ((j * 32 ) + 32 ) .. ][0.. 32].* = array [(j * 2 ) + 1 ];
105+ }
106+ return ;
107+ }
108+ }
109+ }
110+
111+ inline fn copyMove (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
112+ if (@intFromPtr (src ) >= @intFromPtr (dest )) {
113+ @branchHint (.unlikely );
114+ copyForward (dest , src , len );
115+ } else if (@intFromPtr (src ) + len > @intFromPtr (dest )) {
116+ @branchHint (.unlikely );
117+ overlapBwd (dest , src , len );
118+ } else {
119+ copyForward (dest , src , len );
120+ }
121+ }
122+
123+ inline fn copyForward (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
124+ const tail : @Vector (32 , u8 ) = src [len - 32 .. ][0.. 32].* ;
125+
126+ const N : usize = len & ~ @as (usize , 127 );
127+ var i : usize = 0 ;
128+
129+ while (i < N ) : (i += 128 ) {
130+ dest [i .. ][0.. 32].* = src [i .. ][0.. 32].* ;
131+ dest [i + 32 .. ][0.. 32].* = src [i + 32 .. ][0.. 32].* ;
132+ dest [i + 64 .. ][0.. 32].* = src [i + 64 .. ][0.. 32].* ;
133+ dest [i + 96 .. ][0.. 32].* = src [i + 96 .. ][0.. 32].* ;
134+ }
135+
136+ if (len - i <= 32 ) {
137+ @branchHint (.unlikely );
138+ dest [len - 32 .. ][0.. 32].* = tail ;
139+ } else {
140+ copyLong (dest [i .. ], src [i .. ], len - i );
141+ }
142+ }
143+
144+ inline fn overlapBwd (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
145+ var array : [5 ]@Vector (32 , u8 ) = undefined ;
146+ array [0 ] = src [len - 32 .. ][0.. 32].* ;
147+ inline for (1.. 5) | i | array [i ] = src [(i - 1 ) << 5 .. ][0.. 32].* ;
148+
149+ const end : usize = (@intFromPtr (dest ) + len - 32 ) & 31 ;
150+ const range = len - end ;
151+ var s = src + range ;
152+ var d = dest + range ;
153+
154+ while (@intFromPtr (s ) > @intFromPtr (src + 128 )) {
155+ // zig fmt: off
156+ const first = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 32 )).* ;
157+ const second = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 64 )).* ;
158+ const third = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 96 )).* ;
159+ const fourth = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 128 )).* ;
160+
161+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 32 ))).* = first ;
162+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 64 ))).* = second ;
163+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 96 ))).* = third ;
164+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 128 ))).* = fourth ;
165+ // zig fmt: on
166+
167+ s -= 128 ;
168+ d -= 128 ;
169+ }
170+
171+ inline for (array [1.. ], 0.. ) | vec , i | dest [i * 32 .. ][0.. 32].* = vec ;
172+ dest [len - 32 .. ][0.. 32].* = array [0 ];
173+ }
0 commit comments