@@ -5,24 +5,141 @@ const builtin = @import("builtin");
55comptime {
66 if (builtin .object_format != .c ) {
77 @export (& memcpy , .{ .name = "memcpy" , .linkage = common .linkage , .visibility = common .visibility });
8+ @export (& memcpy , .{ .name = "memmove" , .linkage = common .linkage , .visibility = common .visibility });
89 }
910}
1011
11- pub fn memcpy (noalias dest : ? [* ]u8 , noalias src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
12- @setRuntimeSafety (false );
13-
14- if (len != 0 ) {
15- var d = dest .? ;
16- var s = src .? ;
17- var n = len ;
18- while (true ) {
19- d [0 ] = s [0 ];
20- n -= 1 ;
21- if (n == 0 ) break ;
22- d += 1 ;
23- s += 1 ;
12+ // a port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
13+ pub fn memcpy (maybe_dest : ? [* ]u8 , maybe_src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
14+ if (len == 0 ) {
15+ @branchHint (.unlikely );
16+ return maybe_dest ;
17+ }
18+
19+ const dest = maybe_dest .? ;
20+ const src = maybe_src .? ;
21+
22+ if (len < 8 ) {
23+ @branchHint (.unlikely );
24+ if (len == 1 ) {
25+ @branchHint (.unlikely );
26+ dest [0 ] = src [0 ];
27+ } else if (len >= 4 ) {
28+ @branchHint (.unlikely );
29+ blockCopy (dest , src , 4 , len );
30+ } else {
31+ blockCopy (dest , src , 2 , len );
2432 }
33+ return dest ;
2534 }
2635
36+ if (len > 32 ) {
37+ @branchHint (.unlikely );
38+ if (len > 256 ) {
39+ @branchHint (.unlikely );
40+ copyMove (dest , src , len );
41+ return dest ;
42+ }
43+ copyLong (dest , src , len );
44+ return dest ;
45+ }
46+
47+ if (len > 16 ) {
48+ @branchHint (.unlikely );
49+ blockCopy (dest , src , 16 , len );
50+ return dest ;
51+ }
52+
53+ blockCopy (dest , src , 8 , len );
54+
2755 return dest ;
2856}
57+
58+ inline fn blockCopy (dest : [* ]u8 , src : [* ]const u8 , block_size : comptime_int , len : usize ) void {
59+ const first = @as (* align (1 ) const @Vector (block_size , u8 ), src [0.. block_size ]).* ;
60+ const second = @as (* align (1 ) const @Vector (block_size , u8 ), src [len - block_size .. ][0.. block_size ]).* ;
61+ dest [0.. block_size ].* = first ;
62+ dest [len - block_size .. ][0.. block_size ].* = second ;
63+ }
64+
65+ inline fn copyLong (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
66+ var array : [8 ]@Vector (32 , u8 ) = undefined ;
67+
68+ inline for (.{ 64 , 128 , 192 , 256 }, 0.. ) | N , i | {
69+ array [i * 2 ] = src [(N / 2 ) - 32 .. ][0.. 32].* ;
70+ array [(i * 2 ) + 1 ] = src [len - N / 2 .. ][0.. 32].* ;
71+
72+ if (len <= N ) {
73+ @branchHint (.unlikely );
74+ for (0.. i + 1 ) | j | {
75+ dest [j * 32 .. ][0.. 32].* = array [j * 2 ];
76+ dest [len - ((j * 32 ) + 32 ) .. ][0.. 32].* = array [(j * 2 ) + 1 ];
77+ }
78+ return ;
79+ }
80+ }
81+ }
82+
83+ inline fn copyMove (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
84+ if (@intFromPtr (src ) >= @intFromPtr (dest )) {
85+ @branchHint (.unlikely );
86+ copyForward (dest , src , len );
87+ } else if (@intFromPtr (src ) + len > @intFromPtr (dest )) {
88+ @branchHint (.unlikely );
89+ overlapBwd (dest , src , len );
90+ } else {
91+ copyForward (dest , src , len );
92+ }
93+ }
94+
95+ inline fn copyForward (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
96+ const tail : @Vector (32 , u8 ) = src [len - 32 .. ][0.. 32].* ;
97+
98+ const N : usize = len & ~ @as (usize , 127 );
99+ var i : usize = 0 ;
100+
101+ while (i < N ) : (i += 128 ) {
102+ dest [i .. ][0.. 32].* = src [i .. ][0.. 32].* ;
103+ dest [i + 32 .. ][0.. 32].* = src [i + 32 .. ][0.. 32].* ;
104+ dest [i + 64 .. ][0.. 32].* = src [i + 64 .. ][0.. 32].* ;
105+ dest [i + 96 .. ][0.. 32].* = src [i + 96 .. ][0.. 32].* ;
106+ }
107+
108+ if (len - i <= 32 ) {
109+ @branchHint (.unlikely );
110+ dest [len - 32 .. ][0.. 32].* = tail ;
111+ } else {
112+ copyLong (dest [i .. ], src [i .. ], len - i );
113+ }
114+ }
115+
116+ inline fn overlapBwd (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
117+ var array : [5 ]@Vector (32 , u8 ) = undefined ;
118+ array [0 ] = src [len - 32 .. ][0.. 32].* ;
119+ inline for (1.. 5) | i | array [i ] = src [(i - 1 ) << 5 .. ][0.. 32].* ;
120+
121+ const end : usize = (@intFromPtr (dest ) + len - 32 ) & 31 ;
122+ const range = len - end ;
123+ var s = src + range ;
124+ var d = dest + range ;
125+
126+ while (@intFromPtr (s ) > @intFromPtr (src + 128 )) {
127+ // zig fmt: off
128+ const first = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 32 )).* ;
129+ const second = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 64 )).* ;
130+ const third = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 96 )).* ;
131+ const fourth = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 128 )).* ;
132+
133+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 32 ))).* = first ;
134+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 64 ))).* = second ;
135+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 96 ))).* = third ;
136+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 128 ))).* = fourth ;
137+ // zig fmt: on
138+
139+ s -= 128 ;
140+ d -= 128 ;
141+ }
142+
143+ inline for (array [1.. ], 0.. ) | vec , i | dest [i * 32 .. ][0.. 32].* = vec ;
144+ dest [len - 32 .. ][0.. 32].* = array [0 ];
145+ }
0 commit comments