Skip to content

Commit f278038

Browse files
Rexicon226andrewrk
authored andcommitted
replace compiler_rt memcpy with a better version
1 parent b07958e commit f278038

File tree

3 files changed

+130
-39
lines changed

3 files changed

+130
-39
lines changed

lib/compiler_rt.zig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,6 @@ comptime {
233233

234234
_ = @import("compiler_rt/memcpy.zig");
235235
_ = @import("compiler_rt/memset.zig");
236-
_ = @import("compiler_rt/memmove.zig");
237236
_ = @import("compiler_rt/memcmp.zig");
238237
_ = @import("compiler_rt/bcmp.zig");
239238
_ = @import("compiler_rt/ssp.zig");

lib/compiler_rt/memcpy.zig

Lines changed: 130 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,141 @@ const builtin = @import("builtin");
55
comptime {
66
if (builtin.object_format != .c) {
77
@export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility });
8+
@export(&memcpy, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
89
}
910
}
1011

11-
pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
12-
@setRuntimeSafety(false);
13-
14-
if (len != 0) {
15-
var d = dest.?;
16-
var s = src.?;
17-
var n = len;
18-
while (true) {
19-
d[0] = s[0];
20-
n -= 1;
21-
if (n == 0) break;
22-
d += 1;
23-
s += 1;
12+
// a port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
13+
pub fn memcpy(maybe_dest: ?[*]u8, maybe_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
14+
if (len == 0) {
15+
@branchHint(.unlikely);
16+
return maybe_dest;
17+
}
18+
19+
const dest = maybe_dest.?;
20+
const src = maybe_src.?;
21+
22+
if (len < 8) {
23+
@branchHint(.unlikely);
24+
if (len == 1) {
25+
@branchHint(.unlikely);
26+
dest[0] = src[0];
27+
} else if (len >= 4) {
28+
@branchHint(.unlikely);
29+
blockCopy(dest, src, 4, len);
30+
} else {
31+
blockCopy(dest, src, 2, len);
2432
}
33+
return dest;
2534
}
2635

36+
if (len > 32) {
37+
@branchHint(.unlikely);
38+
if (len > 256) {
39+
@branchHint(.unlikely);
40+
copyMove(dest, src, len);
41+
return dest;
42+
}
43+
copyLong(dest, src, len);
44+
return dest;
45+
}
46+
47+
if (len > 16) {
48+
@branchHint(.unlikely);
49+
blockCopy(dest, src, 16, len);
50+
return dest;
51+
}
52+
53+
blockCopy(dest, src, 8, len);
54+
2755
return dest;
2856
}
57+
58+
inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void {
59+
const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*;
60+
const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*;
61+
dest[0..block_size].* = first;
62+
dest[len - block_size ..][0..block_size].* = second;
63+
}
64+
65+
inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void {
66+
var array: [8]@Vector(32, u8) = undefined;
67+
68+
inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| {
69+
array[i * 2] = src[(N / 2) - 32 ..][0..32].*;
70+
array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*;
71+
72+
if (len <= N) {
73+
@branchHint(.unlikely);
74+
for (0..i + 1) |j| {
75+
dest[j * 32 ..][0..32].* = array[j * 2];
76+
dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1];
77+
}
78+
return;
79+
}
80+
}
81+
}
82+
83+
inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void {
84+
if (@intFromPtr(src) >= @intFromPtr(dest)) {
85+
@branchHint(.unlikely);
86+
copyForward(dest, src, len);
87+
} else if (@intFromPtr(src) + len > @intFromPtr(dest)) {
88+
@branchHint(.unlikely);
89+
overlapBwd(dest, src, len);
90+
} else {
91+
copyForward(dest, src, len);
92+
}
93+
}
94+
95+
inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void {
96+
const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*;
97+
98+
const N: usize = len & ~@as(usize, 127);
99+
var i: usize = 0;
100+
101+
while (i < N) : (i += 128) {
102+
dest[i..][0..32].* = src[i..][0..32].*;
103+
dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*;
104+
dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*;
105+
dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*;
106+
}
107+
108+
if (len - i <= 32) {
109+
@branchHint(.unlikely);
110+
dest[len - 32 ..][0..32].* = tail;
111+
} else {
112+
copyLong(dest[i..], src[i..], len - i);
113+
}
114+
}
115+
116+
inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void {
117+
var array: [5]@Vector(32, u8) = undefined;
118+
array[0] = src[len - 32 ..][0..32].*;
119+
inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*;
120+
121+
const end: usize = (@intFromPtr(dest) + len - 32) & 31;
122+
const range = len - end;
123+
var s = src + range;
124+
var d = dest + range;
125+
126+
while (@intFromPtr(s) > @intFromPtr(src + 128)) {
127+
// zig fmt: off
128+
const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*;
129+
const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*;
130+
const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*;
131+
const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*;
132+
133+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first;
134+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second;
135+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third;
136+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth;
137+
// zig fmt: on
138+
139+
s -= 128;
140+
d -= 128;
141+
}
142+
143+
inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec;
144+
dest[len - 32 ..][0..32].* = array[0];
145+
}

lib/compiler_rt/memmove.zig

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)