Skip to content

Commit ba61f00

Browse files
mitchellhnikneym
authored andcommitted
Swap read on timerfd/eventfd to poll to avoid Kernel 6.15.4 bug
This commit changes our operations on timerfd and eventfd from using read to using poll. This is a workaround for a bug in Kernel 6.15.4 that causes read operations on these file descriptors to hang indefinitely. A poll is equivalent to what we're trying to achieve here and is equivalent to epoll because epoll is already polling and `read` was just a wrapper that subsequently called `read` anyways. Prior to this commit, tests would hang on 6.15.4, now they pass.
1 parent f0cf883 commit ba61f00

File tree

2 files changed

+79
-7
lines changed

2 files changed

+79
-7
lines changed

src/backend/io_uring.zig

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,9 +1182,12 @@ test "io_uring: timerfd" {
11821182
var called = false;
11831183
var c: Completion = .{
11841184
.op = .{
1185-
.read = .{
1185+
// Note: we should be able to use `read` here but on
1186+
// Kernel 6.15.4 there is a bug that prevents the read
1187+
// from ever firing with io_uring. I don't know why. I changed
1188+
// this to a poll so tests pass, which should also be fine!
1189+
.poll = .{
11861190
.fd = t.fd,
1187-
.buffer = .{ .array = undefined },
11881191
},
11891192
},
11901193

src/watcher/async.zig

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,14 @@ fn AsyncEventFd(comptime xev: type) type {
5353
// TODO: error handling
5454
.freebsd => eventfd(
5555
0,
56-
0x100000, // EFD_CLOEXEC
56+
0x100000 | 0x4, // EFD_CLOEXEC | EFD_NONBLOCK
5757
),
5858

5959
// Use std.posix if we can.
6060
else => try std.posix.eventfd(
6161
0,
62-
std.os.linux.EFD.CLOEXEC,
62+
std.os.linux.EFD.CLOEXEC |
63+
std.os.linux.EFD.NONBLOCK,
6364
),
6465
},
6566
};
@@ -81,7 +82,13 @@ fn AsyncEventFd(comptime xev: type) type {
8182
/// You should NOT register an async with multiple loops (the same loop
8283
/// is fine -- but unnecessary). The behavior when waiting on multiple
8384
/// loops is undefined.
84-
pub fn wait(
85+
pub const wait = switch (xev.backend) {
86+
.io_uring, .epoll => waitPoll,
87+
.kqueue => waitRead,
88+
.iocp, .wasi_poll => @compileError("AsyncEventFd does not support wait for this backend"),
89+
};
90+
91+
fn waitRead(
8592
self: Self,
8693
loop: *xev.Loop,
8794
c: *xev.Completion,
@@ -122,6 +129,63 @@ fn AsyncEventFd(comptime xev: type) type {
122129
loop.add(c);
123130
}
124131

132+
fn waitPoll(
133+
self: Self,
134+
loop: *xev.Loop,
135+
c: *xev.Completion,
136+
comptime Userdata: type,
137+
userdata: ?*Userdata,
138+
comptime cb: *const fn (
139+
ud: ?*Userdata,
140+
l: *xev.Loop,
141+
c: *xev.Completion,
142+
r: WaitError!void,
143+
) xev.CallbackAction,
144+
) void {
145+
c.* = .{
146+
.op = .{
147+
// We use a poll operation instead of a read operation
148+
// because in Kernel 6.15.4, read was regressed for
149+
// io_uring on eventfd/timerfd and would block forever.
150+
// However, poll works fine.
151+
.poll = .{
152+
.fd = self.fd,
153+
.events = posix.POLL.IN,
154+
},
155+
},
156+
157+
.userdata = userdata,
158+
.callback = (struct {
159+
fn callback(
160+
ud: ?*anyopaque,
161+
l_inner: *xev.Loop,
162+
c_inner: *xev.Completion,
163+
r: xev.Result,
164+
) xev.CallbackAction {
165+
if (r.poll) |_| {
166+
// We need to read so that we can consume the
167+
// eventfd value. We only read 8 bytes because
168+
// we only write up to 8 bytes and we own the fd.
169+
// We ignore errors here because we expect the
170+
// read to succeed given we just polled it.
171+
var buf: [8]u8 = undefined;
172+
_ = posix.read(c_inner.op.poll.fd, &buf) catch {};
173+
} else |_| {
174+
// We'll call the callback with the error later.
175+
}
176+
177+
return @call(.always_inline, cb, .{
178+
common.userdataValue(Userdata, ud),
179+
l_inner,
180+
c_inner,
181+
if (r.poll) |_| {} else |err| err,
182+
});
183+
}
184+
}).callback,
185+
};
186+
loop.add(c);
187+
}
188+
125189
/// Notify a loop to wake up synchronously. This should never block forever
126190
/// (it will always EVENTUALLY succeed regardless of if the loop is currently
127191
/// ticking or not).
@@ -670,16 +734,21 @@ fn AsyncTests(comptime xev: type, comptime Impl: type) type {
670734
) xev.CallbackAction {
671735
_ = r catch unreachable;
672736
ud.?.* = true;
673-
return .disarm;
737+
return .rearm;
674738
}
675739
}).callback);
676740

677741
// Send a notification
678742
try notifier.notify();
679743

680744
// Wait for wake
681-
try loop.run(.until_done);
745+
try loop.run(.once);
682746
try testing.expect(wake);
747+
748+
// Make sure it only triggers once
749+
wake = false;
750+
try loop.run(.no_wait);
751+
try testing.expect(!wake);
683752
}
684753

685754
test "async: notify first" {

0 commit comments

Comments
 (0)