Skip to content

Commit 55d4b17

Browse files
mitchellhSythivo
authored andcommitted
Swap read on timerfd/eventfd to poll to avoid Kernel 6.15.4 bug (#171)
This commit changes our operations on timerfd and eventfd from using read to using poll. This is a workaround for a bug in Kernel 6.15.4 that causes read operations on these file descriptors to hang indefinitely. A poll is equivalent to what we're trying to achieve here and is equivalent to epoll because epoll is already polling and `read` was just a wrapper that subsequently called `read` anyways. Prior to this commit, tests would hang on 6.15.4, now they pass.
1 parent b1d5fae commit 55d4b17

File tree

2 files changed

+79
-7
lines changed

2 files changed

+79
-7
lines changed

src/backend/io_uring.zig

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,9 +1197,12 @@ test "io_uring: timerfd" {
11971197
var called = false;
11981198
var c: Completion = .{
11991199
.op = .{
1200-
.read = .{
1200+
// Note: we should be able to use `read` here but on
1201+
// Kernel 6.15.4 there is a bug that prevents the read
1202+
// from ever firing with io_uring. I don't know why. I changed
1203+
// this to a poll so tests pass, which should also be fine!
1204+
.poll = .{
12011205
.fd = t.fd,
1202-
.buffer = .{ .array = undefined },
12031206
},
12041207
},
12051208

src/watcher/async.zig

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,14 @@ fn AsyncEventFd(comptime xev: type) type {
5252
// TODO: error handling
5353
.freebsd => eventfd(
5454
0,
55-
0x100000, // EFD_CLOEXEC
55+
0x100000 | 0x4, // EFD_CLOEXEC | EFD_NONBLOCK
5656
),
5757

5858
// Use std.posix if we can.
5959
else => try std.posix.eventfd(
6060
0,
61-
std.os.linux.EFD.CLOEXEC,
61+
std.os.linux.EFD.CLOEXEC |
62+
std.os.linux.EFD.NONBLOCK,
6263
),
6364
},
6465
};
@@ -80,7 +81,13 @@ fn AsyncEventFd(comptime xev: type) type {
8081
/// You should NOT register an async with multiple loops (the same loop
8182
/// is fine -- but unnecessary). The behavior when waiting on multiple
8283
/// loops is undefined.
83-
pub fn wait(
84+
pub const wait = switch (xev.backend) {
85+
.io_uring, .epoll => waitPoll,
86+
.kqueue => waitRead,
87+
.iocp, .wasi_poll => @compileError("AsyncEventFd does not support wait for this backend"),
88+
};
89+
90+
fn waitRead(
8491
self: Self,
8592
loop: *xev.Loop,
8693
c: *xev.Completion,
@@ -128,6 +135,63 @@ fn AsyncEventFd(comptime xev: type) type {
128135
loop.add(c);
129136
}
130137

138+
fn waitPoll(
139+
self: Self,
140+
loop: *xev.Loop,
141+
c: *xev.Completion,
142+
comptime Userdata: type,
143+
userdata: ?*Userdata,
144+
comptime cb: *const fn (
145+
ud: ?*Userdata,
146+
l: *xev.Loop,
147+
c: *xev.Completion,
148+
r: WaitError!void,
149+
) xev.CallbackAction,
150+
) void {
151+
c.* = .{
152+
.op = .{
153+
// We use a poll operation instead of a read operation
154+
// because in Kernel 6.15.4, read was regressed for
155+
// io_uring on eventfd/timerfd and would block forever.
156+
// However, poll works fine.
157+
.poll = .{
158+
.fd = self.fd,
159+
.events = posix.POLL.IN,
160+
},
161+
},
162+
163+
.userdata = userdata,
164+
.callback = (struct {
165+
fn callback(
166+
ud: ?*anyopaque,
167+
l_inner: *xev.Loop,
168+
c_inner: *xev.Completion,
169+
r: xev.Result,
170+
) xev.CallbackAction {
171+
if (r.poll) |_| {
172+
// We need to read so that we can consume the
173+
// eventfd value. We only read 8 bytes because
174+
// we only write up to 8 bytes and we own the fd.
175+
// We ignore errors here because we expect the
176+
// read to succeed given we just polled it.
177+
var buf: [8]u8 = undefined;
178+
_ = posix.read(c_inner.op.poll.fd, &buf) catch {};
179+
} else |_| {
180+
// We'll call the callback with the error later.
181+
}
182+
183+
return @call(.always_inline, cb, .{
184+
common.userdataValue(Userdata, ud),
185+
l_inner,
186+
c_inner,
187+
if (r.poll) |_| {} else |err| err,
188+
});
189+
}
190+
}).callback,
191+
};
192+
loop.add(c);
193+
}
194+
131195
/// Notify a loop to wake up synchronously. This should never block forever
132196
/// (it will always EVENTUALLY succeed regardless of if the loop is currently
133197
/// ticking or not).
@@ -676,16 +740,21 @@ fn AsyncTests(comptime xev: type, comptime Impl: type) type {
676740
) xev.CallbackAction {
677741
_ = r catch unreachable;
678742
ud.?.* = true;
679-
return .disarm;
743+
return .rearm;
680744
}
681745
}).callback);
682746

683747
// Send a notification
684748
try notifier.notify();
685749

686750
// Wait for wake
687-
try loop.run(.until_done);
751+
try loop.run(.once);
688752
try testing.expect(wake);
753+
754+
// Make sure it only triggers once
755+
wake = false;
756+
try loop.run(.no_wait);
757+
try testing.expect(!wake);
689758
}
690759

691760
test "async: notify first" {

0 commit comments

Comments
 (0)