From 2ac9b2088a933b3f67135c1eb5e2bf1f42e8801b Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 2 Sep 2025 19:45:49 +0800 Subject: [PATCH 1/4] Always monitor the CDP client socket, even on page.wait --- src/browser/ScriptManager.zig | 2 +- src/browser/page.zig | 10 +++++++-- src/cdp/cdp.zig | 2 +- src/http/Client.zig | 41 ++++++++++++++++++++++++----------- src/http/Http.zig | 17 ++++++++++----- src/server.zig | 6 ++++- 6 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 7a17cdc30..190d8f13d 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -285,7 +285,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult { // rely on http's timeout settings to avoid an endless/long loop. while (true) { - _ = try client.tick(.{ .timeout_ms = 200 }); + _ = try client.tick(200); switch (blocking.state) { .running => {}, .done => |result| return result, diff --git a/src/browser/page.zig b/src/browser/page.zig index 211ed5987..05430281e 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -313,7 +313,10 @@ pub const Page = struct { } // There should only be 1 active http transfer, the main page - _ = try http_client.tick(.{ .timeout_ms = ms_remaining }); + if (try http_client.tick(ms_remaining) == .extra_socket) { + // data on a socket we aren't handling, return to caller + return; + } }, .html, .parsed => { // The HTML page was parsed. We now either have JS scripts to @@ -374,7 +377,10 @@ pub const Page = struct { // inflight requests else @min(ms_remaining, ms_to_next_task orelse 1000); - _ = try http_client.tick(.{ .timeout_ms = ms_to_wait }); + if (try http_client.tick(ms_to_wait) == .extra_socket) { + // data on a socket we aren't handling, return to caller + return; + } if (request_intercepted) { // Again, proritizing intercepted requests. Exit this diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index a9d932f06..a6b645d19 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -103,7 +103,7 @@ pub fn CDPT(comptime TypeProvider: type) type { pub fn handleMessage(self: *Self, msg: []const u8) bool { // if there's an error, it's already been logged self.processMessage(msg) catch return false; - self.pageWait(); + // self.pageWait(); return true; } diff --git a/src/http/Client.zig b/src/http/Client.zig index 0b42ff807..cec046ffe 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -87,6 +87,11 @@ notification: ?*Notification = null, // restoring, this originally-configured value is what it goes to. http_proxy: ?[:0]const u8 = null, +// libcurl can monitor arbitrary sockets. Currently, we ever [maybe] want to +// monitor the CDP client socket, so we've done the simplest thing possible +// by having this single optional field +extra_socket: ?posix.socket_t = null, + const TransferQueue = std.DoublyLinkedList; pub fn init(allocator: Allocator, ca_blob: ?c.curl_blob, opts: Http.Opts) !*Client { @@ -162,11 +167,7 @@ pub fn abort(self: *Client) void { } } -const TickOpts = struct { - timeout_ms: i32 = 0, - poll_socket: ?posix.socket_t = null, -}; -pub fn tick(self: *Client, opts: TickOpts) !bool { +pub fn tick(self: *Client, timeout_ms: i32) !PerformStatus { while (true) { if (self.handles.hasAvailable() == false) { break; @@ -178,7 +179,7 @@ pub fn tick(self: *Client, opts: TickOpts) !bool { const handle = self.handles.getFreeHandle().?; try self.makeRequest(handle, transfer); } - return self.perform(opts.timeout_ms, opts.poll_socket); + return self.perform(timeout_ms); } pub fn request(self: *Client, req: Request) !void { @@ -342,15 +343,25 @@ fn makeRequest(self: *Client, handle: *Handle, transfer: *Transfer) !void { } self.active += 1; - _ = try self.perform(0, null); + _ = try self.perform(0); } -fn perform(self: *Client, timeout_ms: c_int, socket: ?posix.socket_t) !bool { +pub const PerformStatus = enum{ + extra_socket, + normal, +}; + +fn perform(self: *Client, timeout_ms: c_int) !PerformStatus { const multi = self.multi; var running: c_int = undefined; try errorMCheck(c.curl_multi_perform(multi, &running)); - if (socket) |s| { + // We're potentially going to block for a while until we get data. Process + // whatever messages we have waiting ahead of time. + try self.processMessages(); + + var status = PerformStatus.normal; + if (self.extra_socket) |s| { var wait_fd = c.curl_waitfd{ .fd = s, .events = c.CURL_WAIT_POLLIN, @@ -359,12 +370,18 @@ fn perform(self: *Client, timeout_ms: c_int, socket: ?posix.socket_t) !bool { try errorMCheck(c.curl_multi_poll(multi, &wait_fd, 1, timeout_ms, null)); if (wait_fd.revents != 0) { // the extra socket we passed in is ready, let's signal our caller - return true; + status = .extra_socket; } - } else if (running > 0 and timeout_ms > 0) { + } else if (running > 0) { try errorMCheck(c.curl_multi_poll(multi, null, 0, timeout_ms, null)); } + try self.processMessages(); + return status; +} + +fn processMessages(self: *Client) !void { + const multi = self.multi; var messages_count: c_int = 0; while (c.curl_multi_info_read(multi, &messages_count)) |msg_| { const msg: *c.CURLMsg = @ptrCast(msg_); @@ -422,8 +439,6 @@ fn perform(self: *Client, timeout_ms: c_int, socket: ?posix.socket_t) !bool { self.requestFailed(transfer, err); } } - - return false; } fn endTransfer(self: *Client, transfer: *Transfer) void { diff --git a/src/http/Http.zig b/src/http/Http.zig index ebd1e476e..6f1f1fdc3 100644 --- a/src/http/Http.zig +++ b/src/http/Http.zig @@ -83,16 +83,21 @@ pub fn deinit(self: *Http) void { self.arena.deinit(); } -pub fn poll(self: *Http, timeout_ms: i32, socket: posix.socket_t) bool { - return self.client.tick(.{ - .timeout_ms = timeout_ms, - .poll_socket = socket, - }) catch |err| { +pub fn poll(self: *Http, timeout_ms: i32) Client.PerformStatus { + return self.client.tick(timeout_ms) catch |err| { log.err(.app, "http poll", .{ .err = err }); - return false; + return .normal; }; } +pub fn monitorSocket(self: *Http, socket: posix.socket_t) void { + self.client.extra_socket = socket; +} + +pub fn unmonitorSocket(self: *Http) void { + self.client.extra_socket = null; +} + pub fn newConnection(self: *Http) !Connection { return Connection.init(self.ca_blob, &self.opts); } diff --git a/src/server.zig b/src/server.zig index 2a16bd8d0..0bb48f65b 100644 --- a/src/server.zig +++ b/src/server.zig @@ -126,8 +126,12 @@ pub const Server = struct { var last_message = timestamp(); var http = &self.app.http; + + http.monitorSocket(socket); + defer http.unmonitorSocket(); + while (true) { - if (http.poll(20, socket)) { + if (http.poll(10) == .extra_socket) { const n = posix.read(socket, client.readBuf()) catch |err| { log.warn(.app, "CDP read", .{ .err = err }); return; From e237e709b6c58b9b42b2bb15dd242f1c260a4885 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 2 Sep 2025 21:05:22 +0800 Subject: [PATCH 2/4] Change loader id on navigation This appears to be what chrome is doing. I don't know why we weren't before. --- src/cdp/cdp.zig | 1 - src/cdp/domains/page.zig | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index a6b645d19..ae95e5707 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -103,7 +103,6 @@ pub fn CDPT(comptime TypeProvider: type) type { pub fn handleMessage(self: *Self, msg: []const u8) bool { // if there's an error, it's already been logged self.processMessage(msg) catch return false; - // self.pageWait(); return true; } diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 19ef1b776..1f7e44940 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -164,10 +164,7 @@ pub fn pageNavigate(arena: Allocator, bc: anytype, event: *const Notification.Pa var cdp = bc.cdp; - if (event.opts.reason != .address_bar) { - bc.loader_id = bc.cdp.loader_id_gen.next(); - } - + bc.loader_id = bc.cdp.loader_id_gen.next(); const loader_id = bc.loader_id; const target_id = bc.target_id orelse unreachable; const session_id = bc.session_id orelse unreachable; From b6137b03cde488072e8998b68b8a1429e017fd52 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Wed, 3 Sep 2025 19:37:09 +0800 Subject: [PATCH 3/4] Rework page wait again Further reducing bouncing between page and server for loop polling. If there is a page, the page polls. If there isn't a page, the server polls. Simpler. --- src/browser/Scheduler.zig | 5 +- src/browser/ScriptManager.zig | 2 +- src/browser/html/window.zig | 12 ++- src/browser/page.zig | 194 ++++++++++++++++------------------ src/browser/session.zig | 17 ++- src/cdp/cdp.zig | 12 ++- src/cdp/domains/fetch.zig | 24 ----- src/http/Client.zig | 2 +- src/main.zig | 4 +- src/main_wpt.zig | 2 +- src/server.zig | 90 ++++++++++------ src/testing.zig | 4 +- 12 files changed, 187 insertions(+), 181 deletions(-) diff --git a/src/browser/Scheduler.zig b/src/browser/Scheduler.zig index d79ca14c1..f2a43e7f4 100644 --- a/src/browser/Scheduler.zig +++ b/src/browser/Scheduler.zig @@ -44,6 +44,7 @@ pub fn reset(self: *Scheduler) void { const AddOpts = struct { name: []const u8 = "", + low_priority: bool = false, }; pub fn add(self: *Scheduler, ctx: *anyopaque, func: Task.Func, ms: u32, opts: AddOpts) !void { if (ms > 5_000) { @@ -51,7 +52,9 @@ pub fn add(self: *Scheduler, ctx: *anyopaque, func: Task.Func, ms: u32, opts: Ad // ignore any task that we're almost certainly never going to run return; } - return self.primary.add(.{ + + var q = if (opts.low_priority) &self.secondary else &self.primary; + return q.add(.{ .ms = std.time.milliTimestamp() + ms, .ctx = ctx, .func = func, diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 190d8f13d..36d47778b 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -428,7 +428,7 @@ fn errorCallback(ctx: *anyopaque, err: anyerror) void { // It could be pending because: // (a) we're still downloading its content or // (b) this is a non-async script that has to be executed in order -const PendingScript = struct { +pub const PendingScript = struct { script: Script, complete: bool, node: OrderList.Node, diff --git a/src/browser/html/window.zig b/src/browser/html/window.zig index 37a1dda25..06953c156 100644 --- a/src/browser/html/window.zig +++ b/src/browser/html/window.zig @@ -215,7 +215,11 @@ pub const Window = struct { } pub fn _requestAnimationFrame(self: *Window, cbk: Function, page: *Page) !u32 { - return self.createTimeout(cbk, 5, page, .{ .animation_frame = true, .name = "animationFrame" }); + return self.createTimeout(cbk, 5, page, .{ + .animation_frame = true, + .name = "animationFrame", + .low_priority = true, + }); } pub fn _cancelAnimationFrame(self: *Window, id: u32) !void { @@ -269,6 +273,7 @@ pub const Window = struct { args: []Env.JsObject = &.{}, repeat: bool = false, animation_frame: bool = false, + low_priority: bool = false, }; fn createTimeout(self: *Window, cbk: Function, delay_: ?u32, page: *Page, opts: CreateTimeoutOpts) !u32 { const delay = delay_ orelse 0; @@ -319,7 +324,10 @@ pub const Window = struct { .repeat = if (opts.repeat) delay + 1 else null, }; - try page.scheduler.add(callback, TimerCallback.run, delay, .{ .name = opts.name }); + try page.scheduler.add(callback, TimerCallback.run, delay, .{ + .name = opts.name, + .low_priority = opts.low_priority, + }); return timer_id; } diff --git a/src/browser/page.zig b/src/browser/page.zig index 05430281e..f8dd08d70 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -90,16 +90,6 @@ pub const Page = struct { load_state: LoadState = .parsing, - // Page.wait balances waiting for resources / tasks and producing an output. - // Up until a timeout, Page.wait will always wait for inflight or pending - // HTTP requests, via the Http.Client.active counter. However, intercepted - // requests (via CDP, but it could be anything), aren't considered "active" - // connection. So it's possible that we have intercepted requests (which are - // pending on some driver to continue/abort) while Http.Client.active == 0. - // This boolean exists to supplment Http.Client.active and inform Page.wait - // of pending connections. - request_intercepted: bool = false, - const Mode = union(enum) { pre: void, err: anyerror, @@ -262,23 +252,26 @@ pub const Page = struct { return self.script_manager.blockingGet(src); } - pub fn wait(self: *Page, wait_sec: u16) void { - self._wait(wait_sec) catch |err| switch (err) { - error.JsError => {}, // already logged (with hopefully more context) - else => { - // There may be errors from the http/client or ScriptManager - // that we should not treat as an error like this. Will need - // to run this through more real-world sites and see if we need - // to expand the switch (err) to have more customized logs for - // specific messages. - log.err(.browser, "page wait", .{ .err = err }); - }, + pub fn wait(self: *Page, wait_ms: i32) Session.WaitResult { + return self._wait(wait_ms) catch |err| { + switch (err) { + error.JsError => {}, // already logged (with hopefully more context) + else => { + // There may be errors from the http/client or ScriptManager + // that we should not treat as an error like this. Will need + // to run this through more real-world sites and see if we need + // to expand the switch (err) to have more customized logs for + // specific messages. + log.err(.browser, "page wait", .{ .err = err }); + }, + } + return .done; }; } - fn _wait(self: *Page, wait_sec: u16) !void { + fn _wait(self: *Page, wait_ms: i32) !Session.WaitResult { var timer = try std.time.Timer.start(); - var ms_remaining: i32 = @intCast(wait_sec * 1000); + var ms_remaining = wait_ms; var try_catch: Env.TryCatch = undefined; try_catch.init(self.main_context); @@ -287,40 +280,42 @@ pub const Page = struct { var scheduler = &self.scheduler; var http_client = self.http_client; + // I'd like the page to know NOTHING about extra_socket / CDP, but the + // fact is that the behavior of wait changes depending on whether or + // not we're using CDP. + // If we aren't using CDP, as soon as we think there's nothing left + // to do, we can exit - we'de done. + // But if we are using CDP, we should wait for the whole `wait_ms` + // because the http_click.tick() also monitors the CDP socket. And while + // we could let CDP poll http (like it does for HTTP requests), the fact + // is that we know more about the timing of stuff (e.g. how long to + // poll/sleep) in the page. + const exit_when_done = http_client.extra_socket == null; + // for debugging // defer self.printWaitAnalysis(); while (true) { - SW: switch (self.mode) { + switch (self.mode) { .pre, .raw, .text => { - if (self.request_intercepted) { - // the page request was intercepted. - - // there shouldn't be any active requests; - std.debug.assert(http_client.active == 0); - - // nothing we can do for this, need to kick the can up - // the chain and wait for activity (e.g. a CDP message) - // to unblock this. - return; - } - // The main page hasn't started/finished navigating. // There's no JS to run, and no reason to run the scheduler. - if (http_client.active == 0) { + if (http_client.active == 0 and exit_when_done) { // haven't started navigating, I guess. - return; + return .done; } - // There should only be 1 active http transfer, the main page + // Either we have active http connections, or we're in CDP + // mode with an extra socket. Either way, we're waiting + // for http traffic if (try http_client.tick(ms_remaining) == .extra_socket) { // data on a socket we aren't handling, return to caller - return; + return .extra_socket; } }, .html, .parsed => { // The HTML page was parsed. We now either have JS scripts to - // download, or timeouts to execute, or both. + // download, or scheduled tasks to execute, or both. // scheduler.run could trigger new http transfers, so do not // store http_client.active BEFORE this call and then use @@ -333,72 +328,56 @@ pub const Page = struct { return error.JsError; } - if (http_client.active == 0) { - if (ms_to_next_task) |ms| { - // There are no HTTP transfers, so there's no point calling - // http_client.tick. - // TODO: should we just force-run the scheduler?? - - if (ms > ms_remaining) { - // we'd wait to long, might as well exit early. - return; - } - _ = try scheduler.runLowPriority(); - - // We must use a u64 here b/c ms is a u32 and the - // conversion to ns can generate an integer - // overflow. - const _ms: u64 = @intCast(ms); - - std.Thread.sleep(std.time.ns_per_ms * _ms); - break :SW; + if (http_client.active == 0 and exit_when_done) { + const ms = ms_to_next_task orelse { + // no http transfers, no cdp extra socket, no + // scheduled tasks, we're done. + return .done; + }; + + if (ms > ms_remaining) { + // same as above, except we have a scheduled task, + // it just happens to be too far into the future.s + return .done; } - // We have no active http transfer and no pending - // schedule tasks. We're done - return; - } - - _ = try scheduler.runLowPriority(); - - const request_intercepted = self.request_intercepted; - - // We want to prioritize processing intercepted requests - // because, the sooner they get unblocked, the sooner we - // can start the HTTP request. But we still want to advanced - // existing HTTP requests, if possible. So, if we have - // intercepted requests, we'll still look at existing HTTP - // requests, but we won't block waiting for more data. - const ms_to_wait = - if (request_intercepted) 0 - - // But if we have no intercepted requests, we'll wait - // for as long as we can for data to our existing - // inflight requests - else @min(ms_remaining, ms_to_next_task orelse 1000); - - if (try http_client.tick(ms_to_wait) == .extra_socket) { - // data on a socket we aren't handling, return to caller - return; - } - - if (request_intercepted) { - // Again, proritizing intercepted requests. Exit this - // loop so that our caller can hopefully resolve them - // (i.e. continue or abort them); - return; + _ = try scheduler.runLowPriority(); + // we have a task to run in the not-so-distant future. + // You might think we can just sleep until that task is + // ready, but we should continue to run lowPriority tasks + // in the meantime, and that could unblock things. So + // we'll just sleep for a bit, and then restart our wait + // loop to see what's changed + std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20)))); + } else { + // We're here because we either have active HTTP + // connections, of exit_when_done == false (aka, there's + // an extra_socket registered with the http client). + _ = try scheduler.runLowPriority(); + const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 100); + if (try http_client.tick(ms_to_wait) == .extra_socket) { + // data on a socket we aren't handling, return to caller + return .extra_socket; + } } }, .err => |err| { self.mode = .{ .raw_done = @errorName(err) }; return err; }, - .raw_done => return, + .raw_done => { + if (exit_when_done) { + return .done; + } + // we _could_ http_client.tick(ms_to_wait), but this has + // the same result, and I feel is more correct. + return .no_page; + } } const ms_elapsed = timer.lap() / 1_000_000; if (ms_elapsed >= ms_remaining) { - return; + return .done; } ms_remaining -= @intCast(ms_elapsed); } @@ -411,48 +390,53 @@ pub const Page = struct { std.debug.print("\nactive requests: {d}\n", .{self.http_client.active}); var n_ = self.http_client.handles.in_use.first; while (n_) |n| { - const transfer = Http.Transfer.fromEasy(n.data.conn.easy) catch |err| { + const handle: *Http.Client.Handle = @fieldParentPtr("node", n); + const transfer = Http.Transfer.fromEasy(handle.conn.easy) catch |err| { std.debug.print(" - failed to load transfer: {any}\n", .{err}); break; }; - std.debug.print(" - {s}\n", .{transfer}); + std.debug.print(" - {f}\n", .{transfer}); n_ = n.next; } } { - std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len}); + std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len()}); var n_ = self.http_client.queue.first; while (n_) |n| { - std.debug.print(" - {s}\n", .{n.data.url}); + const transfer: *Http.Transfer = @fieldParentPtr("_node", n); + std.debug.print(" - {f}\n", .{transfer.uri}); n_ = n.next; } } { - std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len}); + std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len()}); var n_ = self.script_manager.scripts.first; while (n_) |n| { - std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete }); + const ps: *ScriptManager.PendingScript = @fieldParentPtr("node", n); + std.debug.print(" - {s} complete: {any}\n", .{ ps.script.url, ps.complete }); n_ = n.next; } } { - std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len}); + std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len()}); var n_ = self.script_manager.deferreds.first; while (n_) |n| { - std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete }); + const ps: *ScriptManager.PendingScript = @fieldParentPtr("node", n); + std.debug.print(" - {s} complete: {any}\n", .{ ps.script.url, ps.complete }); n_ = n.next; } } const now = std.time.milliTimestamp(); { - std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len}); + std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len()}); var n_ = self.script_manager.asyncs.first; while (n_) |n| { - std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete }); + const ps: *ScriptManager.PendingScript = @fieldParentPtr("node", n); + std.debug.print(" - {s} complete: {any}\n", .{ ps.script.url, ps.complete }); n_ = n.next; } } diff --git a/src/browser/session.zig b/src/browser/session.zig index 0ca39cce0..a9c590a4d 100644 --- a/src/browser/session.zig +++ b/src/browser/session.zig @@ -137,7 +137,13 @@ pub const Session = struct { return &(self.page orelse return null); } - pub fn wait(self: *Session, wait_sec: u16) void { + pub const WaitResult = enum { + done, + no_page, + extra_socket, + }; + + pub fn wait(self: *Session, wait_ms: i32) WaitResult { if (self.queued_navigation) |qn| { // This was already aborted on the page, but it would be pretty // bad if old requests went to the new page, so let's make double sure @@ -154,21 +160,24 @@ pub const Session = struct { .err = err, .url = qn.url, }); - return; + return .done; }; page.navigate(qn.url, qn.opts) catch |err| { log.err(.browser, "queued navigation error", .{ .err = err, .url = qn.url }); - return; + return .done; }; } if (self.page) |*page| { - page.wait(wait_sec); + return page.wait(wait_ms); } + return .no_page; } }; + + const QueuedNavigation = struct { url: []const u8, opts: NavigateOpts, diff --git a/src/cdp/cdp.zig b/src/cdp/cdp.zig index ae95e5707..55c489d32 100644 --- a/src/cdp/cdp.zig +++ b/src/cdp/cdp.zig @@ -116,11 +116,13 @@ pub fn CDPT(comptime TypeProvider: type) type { // A bit hacky right now. The main server loop doesn't unblock for // scheduled task. So we run this directly in order to process any // timeouts (or http events) which are ready to be processed. - pub fn pageWait(self: *Self) void { - const session = &(self.browser.session orelse return); - // exits early if there's nothing to do, so a large value like - // 5 seconds should be ok - session.wait(5); + + pub fn hasPage() bool { + + } + pub fn pageWait(self: *Self, ms: i32) Session.WaitResult { + const session = &(self.browser.session orelse return .no_page); + return session.wait(ms); } // Called from above, in processMessage which handles client messages diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 9ff261024..c299e05c9 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -182,7 +182,6 @@ pub fn requestIntercept(arena: Allocator, bc: anytype, intercept: *const Notific // unreachable because we _have_ to have a page. const session_id = bc.session_id orelse unreachable; const target_id = bc.target_id orelse unreachable; - const page = bc.session.currentPage() orelse unreachable; // We keep it around to wait for modifications to the request. // NOTE: we assume whomever created the request created it with a lifetime of the Page. @@ -211,7 +210,6 @@ pub fn requestIntercept(arena: Allocator, bc: anytype, intercept: *const Notific // Await either continueRequest, failRequest or fulfillRequest intercept.wait_for_interception.* = true; - page.request_intercepted = true; } fn continueRequest(cmd: anytype) !void { @@ -229,8 +227,6 @@ fn continueRequest(cmd: anytype) !void { return error.NotImplemented; } - const page = bc.session.currentPage() orelse return error.PageNotLoaded; - var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; @@ -266,11 +262,6 @@ fn continueRequest(cmd: anytype) !void { } try bc.cdp.browser.http_client.process(transfer); - - if (intercept_state.empty()) { - page.request_intercepted = false; - } - return cmd.sendResult(null, .{}); } @@ -292,8 +283,6 @@ fn continueWithAuth(cmd: anytype) !void { }, })) orelse return error.InvalidParams; - const page = bc.session.currentPage() orelse return error.PageNotLoaded; - var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); const transfer = intercept_state.remove(request_id) orelse return error.RequestNotFound; @@ -323,11 +312,6 @@ fn continueWithAuth(cmd: anytype) !void { transfer.reset(); try bc.cdp.browser.http_client.process(transfer); - - if (intercept_state.empty()) { - page.request_intercepted = false; - } - return cmd.sendResult(null, .{}); } @@ -380,8 +364,6 @@ fn failRequest(cmd: anytype) !void { errorReason: ErrorReason, })) orelse return error.InvalidParams; - const page = bc.session.currentPage() orelse return error.PageNotLoaded; - var intercept_state = &bc.intercept_state; const request_id = try idFromRequestId(params.requestId); @@ -394,10 +376,6 @@ fn failRequest(cmd: anytype) !void { .url = transfer.uri, .reason = params.errorReason, }); - - if (intercept_state.empty()) { - page.request_intercepted = false; - } return cmd.sendResult(null, .{}); } @@ -405,7 +383,6 @@ pub fn requestAuthRequired(arena: Allocator, bc: anytype, intercept: *const Noti // unreachable because we _have_ to have a page. const session_id = bc.session_id orelse unreachable; const target_id = bc.target_id orelse unreachable; - const page = bc.session.currentPage() orelse unreachable; // We keep it around to wait for modifications to the request. // NOTE: we assume whomever created the request created it with a lifetime of the Page. @@ -442,7 +419,6 @@ pub fn requestAuthRequired(arena: Allocator, bc: anytype, intercept: *const Noti // Await continueWithAuth intercept.wait_for_interception.* = true; - page.request_intercepted = true; } // Get u64 from requestId which is formatted as: "INTERCEPT-{d}" diff --git a/src/http/Client.zig b/src/http/Client.zig index cec046ffe..5ab99c309 100644 --- a/src/http/Client.zig +++ b/src/http/Client.zig @@ -523,7 +523,7 @@ const Handles = struct { }; // wraps a c.CURL (an easy handle) -const Handle = struct { +pub const Handle = struct { client: *Client, conn: Http.Connection, node: Handles.HandleList.Node, diff --git a/src/main.zig b/src/main.zig index 916b3c9f4..3aac69074 100644 --- a/src/main.zig +++ b/src/main.zig @@ -137,7 +137,7 @@ fn run(alloc: Allocator) !void { const server = &_server.?; defer server.deinit(); - server.run(address, opts.timeout) catch |err| { + server.run(address, opts.timeout * 1000) catch |err| { log.fatal(.app, "server run error", .{ .err = err }); return err; }; @@ -166,7 +166,7 @@ fn run(alloc: Allocator) !void { }, }; - session.wait(5); // 5 seconds + _ = session.wait(5000); // 5 seconds // dump if (opts.dump) { diff --git a/src/main_wpt.zig b/src/main_wpt.zig index 7a4f9d8ef..9632bca8b 100644 --- a/src/main_wpt.zig +++ b/src/main_wpt.zig @@ -109,7 +109,7 @@ fn run( const url = try std.fmt.allocPrint(arena, "http://localhost:9582/{s}", .{test_file}); try page.navigate(url, .{}); - page.wait(2); + _ = page.wait(2000); const js_context = page.main_context; var try_catch: Env.TryCatch = undefined; diff --git a/src/server.zig b/src/server.zig index 0bb48f65b..790a30f91 100644 --- a/src/server.zig +++ b/src/server.zig @@ -124,48 +124,58 @@ pub const Server = struct { client.* = try Client.init(socket, self); defer client.deinit(); - var last_message = timestamp(); var http = &self.app.http; - http.monitorSocket(socket); defer http.unmonitorSocket(); + std.debug.assert(client.mode == .http); while (true) { - if (http.poll(10) == .extra_socket) { - const n = posix.read(socket, client.readBuf()) catch |err| { - log.warn(.app, "CDP read", .{ .err = err }); - return; - }; - if (n == 0) { - log.info(.app, "CDP disconnect", .{}); - return; - } - const more = client.processData(n) catch false; - if (!more) { - return; - } - last_message = timestamp(); - } else if (timestamp() - last_message > timeout_ms) { + if (http.poll(timeout_ms) != .extra_socket) { log.info(.app, "CDP timeout", .{}); return; } - // We have 3 types of "events": - // - Incoming CDP messages - // - Network events from the browser - // - Timeouts from the browser - - // The call to http.poll above handles the first two (which is why - // we pass the client socket to it). But browser timeouts aren't - // hooked into that. So we need to go to the browser page (if there - // is one), and ask it to process any pending events. That action - // doesn't starve #2 (Network events from the browser), because - // page.wait() handles that too. But it does starve #1 (Incoming CDP - // messages). The good news is that, while the Page is mostly - // unaware of CDP, it will only block if it actually has something to - // do AND it knows if we're waiting on an intercept request, and will - // eagerly return control here in those cases. + + if (try client.readSocket() == false) { + return; + } + if (client.mode == .cdp) { - client.mode.cdp.pageWait(); + break; // switch to our CDP loop + } + } + + var cdp = &client.mode.cdp; + var last_message = timestamp(); + var ms_remaining = timeout_ms; + while (true) { + switch (cdp.pageWait(ms_remaining)) { + .extra_socket => { + if (try client.readSocket() == false) { + return; + } + last_message = timestamp(); + ms_remaining = timeout_ms; + }, + .no_page => { + if (http.poll(ms_remaining) != .extra_socket) { + log.info(.app, "CDP timeout", .{}); + return; + } + if (try client.readSocket() == false) { + return; + } + last_message = timestamp(); + ms_remaining = timeout_ms; + }, + .done => { + std.debug.print("ok\n", .{}); + const elapsed = timestamp() - last_message; + if (elapsed > ms_remaining) { + log.info(.app, "CDP timeout", .{}); + return; + } + ms_remaining -= @as(i32, @intCast(elapsed)); + }, } } } @@ -222,6 +232,20 @@ pub const Client = struct { self.send_arena.deinit(); } + fn readSocket(self: *Client) !bool { + const n = posix.read(self.socket, self.readBuf()) catch |err| { + log.warn(.app, "CDP read", .{ .err = err }); + return false; + }; + + if (n == 0) { + log.info(.app, "CDP disconnect", .{}); + return false; + } + + return self.processData(n) catch false; + } + fn readBuf(self: *Client) []u8 { return self.reader.readBuf(); } diff --git a/src/testing.zig b/src/testing.zig index 7bc430afa..c6055cc3e 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -424,7 +424,7 @@ pub const JsRunner = struct { } return err; }; - self.page.session.wait(1); + self.page.session.wait(1000); @import("root").js_runner_duration += std.time.Instant.since(try std.time.Instant.now(), start); if (case.@"1") |expected| { @@ -518,7 +518,7 @@ pub fn htmlRunner(file: []const u8) !void { const url = try std.fmt.allocPrint(arena_allocator, "http://localhost:9582/src/tests/{s}", .{file}); try page.navigate(url, .{}); - page.wait(2); + _ = page.wait(2000); const value = js_context.exec("testing.getStatus()", "testing.getStatus()") catch |err| { const msg = try_catch.err(arena_allocator) catch @errorName(err) orelse "unknown"; From 78285d7b1e06ce89d46a10cba7630777d5de282c Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Wed, 3 Sep 2025 20:23:59 +0800 Subject: [PATCH 4/4] fix tests --- src/browser/page.zig | 15 ++++++++++++--- src/server.zig | 1 - src/testing.zig | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/browser/page.zig b/src/browser/page.zig index f8dd08d70..c9a9df0a4 100644 --- a/src/browser/page.zig +++ b/src/browser/page.zig @@ -321,6 +321,7 @@ pub const Page = struct { // store http_client.active BEFORE this call and then use // it AFTER. const ms_to_next_task = try scheduler.runHighPriority(); + _ = try scheduler.runLowPriority(); if (try_catch.hasCaught()) { const msg = (try try_catch.err(self.arena)) orelse "unknown"; @@ -329,7 +330,17 @@ pub const Page = struct { } if (http_client.active == 0 and exit_when_done) { - const ms = ms_to_next_task orelse { + const ms = ms_to_next_task orelse blk: { + // TODO: when jsRunner is fully replaced with the + // htmlRunner, we can remove the first part of this + // condition. jsRunner calls `page.wait` far too + // often to enforce this. + if (wait_ms > 100 and wait_ms - ms_remaining < 100) { + // Look, we want to exit ASAP, but we don't want + // to exit so fast that we've run none of the + // background jobs. + break :blk 50; + } // no http transfers, no cdp extra socket, no // scheduled tasks, we're done. return .done; @@ -341,7 +352,6 @@ pub const Page = struct { return .done; } - _ = try scheduler.runLowPriority(); // we have a task to run in the not-so-distant future. // You might think we can just sleep until that task is // ready, but we should continue to run lowPriority tasks @@ -353,7 +363,6 @@ pub const Page = struct { // We're here because we either have active HTTP // connections, of exit_when_done == false (aka, there's // an extra_socket registered with the http client). - _ = try scheduler.runLowPriority(); const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 100); if (try http_client.tick(ms_to_wait) == .extra_socket) { // data on a socket we aren't handling, return to caller diff --git a/src/server.zig b/src/server.zig index 790a30f91..f4f169d11 100644 --- a/src/server.zig +++ b/src/server.zig @@ -168,7 +168,6 @@ pub const Server = struct { ms_remaining = timeout_ms; }, .done => { - std.debug.print("ok\n", .{}); const elapsed = timestamp() - last_message; if (elapsed > ms_remaining) { log.info(.app, "CDP timeout", .{}); diff --git a/src/testing.zig b/src/testing.zig index c6055cc3e..d8c894ce5 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -424,7 +424,7 @@ pub const JsRunner = struct { } return err; }; - self.page.session.wait(1000); + _ = self.page.session.wait(100); @import("root").js_runner_duration += std.time.Instant.since(try std.time.Instant.now(), start); if (case.@"1") |expected| {