Skip to content

Commit f393eb7

Browse files
authored
Merge pull request #1003 from lightpanda-io/http_always_monitor_cdp
Http always monitor cdp
2 parents a791212 + 78285d7 commit f393eb7

File tree

14 files changed

+241
-201
lines changed

14 files changed

+241
-201
lines changed

src/browser/Scheduler.zig

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,17 @@ pub fn reset(self: *Scheduler) void {
4444

4545
const AddOpts = struct {
4646
name: []const u8 = "",
47+
low_priority: bool = false,
4748
};
4849
pub fn add(self: *Scheduler, ctx: *anyopaque, func: Task.Func, ms: u32, opts: AddOpts) !void {
4950
if (ms > 5_000) {
5051
log.warn(.user_script, "long timeout ignored", .{ .delay = ms });
5152
// ignore any task that we're almost certainly never going to run
5253
return;
5354
}
54-
return self.primary.add(.{
55+
56+
var q = if (opts.low_priority) &self.secondary else &self.primary;
57+
return q.add(.{
5558
.ms = std.time.milliTimestamp() + ms,
5659
.ctx = ctx,
5760
.func = func,

src/browser/ScriptManager.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ pub fn blockingGet(self: *ScriptManager, url: [:0]const u8) !BlockingResult {
285285

286286
// rely on http's timeout settings to avoid an endless/long loop.
287287
while (true) {
288-
_ = try client.tick(.{ .timeout_ms = 200 });
288+
_ = try client.tick(200);
289289
switch (blocking.state) {
290290
.running => {},
291291
.done => |result| return result,
@@ -428,7 +428,7 @@ fn errorCallback(ctx: *anyopaque, err: anyerror) void {
428428
// It could be pending because:
429429
// (a) we're still downloading its content or
430430
// (b) this is a non-async script that has to be executed in order
431-
const PendingScript = struct {
431+
pub const PendingScript = struct {
432432
script: Script,
433433
complete: bool,
434434
node: OrderList.Node,

src/browser/html/window.zig

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,11 @@ pub const Window = struct {
215215
}
216216

217217
pub fn _requestAnimationFrame(self: *Window, cbk: Function, page: *Page) !u32 {
218-
return self.createTimeout(cbk, 5, page, .{ .animation_frame = true, .name = "animationFrame" });
218+
return self.createTimeout(cbk, 5, page, .{
219+
.animation_frame = true,
220+
.name = "animationFrame",
221+
.low_priority = true,
222+
});
219223
}
220224

221225
pub fn _cancelAnimationFrame(self: *Window, id: u32) !void {
@@ -269,6 +273,7 @@ pub const Window = struct {
269273
args: []Env.JsObject = &.{},
270274
repeat: bool = false,
271275
animation_frame: bool = false,
276+
low_priority: bool = false,
272277
};
273278
fn createTimeout(self: *Window, cbk: Function, delay_: ?u32, page: *Page, opts: CreateTimeoutOpts) !u32 {
274279
const delay = delay_ orelse 0;
@@ -319,7 +324,10 @@ pub const Window = struct {
319324
.repeat = if (opts.repeat) delay + 1 else null,
320325
};
321326

322-
try page.scheduler.add(callback, TimerCallback.run, delay, .{ .name = opts.name });
327+
try page.scheduler.add(callback, TimerCallback.run, delay, .{
328+
.name = opts.name,
329+
.low_priority = opts.low_priority,
330+
});
323331

324332
return timer_id;
325333
}

src/browser/page.zig

Lines changed: 100 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,6 @@ pub const Page = struct {
9090

9191
load_state: LoadState = .parsing,
9292

93-
// Page.wait balances waiting for resources / tasks and producing an output.
94-
// Up until a timeout, Page.wait will always wait for inflight or pending
95-
// HTTP requests, via the Http.Client.active counter. However, intercepted
96-
// requests (via CDP, but it could be anything), aren't considered "active"
97-
// connection. So it's possible that we have intercepted requests (which are
98-
// pending on some driver to continue/abort) while Http.Client.active == 0.
99-
// This boolean exists to supplment Http.Client.active and inform Page.wait
100-
// of pending connections.
101-
request_intercepted: bool = false,
102-
10393
const Mode = union(enum) {
10494
pre: void,
10595
err: anyerror,
@@ -262,23 +252,26 @@ pub const Page = struct {
262252
return self.script_manager.blockingGet(src);
263253
}
264254

265-
pub fn wait(self: *Page, wait_sec: u16) void {
266-
self._wait(wait_sec) catch |err| switch (err) {
267-
error.JsError => {}, // already logged (with hopefully more context)
268-
else => {
269-
// There may be errors from the http/client or ScriptManager
270-
// that we should not treat as an error like this. Will need
271-
// to run this through more real-world sites and see if we need
272-
// to expand the switch (err) to have more customized logs for
273-
// specific messages.
274-
log.err(.browser, "page wait", .{ .err = err });
275-
},
255+
pub fn wait(self: *Page, wait_ms: i32) Session.WaitResult {
256+
return self._wait(wait_ms) catch |err| {
257+
switch (err) {
258+
error.JsError => {}, // already logged (with hopefully more context)
259+
else => {
260+
// There may be errors from the http/client or ScriptManager
261+
// that we should not treat as an error like this. Will need
262+
// to run this through more real-world sites and see if we need
263+
// to expand the switch (err) to have more customized logs for
264+
// specific messages.
265+
log.err(.browser, "page wait", .{ .err = err });
266+
},
267+
}
268+
return .done;
276269
};
277270
}
278271

279-
fn _wait(self: *Page, wait_sec: u16) !void {
272+
fn _wait(self: *Page, wait_ms: i32) !Session.WaitResult {
280273
var timer = try std.time.Timer.start();
281-
var ms_remaining: i32 = @intCast(wait_sec * 1000);
274+
var ms_remaining = wait_ms;
282275

283276
var try_catch: Env.TryCatch = undefined;
284277
try_catch.init(self.main_context);
@@ -287,112 +280,113 @@ pub const Page = struct {
287280
var scheduler = &self.scheduler;
288281
var http_client = self.http_client;
289282

283+
// I'd like the page to know NOTHING about extra_socket / CDP, but the
284+
// fact is that the behavior of wait changes depending on whether or
285+
// not we're using CDP.
286+
// If we aren't using CDP, as soon as we think there's nothing left
287+
// to do, we can exit - we'de done.
288+
// But if we are using CDP, we should wait for the whole `wait_ms`
289+
// because the http_click.tick() also monitors the CDP socket. And while
290+
// we could let CDP poll http (like it does for HTTP requests), the fact
291+
// is that we know more about the timing of stuff (e.g. how long to
292+
// poll/sleep) in the page.
293+
const exit_when_done = http_client.extra_socket == null;
294+
290295
// for debugging
291296
// defer self.printWaitAnalysis();
292297

293298
while (true) {
294-
SW: switch (self.mode) {
299+
switch (self.mode) {
295300
.pre, .raw, .text => {
296-
if (self.request_intercepted) {
297-
// the page request was intercepted.
298-
299-
// there shouldn't be any active requests;
300-
std.debug.assert(http_client.active == 0);
301-
302-
// nothing we can do for this, need to kick the can up
303-
// the chain and wait for activity (e.g. a CDP message)
304-
// to unblock this.
305-
return;
306-
}
307-
308301
// The main page hasn't started/finished navigating.
309302
// There's no JS to run, and no reason to run the scheduler.
310-
if (http_client.active == 0) {
303+
if (http_client.active == 0 and exit_when_done) {
311304
// haven't started navigating, I guess.
312-
return;
305+
return .done;
313306
}
314307

315-
// There should only be 1 active http transfer, the main page
316-
_ = try http_client.tick(.{ .timeout_ms = ms_remaining });
308+
// Either we have active http connections, or we're in CDP
309+
// mode with an extra socket. Either way, we're waiting
310+
// for http traffic
311+
if (try http_client.tick(ms_remaining) == .extra_socket) {
312+
// data on a socket we aren't handling, return to caller
313+
return .extra_socket;
314+
}
317315
},
318316
.html, .parsed => {
319317
// The HTML page was parsed. We now either have JS scripts to
320-
// download, or timeouts to execute, or both.
318+
// download, or scheduled tasks to execute, or both.
321319

322320
// scheduler.run could trigger new http transfers, so do not
323321
// store http_client.active BEFORE this call and then use
324322
// it AFTER.
325323
const ms_to_next_task = try scheduler.runHighPriority();
324+
_ = try scheduler.runLowPriority();
326325

327326
if (try_catch.hasCaught()) {
328327
const msg = (try try_catch.err(self.arena)) orelse "unknown";
329328
log.warn(.user_script, "page wait", .{ .err = msg, .src = "scheduler" });
330329
return error.JsError;
331330
}
332331

333-
if (http_client.active == 0) {
334-
if (ms_to_next_task) |ms| {
335-
// There are no HTTP transfers, so there's no point calling
336-
// http_client.tick.
337-
// TODO: should we just force-run the scheduler??
338-
339-
if (ms > ms_remaining) {
340-
// we'd wait to long, might as well exit early.
341-
return;
332+
if (http_client.active == 0 and exit_when_done) {
333+
const ms = ms_to_next_task orelse blk: {
334+
// TODO: when jsRunner is fully replaced with the
335+
// htmlRunner, we can remove the first part of this
336+
// condition. jsRunner calls `page.wait` far too
337+
// often to enforce this.
338+
if (wait_ms > 100 and wait_ms - ms_remaining < 100) {
339+
// Look, we want to exit ASAP, but we don't want
340+
// to exit so fast that we've run none of the
341+
// background jobs.
342+
break :blk 50;
342343
}
343-
_ = try scheduler.runLowPriority();
344-
345-
// We must use a u64 here b/c ms is a u32 and the
346-
// conversion to ns can generate an integer
347-
// overflow.
348-
const _ms: u64 = @intCast(ms);
349-
350-
std.Thread.sleep(std.time.ns_per_ms * _ms);
351-
break :SW;
344+
// no http transfers, no cdp extra socket, no
345+
// scheduled tasks, we're done.
346+
return .done;
347+
};
348+
349+
if (ms > ms_remaining) {
350+
// same as above, except we have a scheduled task,
351+
// it just happens to be too far into the future.s
352+
return .done;
352353
}
353354

354-
// We have no active http transfer and no pending
355-
// schedule tasks. We're done
356-
return;
357-
}
358-
359-
_ = try scheduler.runLowPriority();
360-
361-
const request_intercepted = self.request_intercepted;
362-
363-
// We want to prioritize processing intercepted requests
364-
// because, the sooner they get unblocked, the sooner we
365-
// can start the HTTP request. But we still want to advanced
366-
// existing HTTP requests, if possible. So, if we have
367-
// intercepted requests, we'll still look at existing HTTP
368-
// requests, but we won't block waiting for more data.
369-
const ms_to_wait =
370-
if (request_intercepted) 0
371-
372-
// But if we have no intercepted requests, we'll wait
373-
// for as long as we can for data to our existing
374-
// inflight requests
375-
else @min(ms_remaining, ms_to_next_task orelse 1000);
376-
377-
_ = try http_client.tick(.{ .timeout_ms = ms_to_wait });
378-
379-
if (request_intercepted) {
380-
// Again, proritizing intercepted requests. Exit this
381-
// loop so that our caller can hopefully resolve them
382-
// (i.e. continue or abort them);
383-
return;
355+
// we have a task to run in the not-so-distant future.
356+
// You might think we can just sleep until that task is
357+
// ready, but we should continue to run lowPriority tasks
358+
// in the meantime, and that could unblock things. So
359+
// we'll just sleep for a bit, and then restart our wait
360+
// loop to see what's changed
361+
std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20))));
362+
} else {
363+
// We're here because we either have active HTTP
364+
// connections, of exit_when_done == false (aka, there's
365+
// an extra_socket registered with the http client).
366+
const ms_to_wait = @min(ms_remaining, ms_to_next_task orelse 100);
367+
if (try http_client.tick(ms_to_wait) == .extra_socket) {
368+
// data on a socket we aren't handling, return to caller
369+
return .extra_socket;
370+
}
384371
}
385372
},
386373
.err => |err| {
387374
self.mode = .{ .raw_done = @errorName(err) };
388375
return err;
389376
},
390-
.raw_done => return,
377+
.raw_done => {
378+
if (exit_when_done) {
379+
return .done;
380+
}
381+
// we _could_ http_client.tick(ms_to_wait), but this has
382+
// the same result, and I feel is more correct.
383+
return .no_page;
384+
}
391385
}
392386

393387
const ms_elapsed = timer.lap() / 1_000_000;
394388
if (ms_elapsed >= ms_remaining) {
395-
return;
389+
return .done;
396390
}
397391
ms_remaining -= @intCast(ms_elapsed);
398392
}
@@ -405,48 +399,53 @@ pub const Page = struct {
405399
std.debug.print("\nactive requests: {d}\n", .{self.http_client.active});
406400
var n_ = self.http_client.handles.in_use.first;
407401
while (n_) |n| {
408-
const transfer = Http.Transfer.fromEasy(n.data.conn.easy) catch |err| {
402+
const handle: *Http.Client.Handle = @fieldParentPtr("node", n);
403+
const transfer = Http.Transfer.fromEasy(handle.conn.easy) catch |err| {
409404
std.debug.print(" - failed to load transfer: {any}\n", .{err});
410405
break;
411406
};
412-
std.debug.print(" - {s}\n", .{transfer});
407+
std.debug.print(" - {f}\n", .{transfer});
413408
n_ = n.next;
414409
}
415410
}
416411

417412
{
418-
std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len});
413+
std.debug.print("\nqueued requests: {d}\n", .{self.http_client.queue.len()});
419414
var n_ = self.http_client.queue.first;
420415
while (n_) |n| {
421-
std.debug.print(" - {s}\n", .{n.data.url});
416+
const transfer: *Http.Transfer = @fieldParentPtr("_node", n);
417+
std.debug.print(" - {f}\n", .{transfer.uri});
422418
n_ = n.next;
423419
}
424420
}
425421

426422
{
427-
std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len});
423+
std.debug.print("\nscripts: {d}\n", .{self.script_manager.scripts.len()});
428424
var n_ = self.script_manager.scripts.first;
429425
while (n_) |n| {
430-
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
426+
const ps: *ScriptManager.PendingScript = @fieldParentPtr("node", n);
427+
std.debug.print(" - {s} complete: {any}\n", .{ ps.script.url, ps.complete });
431428
n_ = n.next;
432429
}
433430
}
434431

435432
{
436-
std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len});
433+
std.debug.print("\ndeferreds: {d}\n", .{self.script_manager.deferreds.len()});
437434
var n_ = self.script_manager.deferreds.first;
438435
while (n_) |n| {
439-
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
436+
const ps: *ScriptManager.PendingScript = @fieldParentPtr("node", n);
437+
std.debug.print(" - {s} complete: {any}\n", .{ ps.script.url, ps.complete });
440438
n_ = n.next;
441439
}
442440
}
443441

444442
const now = std.time.milliTimestamp();
445443
{
446-
std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len});
444+
std.debug.print("\nasyncs: {d}\n", .{self.script_manager.asyncs.len()});
447445
var n_ = self.script_manager.asyncs.first;
448446
while (n_) |n| {
449-
std.debug.print(" - {s} complete: {any}\n", .{ n.data.script.url, n.data.complete });
447+
const ps: *ScriptManager.PendingScript = @fieldParentPtr("node", n);
448+
std.debug.print(" - {s} complete: {any}\n", .{ ps.script.url, ps.complete });
450449
n_ = n.next;
451450
}
452451
}

0 commit comments

Comments
 (0)